From 2d179af557d13946395634967c1b61da0508fa71 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Thu, 2 Apr 2026 16:17:35 +0100 Subject: [PATCH] Fix search human-mode output to match engine API response The Go client struct expected a nested document object and top-level page/section fields, but the engine returns flat results with metadata in chunk_metadata. This caused empty display for title, type, tags, page, and section in human output mode. Co-Authored-By: Claude Opus 4.6 (1M context) --- SKILL.md | 28 +++++++------ client/cmd/search.go | 39 +++++++++--------- .../.openspec.yaml | 2 + .../design.md | 41 +++++++++++++++++++ .../proposal.md | 24 +++++++++++ .../specs/go-client/spec.md | 40 ++++++++++++++++++ .../tasks.md | 14 +++++++ openspec/specs/go-client/spec.md | 19 +++++++++ 8 files changed, 174 insertions(+), 33 deletions(-) create mode 100644 openspec/changes/archive/2026-04-02-fix-search-human-output/.openspec.yaml create mode 100644 openspec/changes/archive/2026-04-02-fix-search-human-output/design.md create mode 100644 openspec/changes/archive/2026-04-02-fix-search-human-output/proposal.md create mode 100644 openspec/changes/archive/2026-04-02-fix-search-human-output/specs/go-client/spec.md create mode 100644 openspec/changes/archive/2026-04-02-fix-search-human-output/tasks.md diff --git a/SKILL.md b/SKILL.md index 06ee8bc..68164e6 100644 --- a/SKILL.md +++ b/SKILL.md @@ -79,6 +79,12 @@ kb jobs --status failed --format json # filter by status kb jobs --format json # job details ``` +## Examples + +```bash +kb examples # show common usage examples +``` + ## Engine status and maintenance ```bash @@ -102,18 +108,14 @@ All commands support: { "chunk_id": 1423, "score": 0.031, - "score_breakdown": {"fts": 0.016, "vector": 0.015}, "text": "To install the latest version of git from source...", - "source": { - "document_id": 42, - "title": "Git Admin Guide", - "path": "/home/user/docs/git-admin.pdf", - "type": "pdf", - "page": 12, - "chunk_index": 3, - "total_chunks": 28, - "tags": ["git", "admin"] - } + "chunk_index": 3, + "chunk_metadata": {"page": 12}, + "title": "Git Admin Guide", + "doc_type": "pdf", + "source_path": "/home/user/docs/git-admin.pdf", + "created_at": "2026-03-15T10:30:00", + "tags": ["git", "admin"] } ], "total_matches": 47, @@ -160,7 +162,7 @@ Use filters when the question implies a specific domain: - Always use `--format json` for machine parsing - The `score` field is relative, not absolute — compare scores within a result set -- `source.page` is only present for PDF documents -- `source.section_header` is only present for markdown documents with headers +- `chunk_metadata.page` is only present for PDF documents +- `chunk_metadata.section_header` is only present for markdown documents with headers - Results are already ranked by relevance (hybrid FTS + vector search) - Duplicate files are detected at upload time (HTTP 409) — the client handles this gracefully diff --git a/client/cmd/search.go b/client/cmd/search.go index bf7e00d..902072f 100644 --- a/client/cmd/search.go +++ b/client/cmd/search.go @@ -67,15 +67,12 @@ func runSearch(cmd *cobra.Command, args []string) error { var result struct { Results []struct { - Score float64 `json:"score"` - Document struct { - Title string `json:"title"` - Type string `json:"doc_type"` - Tags []string `json:"tags"` - } `json:"document"` - Page interface{} `json:"page"` - Section string `json:"section"` - Text string `json:"text"` + Score float64 `json:"score"` + Title string `json:"title"` + DocType string `json:"doc_type"` + Tags []string `json:"tags"` + ChunkMetadata map[string]interface{} `json:"chunk_metadata"` + Text string `json:"text"` } `json:"results"` } @@ -103,26 +100,28 @@ func runSearch(cmd *cobra.Command, args []string) error { snippet = snippet[:200] + "..." } - fmt.Printf("\n%d. [%.4f] %s\n", i+1, r.Score, r.Document.Title) + fmt.Printf("\n%d. [%.4f] %s\n", i+1, r.Score, r.Title) location := "" - if r.Page != nil { - location = fmt.Sprintf("Page %v", r.Page) + if page, ok := r.ChunkMetadata["page"]; ok && page != nil { + location = fmt.Sprintf("Page %v", page) } - if r.Section != "" { - if location != "" { - location += " / " + if section, ok := r.ChunkMetadata["section_header"]; ok && section != nil { + if s, ok := section.(string); ok && s != "" { + if location != "" { + location += " / " + } + location += s } - location += r.Section } if location != "" { fmt.Printf(" Location: %s\n", location) } - if r.Document.Type != "" { - fmt.Printf(" Type: %s\n", r.Document.Type) + if r.DocType != "" { + fmt.Printf(" Type: %s\n", r.DocType) } - if len(r.Document.Tags) > 0 { - fmt.Printf(" Tags: %s\n", joinStrings(r.Document.Tags)) + if len(r.Tags) > 0 { + fmt.Printf(" Tags: %s\n", joinStrings(r.Tags)) } fmt.Printf(" %s\n", snippet) } diff --git a/openspec/changes/archive/2026-04-02-fix-search-human-output/.openspec.yaml b/openspec/changes/archive/2026-04-02-fix-search-human-output/.openspec.yaml new file mode 100644 index 0000000..6a5db8c --- /dev/null +++ b/openspec/changes/archive/2026-04-02-fix-search-human-output/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-02 diff --git a/openspec/changes/archive/2026-04-02-fix-search-human-output/design.md b/openspec/changes/archive/2026-04-02-fix-search-human-output/design.md new file mode 100644 index 0000000..9987a1b --- /dev/null +++ b/openspec/changes/archive/2026-04-02-fix-search-human-output/design.md @@ -0,0 +1,41 @@ +## Context + +The engine's `/api/v1/search` endpoint returns flat result objects: + +```json +{ + "chunk_id": 123, + "score": 0.031, + "text": "...", + "chunk_index": 3, + "chunk_metadata": {"page": 12, "section_header": "Installation"}, + "title": "Git Admin Guide", + "doc_type": "pdf", + "source_path": "/home/user/docs/git-admin.pdf", + "created_at": "2026-03-15T10:30:00", + "tags": ["git", "admin"] +} +``` + +The Go client's human-mode struct in `client/cmd/search.go` incorrectly expects a nested `document` object and top-level `page`/`section` fields. This causes all metadata to display as zero values. + +## Goals / Non-Goals + +**Goals:** +- Fix the search result struct to match the flat engine response +- Extract `page` and `section_header` from `chunk_metadata` for human display +- Maintain identical JSON output (already passes through raw response) + +**Non-Goals:** +- Changing the engine API response format +- Adding new display fields beyond what was originally intended + +## Decisions + +**Flatten the struct to match API response.** The result struct will have `Title`, `DocType`, `Tags` as top-level fields (matching `title`, `doc_type`, `tags` JSON keys). `ChunkMetadata` will be decoded as `map[string]interface{}` to extract `page` and `section_header` dynamically, since its contents vary by document type. + +**Why not a typed ChunkMetadata struct?** The metadata keys depend on the ingestion pipeline (PDFs have `page`, markdown has `section_header`, code may have others in future). A map is more resilient to engine-side additions. + +## Risks / Trade-offs + +- [Minimal risk] If the engine adds new top-level fields, the Go struct silently ignores them — this is existing behavior and acceptable for human-mode display. diff --git a/openspec/changes/archive/2026-04-02-fix-search-human-output/proposal.md b/openspec/changes/archive/2026-04-02-fix-search-human-output/proposal.md new file mode 100644 index 0000000..227c383 --- /dev/null +++ b/openspec/changes/archive/2026-04-02-fix-search-human-output/proposal.md @@ -0,0 +1,24 @@ +## Why + +The Go client's human-mode search output struct expects a nested `document` object and top-level `page`/`section` fields, but the engine API returns flat results with `title`, `doc_type`, `tags` at the result level and `page`/`section_header` inside `chunk_metadata`. This means human-mode display shows empty values for title, type, tags, page, and section. + +## What Changes + +- Fix the Go client search result struct to match the flat engine API response format +- Extract `page` and `section_header` from the `chunk_metadata` map instead of expecting them as top-level fields +- Human-mode output will correctly display document title, type, tags, page number, and section header + +## Capabilities + +### New Capabilities + +(none) + +### Modified Capabilities + +- `go-client`: Fix search result parsing to match actual engine API response shape + +## Impact + +- `client/cmd/search.go` — struct definition and display logic +- No API changes, no breaking changes — this is a bug fix aligning the client with the existing API contract diff --git a/openspec/changes/archive/2026-04-02-fix-search-human-output/specs/go-client/spec.md b/openspec/changes/archive/2026-04-02-fix-search-human-output/specs/go-client/spec.md new file mode 100644 index 0000000..e2e9073 --- /dev/null +++ b/openspec/changes/archive/2026-04-02-fix-search-human-output/specs/go-client/spec.md @@ -0,0 +1,40 @@ +## MODIFIED Requirements + +### Requirement: Search command + +The client SHALL provide a `kb search ` command that sends the query to the engine and displays results. + +#### Scenario: Human-readable search output +- **WHEN** the user runs `kb search "how to change oil"` +- **THEN** the client SHALL POST to `/api/v1/search`, and display results in a human-readable format showing rank, score, document title, page/section, doc type, tags, and a text snippet +- **THEN** the client SHALL parse search results as flat objects with top-level `title`, `doc_type`, `tags`, `score`, `text`, `chunk_index` fields +- **THEN** the client SHALL extract `page` from `chunk_metadata` when present (PDF documents) +- **THEN** the client SHALL extract `section_header` from `chunk_metadata` when present (markdown documents) + +#### Scenario: JSON search output +- **WHEN** the user runs `kb search "query" --format json` +- **THEN** the client SHALL output the raw JSON response from the engine + +#### Scenario: Search with filters +- **WHEN** the user runs `kb search "brakes" --tags maintenance --type pdf --top 3` +- **THEN** the client SHALL include the filters in the API request body + +#### Scenario: Search mode flags +- **WHEN** the user runs `kb search "error" --fts-only` +- **THEN** the client SHALL set `fts_only: true` in the request body + +#### Scenario: PDF result with page number +- **WHEN** a search result has `chunk_metadata` containing `{"page": 12}` +- **THEN** the human output SHALL display "Page 12" in the location line + +#### Scenario: Markdown result with section header +- **WHEN** a search result has `chunk_metadata` containing `{"section_header": "Installation > Prerequisites"}` +- **THEN** the human output SHALL display "Installation > Prerequisites" in the location line + +#### Scenario: Result with both page and section +- **WHEN** a search result has `chunk_metadata` containing both `page` and `section_header` +- **THEN** the human output SHALL display both separated by " / " + +#### Scenario: Result with no location metadata +- **WHEN** a search result has empty `chunk_metadata` or no page/section keys +- **THEN** the human output SHALL omit the location line entirely diff --git a/openspec/changes/archive/2026-04-02-fix-search-human-output/tasks.md b/openspec/changes/archive/2026-04-02-fix-search-human-output/tasks.md new file mode 100644 index 0000000..919f12a --- /dev/null +++ b/openspec/changes/archive/2026-04-02-fix-search-human-output/tasks.md @@ -0,0 +1,14 @@ +## 1. Fix search result struct + +- [x] 1.1 Replace nested `Document` struct with flat fields (`Title`, `DocType`, `Tags`) matching engine JSON keys +- [x] 1.2 Add `ChunkMetadata map[string]interface{}` field to capture `chunk_metadata` + +## 2. Fix display logic + +- [x] 2.1 Update title/type/tags references in the display loop to use the new flat fields +- [x] 2.2 Extract `page` from `ChunkMetadata` map (replacing top-level `Page` field) +- [x] 2.3 Extract `section_header` from `ChunkMetadata` map (replacing top-level `Section` field) + +## 3. Verify + +- [x] 3.1 Build the client and verify it compiles cleanly diff --git a/openspec/specs/go-client/spec.md b/openspec/specs/go-client/spec.md index 3957949..d229d16 100644 --- a/openspec/specs/go-client/spec.md +++ b/openspec/specs/go-client/spec.md @@ -53,6 +53,9 @@ The client SHALL provide a `kb search ` command that sends the query to t #### Scenario: Human-readable search output - **WHEN** the user runs `kb search "how to change oil"` - **THEN** the client SHALL POST to `/api/v1/search`, and display results in a human-readable format showing rank, score, document title, page/section, doc type, tags, and a text snippet +- **THEN** the client SHALL parse search results as flat objects with top-level `title`, `doc_type`, `tags`, `score`, `text`, `chunk_index` fields +- **THEN** the client SHALL extract `page` from `chunk_metadata` when present (PDF documents) +- **THEN** the client SHALL extract `section_header` from `chunk_metadata` when present (markdown documents) #### Scenario: JSON search output - **WHEN** the user runs `kb search "query" --format json` @@ -66,6 +69,22 @@ The client SHALL provide a `kb search ` command that sends the query to t - **WHEN** the user runs `kb search "error" --fts-only` - **THEN** the client SHALL set `fts_only: true` in the request body +#### Scenario: PDF result with page number +- **WHEN** a search result has `chunk_metadata` containing `{"page": 12}` +- **THEN** the human output SHALL display "Page 12" in the location line + +#### Scenario: Markdown result with section header +- **WHEN** a search result has `chunk_metadata` containing `{"section_header": "Installation > Prerequisites"}` +- **THEN** the human output SHALL display "Installation > Prerequisites" in the location line + +#### Scenario: Result with both page and section +- **WHEN** a search result has `chunk_metadata` containing both `page` and `section_header` +- **THEN** the human output SHALL display both separated by " / " + +#### Scenario: Result with no location metadata +- **WHEN** a search result has empty `chunk_metadata` or no page/section keys +- **THEN** the human output SHALL omit the location line entirely + --- ### Requirement: Add note command