Upload-time duplicate detection, FTS5 query sanitization, release guard

- Reject duplicate uploads at the API boundary (HTTP 409) instead of
  silently skipping in the background worker. Checks both ingested
  documents and in-flight jobs via content_hash on the jobs table.
- Go client handles 409 with distinct messages for already-imported
  documents vs already-queued jobs.
- Sanitize FTS5 search queries by quoting each token to prevent syntax
  errors from special characters like ?, *, ", (), AND, OR, NOT.
- Add try/except safety net around FTS5 execute for edge cases.
- Add main branch guard to release.sh to prevent releasing from
  feature branches.
- Update specs and README to reflect new behaviour.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-26 23:05:07 +00:00
parent 63654a59b8
commit 6fec627503
20 changed files with 536 additions and 30 deletions
+81 -7
View File
@@ -1,7 +1,9 @@
package cmd
import (
"encoding/json"
"fmt"
"net/http"
"os"
"path/filepath"
"strings"
@@ -11,6 +13,21 @@ import (
"github.com/spf13/cobra"
)
type uploadResult struct {
Raw interface{}
Duplicate bool
DocID float64
JobID float64
Title string
}
func (r *uploadResult) duplicateMsg() string {
if r.DocID > 0 {
return fmt.Sprintf("Already imported: %s (doc ID: %.0f)", r.Title, r.DocID)
}
return fmt.Sprintf("Already queued: %s (job ID: %.0f)", r.Title, r.JobID)
}
var supportedExts = map[string]bool{
".pdf": true,
".docx": true,
@@ -67,6 +84,26 @@ func runAdd(cmd *cobra.Command, args []string) error {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
if resp.StatusCode == http.StatusConflict {
var result interface{}
if err := api.DecodeJSON(resp, &result); err != nil {
return fmt.Errorf("failed to decode response: %w", err)
}
if output.IsJSON() {
output.PrintJSON(result)
} else {
if m, ok := result.(map[string]interface{}); ok {
if docID, ok := m["document_id"].(float64); ok {
fmt.Printf("Already imported: %s (doc ID: %.0f)\n", m["title"], docID)
} else if jobID, ok := m["job_id"].(float64); ok {
fmt.Printf("Already queued: %s (job ID: %.0f)\n", m["title"], jobID)
}
}
}
return nil
}
if err := api.CheckError(resp); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
@@ -104,7 +141,9 @@ func runAdd(cmd *cobra.Command, args []string) error {
}
if output.IsJSON() {
output.PrintJSON([]interface{}{result})
output.PrintJSON([]interface{}{result.Raw})
} else if result.Duplicate {
fmt.Println(result.duplicateMsg())
} else {
fmt.Printf("Queued: %s\n", filepath.Base(path))
}
@@ -135,27 +174,39 @@ func runAdd(cmd *cobra.Command, args []string) error {
}
var results []interface{}
queued := 0
duplicates := 0
for _, f := range files {
result, err := uploadFile(client, f, tags, docType)
if err != nil {
fmt.Fprintf(os.Stderr, "Error uploading %s: %v\n", f, err)
continue
}
results = append(results, result)
if !output.IsJSON() {
fmt.Printf("Queued: %s\n", filepath.Base(f))
results = append(results, result.Raw)
if result.Duplicate {
duplicates++
if !output.IsJSON() {
fmt.Println(result.duplicateMsg())
}
} else {
queued++
if !output.IsJSON() {
fmt.Printf("Queued: %s\n", filepath.Base(f))
}
}
}
if output.IsJSON() {
output.PrintJSON(results)
} else if duplicates > 0 {
fmt.Printf("Queued: %d files, %d duplicates skipped\n", queued, duplicates)
} else {
fmt.Printf("Queued: %d files\n", len(results))
fmt.Printf("Queued: %d files\n", queued)
}
return nil
}
func uploadFile(client *api.Client, path, tags, docType string) (interface{}, error) {
func uploadFile(client *api.Client, path, tags, docType string) (*uploadResult, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("cannot open %s: %w", path, err)
@@ -180,6 +231,29 @@ func uploadFile(client *api.Client, path, tags, docType string) (interface{}, er
if err != nil {
return nil, err
}
if resp.StatusCode == http.StatusConflict {
var raw json.RawMessage
if err := api.DecodeJSON(resp, &raw); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
var dupResp struct {
DocumentID float64 `json:"document_id"`
JobID float64 `json:"job_id"`
Title string `json:"title"`
}
json.Unmarshal(raw, &dupResp)
var rawIface interface{}
json.Unmarshal(raw, &rawIface)
return &uploadResult{
Raw: rawIface,
Duplicate: true,
DocID: dupResp.DocumentID,
JobID: dupResp.JobID,
Title: dupResp.Title,
}, nil
}
if err := api.CheckError(resp); err != nil {
return nil, err
}
@@ -188,5 +262,5 @@ func uploadFile(client *api.Client, path, tags, docType string) (interface{}, er
if err := api.DecodeJSON(resp, &result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result, nil
return &uploadResult{Raw: result}, nil
}