Upload-time duplicate detection, FTS5 query sanitization, release guard
- Reject duplicate uploads at the API boundary (HTTP 409) instead of silently skipping in the background worker. Checks both ingested documents and in-flight jobs via content_hash on the jobs table. - Go client handles 409 with distinct messages for already-imported documents vs already-queued jobs. - Sanitize FTS5 search queries by quoting each token to prevent syntax errors from special characters like ?, *, ", (), AND, OR, NOT. - Add try/except safety net around FTS5 execute for edge cases. - Add main branch guard to release.sh to prevent releasing from feature branches. - Update specs and README to reflect new behaviour. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+81
-7
@@ -1,7 +1,9 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -11,6 +13,21 @@ import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
type uploadResult struct {
|
||||
Raw interface{}
|
||||
Duplicate bool
|
||||
DocID float64
|
||||
JobID float64
|
||||
Title string
|
||||
}
|
||||
|
||||
func (r *uploadResult) duplicateMsg() string {
|
||||
if r.DocID > 0 {
|
||||
return fmt.Sprintf("Already imported: %s (doc ID: %.0f)", r.Title, r.DocID)
|
||||
}
|
||||
return fmt.Sprintf("Already queued: %s (job ID: %.0f)", r.Title, r.JobID)
|
||||
}
|
||||
|
||||
var supportedExts = map[string]bool{
|
||||
".pdf": true,
|
||||
".docx": true,
|
||||
@@ -67,6 +84,26 @@ func runAdd(cmd *cobra.Command, args []string) error {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if resp.StatusCode == http.StatusConflict {
|
||||
var result interface{}
|
||||
if err := api.DecodeJSON(resp, &result); err != nil {
|
||||
return fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
if output.IsJSON() {
|
||||
output.PrintJSON(result)
|
||||
} else {
|
||||
if m, ok := result.(map[string]interface{}); ok {
|
||||
if docID, ok := m["document_id"].(float64); ok {
|
||||
fmt.Printf("Already imported: %s (doc ID: %.0f)\n", m["title"], docID)
|
||||
} else if jobID, ok := m["job_id"].(float64); ok {
|
||||
fmt.Printf("Already queued: %s (job ID: %.0f)\n", m["title"], jobID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := api.CheckError(resp); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
@@ -104,7 +141,9 @@ func runAdd(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if output.IsJSON() {
|
||||
output.PrintJSON([]interface{}{result})
|
||||
output.PrintJSON([]interface{}{result.Raw})
|
||||
} else if result.Duplicate {
|
||||
fmt.Println(result.duplicateMsg())
|
||||
} else {
|
||||
fmt.Printf("Queued: %s\n", filepath.Base(path))
|
||||
}
|
||||
@@ -135,27 +174,39 @@ func runAdd(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
var results []interface{}
|
||||
queued := 0
|
||||
duplicates := 0
|
||||
for _, f := range files {
|
||||
result, err := uploadFile(client, f, tags, docType)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error uploading %s: %v\n", f, err)
|
||||
continue
|
||||
}
|
||||
results = append(results, result)
|
||||
if !output.IsJSON() {
|
||||
fmt.Printf("Queued: %s\n", filepath.Base(f))
|
||||
results = append(results, result.Raw)
|
||||
if result.Duplicate {
|
||||
duplicates++
|
||||
if !output.IsJSON() {
|
||||
fmt.Println(result.duplicateMsg())
|
||||
}
|
||||
} else {
|
||||
queued++
|
||||
if !output.IsJSON() {
|
||||
fmt.Printf("Queued: %s\n", filepath.Base(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if output.IsJSON() {
|
||||
output.PrintJSON(results)
|
||||
} else if duplicates > 0 {
|
||||
fmt.Printf("Queued: %d files, %d duplicates skipped\n", queued, duplicates)
|
||||
} else {
|
||||
fmt.Printf("Queued: %d files\n", len(results))
|
||||
fmt.Printf("Queued: %d files\n", queued)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func uploadFile(client *api.Client, path, tags, docType string) (interface{}, error) {
|
||||
func uploadFile(client *api.Client, path, tags, docType string) (*uploadResult, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot open %s: %w", path, err)
|
||||
@@ -180,6 +231,29 @@ func uploadFile(client *api.Client, path, tags, docType string) (interface{}, er
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode == http.StatusConflict {
|
||||
var raw json.RawMessage
|
||||
if err := api.DecodeJSON(resp, &raw); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
var dupResp struct {
|
||||
DocumentID float64 `json:"document_id"`
|
||||
JobID float64 `json:"job_id"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
json.Unmarshal(raw, &dupResp)
|
||||
var rawIface interface{}
|
||||
json.Unmarshal(raw, &rawIface)
|
||||
return &uploadResult{
|
||||
Raw: rawIface,
|
||||
Duplicate: true,
|
||||
DocID: dupResp.DocumentID,
|
||||
JobID: dupResp.JobID,
|
||||
Title: dupResp.Title,
|
||||
}, nil
|
||||
}
|
||||
|
||||
if err := api.CheckError(resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -188,5 +262,5 @@ func uploadFile(client *api.Client, path, tags, docType string) (interface{}, er
|
||||
if err := api.DecodeJSON(resp, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
return &uploadResult{Raw: result}, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user