e7136a4a20
New MCP server (mcp/) exposes kb operations as native MCP tools over
Streamable HTTP with Bearer token auth. Supports collections via tag
conventions, chunked file uploads, and agent-side search patterns.
Engine gains PATCH /api/v1/notes/{id} for in-place note updates with
transactional re-chunk/re-embed, and updated_at column on documents.
Go client adds updatenote command and Patch HTTP method.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
209 lines
6.3 KiB
Python
209 lines
6.3 KiB
Python
"""Document management endpoints — list, view, and delete documents."""
|
|
|
|
import json
|
|
import logging
|
|
import mimetypes
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from fastapi import HTTPException, Query
|
|
from fastapi.responses import FileResponse
|
|
|
|
from main import app
|
|
from kb.config import cfg
|
|
from kb.database import get_connection
|
|
|
|
logger = logging.getLogger("kb.routes.documents")
|
|
|
|
|
|
@app.get("/api/v1/documents")
|
|
async def list_documents(
|
|
type: Optional[str] = Query(None),
|
|
tags: Optional[str] = Query(None),
|
|
):
|
|
conn = get_connection(cfg.db_path)
|
|
try:
|
|
sql = """
|
|
SELECT d.id, d.title, d.doc_type,
|
|
(SELECT COUNT(*) FROM chunks c WHERE c.document_id = d.id) AS chunk_count,
|
|
d.created_at, d.updated_at
|
|
FROM documents d
|
|
"""
|
|
joins: list[str] = []
|
|
where: list[str] = []
|
|
params: list = []
|
|
|
|
if type:
|
|
where.append("d.doc_type = ?")
|
|
params.append(type)
|
|
|
|
if tags:
|
|
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
|
for i, tag in enumerate(tag_list):
|
|
joins.append(f"JOIN document_tags dt{i} ON d.id = dt{i}.document_id")
|
|
joins.append(f"JOIN tags t{i} ON dt{i}.tag_id = t{i}.id")
|
|
where.append(f"t{i}.name = ?")
|
|
params.append(tag)
|
|
|
|
if joins:
|
|
sql += " " + " ".join(joins)
|
|
if where:
|
|
sql += " WHERE " + " AND ".join(where)
|
|
|
|
sql += " ORDER BY COALESCE(d.updated_at, d.created_at) DESC"
|
|
|
|
rows = conn.execute(sql, params).fetchall()
|
|
|
|
results = []
|
|
for row in rows:
|
|
doc_id = row["id"]
|
|
tag_rows = conn.execute(
|
|
"""
|
|
SELECT t.name FROM tags t
|
|
JOIN document_tags dt ON t.id = dt.tag_id
|
|
WHERE dt.document_id = ?
|
|
ORDER BY t.name
|
|
""",
|
|
(doc_id,),
|
|
).fetchall()
|
|
|
|
results.append({
|
|
"id": row["id"],
|
|
"title": row["title"],
|
|
"doc_type": row["doc_type"],
|
|
"tags": [t["name"] for t in tag_rows],
|
|
"chunk_count": row["chunk_count"],
|
|
"created_at": row["created_at"],
|
|
"updated_at": row["updated_at"],
|
|
})
|
|
|
|
return results
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
@app.get("/api/v1/documents/{doc_id}")
|
|
async def get_document(doc_id: int):
|
|
conn = get_connection(cfg.db_path)
|
|
try:
|
|
doc = conn.execute(
|
|
"SELECT * FROM documents WHERE id = ?", (doc_id,)
|
|
).fetchone()
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Document not found.")
|
|
|
|
chunks = conn.execute(
|
|
"SELECT * FROM chunks WHERE document_id = ? ORDER BY chunk_index",
|
|
(doc_id,),
|
|
).fetchall()
|
|
|
|
tag_rows = conn.execute(
|
|
"""
|
|
SELECT t.name FROM tags t
|
|
JOIN document_tags dt ON t.id = dt.tag_id
|
|
WHERE dt.document_id = ?
|
|
ORDER BY t.name
|
|
""",
|
|
(doc_id,),
|
|
).fetchall()
|
|
|
|
stored_path = doc["stored_path"]
|
|
has_file = bool(stored_path and Path(stored_path).exists())
|
|
|
|
return {
|
|
**dict(doc),
|
|
"has_file": has_file,
|
|
"tags": [t["name"] for t in tag_rows],
|
|
"chunks": [dict(c) for c in chunks],
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
@app.get("/api/v1/documents/{doc_id}/file")
|
|
async def download_document_file(doc_id: int):
|
|
conn = get_connection(cfg.db_path)
|
|
try:
|
|
doc = conn.execute(
|
|
"SELECT id, title, stored_path, original_filename FROM documents WHERE id = ?",
|
|
(doc_id,),
|
|
).fetchone()
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Document not found.")
|
|
|
|
stored_path = doc["stored_path"]
|
|
if not stored_path:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail="Original file not available - ingested before document storage was enabled.",
|
|
)
|
|
|
|
file_path = Path(stored_path)
|
|
if not file_path.exists():
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail="Stored file not found on disk.",
|
|
)
|
|
|
|
original_filename = doc["original_filename"]
|
|
if not original_filename:
|
|
ext = file_path.suffix
|
|
original_filename = (doc["title"] or "document") + ext
|
|
|
|
media_type = mimetypes.guess_type(original_filename)[0] or "application/octet-stream"
|
|
|
|
return FileResponse(
|
|
path=str(file_path),
|
|
media_type=media_type,
|
|
filename=original_filename,
|
|
)
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
@app.delete("/api/v1/documents/{doc_id}")
|
|
async def delete_document(doc_id: int):
|
|
conn = get_connection(cfg.db_path)
|
|
try:
|
|
doc = conn.execute(
|
|
"SELECT id, title, stored_path FROM documents WHERE id = ?", (doc_id,)
|
|
).fetchone()
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Document not found.")
|
|
|
|
# Get chunk IDs for embedding cleanup
|
|
chunk_ids = conn.execute(
|
|
"SELECT id FROM chunks WHERE document_id = ?", (doc_id,)
|
|
).fetchall()
|
|
|
|
# Delete embeddings from vec table
|
|
for row in chunk_ids:
|
|
conn.execute(
|
|
"DELETE FROM chunks_vec WHERE chunk_id = ?", (row["id"],)
|
|
)
|
|
|
|
# Delete document (cascades to chunks, document_tags)
|
|
conn.execute("DELETE FROM documents WHERE id = ?", (doc_id,))
|
|
conn.commit()
|
|
|
|
# Delete stored file from disk
|
|
stored_path = doc["stored_path"]
|
|
if stored_path:
|
|
try:
|
|
file_path = Path(stored_path)
|
|
if file_path.exists():
|
|
file_path.unlink()
|
|
logger.info("Deleted stored file: %s", stored_path)
|
|
else:
|
|
logger.warning("Stored file already missing: %s", stored_path)
|
|
except OSError as exc:
|
|
logger.warning("Failed to delete stored file %s: %s", stored_path, exc)
|
|
|
|
return {
|
|
"status": "deleted",
|
|
"document_id": doc_id,
|
|
"title": doc["title"],
|
|
}
|
|
finally:
|
|
conn.close()
|