kb/engine/kb/routes/notes.py

"""Note mutation endpoint — update existing notes in place."""

import hashlib
import logging

from fastapi import HTTPException
from pydantic import BaseModel

from main import app
from kb.config import cfg
from kb.database import (
    get_connection,
    build_enriched_text,
    insert_chunk,
    insert_embedding,
)
from kb.embeddings import embed_texts
from kb.ingest.note import chunk_note

logger = logging.getLogger("kb.routes.notes")


class NoteUpdateRequest(BaseModel):
    text: str


@app.patch("/api/v1/notes/{doc_id}")
async def update_note(doc_id: int, req: NoteUpdateRequest):
    conn = get_connection(cfg.db_path)
    try:
        doc = conn.execute(
            "SELECT id, title, doc_type FROM documents WHERE id = ?", (doc_id,)
        ).fetchone()
        if not doc:
            raise HTTPException(status_code=404, detail="Document not found.")
        if doc["doc_type"] != "note":
            raise HTTPException(
                status_code=422,
                detail="Only notes can be updated via this endpoint.",
            )

        title = doc["title"]

        # Delete existing chunks and their embeddings
        chunk_ids = conn.execute(
            "SELECT id FROM chunks WHERE document_id = ?", (doc_id,)
        ).fetchall()
        for row in chunk_ids:
            conn.execute("DELETE FROM chunks_vec WHERE chunk_id = ?", (row["id"],))
        conn.execute("DELETE FROM chunks WHERE document_id = ?", (doc_id,))

        # Run note chunking pipeline on new text
        chunks = chunk_note(req.text)
        chunk_texts = [c["text"] for c in chunks]
        chunk_metas = [
            {k: v for k, v in c.items() if k != "text"} or None for c in chunks
        ]

        enriched_texts = [
            build_enriched_text(title, ct, cm)
            for ct, cm in zip(chunk_texts, chunk_metas)
        ]

        # Embed — if this fails, the transaction rolls back
        vectors = embed_texts(enriched_texts)

        for idx, (chunk_text, enriched, vector) in enumerate(
            zip(chunk_texts, enriched_texts, vectors)
        ):
            chunk_id = insert_chunk(
                conn,
                document_id=doc_id,
                chunk_index=idx,
                text=chunk_text,
                enriched_text=enriched,
                metadata=chunk_metas[idx],
            )
            insert_embedding(conn, chunk_id, vector)

        # Update content_hash and updated_at
        content_hash = hashlib.sha256(req.text.encode("utf-8")).hexdigest()
        conn.execute(
            "UPDATE documents SET content_hash = ?, updated_at = current_timestamp WHERE id = ?",
            (content_hash, doc_id),
        )
        conn.commit()

        # Return updated document
        updated_doc = conn.execute(
            "SELECT * FROM documents WHERE id = ?", (doc_id,)
        ).fetchone()

        new_chunks = conn.execute(
            "SELECT * FROM chunks WHERE document_id = ? ORDER BY chunk_index",
            (doc_id,),
        ).fetchall()

        tag_rows = conn.execute(
            """
            SELECT t.name FROM tags t
            JOIN document_tags dt ON t.id = dt.tag_id
            WHERE dt.document_id = ?
            ORDER BY t.name
            """,
            (doc_id,),
        ).fetchall()

        return {
            **dict(updated_doc),
            "tags": [t["name"] for t in tag_rows],
            "chunks": [dict(c) for c in new_chunks],
        }
    except HTTPException:
        raise
    except Exception:
        conn.rollback()
        logger.exception("Failed to update note %d", doc_id)
        raise HTTPException(status_code=500, detail="Failed to update note.")
    finally:
        conn.close()