Add bulk operations and remove collections abstraction
- Add bulk delete, bulk tags, and bulk set-tags engine endpoints (POST /api/v1/bulk/delete, /bulk/tags, /bulk/set-tags) - Filter-based selection: by tags, doc_type, ID list, ID range - Safety threshold (KB_BULK_SAFETY_PERCENT, default 70%) prevents accidental mass operations unless force=true - Synchronous execution with audit trail via jobs table - Add kb_bulk_delete, kb_bulk_tags, kb_bulk_set_tags MCP tools - Add kb bulk-remove, bulk-tag, bulk-set-tags CLI commands - Remove collection abstraction from MCP server (use tags instead) - Remove kb_set_collection MCP tool - Update SKILL.md, MCP.md, README.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -106,6 +106,93 @@ def update_tags(doc_id: int, add: list[str] | None = None,
|
||||
return r.json()
|
||||
|
||||
|
||||
def delete_document(doc_id: int) -> dict:
|
||||
with _client() as c:
|
||||
r = c.delete(f"/api/v1/documents/{doc_id}")
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
def _bulk_body(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
force: bool = False,
|
||||
**extra,
|
||||
) -> dict:
|
||||
body: dict = {}
|
||||
if document_ids:
|
||||
body["document_ids"] = document_ids
|
||||
if tags:
|
||||
body["tags"] = tags
|
||||
if doc_type:
|
||||
body["doc_type"] = doc_type
|
||||
if from_id is not None:
|
||||
body["from_id"] = from_id
|
||||
if to_id is not None:
|
||||
body["to_id"] = to_id
|
||||
if force:
|
||||
body["force"] = True
|
||||
body.update(extra)
|
||||
return body
|
||||
|
||||
|
||||
def bulk_delete(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
force: bool = False,
|
||||
) -> dict:
|
||||
body = _bulk_body(document_ids, tags, doc_type, from_id, to_id, force)
|
||||
with _client() as c:
|
||||
r = c.post("/api/v1/bulk/delete", json=body)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
def bulk_tags(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
add: list[str] | None = None,
|
||||
remove: list[str] | None = None,
|
||||
force: bool = False,
|
||||
) -> dict:
|
||||
extra = {}
|
||||
if add:
|
||||
extra["add"] = add
|
||||
if remove:
|
||||
extra["remove"] = remove
|
||||
body = _bulk_body(document_ids, tags, doc_type, from_id, to_id, force, **extra)
|
||||
with _client() as c:
|
||||
r = c.post("/api/v1/bulk/tags", json=body)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
def bulk_set_tags(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
new_tags: list[str] | None = None,
|
||||
force: bool = False,
|
||||
) -> dict:
|
||||
extra = {"new_tags": new_tags or []}
|
||||
body = _bulk_body(document_ids, tags, doc_type, from_id, to_id, force, **extra)
|
||||
with _client() as c:
|
||||
r = c.post("/api/v1/bulk/set-tags", json=body)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
def upload_file(filename: str, file_bytes: bytes,
|
||||
tags: list[str] | None = None) -> dict:
|
||||
fields: dict = {}
|
||||
|
||||
+136
-93
@@ -20,68 +20,6 @@ import uploads
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
logger = logging.getLogger("kb.mcp")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collection helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
COLLECTION_TAG_PREFIX = "collection:"
|
||||
DEFAULT_COLLECTION = "documents"
|
||||
|
||||
|
||||
def _collection_tag(collection: str | None) -> str:
|
||||
return f"{COLLECTION_TAG_PREFIX}{collection or DEFAULT_COLLECTION}"
|
||||
|
||||
|
||||
def _strip_collection_tags(tags: list[str]) -> tuple[str | None, list[str]]:
|
||||
"""Split tags into (collection, remaining_tags)."""
|
||||
collection = None
|
||||
remaining = []
|
||||
for t in tags:
|
||||
if t.startswith(COLLECTION_TAG_PREFIX):
|
||||
collection = t[len(COLLECTION_TAG_PREFIX):]
|
||||
else:
|
||||
remaining.append(t)
|
||||
return collection, remaining
|
||||
|
||||
|
||||
def _process_document(doc: dict) -> dict:
|
||||
"""Strip collection tags from a document dict and add collection field."""
|
||||
tags = doc.get("tags", [])
|
||||
collection, clean_tags = _strip_collection_tags(tags)
|
||||
doc["tags"] = clean_tags
|
||||
doc["collection"] = collection
|
||||
return doc
|
||||
|
||||
|
||||
def _process_search_results(results: list[dict]) -> list[dict]:
|
||||
"""Strip collection tags from search result dicts."""
|
||||
for r in results:
|
||||
if "tags" in r:
|
||||
collection, clean_tags = _strip_collection_tags(r["tags"])
|
||||
r["tags"] = clean_tags
|
||||
r["collection"] = collection
|
||||
if "document" in r and "tags" in r["document"]:
|
||||
collection, clean_tags = _strip_collection_tags(r["document"]["tags"])
|
||||
r["document"]["tags"] = clean_tags
|
||||
r["document"]["collection"] = collection
|
||||
return results
|
||||
|
||||
|
||||
async def _ensure_exclusive_collection(doc_id: int, collection: str) -> None:
|
||||
"""Remove existing collection tags and apply the new one."""
|
||||
doc = engine.get_document(doc_id)
|
||||
existing_collection_tags = [
|
||||
t for t in doc.get("tags", [])
|
||||
if t.startswith(COLLECTION_TAG_PREFIX)
|
||||
]
|
||||
new_tag = _collection_tag(collection)
|
||||
if existing_collection_tags == [new_tag]:
|
||||
return
|
||||
if existing_collection_tags:
|
||||
engine.update_tags(doc_id, remove=existing_collection_tags)
|
||||
engine.update_tags(doc_id, add=[new_tag])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Transport security — DNS rebinding protection with configurable allowed hosts
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -107,9 +45,10 @@ mcp = FastMCP(
|
||||
"kb",
|
||||
instructions=(
|
||||
"Knowledge base MCP server. Provides tools for searching, adding, and "
|
||||
"managing documents and notes. This server requires Bearer token "
|
||||
"authentication — all requests are authenticated via the Authorization "
|
||||
"header at the HTTP transport layer."
|
||||
"managing documents and notes. Use tags to organise and filter documents "
|
||||
"(e.g. tag notes with 'agent:mybot' and filter searches by that tag). "
|
||||
"This server requires Bearer token authentication — all requests are "
|
||||
"authenticated via the Authorization header at the HTTP transport layer."
|
||||
),
|
||||
transport_security=_transport_security,
|
||||
)
|
||||
@@ -121,7 +60,6 @@ async def kb_search(
|
||||
top: int = 10,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
collection: str | None = None,
|
||||
fts_only: bool = False,
|
||||
) -> str:
|
||||
"""Search the knowledge base for relevant documents and notes.
|
||||
@@ -134,7 +72,6 @@ async def kb_search(
|
||||
top: Maximum number of results to return (default 10).
|
||||
tags: Filter results to documents with ALL of these tags.
|
||||
doc_type: Filter by document type (e.g. "note", "pdf", "markdown", "code").
|
||||
collection: Filter by collection name (e.g. "documents", "memory", "workspace").
|
||||
fts_only: If true, use only full-text search (no vector similarity).
|
||||
|
||||
Tips for complex queries:
|
||||
@@ -144,27 +81,21 @@ async def kb_search(
|
||||
- For precision, rerank the returned results using your own judgement based on
|
||||
relevance to the original question.
|
||||
"""
|
||||
search_tags = list(tags) if tags else []
|
||||
if collection:
|
||||
search_tags.append(_collection_tag(collection))
|
||||
|
||||
result = engine.search(
|
||||
query=query,
|
||||
top=top,
|
||||
tags=search_tags or None,
|
||||
tags=tags or None,
|
||||
doc_type=doc_type,
|
||||
fts_only=fts_only,
|
||||
)
|
||||
|
||||
results_list = result if isinstance(result, list) else result.get("results", [])
|
||||
processed = _process_search_results(results_list)
|
||||
return json.dumps(processed, indent=2)
|
||||
return json.dumps(results_list, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_addnote(
|
||||
text: str,
|
||||
collection: str | None = None,
|
||||
tags: list[str] | None = None,
|
||||
title: str | None = None,
|
||||
) -> str:
|
||||
@@ -175,15 +106,10 @@ async def kb_addnote(
|
||||
|
||||
Args:
|
||||
text: The note text content.
|
||||
collection: Collection to add the note to (default "documents").
|
||||
Standard collections: "documents", "memory", "workspace".
|
||||
tags: Additional tags to apply to the note.
|
||||
tags: Tags to apply to the note.
|
||||
title: Optional title (auto-derived from first line if omitted).
|
||||
"""
|
||||
all_tags = list(tags) if tags else []
|
||||
all_tags.append(_collection_tag(collection))
|
||||
|
||||
result = engine.add_note(text=text, tags=all_tags, title=title)
|
||||
result = engine.add_note(text=text, tags=tags or None, title=title)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@@ -203,7 +129,7 @@ async def kb_update_note(
|
||||
text: The new text content for the note.
|
||||
"""
|
||||
result = engine.update_note(document_id, text)
|
||||
return json.dumps(_process_document(result), indent=2)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
@@ -222,14 +148,14 @@ async def kb_get(
|
||||
"""
|
||||
if document_id is not None:
|
||||
result = engine.get_document(document_id)
|
||||
return json.dumps(_process_document(result), indent=2)
|
||||
return json.dumps(result, indent=2)
|
||||
elif source_path is not None:
|
||||
docs = engine.list_documents()
|
||||
matches = [d for d in docs if d.get("source_path") == source_path]
|
||||
if not matches:
|
||||
return json.dumps({"error": "No document found with that source_path"})
|
||||
doc = engine.get_document(matches[0]["id"])
|
||||
return json.dumps(_process_document(doc), indent=2)
|
||||
return json.dumps(doc, indent=2)
|
||||
else:
|
||||
return json.dumps({"error": "Provide either document_id or source_path"})
|
||||
|
||||
@@ -262,12 +188,27 @@ async def kb_jobs(
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_delete(
|
||||
document_id: int,
|
||||
) -> str:
|
||||
"""Permanently delete a document from the knowledge base.
|
||||
|
||||
Removes the document and all associated data (chunks, embeddings, tags,
|
||||
stored files). This action cannot be undone.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document to delete.
|
||||
"""
|
||||
result = engine.delete_document(document_id)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_upload_start(
|
||||
filename: str,
|
||||
total_size: int,
|
||||
tags: list[str] | None = None,
|
||||
collection: str | None = None,
|
||||
) -> str:
|
||||
"""Start a chunked file upload to the knowledge base.
|
||||
|
||||
@@ -277,7 +218,7 @@ async def kb_upload_start(
|
||||
3. Call kb_upload_finish to submit the file for ingestion
|
||||
|
||||
Example for a 3MB file:
|
||||
upload = kb_upload_start(filename="report.pdf", total_size=3145728, collection="documents")
|
||||
upload = kb_upload_start(filename="report.pdf", total_size=3145728, tags=["project:x"])
|
||||
kb_upload_chunk(upload_id=upload["upload_id"], data="<base64 chunk 0>", chunk_index=0)
|
||||
kb_upload_chunk(upload_id=upload["upload_id"], data="<base64 chunk 1>", chunk_index=1)
|
||||
kb_upload_chunk(upload_id=upload["upload_id"], data="<base64 chunk 2>", chunk_index=2)
|
||||
@@ -286,13 +227,9 @@ async def kb_upload_start(
|
||||
Args:
|
||||
filename: Original filename (used for type detection).
|
||||
total_size: Total file size in bytes.
|
||||
tags: Additional tags to apply.
|
||||
collection: Collection name (default "documents").
|
||||
tags: Tags to apply to the uploaded document.
|
||||
"""
|
||||
all_tags = list(tags) if tags else []
|
||||
all_tags.append(_collection_tag(collection))
|
||||
|
||||
upload_id = uploads.start_upload(filename, total_size, all_tags)
|
||||
upload_id = uploads.start_upload(filename, total_size, tags or [])
|
||||
return json.dumps({"upload_id": upload_id})
|
||||
|
||||
|
||||
@@ -338,6 +275,112 @@ async def kb_upload_finish(
|
||||
return json.dumps({"error": str(e)})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk operation tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_bulk_delete(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
force: bool = False,
|
||||
) -> str:
|
||||
"""Permanently delete multiple documents matching a filter.
|
||||
|
||||
Removes matched documents and all associated data (chunks, embeddings, tags,
|
||||
stored files). This action cannot be undone.
|
||||
|
||||
Selection filters combine with AND logic — at least one is required.
|
||||
|
||||
A safety threshold applies: if the operation would affect more than 70% of
|
||||
all documents, it is rejected unless force=true.
|
||||
|
||||
Args:
|
||||
document_ids: Delete documents with these specific IDs.
|
||||
tags: Delete documents that have ALL of these tags (selection filter).
|
||||
doc_type: Delete documents of this type (e.g. "note", "pdf").
|
||||
from_id: Delete documents with id >= this value.
|
||||
to_id: Delete documents with id <= this value.
|
||||
force: Override the safety threshold if it would block the operation.
|
||||
"""
|
||||
result = engine.bulk_delete(
|
||||
document_ids=document_ids, tags=tags, doc_type=doc_type,
|
||||
from_id=from_id, to_id=to_id, force=force,
|
||||
)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_bulk_tags(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
add: list[str] | None = None,
|
||||
remove: list[str] | None = None,
|
||||
force: bool = False,
|
||||
) -> str:
|
||||
"""Add and/or remove tags on multiple documents matching a filter.
|
||||
|
||||
Selection filters combine with AND logic — at least one is required.
|
||||
Note: the 'tags' parameter is a SELECTION FILTER (which documents to target),
|
||||
while 'add' and 'remove' specify the TAG CHANGES to apply to those documents.
|
||||
|
||||
Args:
|
||||
document_ids: Target documents with these specific IDs.
|
||||
tags: Target documents that have ALL of these tags (selection filter).
|
||||
doc_type: Target documents of this type.
|
||||
from_id: Target documents with id >= this value.
|
||||
to_id: Target documents with id <= this value.
|
||||
add: Tags to add to matched documents.
|
||||
remove: Tags to remove from matched documents.
|
||||
force: Override the safety threshold if it would block the operation.
|
||||
"""
|
||||
result = engine.bulk_tags(
|
||||
document_ids=document_ids, tags=tags, doc_type=doc_type,
|
||||
from_id=from_id, to_id=to_id, add=add, remove=remove, force=force,
|
||||
)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def kb_bulk_set_tags(
|
||||
document_ids: list[int] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
doc_type: str | None = None,
|
||||
from_id: int | None = None,
|
||||
to_id: int | None = None,
|
||||
new_tags: list[str] | None = None,
|
||||
force: bool = False,
|
||||
) -> str:
|
||||
"""Replace all tags on multiple documents with a new set.
|
||||
|
||||
Removes ALL existing tags from matched documents, then applies the new tag set.
|
||||
Selection filters combine with AND logic — at least one is required.
|
||||
Note: the 'tags' parameter is a SELECTION FILTER (which documents to target),
|
||||
while 'new_tags' is the REPLACEMENT tag set to apply.
|
||||
|
||||
Args:
|
||||
document_ids: Target documents with these specific IDs.
|
||||
tags: Target documents that have ALL of these tags (selection filter).
|
||||
doc_type: Target documents of this type.
|
||||
from_id: Target documents with id >= this value.
|
||||
to_id: Target documents with id <= this value.
|
||||
new_tags: The replacement tag set to apply to all matched documents.
|
||||
force: Override the safety threshold if it would block the operation.
|
||||
"""
|
||||
result = engine.bulk_set_tags(
|
||||
document_ids=document_ids, tags=tags, doc_type=doc_type,
|
||||
from_id=from_id, to_id=to_id, new_tags=new_tags, force=force,
|
||||
)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth middleware
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user