121 lines
3.7 KiB
Python
121 lines
3.7 KiB
Python
"""Tests for output formatters."""
|
|
|
|
import json
|
|
|
|
from kb_search.output import (
|
|
_human_size,
|
|
format_doc_info,
|
|
format_document_list,
|
|
format_search_results,
|
|
format_status,
|
|
format_tags,
|
|
)
|
|
|
|
SAMPLE_SEARCH = {
|
|
"query": "install git",
|
|
"results": [
|
|
{
|
|
"chunk_id": 1,
|
|
"score": 0.031,
|
|
"score_breakdown": {"fts": 0.016, "vector": 0.015},
|
|
"text": "To install git from source...",
|
|
"source": {
|
|
"document_id": 42,
|
|
"title": "Git Admin Guide",
|
|
"path": "/docs/git.pdf",
|
|
"type": "pdf",
|
|
"page": 12,
|
|
"section_header": None,
|
|
"chunk_index": 3,
|
|
"total_chunks": 28,
|
|
"tags": ["git", "admin"],
|
|
},
|
|
}
|
|
],
|
|
"total_matches": 47,
|
|
"returned": 1,
|
|
}
|
|
|
|
|
|
class TestSearchOutput:
|
|
def test_json_format(self):
|
|
output = format_search_results(SAMPLE_SEARCH, "json")
|
|
parsed = json.loads(output)
|
|
assert parsed["query"] == "install git"
|
|
assert len(parsed["results"]) == 1
|
|
assert parsed["results"][0]["chunk_id"] == 1
|
|
assert "fts" in parsed["results"][0]["score_breakdown"]
|
|
assert "vector" in parsed["results"][0]["score_breakdown"]
|
|
|
|
def test_json_schema_fields(self):
|
|
output = format_search_results(SAMPLE_SEARCH, "json")
|
|
parsed = json.loads(output)
|
|
r = parsed["results"][0]
|
|
assert "chunk_id" in r
|
|
assert "score" in r
|
|
assert "text" in r
|
|
assert "source" in r
|
|
src = r["source"]
|
|
assert "document_id" in src
|
|
assert "title" in src
|
|
assert "type" in src
|
|
assert "tags" in src
|
|
|
|
def test_human_format(self):
|
|
output = format_search_results(SAMPLE_SEARCH, "human")
|
|
assert "install git" in output
|
|
assert "Git Admin Guide" in output
|
|
assert "p.12" in output
|
|
assert "0.031" in output
|
|
|
|
|
|
class TestDocList:
|
|
def test_json(self):
|
|
docs = [{"id": 1, "title": "Test", "type": "pdf", "tags": ["a"], "chunk_count": 5, "created_at": "2024-01-01"}]
|
|
parsed = json.loads(format_document_list(docs, "json"))
|
|
assert len(parsed) == 1
|
|
|
|
def test_human_empty(self):
|
|
assert "No documents" in format_document_list([], "human")
|
|
|
|
def test_human(self):
|
|
docs = [{"id": 1, "title": "Test", "type": "pdf", "tags": ["a"], "chunk_count": 5}]
|
|
output = format_document_list(docs, "human")
|
|
assert "Test" in output
|
|
|
|
|
|
class TestTags:
|
|
def test_json(self):
|
|
tags = [{"name": "git", "count": 15}]
|
|
parsed = json.loads(format_tags(tags, "json"))
|
|
assert parsed[0]["name"] == "git"
|
|
|
|
def test_human_empty(self):
|
|
assert "No tags" in format_tags([], "human")
|
|
|
|
|
|
class TestStatus:
|
|
def test_json(self):
|
|
status = {"model_name": "test", "embedding_dim": 384, "schema_version": 1,
|
|
"db_size_bytes": 1024, "documents": {"pdf": 5}, "total_documents": 5, "total_chunks": 50}
|
|
parsed = json.loads(format_status(status, "json"))
|
|
assert parsed["model_name"] == "test"
|
|
|
|
def test_human(self):
|
|
status = {"model_name": "test", "embedding_dim": 384, "schema_version": 1,
|
|
"db_size_bytes": 1024000, "documents": {"pdf": 5}, "total_documents": 5, "total_chunks": 50}
|
|
output = format_status(status, "human")
|
|
assert "test" in output
|
|
assert "384" in output
|
|
|
|
|
|
class TestHumanSize:
|
|
def test_bytes(self):
|
|
assert _human_size(512) == "512.0 B"
|
|
|
|
def test_kb(self):
|
|
assert _human_size(2048) == "2.0 KB"
|
|
|
|
def test_mb(self):
|
|
assert _human_size(5 * 1024 * 1024) == "5.0 MB"
|