"""Tests for Docling ingestion (fixed-size chunking logic, mocked Docling).""" from kb_search.ingest.docling import _fixed_chunk_text class TestFixedChunkText: def test_short_text_single_chunk(self): chunks = _fixed_chunk_text("Hello world", {}) assert len(chunks) == 1 assert chunks[0]["text"] == "Hello world" assert chunks[0]["chunk_index"] == 0 def test_long_text_multiple_chunks(self): text = "word " * 2000 # ~10000 chars chunks = _fixed_chunk_text(text, {"max_tokens": 512, "overlap_tokens": 50}) assert len(chunks) > 1 # Chunks should overlap for i, c in enumerate(chunks): assert c["chunk_index"] == i def test_empty_text(self): chunks = _fixed_chunk_text("", {}) assert len(chunks) == 0 def test_whitespace_only(self): chunks = _fixed_chunk_text(" \n\n ", {}) assert len(chunks) == 0 def test_custom_max_tokens(self): text = "a " * 500 chunks = _fixed_chunk_text(text, {"max_tokens": 100}) # 100 tokens * 4 chars = 400 chars window, 1000 chars total assert len(chunks) > 1