"""Tests for code chunking — Python, Bash, Go.""" from kb_search.ingest.code import chunk_code, _chunk_python, _chunk_bash, _chunk_go, _fixed_chunk CFG = {"chunking": {"code": {"strategy": "ast", "include_context": True, "max_tokens": 1024}}} class TestPythonChunking: def test_functions(self): code = ''' def hello(): """Say hello.""" print("hello") def goodbye(): """Say goodbye.""" print("bye") ''' chunks = _chunk_python(code, include_context=True) assert len(chunks) == 2 assert chunks[0]["metadata"]["symbol_name"] == "hello" assert chunks[1]["metadata"]["symbol_name"] == "goodbye" def test_class_with_methods(self): code = ''' class MyClass: """A test class.""" def method_a(self): pass def method_b(self): pass ''' chunks = _chunk_python(code, include_context=True) assert len(chunks) == 2 assert chunks[0]["metadata"]["symbol_name"] == "MyClass.method_a" assert chunks[1]["metadata"]["symbol_name"] == "MyClass.method_b" # Context should include class docstring assert "A test class" in chunks[0]["text"] def test_class_without_methods(self): code = ''' class Config: """Configuration.""" DEBUG = True PORT = 8080 ''' chunks = _chunk_python(code, include_context=True) assert len(chunks) == 1 assert chunks[0]["metadata"]["symbol_name"] == "Config" def test_syntax_error_returns_empty(self): chunks = _chunk_python("def broken(:\n pass", include_context=True) assert chunks == [] def test_no_context(self): code = ''' class Foo: """Docstring.""" def bar(self): pass ''' chunks = _chunk_python(code, include_context=False) assert len(chunks) == 1 assert "Docstring" not in chunks[0]["text"] class TestBashChunking: def test_function_keyword(self): code = '''#!/bin/bash function deploy() { echo "deploying" } function rollback() { echo "rolling back" } ''' chunks = _chunk_bash(code, include_context=True) assert len(chunks) == 2 assert chunks[0]["metadata"]["symbol_name"] == "deploy" assert chunks[1]["metadata"]["symbol_name"] == "rollback" def test_shorthand_syntax(self): code = ''' setup() { echo "setup" } cleanup() { echo "cleanup" } ''' chunks = _chunk_bash(code, include_context=True) assert len(chunks) == 2 def test_no_functions(self): code = "#!/bin/bash\necho hello\nexit 0" chunks = _chunk_bash(code, include_context=True) assert chunks == [] def test_with_preceding_comments(self): code = ''' # Deploy to production # Requires valid credentials function deploy() { echo "deploying" } ''' chunks = _chunk_bash(code, include_context=True) assert len(chunks) == 1 assert "Deploy to production" in chunks[0]["text"] class TestGoChunking: def test_basic_funcs(self): code = '''package main func main() { fmt.Println("hello") } func helper() string { return "help" } ''' chunks = _chunk_go(code, include_context=True) assert len(chunks) == 2 assert chunks[0]["metadata"]["symbol_name"] == "main" assert chunks[1]["metadata"]["symbol_name"] == "helper" def test_method_receiver(self): code = ''' func (s *Server) Start() error { return nil } func (s *Server) Stop() { } ''' chunks = _chunk_go(code, include_context=True) assert len(chunks) == 2 assert chunks[0]["metadata"]["symbol_name"] == "Start" def test_no_funcs(self): code = "package main\n\nvar x = 1" chunks = _chunk_go(code, include_context=True) assert chunks == [] class TestFallback: def test_unknown_language_uses_fixed(self): code = "line1\nline2\nline3" chunks = chunk_code(code, "ruby", CFG) assert len(chunks) >= 1 def test_python_no_functions_uses_fixed(self): code = "x = 1\ny = 2\nprint(x + y)" chunks = chunk_code(code, "python", CFG) assert len(chunks) >= 1 def test_fixed_strategy_config(self): cfg = {"chunking": {"code": {"strategy": "fixed", "max_tokens": 10}}} code = "\n".join(f"x_{i} = {i}" for i in range(50)) chunks = chunk_code(code, "python", cfg) assert len(chunks) > 1 def test_empty_code(self): chunks = chunk_code("", "python", CFG) assert len(chunks) == 0