173 lines
4.4 KiB
Python
173 lines
4.4 KiB
Python
"""Tests for code chunking — Python, Bash, Go."""
|
|
|
|
from kb_search.ingest.code import chunk_code, _chunk_python, _chunk_bash, _chunk_go, _fixed_chunk
|
|
|
|
CFG = {"chunking": {"code": {"strategy": "ast", "include_context": True, "max_tokens": 1024}}}
|
|
|
|
|
|
class TestPythonChunking:
|
|
def test_functions(self):
|
|
code = '''
|
|
def hello():
|
|
"""Say hello."""
|
|
print("hello")
|
|
|
|
def goodbye():
|
|
"""Say goodbye."""
|
|
print("bye")
|
|
'''
|
|
chunks = _chunk_python(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
assert chunks[0]["metadata"]["symbol_name"] == "hello"
|
|
assert chunks[1]["metadata"]["symbol_name"] == "goodbye"
|
|
|
|
def test_class_with_methods(self):
|
|
code = '''
|
|
class MyClass:
|
|
"""A test class."""
|
|
|
|
def method_a(self):
|
|
pass
|
|
|
|
def method_b(self):
|
|
pass
|
|
'''
|
|
chunks = _chunk_python(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
assert chunks[0]["metadata"]["symbol_name"] == "MyClass.method_a"
|
|
assert chunks[1]["metadata"]["symbol_name"] == "MyClass.method_b"
|
|
# Context should include class docstring
|
|
assert "A test class" in chunks[0]["text"]
|
|
|
|
def test_class_without_methods(self):
|
|
code = '''
|
|
class Config:
|
|
"""Configuration."""
|
|
DEBUG = True
|
|
PORT = 8080
|
|
'''
|
|
chunks = _chunk_python(code, include_context=True)
|
|
assert len(chunks) == 1
|
|
assert chunks[0]["metadata"]["symbol_name"] == "Config"
|
|
|
|
def test_syntax_error_returns_empty(self):
|
|
chunks = _chunk_python("def broken(:\n pass", include_context=True)
|
|
assert chunks == []
|
|
|
|
def test_no_context(self):
|
|
code = '''
|
|
class Foo:
|
|
"""Docstring."""
|
|
def bar(self):
|
|
pass
|
|
'''
|
|
chunks = _chunk_python(code, include_context=False)
|
|
assert len(chunks) == 1
|
|
assert "Docstring" not in chunks[0]["text"]
|
|
|
|
|
|
class TestBashChunking:
|
|
def test_function_keyword(self):
|
|
code = '''#!/bin/bash
|
|
|
|
function deploy() {
|
|
echo "deploying"
|
|
}
|
|
|
|
function rollback() {
|
|
echo "rolling back"
|
|
}
|
|
'''
|
|
chunks = _chunk_bash(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
assert chunks[0]["metadata"]["symbol_name"] == "deploy"
|
|
assert chunks[1]["metadata"]["symbol_name"] == "rollback"
|
|
|
|
def test_shorthand_syntax(self):
|
|
code = '''
|
|
setup() {
|
|
echo "setup"
|
|
}
|
|
|
|
cleanup() {
|
|
echo "cleanup"
|
|
}
|
|
'''
|
|
chunks = _chunk_bash(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
|
|
def test_no_functions(self):
|
|
code = "#!/bin/bash\necho hello\nexit 0"
|
|
chunks = _chunk_bash(code, include_context=True)
|
|
assert chunks == []
|
|
|
|
def test_with_preceding_comments(self):
|
|
code = '''
|
|
# Deploy to production
|
|
# Requires valid credentials
|
|
function deploy() {
|
|
echo "deploying"
|
|
}
|
|
'''
|
|
chunks = _chunk_bash(code, include_context=True)
|
|
assert len(chunks) == 1
|
|
assert "Deploy to production" in chunks[0]["text"]
|
|
|
|
|
|
class TestGoChunking:
|
|
def test_basic_funcs(self):
|
|
code = '''package main
|
|
|
|
func main() {
|
|
fmt.Println("hello")
|
|
}
|
|
|
|
func helper() string {
|
|
return "help"
|
|
}
|
|
'''
|
|
chunks = _chunk_go(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
assert chunks[0]["metadata"]["symbol_name"] == "main"
|
|
assert chunks[1]["metadata"]["symbol_name"] == "helper"
|
|
|
|
def test_method_receiver(self):
|
|
code = '''
|
|
func (s *Server) Start() error {
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) Stop() {
|
|
}
|
|
'''
|
|
chunks = _chunk_go(code, include_context=True)
|
|
assert len(chunks) == 2
|
|
assert chunks[0]["metadata"]["symbol_name"] == "Start"
|
|
|
|
def test_no_funcs(self):
|
|
code = "package main\n\nvar x = 1"
|
|
chunks = _chunk_go(code, include_context=True)
|
|
assert chunks == []
|
|
|
|
|
|
class TestFallback:
|
|
def test_unknown_language_uses_fixed(self):
|
|
code = "line1\nline2\nline3"
|
|
chunks = chunk_code(code, "ruby", CFG)
|
|
assert len(chunks) >= 1
|
|
|
|
def test_python_no_functions_uses_fixed(self):
|
|
code = "x = 1\ny = 2\nprint(x + y)"
|
|
chunks = chunk_code(code, "python", CFG)
|
|
assert len(chunks) >= 1
|
|
|
|
def test_fixed_strategy_config(self):
|
|
cfg = {"chunking": {"code": {"strategy": "fixed", "max_tokens": 10}}}
|
|
code = "\n".join(f"x_{i} = {i}" for i in range(50))
|
|
chunks = chunk_code(code, "python", cfg)
|
|
assert len(chunks) > 1
|
|
|
|
def test_empty_code(self):
|
|
chunks = chunk_code("", "python", CFG)
|
|
assert len(chunks) == 0
|