Initial MVP
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
"""Tests for code chunking — Python, Bash, Go."""
|
||||
|
||||
from kb_search.ingest.code import chunk_code, _chunk_python, _chunk_bash, _chunk_go, _fixed_chunk
|
||||
|
||||
CFG = {"chunking": {"code": {"strategy": "ast", "include_context": True, "max_tokens": 1024}}}
|
||||
|
||||
|
||||
class TestPythonChunking:
|
||||
def test_functions(self):
|
||||
code = '''
|
||||
def hello():
|
||||
"""Say hello."""
|
||||
print("hello")
|
||||
|
||||
def goodbye():
|
||||
"""Say goodbye."""
|
||||
print("bye")
|
||||
'''
|
||||
chunks = _chunk_python(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "hello"
|
||||
assert chunks[1]["metadata"]["symbol_name"] == "goodbye"
|
||||
|
||||
def test_class_with_methods(self):
|
||||
code = '''
|
||||
class MyClass:
|
||||
"""A test class."""
|
||||
|
||||
def method_a(self):
|
||||
pass
|
||||
|
||||
def method_b(self):
|
||||
pass
|
||||
'''
|
||||
chunks = _chunk_python(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "MyClass.method_a"
|
||||
assert chunks[1]["metadata"]["symbol_name"] == "MyClass.method_b"
|
||||
# Context should include class docstring
|
||||
assert "A test class" in chunks[0]["text"]
|
||||
|
||||
def test_class_without_methods(self):
|
||||
code = '''
|
||||
class Config:
|
||||
"""Configuration."""
|
||||
DEBUG = True
|
||||
PORT = 8080
|
||||
'''
|
||||
chunks = _chunk_python(code, include_context=True)
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "Config"
|
||||
|
||||
def test_syntax_error_returns_empty(self):
|
||||
chunks = _chunk_python("def broken(:\n pass", include_context=True)
|
||||
assert chunks == []
|
||||
|
||||
def test_no_context(self):
|
||||
code = '''
|
||||
class Foo:
|
||||
"""Docstring."""
|
||||
def bar(self):
|
||||
pass
|
||||
'''
|
||||
chunks = _chunk_python(code, include_context=False)
|
||||
assert len(chunks) == 1
|
||||
assert "Docstring" not in chunks[0]["text"]
|
||||
|
||||
|
||||
class TestBashChunking:
|
||||
def test_function_keyword(self):
|
||||
code = '''#!/bin/bash
|
||||
|
||||
function deploy() {
|
||||
echo "deploying"
|
||||
}
|
||||
|
||||
function rollback() {
|
||||
echo "rolling back"
|
||||
}
|
||||
'''
|
||||
chunks = _chunk_bash(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "deploy"
|
||||
assert chunks[1]["metadata"]["symbol_name"] == "rollback"
|
||||
|
||||
def test_shorthand_syntax(self):
|
||||
code = '''
|
||||
setup() {
|
||||
echo "setup"
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
echo "cleanup"
|
||||
}
|
||||
'''
|
||||
chunks = _chunk_bash(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
|
||||
def test_no_functions(self):
|
||||
code = "#!/bin/bash\necho hello\nexit 0"
|
||||
chunks = _chunk_bash(code, include_context=True)
|
||||
assert chunks == []
|
||||
|
||||
def test_with_preceding_comments(self):
|
||||
code = '''
|
||||
# Deploy to production
|
||||
# Requires valid credentials
|
||||
function deploy() {
|
||||
echo "deploying"
|
||||
}
|
||||
'''
|
||||
chunks = _chunk_bash(code, include_context=True)
|
||||
assert len(chunks) == 1
|
||||
assert "Deploy to production" in chunks[0]["text"]
|
||||
|
||||
|
||||
class TestGoChunking:
|
||||
def test_basic_funcs(self):
|
||||
code = '''package main
|
||||
|
||||
func main() {
|
||||
fmt.Println("hello")
|
||||
}
|
||||
|
||||
func helper() string {
|
||||
return "help"
|
||||
}
|
||||
'''
|
||||
chunks = _chunk_go(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "main"
|
||||
assert chunks[1]["metadata"]["symbol_name"] == "helper"
|
||||
|
||||
def test_method_receiver(self):
|
||||
code = '''
|
||||
func (s *Server) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) Stop() {
|
||||
}
|
||||
'''
|
||||
chunks = _chunk_go(code, include_context=True)
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0]["metadata"]["symbol_name"] == "Start"
|
||||
|
||||
def test_no_funcs(self):
|
||||
code = "package main\n\nvar x = 1"
|
||||
chunks = _chunk_go(code, include_context=True)
|
||||
assert chunks == []
|
||||
|
||||
|
||||
class TestFallback:
|
||||
def test_unknown_language_uses_fixed(self):
|
||||
code = "line1\nline2\nline3"
|
||||
chunks = chunk_code(code, "ruby", CFG)
|
||||
assert len(chunks) >= 1
|
||||
|
||||
def test_python_no_functions_uses_fixed(self):
|
||||
code = "x = 1\ny = 2\nprint(x + y)"
|
||||
chunks = chunk_code(code, "python", CFG)
|
||||
assert len(chunks) >= 1
|
||||
|
||||
def test_fixed_strategy_config(self):
|
||||
cfg = {"chunking": {"code": {"strategy": "fixed", "max_tokens": 10}}}
|
||||
code = "\n".join(f"x_{i} = {i}" for i in range(50))
|
||||
chunks = chunk_code(code, "python", cfg)
|
||||
assert len(chunks) > 1
|
||||
|
||||
def test_empty_code(self):
|
||||
chunks = chunk_code("", "python", CFG)
|
||||
assert len(chunks) == 0
|
||||
Reference in New Issue
Block a user