92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
"""Tests for hybrid search, RRF merging, and filtering."""
|
|
|
|
import pytest
|
|
|
|
from kb_search.search import (
|
|
_escape_fts_query,
|
|
_rank_results,
|
|
_rrf_merge,
|
|
_single_source_results,
|
|
)
|
|
|
|
|
|
class TestEscapeFtsQuery:
|
|
def test_plain_query(self):
|
|
assert _escape_fts_query("install git") == "install git"
|
|
|
|
def test_special_chars(self):
|
|
result = _escape_fts_query('install "git" (latest)')
|
|
assert '"' not in result
|
|
assert "(" not in result
|
|
assert ")" not in result
|
|
|
|
def test_collapses_spaces(self):
|
|
assert _escape_fts_query(" too many spaces ") == "too many spaces"
|
|
|
|
def test_empty(self):
|
|
assert _escape_fts_query("") == ""
|
|
|
|
|
|
class TestRankResults:
|
|
def test_basic_ranking(self):
|
|
results = {1: 0.9, 2: 0.5, 3: 0.7}
|
|
ranked = _rank_results(results)
|
|
assert ranked[1] == 1 # highest score = rank 1
|
|
assert ranked[3] == 2
|
|
assert ranked[2] == 3
|
|
|
|
def test_empty(self):
|
|
assert _rank_results({}) == {}
|
|
|
|
|
|
class TestRRFMerge:
|
|
def test_basic_merge(self):
|
|
fts = {1: 0.9, 2: 0.5}
|
|
vec = {1: 0.8, 3: 0.7}
|
|
merged = _rrf_merge(fts, vec, k=60)
|
|
|
|
scores = {r["chunk_id"]: r["score"] for r in merged}
|
|
# Chunk 1 appears in both — should have highest score
|
|
assert scores[1] > scores[2]
|
|
assert scores[1] > scores[3]
|
|
|
|
def test_no_overlap(self):
|
|
fts = {1: 0.9}
|
|
vec = {2: 0.8}
|
|
merged = _rrf_merge(fts, vec, k=60)
|
|
assert len(merged) == 2
|
|
|
|
def test_score_breakdown(self):
|
|
fts = {1: 0.9}
|
|
vec = {1: 0.8}
|
|
merged = _rrf_merge(fts, vec, k=60)
|
|
assert len(merged) == 1
|
|
assert merged[0]["score_breakdown"]["fts"] is not None
|
|
assert merged[0]["score_breakdown"]["vector"] is not None
|
|
|
|
def test_single_source_fts(self):
|
|
fts = {1: 0.9, 2: 0.5}
|
|
merged = _rrf_merge(fts, {}, k=60)
|
|
for r in merged:
|
|
assert r["score_breakdown"]["vector"] is None
|
|
assert r["score_breakdown"]["fts"] is not None
|
|
|
|
def test_empty_both(self):
|
|
merged = _rrf_merge({}, {}, k=60)
|
|
assert merged == []
|
|
|
|
|
|
class TestSingleSourceResults:
|
|
def test_fts_only(self):
|
|
results = _single_source_results({1: 0.9, 2: 0.5}, "fts")
|
|
assert len(results) == 2
|
|
for r in results:
|
|
assert r["score_breakdown"]["vector"] is None
|
|
assert r["score_breakdown"]["fts"] is not None
|
|
|
|
def test_vec_only(self):
|
|
results = _single_source_results({1: 0.8}, "vector")
|
|
assert len(results) == 1
|
|
assert results[0]["score_breakdown"]["fts"] is None
|
|
assert results[0]["score_breakdown"]["vector"] is not None
|