132 lines
4.1 KiB
Python
132 lines
4.1 KiB
Python
"""Tests for configuration loading, merging, and ENV overrides."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from kb_search.config import (
|
|
DEFAULTS,
|
|
_deep_merge,
|
|
_get_nested,
|
|
_set_nested,
|
|
config_with_sources,
|
|
load_config,
|
|
save_config_value,
|
|
)
|
|
|
|
|
|
def test_deep_merge_basic():
|
|
base = {"a": 1, "b": {"c": 2, "d": 3}}
|
|
override = {"b": {"c": 99}}
|
|
result = _deep_merge(base, override)
|
|
assert result == {"a": 1, "b": {"c": 99, "d": 3}}
|
|
|
|
|
|
def test_deep_merge_new_keys():
|
|
base = {"a": 1}
|
|
override = {"b": 2}
|
|
result = _deep_merge(base, override)
|
|
assert result == {"a": 1, "b": 2}
|
|
|
|
|
|
def test_deep_merge_does_not_mutate():
|
|
base = {"a": {"b": 1}}
|
|
override = {"a": {"b": 2}}
|
|
_deep_merge(base, override)
|
|
assert base["a"]["b"] == 1
|
|
|
|
|
|
def test_set_nested():
|
|
d = {}
|
|
_set_nested(d, "a.b.c", 42)
|
|
assert d == {"a": {"b": {"c": 42}}}
|
|
|
|
|
|
def test_get_nested():
|
|
d = {"a": {"b": {"c": 42}}}
|
|
assert _get_nested(d, "a.b.c") == 42
|
|
assert _get_nested(d, "a.b.x", "missing") == "missing"
|
|
assert _get_nested(d, "x.y.z") is None
|
|
|
|
|
|
def test_load_config_defaults(tmp_path):
|
|
"""With no config file, returns defaults."""
|
|
cfg = load_config(tmp_path / "nonexistent.yaml")
|
|
assert cfg["embedding"]["model"] == "all-MiniLM-L6-v2"
|
|
assert cfg["search"]["default_top"] == 10
|
|
assert cfg["chunking"]["pdf"]["strategy"] == "hierarchy"
|
|
|
|
|
|
def test_load_config_yaml_override(tmp_path):
|
|
"""YAML values override defaults."""
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(yaml.dump({"embedding": {"model": "nomic-embed-text"}}))
|
|
cfg = load_config(config_path)
|
|
assert cfg["embedding"]["model"] == "nomic-embed-text"
|
|
# Other defaults preserved
|
|
assert cfg["search"]["default_top"] == 10
|
|
|
|
|
|
def test_load_config_env_override(tmp_path, monkeypatch):
|
|
"""ENV overrides both YAML and defaults."""
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(yaml.dump({"search": {"default_top": 20}}))
|
|
monkeypatch.setenv("KB_DEFAULT_TOP", "50")
|
|
cfg = load_config(config_path)
|
|
assert cfg["search"]["default_top"] == 50
|
|
|
|
|
|
def test_load_config_env_model(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("KB_MODEL", "bge-small-en-v1.5")
|
|
cfg = load_config(tmp_path / "nonexistent.yaml")
|
|
assert cfg["embedding"]["model"] == "bge-small-en-v1.5"
|
|
|
|
|
|
def test_save_config_value(tmp_path):
|
|
config_path = tmp_path / "config.yaml"
|
|
save_config_value(config_path, "chunking.pdf.max_tokens", "2048")
|
|
with open(config_path) as f:
|
|
data = yaml.safe_load(f)
|
|
assert data["chunking"]["pdf"]["max_tokens"] == 2048
|
|
|
|
|
|
def test_save_config_value_bool(tmp_path):
|
|
config_path = tmp_path / "config.yaml"
|
|
save_config_value(config_path, "chunking.code.include_context", "false")
|
|
with open(config_path) as f:
|
|
data = yaml.safe_load(f)
|
|
assert data["chunking"]["code"]["include_context"] is False
|
|
|
|
|
|
def test_save_config_preserves_existing(tmp_path):
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(yaml.dump({"embedding": {"model": "custom"}}))
|
|
save_config_value(config_path, "search.default_top", "20")
|
|
with open(config_path) as f:
|
|
data = yaml.safe_load(f)
|
|
assert data["embedding"]["model"] == "custom"
|
|
assert data["search"]["default_top"] == 20
|
|
|
|
|
|
def test_config_with_sources_defaults(tmp_path, monkeypatch):
|
|
entries = config_with_sources(tmp_path / "nonexistent.yaml")
|
|
sources = {k: s for k, _, s in entries}
|
|
assert sources["embedding.model"] == "default"
|
|
|
|
|
|
def test_config_with_sources_yaml(tmp_path):
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(yaml.dump({"embedding": {"model": "custom"}}))
|
|
entries = config_with_sources(config_path)
|
|
sources = {k: s for k, _, s in entries}
|
|
assert sources["embedding.model"] == "config.yaml"
|
|
|
|
|
|
def test_config_with_sources_env(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("KB_MODEL", "from-env")
|
|
entries = config_with_sources(tmp_path / "nonexistent.yaml")
|
|
sources = {k: s for k, _, s in entries}
|
|
assert sources["embedding.model"] == "env (KB_MODEL)"
|