"""Tests for configuration loading, merging, and ENV overrides.""" import os from pathlib import Path import pytest import yaml from kb_search.config import ( DEFAULTS, _deep_merge, _get_nested, _set_nested, config_with_sources, load_config, save_config_value, ) def test_deep_merge_basic(): base = {"a": 1, "b": {"c": 2, "d": 3}} override = {"b": {"c": 99}} result = _deep_merge(base, override) assert result == {"a": 1, "b": {"c": 99, "d": 3}} def test_deep_merge_new_keys(): base = {"a": 1} override = {"b": 2} result = _deep_merge(base, override) assert result == {"a": 1, "b": 2} def test_deep_merge_does_not_mutate(): base = {"a": {"b": 1}} override = {"a": {"b": 2}} _deep_merge(base, override) assert base["a"]["b"] == 1 def test_set_nested(): d = {} _set_nested(d, "a.b.c", 42) assert d == {"a": {"b": {"c": 42}}} def test_get_nested(): d = {"a": {"b": {"c": 42}}} assert _get_nested(d, "a.b.c") == 42 assert _get_nested(d, "a.b.x", "missing") == "missing" assert _get_nested(d, "x.y.z") is None def test_load_config_defaults(tmp_path): """With no config file, returns defaults.""" cfg = load_config(tmp_path / "nonexistent.yaml") assert cfg["embedding"]["model"] == "all-MiniLM-L6-v2" assert cfg["search"]["default_top"] == 10 assert cfg["chunking"]["pdf"]["strategy"] == "hierarchy" def test_load_config_yaml_override(tmp_path): """YAML values override defaults.""" config_path = tmp_path / "config.yaml" config_path.write_text(yaml.dump({"embedding": {"model": "nomic-embed-text"}})) cfg = load_config(config_path) assert cfg["embedding"]["model"] == "nomic-embed-text" # Other defaults preserved assert cfg["search"]["default_top"] == 10 def test_load_config_env_override(tmp_path, monkeypatch): """ENV overrides both YAML and defaults.""" config_path = tmp_path / "config.yaml" config_path.write_text(yaml.dump({"search": {"default_top": 20}})) monkeypatch.setenv("KB_DEFAULT_TOP", "50") cfg = load_config(config_path) assert cfg["search"]["default_top"] == 50 def test_load_config_env_model(tmp_path, monkeypatch): monkeypatch.setenv("KB_MODEL", "bge-small-en-v1.5") cfg = load_config(tmp_path / "nonexistent.yaml") assert cfg["embedding"]["model"] == "bge-small-en-v1.5" def test_save_config_value(tmp_path): config_path = tmp_path / "config.yaml" save_config_value(config_path, "chunking.pdf.max_tokens", "2048") with open(config_path) as f: data = yaml.safe_load(f) assert data["chunking"]["pdf"]["max_tokens"] == 2048 def test_save_config_value_bool(tmp_path): config_path = tmp_path / "config.yaml" save_config_value(config_path, "chunking.code.include_context", "false") with open(config_path) as f: data = yaml.safe_load(f) assert data["chunking"]["code"]["include_context"] is False def test_save_config_preserves_existing(tmp_path): config_path = tmp_path / "config.yaml" config_path.write_text(yaml.dump({"embedding": {"model": "custom"}})) save_config_value(config_path, "search.default_top", "20") with open(config_path) as f: data = yaml.safe_load(f) assert data["embedding"]["model"] == "custom" assert data["search"]["default_top"] == 20 def test_config_with_sources_defaults(tmp_path, monkeypatch): entries = config_with_sources(tmp_path / "nonexistent.yaml") sources = {k: s for k, _, s in entries} assert sources["embedding.model"] == "default" def test_config_with_sources_yaml(tmp_path): config_path = tmp_path / "config.yaml" config_path.write_text(yaml.dump({"embedding": {"model": "custom"}})) entries = config_with_sources(config_path) sources = {k: s for k, _, s in entries} assert sources["embedding.model"] == "config.yaml" def test_config_with_sources_env(tmp_path, monkeypatch): monkeypatch.setenv("KB_MODEL", "from-env") entries = config_with_sources(tmp_path / "nonexistent.yaml") sources = {k: s for k, _, s in entries} assert sources["embedding.model"] == "env (KB_MODEL)"