"""Engine entry point — FastAPI server with eager model loading.""" import asyncio import logging import os from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI _version_file = Path(__file__).parent / "VERSION" __version__ = _version_file.read_text().strip() if _version_file.exists() else "dev" from kb.config import cfg from kb.embeddings import load_model from kb.database import get_connection, init_schema from kb.worker import ingestion_worker logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") log = logging.getLogger("kb.engine") # Track readiness for health endpoint ready = False worker_task: asyncio.Task | None = None @asynccontextmanager async def lifespan(app: FastAPI): global ready, worker_task # Set HF cache before any model imports os.environ["HF_HOME"] = str(cfg.hf_cache) log.info("Starting engine...") cfg.ensure_dirs() # Initialise database conn = get_connection(cfg.db_path) model_dim = load_model(cfg.model, cfg.device) init_schema(conn, model_dim) conn.close() # Start background ingestion worker worker_task = asyncio.create_task(ingestion_worker()) ready = True log.info("Engine ready — model: %s, device: %s", cfg.model, cfg.device) yield # Shutdown ready = False if worker_task: worker_task.cancel() try: await worker_task except asyncio.CancelledError: pass log.info("Engine stopped.") app = FastAPI(title="kb-engine", version=__version__, lifespan=lifespan) # Import routes after app is created from kb.routes import health, search, jobs, documents, tags, status, reindex, auth, notes # noqa: E402, F401 if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host=cfg.host, port=cfg.port, log_level="info")