v2 restructure: Go client, Docker engine, release tooling
- Remove v1 Python CLI (src/kb_search/, tests/, root pyproject.toml, uv.lock, .venv) - Add Go client with cross-platform build (client/) - Add FastAPI engine with NVIDIA and multi-stage ROCm Dockerfiles (engine/) - Add VERSION files for client and engine, wired into builds - Add release.sh for automated build, tag, release, and Docker push - Update README with build/release docs and ROCm migration note - Clean up .gitignore for v2 project structure Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
"""Engine entry point — FastAPI server with eager model loading."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
_version_file = Path(__file__).parent / "VERSION"
|
||||
__version__ = _version_file.read_text().strip() if _version_file.exists() else "dev"
|
||||
|
||||
from kb.config import cfg
|
||||
from kb.embeddings import load_model
|
||||
from kb.database import get_connection, init_schema
|
||||
from kb.worker import ingestion_worker
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
log = logging.getLogger("kb.engine")
|
||||
|
||||
# Track readiness for health endpoint
|
||||
ready = False
|
||||
worker_task: asyncio.Task | None = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
global ready, worker_task
|
||||
|
||||
# Set HF cache before any model imports
|
||||
os.environ["HF_HOME"] = str(cfg.hf_cache)
|
||||
|
||||
log.info("Starting engine...")
|
||||
cfg.ensure_dirs()
|
||||
|
||||
# Initialise database
|
||||
conn = get_connection(cfg.db_path)
|
||||
model_dim = load_model(cfg.model, cfg.device)
|
||||
init_schema(conn, model_dim)
|
||||
conn.close()
|
||||
|
||||
# Start background ingestion worker
|
||||
worker_task = asyncio.create_task(ingestion_worker())
|
||||
|
||||
ready = True
|
||||
log.info("Engine ready — model: %s, device: %s", cfg.model, cfg.device)
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
ready = False
|
||||
if worker_task:
|
||||
worker_task.cancel()
|
||||
try:
|
||||
await worker_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
log.info("Engine stopped.")
|
||||
|
||||
|
||||
app = FastAPI(title="kb-engine", version=__version__, lifespan=lifespan)
|
||||
|
||||
# Import routes after app is created
|
||||
from kb.routes import health, search, jobs, documents, tags, status, reindex, auth # noqa: E402, F401
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("main:app", host=cfg.host, port=cfg.port, log_level="info")
|
||||
Reference in New Issue
Block a user