diff --git a/README.md b/README.md index b2bb01a..d3ef97e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Personal knowledge base with hybrid search (full-text + semantic vector search). -v2 uses a client-server architecture: a **FastAPI engine** running in Docker (with GPU acceleration) and a lightweight **Go CLI client** that talks to it over HTTP. +v2 uses a client-server architecture: a **FastAPI engine** running in Docker (with optional GPU acceleration) and a lightweight **Go CLI client** that talks to it over HTTP. ## Architecture @@ -10,7 +10,7 @@ v2 uses a client-server architecture: a **FastAPI engine** running in Docker (wi Go CLI (kb) ──HTTP──▶ FastAPI Engine (Docker) ──▶ SQLite + GPU ``` -- **Engine**: Keeps the embedding model warm in GPU memory. Handles search, ingestion, and document management via REST API. Runs in Docker with NVIDIA or AMD GPU support. +- **Engine**: Keeps the embedding model warm in memory. Handles search, ingestion, and document management via REST API. Runs in Docker with NVIDIA GPU, AMD GPU (ROCm), or CPU-only support. - **Client**: Single static Go binary. No Python, no ML dependencies, instant startup. Talks to the engine over HTTP. - **Storage**: Single SQLite database with FTS5 (keyword search) and sqlite-vec (vector search). Portable via bind mount — just copy the data directory between hosts. @@ -43,49 +43,33 @@ docker run -d --name kb-engine \ -e KB_API_KEY=your-secret-key \ --restart unless-stopped \ docker.dcglab.co.uk/dcg/kb/engine:latest-rocm + +# CPU only (no GPU required — smaller image) +docker run -d --name kb-engine \ + -p 8000:8000 \ + -v ~/kb-data:/data \ + -e KB_MODEL=all-MiniLM-L6-v2 \ + -e KB_API_KEY=your-secret-key \ + --restart unless-stopped \ + docker.dcglab.co.uk/dcg/kb/engine:latest-cpu ``` -Or use a compose file — create `compose.yaml`: - -```yaml -services: - kb-engine: - image: docker.dcglab.co.uk/dcg/kb/engine:latest-nvidia # or latest-rocm - runtime: nvidia # remove for ROCm - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - # For ROCm, replace the above runtime/deploy block with: - # devices: - # - "/dev/kfd" - # - "/dev/dri" - # group_add: - # - "video" - ports: - - "${KB_PORT:-8000}:8000" - volumes: - - ${KB_DATA_PATH:-./data}:/data - environment: - - KB_MODEL=${KB_MODEL:-all-MiniLM-L6-v2} - - KB_DEVICE=${KB_DEVICE:-auto} - - KB_INGEST_DEVICE=${KB_INGEST_DEVICE:-auto} - - KB_API_KEY=${KB_API_KEY:-} - - KB_SEARCH_THRESHOLD=${KB_SEARCH_THRESHOLD:-0.01} - - HF_HUB_OFFLINE=${HF_HUB_OFFLINE:-} - restart: unless-stopped -``` +Or use a compose file from the repo: ```bash -KB_DATA_PATH=~/kb-data docker compose up -d +# NVIDIA GPU +KB_DATA_PATH=~/kb-data docker compose -f engine/compose.nvidia.yaml up -d + +# AMD GPU (ROCm) +KB_DATA_PATH=~/kb-data docker compose -f engine/compose.rocm.yaml up -d + +# CPU only +KB_DATA_PATH=~/kb-data docker compose -f engine/compose.cpu.yaml up -d ``` See [DEVELOPER.md](DEVELOPER.md) to run the engine from source. -The engine will download the embedding model on first start (~90MB) and load it onto the GPU. Check readiness: +The engine will download the embedding model on first start (~90MB) and load it into memory (GPU or CPU). Check readiness: ```bash curl http://localhost:8000/api/v1/health @@ -196,7 +180,7 @@ rsync -a ~/kb-data/ user@target:/home/user/kb-data/ KB_DATA_PATH=~/kb-data docker compose -f compose.nvidia.yaml up -d ``` -Data is GPU-vendor-agnostic — you can ingest on NVIDIA and serve from AMD (or vice versa) with the same data directory. +Data is device-agnostic — you can ingest on NVIDIA and serve from AMD or CPU (or any combination) with the same data directory. ## Claude Code skill diff --git a/engine/Dockerfile.cpu b/engine/Dockerfile.cpu new file mode 100644 index 0000000..cb79b59 --- /dev/null +++ b/engine/Dockerfile.cpu @@ -0,0 +1,36 @@ +FROM ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 python3.12-venv python3.12-dev python3-pip \ + libpoppler-cpp-dev poppler-utils \ + libgl1 libglib2.0-0 \ + build-essential curl \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +WORKDIR /app + +COPY pyproject.toml ./ +COPY kb/ kb/ +COPY main.py ./ +COPY VERSION ./ + +RUN uv venv .venv && \ + . .venv/bin/activate && \ + uv pip install -e . && \ + uv pip install "sentence-transformers[onnx]" && \ + uv pip install --reinstall torch torchvision --index-url https://download.pytorch.org/whl/cpu + +ENV PATH="/app/.venv/bin:$PATH" +ENV VIRTUAL_ENV="/app/.venv" +ENV KB_DEVICE=cpu +ENV KB_INGEST_DEVICE=cpu +ENV KB_DATA_DIR=/data + +EXPOSE 8000 +VOLUME ["/data"] + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/engine/compose.cpu.yaml b/engine/compose.cpu.yaml new file mode 100644 index 0000000..2d54879 --- /dev/null +++ b/engine/compose.cpu.yaml @@ -0,0 +1,17 @@ +services: + kb-engine: + build: + context: . + dockerfile: Dockerfile.cpu + ports: + - "${KB_PORT:-8000}:8000" + volumes: + - ${KB_DATA_PATH:-./data}:/data + environment: + - KB_MODEL=${KB_MODEL:-all-MiniLM-L6-v2} + - KB_DEVICE=cpu + - KB_INGEST_DEVICE=cpu + - KB_API_KEY=${KB_API_KEY:-} + - KB_SEARCH_THRESHOLD=${KB_SEARCH_THRESHOLD:-0.01} + - HF_HUB_OFFLINE=${HF_HUB_OFFLINE:-} + restart: unless-stopped diff --git a/release-engine.sh b/release-engine.sh index 453fb3d..6f8fdcb 100755 --- a/release-engine.sh +++ b/release-engine.sh @@ -111,9 +111,11 @@ else echo "==> Engine version: $VERSION (no increment)" fi -TAG="engine-v${VERSION}" +GIT_TAG="engine-v${VERSION}" +DOCKER_TAG="v${VERSION}" -echo " Tag: $TAG" +echo " Git tag: $GIT_TAG" +echo " Image tag: $DOCKER_TAG" echo " Registry: $IMAGE_BASE" echo " Forge CLI: $FORGE" echo " Dry run: $DRY_RUN" @@ -125,8 +127,8 @@ echo "" echo "==> Pre-flight checks" if [[ "$DRY_RUN" == false ]]; then - if git -C "$SCRIPT_DIR" rev-parse "$TAG" &>/dev/null; then - echo "Error: tag $TAG already exists" + if git -C "$SCRIPT_DIR" rev-parse "$GIT_TAG" &>/dev/null; then + echo "Error: tag $GIT_TAG already exists" exit 1 fi fi @@ -148,29 +150,32 @@ fi #────────────────────────────────────────────────────────────────────── echo "==> Building Docker engine images ($VERSION)" -NVIDIA_IMAGE="${IMAGE_BASE}/engine:${TAG}-nvidia" -ROCM_IMAGE="${IMAGE_BASE}/engine:${TAG}-rocm" +NVIDIA_IMAGE="${IMAGE_BASE}/engine:${DOCKER_TAG}-nvidia" +ROCM_IMAGE="${IMAGE_BASE}/engine:${DOCKER_TAG}-rocm" +CPU_IMAGE="${IMAGE_BASE}/engine:${DOCKER_TAG}-cpu" NVIDIA_LATEST="${IMAGE_BASE}/engine:latest-nvidia" ROCM_LATEST="${IMAGE_BASE}/engine:latest-rocm" +CPU_LATEST="${IMAGE_BASE}/engine:latest-cpu" run docker build -t "$NVIDIA_IMAGE" -t "$NVIDIA_LATEST" -f "$ENGINE_DIR/Dockerfile.nvidia" "$ENGINE_DIR" run docker build -t "$ROCM_IMAGE" -t "$ROCM_LATEST" -f "$ENGINE_DIR/Dockerfile.rocm" "$ENGINE_DIR" +run docker build -t "$CPU_IMAGE" -t "$CPU_LATEST" -f "$ENGINE_DIR/Dockerfile.cpu" "$ENGINE_DIR" echo "" #────────────────────────────────────────────────────────────────────── # 4. Commit, tag, and push #────────────────────────────────────────────────────────────────────── -echo "==> Committing and tagging $TAG" +echo "==> Committing and tagging $GIT_TAG" if [[ "$INCREMENT" == true ]]; then run git -C "$SCRIPT_DIR" add "$VERSION_FILE" run git -C "$SCRIPT_DIR" commit -m "Bump engine version to $VERSION" fi -run git -C "$SCRIPT_DIR" tag -a "$TAG" -m "Release $TAG" +run git -C "$SCRIPT_DIR" tag -a "$GIT_TAG" -m "Release $GIT_TAG" run git -C "$SCRIPT_DIR" push origin HEAD -run git -C "$SCRIPT_DIR" push origin "$TAG" +run git -C "$SCRIPT_DIR" push origin "$GIT_TAG" echo "" @@ -179,7 +184,7 @@ echo "" #────────────────────────────────────────────────────────────────────── echo "==> Creating release via $FORGE" -RELEASE_TITLE="Engine $TAG" +RELEASE_TITLE="Engine $GIT_TAG" RELEASE_NOTES="## Docker images \`\`\`bash @@ -188,16 +193,19 @@ docker pull ${NVIDIA_IMAGE} # AMD GPU (ROCm) docker pull ${ROCM_IMAGE} + +# CPU only +docker pull ${CPU_IMAGE} \`\`\`" if [[ "$FORGE" == "gh" ]]; then - run gh release create "$TAG" \ + run gh release create "$GIT_TAG" \ --title "$RELEASE_TITLE" \ --notes "$RELEASE_NOTES" elif [[ "$FORGE" == "tea" ]]; then run tea release create \ - --tag "$TAG" \ + --tag "$GIT_TAG" \ --title "$RELEASE_TITLE" \ --note "$RELEASE_NOTES" fi @@ -213,10 +221,13 @@ run docker push "$NVIDIA_IMAGE" run docker push "$NVIDIA_LATEST" run docker push "$ROCM_IMAGE" run docker push "$ROCM_LATEST" +run docker push "$CPU_IMAGE" +run docker push "$CPU_LATEST" echo "" -echo "==> Release $TAG complete!" +echo "==> Release $GIT_TAG complete!" echo "" echo " Images:" echo " $NVIDIA_IMAGE" echo " $ROCM_IMAGE" +echo " $CPU_IMAGE"