FROM nvidia/cuda:13.0.1-runtime-ubuntu24.04

ENV DEBIAN_FRONTEND=noninteractive

# System deps for docling (poppler for PDF, build tools for native wheels)
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.12 python3.12-venv python3.12-dev python3-pip \
    libpoppler-cpp-dev poppler-utils \
    libgl1 libglib2.0-0 \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install uv for fast dependency resolution
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

WORKDIR /app

# Copy project files
COPY pyproject.toml uv.lock ./
COPY src/ src/

# Create venv, install deps, overlay onnxruntime-gpu
RUN uv venv .venv && \
    . .venv/bin/activate && \
    uv sync && \
    uv pip install --no-deps onnxruntime-gpu

# Put venv on PATH
ENV PATH="/app/.venv/bin:$PATH"
ENV VIRTUAL_ENV="/app/.venv"

# GPU enabled by default in the container
ENV KB_DEVICE=auto
ENV KB_INGEST_DEVICE=auto

# Model cache persisted via volume
ENV HF_HOME=/data/hf_cache
ENV KB_DATA_DIR=/data/kb

VOLUME ["/data"]

ENTRYPOINT ["kb"]
CMD ["--help"]
