src/infra/Dockerfile.inference
1,416 bytes · 44 lines · capsule://quake0day/[email protected]
raw on github
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04 AS builder
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 python3-pip ffmpeg \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY pyproject.toml .
RUN pip install --no-cache-dir -e ".[inference,rag]" && \
pip install --no-cache-dir grpcio-tools grpcio-health-checking scipy
# Generate gRPC code in builder stage
COPY proto/ ./proto/
RUN mkdir -p inference/generated && \
python3 -m grpc_tools.protoc \
-I proto/ \
--python_out=inference/generated \
--grpc_python_out=inference/generated \
proto/*.proto
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 python3-pip ffmpeg \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
COPY --from=builder /usr/local/bin /usr/local/bin
COPY --from=builder /app/inference/generated ./inference/generated/
COPY pyproject.toml .
COPY inference/ ./inference/
COPY models/flash_head/ ./models/flash_head/
COPY cyberverse_config.yaml .
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
CMD python3 -c "import grpc; ch = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(ch).result(timeout=3)" || exit 1
EXPOSE 50051
CMD ["python3", "-m", "inference.server", "--config", "cyberverse_config.yaml"]