# Dockerfile for svc_ocr - Uses base-ml image # Base image contains: FastAPI, database drivers, transformers, PyTorch, numpy, etc. # This Dockerfile adds OCR-specific dependencies and application code ARG REGISTRY=gitea.harkon.co.uk ARG OWNER=harkon ARG BASE_VERSION=v1.0.1 FROM ${REGISTRY}/${OWNER}/base-ml:${BASE_VERSION} # Switch to root to install system and service-specific dependencies USER root # Install OCR runtime dependencies (Tesseract, poppler) RUN apt-get update && apt-get install -y \ tesseract-ocr \ tesseract-ocr-eng \ poppler-utils \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Copy service-specific requirements and install COPY apps/svc_ocr/requirements.txt /tmp/service-requirements.txt RUN pip install --no-cache-dir -r /tmp/service-requirements.txt # Copy application code COPY libs/ ./libs/ COPY apps/svc_ocr/ ./apps/svc_ocr/ # Set permissions and switch to non-root user RUN chown -R appuser:appuser /app USER appuser # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:8000/healthz || exit 1 # Expose port EXPOSE 8000 # Run the application CMD ["python", "-m", "uvicorn", "apps.svc_ocr.main:app", "--host", "0.0.0.0", "--port", "8000"]