completed local setup with compose
Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
@@ -7,13 +7,14 @@ import os
|
||||
|
||||
# Import shared libraries
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
from typing import Any, cast
|
||||
|
||||
import pytesseract
|
||||
import structlog
|
||||
import ulid
|
||||
from fastapi import BackgroundTasks, Depends, HTTPException, Request
|
||||
from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pdf2image import convert_from_bytes
|
||||
from PIL import Image
|
||||
@@ -78,6 +79,8 @@ settings: OCRSettings
|
||||
async def init_dependencies(app_settings: OCRSettings) -> None:
|
||||
"""Initialize service dependencies"""
|
||||
global storage_client, document_storage, event_bus, settings, vision_processor
|
||||
# Larger delay to ensure NATS is fully ready before attempting connection
|
||||
await asyncio.sleep(10)
|
||||
|
||||
settings = app_settings
|
||||
logger.info("Starting OCR service")
|
||||
@@ -89,17 +92,35 @@ async def init_dependencies(app_settings: OCRSettings) -> None:
|
||||
minio_client = create_minio_client(settings)
|
||||
storage_client = StorageClient(minio_client)
|
||||
document_storage = DocumentStorage(storage_client)
|
||||
# Initialize event bus
|
||||
event_bus = create_event_bus(settings)
|
||||
if not event_bus:
|
||||
raise HTTPException(status_code=500, detail="Event bus not initialized")
|
||||
|
||||
eb = event_bus
|
||||
# mypy: event_bus is Optional, so use local alias after check
|
||||
await eb.start()
|
||||
|
||||
# Subscribe to document ingestion events
|
||||
await eb.subscribe(EventTopics.DOC_INGESTED, _handle_document_ingested)
|
||||
# Initialize event bus with retry logic
|
||||
max_retries = 20
|
||||
delay = 5
|
||||
for attempt in range(1, max_retries + 1):
|
||||
logger.info(
|
||||
"Attempting NATS connection", url=settings.nats_servers, attempt=attempt
|
||||
)
|
||||
event_bus = create_event_bus(settings)
|
||||
if not event_bus:
|
||||
raise HTTPException(status_code=500, detail="Event bus not initialized")
|
||||
eb = event_bus
|
||||
try:
|
||||
# Attempt to start and subscribe
|
||||
await eb.start()
|
||||
await eb.subscribe(EventTopics.DOC_INGESTED, _handle_document_ingested)
|
||||
logger.info("NATS connection established on attempt", attempt=attempt)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to connect to NATS, retrying",
|
||||
attempt=attempt,
|
||||
error=str(e),
|
||||
)
|
||||
if attempt == max_retries:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to connect to NATS after retries"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
delay *= 2 # exponential backoff
|
||||
|
||||
# Initialize shared OCRProcessor for vision strategy
|
||||
try:
|
||||
@@ -114,7 +135,26 @@ async def init_dependencies(app_settings: OCRSettings) -> None:
|
||||
logger.info("OCR service started successfully")
|
||||
|
||||
|
||||
# Create app and settings
|
||||
async def shutdown_dependencies() -> None:
|
||||
"""Shutdown service dependencies"""
|
||||
logger.info("Shutting down OCR service")
|
||||
eb = event_bus
|
||||
if eb is not None:
|
||||
await eb.stop()
|
||||
logger.info("OCR service shutdown complete")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI): # type: ignore
|
||||
"""FastAPI lifespan event handler"""
|
||||
# Startup
|
||||
await init_dependencies(cast(OCRSettings, _settings))
|
||||
yield
|
||||
# Shutdown
|
||||
await shutdown_dependencies()
|
||||
|
||||
|
||||
# Create app and settings with lifespan
|
||||
app, _settings = create_app(
|
||||
service_name="svc-ocr",
|
||||
title="Tax Agent OCR Service",
|
||||
@@ -122,8 +162,8 @@ app, _settings = create_app(
|
||||
settings_class=OCRSettings,
|
||||
) # fmt: skip
|
||||
|
||||
# Initialize dependencies immediately
|
||||
asyncio.run(init_dependencies(cast(OCRSettings, _settings)))
|
||||
# Override app's lifespan
|
||||
app.router.lifespan_context = lifespan
|
||||
|
||||
tracer = get_tracer("svc-ocr")
|
||||
metrics = get_metrics()
|
||||
|
||||
@@ -14,3 +14,12 @@ opencv-python-headless>=4.12.0.88 # Headless version is smaller
|
||||
|
||||
# Computer vision (torchvision not in base-ml)
|
||||
torchvision>=0.23.0
|
||||
|
||||
# OpenTelemetry (required by libs/observability)
|
||||
opentelemetry-api>=1.21.0
|
||||
opentelemetry-sdk>=1.21.0
|
||||
opentelemetry-exporter-otlp-proto-grpc>=1.21.0
|
||||
opentelemetry-instrumentation-fastapi>=0.42b0
|
||||
opentelemetry-instrumentation-httpx>=0.42b0
|
||||
opentelemetry-instrumentation-psycopg2>=0.42b0
|
||||
opentelemetry-instrumentation-redis>=0.42b0
|
||||
|
||||
Reference in New Issue
Block a user