full ingestion -> OCR -> extraction flow is now working correctly.
Some checks failed
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
# FILE: libs/app_factory.py
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from collections.abc import AsyncIterator, Awaitable, Callable
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
@@ -36,6 +36,8 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume
|
||||
version: str = "1.0.0",
|
||||
settings_class: type[BaseAppSettings] = BaseAppSettings,
|
||||
custom_settings: dict[str, Any] | None = None,
|
||||
startup_hooks: list[Callable[[], Awaitable[None]]] | None = None,
|
||||
shutdown_hooks: list[Callable[[], Awaitable[None]]] | None = None,
|
||||
) -> tuple[FastAPI, BaseAppSettings]:
|
||||
"""Create a FastAPI application with standard configuration"""
|
||||
|
||||
@@ -56,8 +58,14 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume
|
||||
) -> AsyncIterator[None]: # pylint: disable=unused-argument
|
||||
# Startup
|
||||
setup_observability(settings)
|
||||
if startup_hooks:
|
||||
for hook in startup_hooks:
|
||||
await hook()
|
||||
yield
|
||||
# Shutdown
|
||||
if shutdown_hooks:
|
||||
for hook in shutdown_hooks:
|
||||
await hook()
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
|
||||
@@ -4,15 +4,15 @@
|
||||
class EventTopics: # pylint: disable=too-few-public-methods
|
||||
"""Standard event topic names"""
|
||||
|
||||
DOC_INGESTED = "doc.ingested"
|
||||
DOC_OCR_READY = "doc.ocr_ready"
|
||||
DOC_EXTRACTED = "doc.extracted"
|
||||
KG_UPSERT_READY = "kg.upsert.ready"
|
||||
KG_UPSERTED = "kg.upserted"
|
||||
RAG_INDEXED = "rag.indexed"
|
||||
CALC_SCHEDULE_READY = "calc.schedule_ready"
|
||||
FORM_FILLED = "form.filled"
|
||||
HMRC_SUBMITTED = "hmrc.submitted"
|
||||
REVIEW_REQUESTED = "review.requested"
|
||||
REVIEW_COMPLETED = "review.completed"
|
||||
FIRM_SYNC_COMPLETED = "firm.sync.completed"
|
||||
DOC_INGESTED = "doc_ingested"
|
||||
DOC_OCR_READY = "doc_ocr_ready"
|
||||
DOC_EXTRACTED = "doc_extracted"
|
||||
KG_UPSERT_READY = "kg_upsert_ready"
|
||||
KG_UPSERTED = "kg_upserted"
|
||||
RAG_INDEXED = "rag_indexed"
|
||||
CALC_SCHEDULE_READY = "calc_schedule_ready"
|
||||
FORM_FILLED = "form_filled"
|
||||
HMRC_SUBMITTED = "hmrc_submitted"
|
||||
REVIEW_REQUESTED = "review_requested"
|
||||
REVIEW_COMPLETED = "review_completed"
|
||||
FIRM_SYNC_COMPLETED = "firm_sync_completed"
|
||||
|
||||
@@ -11,7 +11,7 @@ psycopg2-binary>=2.9.11
|
||||
neo4j>=6.0.2
|
||||
redis[hiredis]>=6.4.0
|
||||
|
||||
minio>=7.2.18
|
||||
minio==7.2.18
|
||||
boto3>=1.34.0
|
||||
qdrant-client>=1.15.1
|
||||
|
||||
|
||||
@@ -72,22 +72,23 @@ class DocumentExtractedEventData(BaseEventData):
|
||||
"""Event emitted when field extraction is complete."""
|
||||
|
||||
doc_id: str = Field(..., description="Document identifier")
|
||||
tenant_id: str = Field(..., description="Tenant identifier")
|
||||
extraction_id: str = Field(..., description="Unique extraction run identifier")
|
||||
strategy: Literal["llm", "rules", "hybrid"] = Field(
|
||||
..., description="Extraction strategy used"
|
||||
)
|
||||
fields_extracted: int = Field(..., ge=0, description="Number of fields extracted")
|
||||
confidence_avg: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Average extraction confidence"
|
||||
field_count: int = Field(..., ge=0, description="Number of fields extracted")
|
||||
confidence: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Extraction confidence score"
|
||||
)
|
||||
calibrated_confidence: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Calibrated confidence score"
|
||||
extraction_results: dict[str, Any] = Field(
|
||||
..., description="Full extraction results including provenance"
|
||||
)
|
||||
model_name: str | None = Field(None, description="LLM model used (if applicable)")
|
||||
processing_time_ms: int = Field(
|
||||
..., ge=0, description="Processing time in milliseconds"
|
||||
processing_time_ms: int | None = Field(
|
||||
None, ge=0, description="Processing time in milliseconds"
|
||||
)
|
||||
storage_path: str = Field(..., description="Path to extraction results")
|
||||
storage_path: str | None = Field(None, description="Path to extraction results")
|
||||
|
||||
|
||||
# Knowledge Graph events
|
||||
|
||||
@@ -41,6 +41,11 @@ def get_current_tenant(request: Request) -> str | None:
|
||||
if role.startswith("tenant:"):
|
||||
return str(role.split(":", 1)[1])
|
||||
|
||||
# Check for explicit tenant header (useful for testing/API keys)
|
||||
tenant_header = request.headers.get("X-Tenant-ID")
|
||||
if tenant_header:
|
||||
return tenant_header
|
||||
|
||||
# Default tenant for development
|
||||
return "default"
|
||||
|
||||
|
||||
@@ -19,17 +19,13 @@ class StorageClient:
|
||||
async def ensure_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
|
||||
"""Ensure bucket exists, create if not"""
|
||||
try:
|
||||
# Check if bucket exists
|
||||
if self.client.bucket_exists(bucket_name):
|
||||
logger.debug("Bucket already exists", bucket=bucket_name)
|
||||
return True
|
||||
|
||||
# Create bucket
|
||||
self.client.make_bucket(bucket_name, location=region)
|
||||
self.client.make_bucket(bucket_name=bucket_name, location=region)
|
||||
logger.info("Created bucket", bucket=bucket_name, region=region)
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
if e.code in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
|
||||
logger.debug("Bucket already exists", bucket=bucket_name)
|
||||
return True
|
||||
logger.error("Failed to ensure bucket", bucket=bucket_name, error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user