"""Prometheus metrics setup and business metrics.""" from typing import Any from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram, Info from prometheus_fastapi_instrumentator import Instrumentator def init_prometheus_metrics( # pylint: disable=unused-argument app: Any, service_name: str ) -> Any: """Initialize Prometheus metrics for FastAPI app""" # Create instrumentator instrumentator = Instrumentator( should_group_status_codes=False, should_ignore_untemplated=True, should_respect_env_var=True, should_instrument_requests_inprogress=True, excluded_handlers=["/metrics", "/healthz", "/readyz", "/livez"], env_var_name="ENABLE_METRICS", inprogress_name="http_requests_inprogress", inprogress_labels=True, ) # Add custom metrics instrumentator.add( lambda info: info.modified_duration < 0.1, # type: ignore lambda info: Counter( "http_requests_fast_total", "Number of fast HTTP requests (< 100ms)", ["method", "endpoint"], ) .labels(method=info.method, endpoint=info.modified_handler) .inc(), ) instrumentator.add( lambda info: info.modified_duration > 1.0, # type: ignore lambda info: Counter( "http_requests_slow_total", "Number of slow HTTP requests (> 1s)", ["method", "endpoint"], ) .labels(method=info.method, endpoint=info.modified_handler) .inc(), ) # Instrument the app instrumentator.instrument(app) instrumentator.expose(app, endpoint="/metrics") return instrumentator # Global registry for business metrics to avoid duplicates _business_metrics_registry: dict[str, Any] = {} # Custom metrics for business logic class BusinessMetrics: # pylint: disable=too-many-instance-attributes """Custom business metrics for the application""" def __init__(self, service_name: str): self.service_name = service_name # Sanitize service name for Prometheus metrics (replace hyphens with underscores) self.sanitized_name = service_name.replace("-", "_") # Create a custom registry for this service to avoid conflicts self.registry = CollectorRegistry() # Document processing metrics self.documents_processed = Counter( "documents_processed_total", "Total number of documents processed", ["service", "document_type", "status"], registry=self.registry, ) # Add active connections metric for tests self.active_connections = Gauge( "active_connections", "Number of active connections", ["service"], registry=self.registry, ) # Dynamic counters for forms service self._dynamic_counters: dict[str, Any] = {} self.document_processing_duration = Histogram( f"document_processing_duration_seconds_{self.sanitized_name}", "Time spent processing documents", ["service", "document_type"], buckets=[0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0], registry=self.registry, ) # Field extraction metrics self.field_extractions = Counter( f"field_extractions_total_{self.sanitized_name}", "Total number of field extractions", ["service", "field_type", "status"], registry=self.registry, ) self.extraction_confidence = Histogram( f"extraction_confidence_score_{self.sanitized_name}", "Confidence scores for extractions", ["service", "extraction_type"], buckets=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], registry=self.registry, ) # Tax calculation metrics self.tax_calculations = Counter( f"tax_calculations_total_{self.sanitized_name}", "Total number of tax calculations", ["service", "calculation_type", "status"], registry=self.registry, ) self.calculation_confidence = Histogram( f"calculation_confidence_score_{self.sanitized_name}", "Confidence scores for tax calculations", ["service", "calculation_type"], buckets=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], registry=self.registry, ) # RAG metrics self.rag_searches = Counter( f"rag_searches_total_{self.sanitized_name}", "Total number of RAG searches", ["service", "collection", "status"], registry=self.registry, ) self.rag_search_duration = Histogram( f"rag_search_duration_seconds_{self.sanitized_name}", "Time spent on RAG searches", ["service", "collection"], buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0], registry=self.registry, ) self.rag_relevance_score = Histogram( f"rag_relevance_score_{self.sanitized_name}", "RAG search relevance scores", ["service", "collection"], buckets=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], registry=self.registry, ) # Knowledge graph metrics self.kg_operations = Counter( f"kg_operations_total_{self.sanitized_name}", "Total number of KG operations", ["service", "operation", "status"], registry=self.registry, ) self.kg_query_duration = Histogram( f"kg_query_duration_seconds_{self.sanitized_name}", "Time spent on KG queries", ["service", "query_type"], buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0], registry=self.registry, ) # HMRC submission metrics self.hmrc_submissions = Counter( f"hmrc_submissions_total_{self.sanitized_name}", "Total number of HMRC submissions", ["service", "submission_type", "status"], registry=self.registry, ) # Service health metrics self.service_info = Info( f"service_info_{self.sanitized_name}", "Service information", registry=self.registry, ) try: self.service_info.info({"service": service_name, "version": "1.0.0"}) except (AttributeError, ValueError): # Handle prometheus_client version compatibility or registry conflicts pass def counter(self, name: str, labelnames: list[str] | None = None) -> Any: """Get or create a counter metric with dynamic labels""" # Use provided labelnames or default ones if labelnames is None: labelnames = ["tenant_id", "form_id", "scope", "error_type"] # Create a unique key based on name and labelnames label_key = f"{name}_{','.join(sorted(labelnames))}" if label_key not in self._dynamic_counters: self._dynamic_counters[label_key] = Counter( name, f"Dynamic counter: {name}", labelnames=labelnames, registry=self.registry, ) return self._dynamic_counters[label_key] def histogram(self, name: str, labelnames: list[str] | None = None) -> Any: """Get or create a histogram metric with dynamic labels""" # Use provided labelnames or default ones if labelnames is None: labelnames = ["tenant_id", "kind"] # Create a unique key based on name and labelnames label_key = f"{name}_{','.join(sorted(labelnames))}" histogram_key = f"_histogram_{label_key}" if not hasattr(self, histogram_key): histogram = Histogram( name, f"Dynamic histogram: {name}", labelnames=labelnames, registry=self.registry, ) setattr(self, histogram_key, histogram) return getattr(self, histogram_key) def get_business_metrics(service_name: str) -> BusinessMetrics: """Get business metrics instance for service""" # Use singleton pattern to avoid registry conflicts if service_name not in _business_metrics_registry: _business_metrics_registry[service_name] = BusinessMetrics(service_name) return _business_metrics_registry[service_name] # type: ignore