Files
ai-tax-agent/libs/calibration/calibrator.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

191 lines
6.6 KiB
Python

"""Confidence calibrator using various methods."""
import pickle
import numpy as np
import structlog
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
logger = structlog.get_logger()
class ConfidenceCalibrator:
"""Calibrate confidence scores using various methods"""
def __init__(self, method: str = "temperature"):
"""
Initialize calibrator
Args:
method: Calibration method ('temperature', 'platt', 'isotonic')
"""
self.method = method
self.calibrator = None
self.temperature = 1.0
self.is_fitted = False
def fit(self, scores: list[float], labels: list[bool]) -> None:
"""
Fit calibration model
Args:
scores: Raw confidence scores (0-1)
labels: True labels (True/False for correct/incorrect)
"""
# Validate inputs
if len(scores) == 0 or len(labels) == 0:
raise ValueError("Scores and labels cannot be empty")
if len(scores) != len(labels):
raise ValueError("Scores and labels must have the same length")
scores_array = np.array(scores).reshape(-1, 1)
labels_array = np.array(labels, dtype=int)
if self.method == "temperature":
self._fit_temperature_scaling(scores_array, labels_array)
elif self.method == "platt":
self._fit_platt_scaling(scores_array, labels_array)
elif self.method == "isotonic":
self._fit_isotonic_regression(scores_array, labels_array)
else:
raise ValueError(f"Unknown calibration method: {self.method}")
self.is_fitted = True
logger.info("Calibrator fitted", method=self.method)
def _fit_temperature_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
"""Fit temperature scaling parameter"""
# pylint: disable=import-outside-toplevel
from scipy.optimize import minimize_scalar
def negative_log_likelihood(temperature: float) -> float:
# Convert scores to logits
epsilon = 1e-7
scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
logits = np.log(scores_clipped / (1 - scores_clipped))
# Apply temperature scaling
calibrated_logits = logits / temperature
calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))
# Calculate negative log likelihood
nll = -np.mean(
labels * np.log(calibrated_probs + epsilon)
+ (1 - labels) * np.log(1 - calibrated_probs + epsilon)
)
return float(nll)
# Find optimal temperature
result = minimize_scalar( # type: ignore
negative_log_likelihood,
bounds=(0.1, 10.0),
method="bounded", # fmt: skip # pyright: ignore[reportArgumentType]
)
self.temperature = result.x
logger.debug("Temperature scaling fitted", temperature=self.temperature)
def _fit_platt_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
"""Fit Platt scaling (logistic regression)"""
# Convert scores to logits
epsilon = 1e-7
scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)
# Fit logistic regression
self.calibrator = LogisticRegression()
self.calibrator.fit(logits, labels) # type: ignore
logger.debug("Platt scaling fitted")
def _fit_isotonic_regression(self, scores: np.ndarray, labels: np.ndarray) -> None:
"""Fit isotonic regression"""
self.calibrator = IsotonicRegression(out_of_bounds="clip")
self.calibrator.fit(scores.flatten(), labels) # type: ignore
logger.debug("Isotonic regression fitted")
def calibrate(self, scores: list[float]) -> list[float]:
"""
Calibrate confidence scores
Args:
scores: Raw confidence scores
Returns:
Calibrated confidence scores
"""
if not self.is_fitted:
logger.warning("Calibrator not fitted, returning original scores")
return scores
scores_array = np.array(scores)
if self.method == "temperature":
return self._calibrate_temperature(scores_array)
if self.method == "platt":
return self._calibrate_platt(scores_array)
if self.method == "isotonic":
return self._calibrate_isotonic(scores_array)
return scores
def _calibrate_temperature(self, scores: np.ndarray) -> list[float]:
"""Apply temperature scaling"""
epsilon = 1e-7
scores_clipped = np.clip(scores, epsilon, 1 - epsilon)
# Convert to logits
logits = np.log(scores_clipped / (1 - scores_clipped))
# Apply temperature scaling
calibrated_logits = logits / self.temperature
calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))
return calibrated_probs.tolist() # type: ignore
def _calibrate_platt(self, scores: np.ndarray) -> list[float]:
"""Apply Platt scaling"""
epsilon = 1e-7
scores_clipped = np.clip(scores, epsilon, 1 - epsilon)
# Convert to logits
logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)
# Apply Platt scaling
calibrated_probs = self.calibrator.predict_proba(logits)[:, 1] # type: ignore
return calibrated_probs.tolist() # type: ignore
def _calibrate_isotonic(self, scores: np.ndarray) -> list[float]:
"""Apply isotonic regression"""
calibrated_probs = self.calibrator.predict(scores) # type: ignore
return calibrated_probs.tolist() # type: ignore
def save_model(self, filepath: str) -> None:
"""Save calibration model"""
model_data = {
"method": self.method,
"temperature": self.temperature,
"calibrator": self.calibrator,
"is_fitted": self.is_fitted,
}
with open(filepath, "wb") as f:
pickle.dump(model_data, f)
logger.info("Calibration model saved", filepath=filepath)
def load_model(self, filepath: str) -> None:
"""Load calibration model"""
with open(filepath, "rb") as f:
model_data = pickle.load(f)
self.method = model_data["method"]
self.temperature = model_data["temperature"]
self.calibrator = model_data["calibrator"]
self.is_fitted = model_data["is_fitted"]
logger.info("Calibration model loaded", filepath=filepath, method=self.method)