Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
191 lines
6.6 KiB
Python
191 lines
6.6 KiB
Python
"""Confidence calibrator using various methods."""
|
|
|
|
import pickle
|
|
|
|
import numpy as np
|
|
import structlog
|
|
from sklearn.isotonic import IsotonicRegression
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class ConfidenceCalibrator:
|
|
"""Calibrate confidence scores using various methods"""
|
|
|
|
def __init__(self, method: str = "temperature"):
|
|
"""
|
|
Initialize calibrator
|
|
|
|
Args:
|
|
method: Calibration method ('temperature', 'platt', 'isotonic')
|
|
"""
|
|
self.method = method
|
|
self.calibrator = None
|
|
self.temperature = 1.0
|
|
self.is_fitted = False
|
|
|
|
def fit(self, scores: list[float], labels: list[bool]) -> None:
|
|
"""
|
|
Fit calibration model
|
|
|
|
Args:
|
|
scores: Raw confidence scores (0-1)
|
|
labels: True labels (True/False for correct/incorrect)
|
|
"""
|
|
# Validate inputs
|
|
if len(scores) == 0 or len(labels) == 0:
|
|
raise ValueError("Scores and labels cannot be empty")
|
|
|
|
if len(scores) != len(labels):
|
|
raise ValueError("Scores and labels must have the same length")
|
|
|
|
scores_array = np.array(scores).reshape(-1, 1)
|
|
labels_array = np.array(labels, dtype=int)
|
|
|
|
if self.method == "temperature":
|
|
self._fit_temperature_scaling(scores_array, labels_array)
|
|
elif self.method == "platt":
|
|
self._fit_platt_scaling(scores_array, labels_array)
|
|
elif self.method == "isotonic":
|
|
self._fit_isotonic_regression(scores_array, labels_array)
|
|
else:
|
|
raise ValueError(f"Unknown calibration method: {self.method}")
|
|
|
|
self.is_fitted = True
|
|
logger.info("Calibrator fitted", method=self.method)
|
|
|
|
def _fit_temperature_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
|
|
"""Fit temperature scaling parameter"""
|
|
# pylint: disable=import-outside-toplevel
|
|
from scipy.optimize import minimize_scalar
|
|
|
|
def negative_log_likelihood(temperature: float) -> float:
|
|
# Convert scores to logits
|
|
epsilon = 1e-7
|
|
scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
|
|
logits = np.log(scores_clipped / (1 - scores_clipped))
|
|
|
|
# Apply temperature scaling
|
|
calibrated_logits = logits / temperature
|
|
calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))
|
|
|
|
# Calculate negative log likelihood
|
|
nll = -np.mean(
|
|
labels * np.log(calibrated_probs + epsilon)
|
|
+ (1 - labels) * np.log(1 - calibrated_probs + epsilon)
|
|
)
|
|
return float(nll)
|
|
|
|
# Find optimal temperature
|
|
result = minimize_scalar( # type: ignore
|
|
negative_log_likelihood,
|
|
bounds=(0.1, 10.0),
|
|
method="bounded", # fmt: skip # pyright: ignore[reportArgumentType]
|
|
)
|
|
self.temperature = result.x
|
|
|
|
logger.debug("Temperature scaling fitted", temperature=self.temperature)
|
|
|
|
def _fit_platt_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
|
|
"""Fit Platt scaling (logistic regression)"""
|
|
# Convert scores to logits
|
|
epsilon = 1e-7
|
|
scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
|
|
logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)
|
|
|
|
# Fit logistic regression
|
|
self.calibrator = LogisticRegression()
|
|
self.calibrator.fit(logits, labels) # type: ignore
|
|
|
|
logger.debug("Platt scaling fitted")
|
|
|
|
def _fit_isotonic_regression(self, scores: np.ndarray, labels: np.ndarray) -> None:
|
|
"""Fit isotonic regression"""
|
|
self.calibrator = IsotonicRegression(out_of_bounds="clip")
|
|
self.calibrator.fit(scores.flatten(), labels) # type: ignore
|
|
|
|
logger.debug("Isotonic regression fitted")
|
|
|
|
def calibrate(self, scores: list[float]) -> list[float]:
|
|
"""
|
|
Calibrate confidence scores
|
|
|
|
Args:
|
|
scores: Raw confidence scores
|
|
|
|
Returns:
|
|
Calibrated confidence scores
|
|
"""
|
|
if not self.is_fitted:
|
|
logger.warning("Calibrator not fitted, returning original scores")
|
|
return scores
|
|
|
|
scores_array = np.array(scores)
|
|
|
|
if self.method == "temperature":
|
|
return self._calibrate_temperature(scores_array)
|
|
if self.method == "platt":
|
|
return self._calibrate_platt(scores_array)
|
|
if self.method == "isotonic":
|
|
return self._calibrate_isotonic(scores_array)
|
|
return scores
|
|
|
|
def _calibrate_temperature(self, scores: np.ndarray) -> list[float]:
|
|
"""Apply temperature scaling"""
|
|
epsilon = 1e-7
|
|
scores_clipped = np.clip(scores, epsilon, 1 - epsilon)
|
|
|
|
# Convert to logits
|
|
logits = np.log(scores_clipped / (1 - scores_clipped))
|
|
|
|
# Apply temperature scaling
|
|
calibrated_logits = logits / self.temperature
|
|
calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))
|
|
|
|
return calibrated_probs.tolist() # type: ignore
|
|
|
|
def _calibrate_platt(self, scores: np.ndarray) -> list[float]:
|
|
"""Apply Platt scaling"""
|
|
epsilon = 1e-7
|
|
scores_clipped = np.clip(scores, epsilon, 1 - epsilon)
|
|
|
|
# Convert to logits
|
|
logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)
|
|
|
|
# Apply Platt scaling
|
|
calibrated_probs = self.calibrator.predict_proba(logits)[:, 1] # type: ignore
|
|
|
|
return calibrated_probs.tolist() # type: ignore
|
|
|
|
def _calibrate_isotonic(self, scores: np.ndarray) -> list[float]:
|
|
"""Apply isotonic regression"""
|
|
calibrated_probs = self.calibrator.predict(scores) # type: ignore
|
|
return calibrated_probs.tolist() # type: ignore
|
|
|
|
def save_model(self, filepath: str) -> None:
|
|
"""Save calibration model"""
|
|
model_data = {
|
|
"method": self.method,
|
|
"temperature": self.temperature,
|
|
"calibrator": self.calibrator,
|
|
"is_fitted": self.is_fitted,
|
|
}
|
|
|
|
with open(filepath, "wb") as f:
|
|
pickle.dump(model_data, f)
|
|
|
|
logger.info("Calibration model saved", filepath=filepath)
|
|
|
|
def load_model(self, filepath: str) -> None:
|
|
"""Load calibration model"""
|
|
with open(filepath, "rb") as f:
|
|
model_data = pickle.load(f)
|
|
|
|
self.method = model_data["method"]
|
|
self.temperature = model_data["temperature"]
|
|
self.calibrator = model_data["calibrator"]
|
|
self.is_fitted = model_data["is_fitted"]
|
|
|
|
logger.info("Calibration model loaded", filepath=filepath, method=self.method)
|