"""Confidence calibrator using various methods.""" import pickle import numpy as np import structlog from sklearn.isotonic import IsotonicRegression from sklearn.linear_model import LogisticRegression logger = structlog.get_logger() class ConfidenceCalibrator: """Calibrate confidence scores using various methods""" def __init__(self, method: str = "temperature"): """ Initialize calibrator Args: method: Calibration method ('temperature', 'platt', 'isotonic') """ self.method = method self.calibrator = None self.temperature = 1.0 self.is_fitted = False def fit(self, scores: list[float], labels: list[bool]) -> None: """ Fit calibration model Args: scores: Raw confidence scores (0-1) labels: True labels (True/False for correct/incorrect) """ # Validate inputs if len(scores) == 0 or len(labels) == 0: raise ValueError("Scores and labels cannot be empty") if len(scores) != len(labels): raise ValueError("Scores and labels must have the same length") scores_array = np.array(scores).reshape(-1, 1) labels_array = np.array(labels, dtype=int) if self.method == "temperature": self._fit_temperature_scaling(scores_array, labels_array) elif self.method == "platt": self._fit_platt_scaling(scores_array, labels_array) elif self.method == "isotonic": self._fit_isotonic_regression(scores_array, labels_array) else: raise ValueError(f"Unknown calibration method: {self.method}") self.is_fitted = True logger.info("Calibrator fitted", method=self.method) def _fit_temperature_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None: """Fit temperature scaling parameter""" # pylint: disable=import-outside-toplevel from scipy.optimize import minimize_scalar def negative_log_likelihood(temperature: float) -> float: # Convert scores to logits epsilon = 1e-7 scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon) logits = np.log(scores_clipped / (1 - scores_clipped)) # Apply temperature scaling calibrated_logits = logits / temperature calibrated_probs = 1 / (1 + np.exp(-calibrated_logits)) # Calculate negative log likelihood nll = -np.mean( labels * np.log(calibrated_probs + epsilon) + (1 - labels) * np.log(1 - calibrated_probs + epsilon) ) return float(nll) # Find optimal temperature result = minimize_scalar( # type: ignore negative_log_likelihood, bounds=(0.1, 10.0), method="bounded", # fmt: skip # pyright: ignore[reportArgumentType] ) self.temperature = result.x logger.debug("Temperature scaling fitted", temperature=self.temperature) def _fit_platt_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None: """Fit Platt scaling (logistic regression)""" # Convert scores to logits epsilon = 1e-7 scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon) logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1) # Fit logistic regression self.calibrator = LogisticRegression() self.calibrator.fit(logits, labels) # type: ignore logger.debug("Platt scaling fitted") def _fit_isotonic_regression(self, scores: np.ndarray, labels: np.ndarray) -> None: """Fit isotonic regression""" self.calibrator = IsotonicRegression(out_of_bounds="clip") self.calibrator.fit(scores.flatten(), labels) # type: ignore logger.debug("Isotonic regression fitted") def calibrate(self, scores: list[float]) -> list[float]: """ Calibrate confidence scores Args: scores: Raw confidence scores Returns: Calibrated confidence scores """ if not self.is_fitted: logger.warning("Calibrator not fitted, returning original scores") return scores scores_array = np.array(scores) if self.method == "temperature": return self._calibrate_temperature(scores_array) if self.method == "platt": return self._calibrate_platt(scores_array) if self.method == "isotonic": return self._calibrate_isotonic(scores_array) return scores def _calibrate_temperature(self, scores: np.ndarray) -> list[float]: """Apply temperature scaling""" epsilon = 1e-7 scores_clipped = np.clip(scores, epsilon, 1 - epsilon) # Convert to logits logits = np.log(scores_clipped / (1 - scores_clipped)) # Apply temperature scaling calibrated_logits = logits / self.temperature calibrated_probs = 1 / (1 + np.exp(-calibrated_logits)) return calibrated_probs.tolist() # type: ignore def _calibrate_platt(self, scores: np.ndarray) -> list[float]: """Apply Platt scaling""" epsilon = 1e-7 scores_clipped = np.clip(scores, epsilon, 1 - epsilon) # Convert to logits logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1) # Apply Platt scaling calibrated_probs = self.calibrator.predict_proba(logits)[:, 1] # type: ignore return calibrated_probs.tolist() # type: ignore def _calibrate_isotonic(self, scores: np.ndarray) -> list[float]: """Apply isotonic regression""" calibrated_probs = self.calibrator.predict(scores) # type: ignore return calibrated_probs.tolist() # type: ignore def save_model(self, filepath: str) -> None: """Save calibration model""" model_data = { "method": self.method, "temperature": self.temperature, "calibrator": self.calibrator, "is_fitted": self.is_fitted, } with open(filepath, "wb") as f: pickle.dump(model_data, f) logger.info("Calibration model saved", filepath=filepath) def load_model(self, filepath: str) -> None: """Load calibration model""" with open(filepath, "rb") as f: model_data = pickle.load(f) self.method = model_data["method"] self.temperature = model_data["temperature"] self.calibrator = model_data["calibrator"] self.is_fitted = model_data["is_fitted"] logger.info("Calibration model loaded", filepath=filepath, method=self.method)