ai-tax-agent/libs/calibration/calibrator.py

"""Confidence calibrator using various methods."""

import pickle

import numpy as np
import structlog
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression

logger = structlog.get_logger()


class ConfidenceCalibrator:
    """Calibrate confidence scores using various methods"""

    def __init__(self, method: str = "temperature"):
        """
        Initialize calibrator

        Args:
            method: Calibration method ('temperature', 'platt', 'isotonic')
        """
        self.method = method
        self.calibrator = None
        self.temperature = 1.0
        self.is_fitted = False

    def fit(self, scores: list[float], labels: list[bool]) -> None:
        """
        Fit calibration model

        Args:
            scores: Raw confidence scores (0-1)
            labels: True labels (True/False for correct/incorrect)
        """
        # Validate inputs
        if len(scores) == 0 or len(labels) == 0:
            raise ValueError("Scores and labels cannot be empty")

        if len(scores) != len(labels):
            raise ValueError("Scores and labels must have the same length")

        scores_array = np.array(scores).reshape(-1, 1)
        labels_array = np.array(labels, dtype=int)

        if self.method == "temperature":
            self._fit_temperature_scaling(scores_array, labels_array)
        elif self.method == "platt":
            self._fit_platt_scaling(scores_array, labels_array)
        elif self.method == "isotonic":
            self._fit_isotonic_regression(scores_array, labels_array)
        else:
            raise ValueError(f"Unknown calibration method: {self.method}")

        self.is_fitted = True
        logger.info("Calibrator fitted", method=self.method)

    def _fit_temperature_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
        """Fit temperature scaling parameter"""
        # pylint: disable=import-outside-toplevel
        from scipy.optimize import minimize_scalar

        def negative_log_likelihood(temperature: float) -> float:
            # Convert scores to logits
            epsilon = 1e-7
            scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
            logits = np.log(scores_clipped / (1 - scores_clipped))

            # Apply temperature scaling
            calibrated_logits = logits / temperature
            calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))

            # Calculate negative log likelihood
            nll = -np.mean(
                labels * np.log(calibrated_probs + epsilon)
                + (1 - labels) * np.log(1 - calibrated_probs + epsilon)
            )
            return float(nll)

        # Find optimal temperature
        result = minimize_scalar(  # type: ignore
            negative_log_likelihood,
            bounds=(0.1, 10.0),
            method="bounded",  # fmt: skip # pyright: ignore[reportArgumentType]
        )
        self.temperature = result.x

        logger.debug("Temperature scaling fitted", temperature=self.temperature)

    def _fit_platt_scaling(self, scores: np.ndarray, labels: np.ndarray) -> None:
        """Fit Platt scaling (logistic regression)"""
        # Convert scores to logits
        epsilon = 1e-7
        scores_clipped = np.clip(scores.flatten(), epsilon, 1 - epsilon)
        logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)

        # Fit logistic regression
        self.calibrator = LogisticRegression()
        self.calibrator.fit(logits, labels)  # type: ignore

        logger.debug("Platt scaling fitted")

    def _fit_isotonic_regression(self, scores: np.ndarray, labels: np.ndarray) -> None:
        """Fit isotonic regression"""
        self.calibrator = IsotonicRegression(out_of_bounds="clip")
        self.calibrator.fit(scores.flatten(), labels)  # type: ignore

        logger.debug("Isotonic regression fitted")

    def calibrate(self, scores: list[float]) -> list[float]:
        """
        Calibrate confidence scores

        Args:
            scores: Raw confidence scores

        Returns:
            Calibrated confidence scores
        """
        if not self.is_fitted:
            logger.warning("Calibrator not fitted, returning original scores")
            return scores

        scores_array = np.array(scores)

        if self.method == "temperature":
            return self._calibrate_temperature(scores_array)
        if self.method == "platt":
            return self._calibrate_platt(scores_array)
        if self.method == "isotonic":
            return self._calibrate_isotonic(scores_array)
        return scores

    def _calibrate_temperature(self, scores: np.ndarray) -> list[float]:
        """Apply temperature scaling"""
        epsilon = 1e-7
        scores_clipped = np.clip(scores, epsilon, 1 - epsilon)

        # Convert to logits
        logits = np.log(scores_clipped / (1 - scores_clipped))

        # Apply temperature scaling
        calibrated_logits = logits / self.temperature
        calibrated_probs = 1 / (1 + np.exp(-calibrated_logits))

        return calibrated_probs.tolist()  # type: ignore

    def _calibrate_platt(self, scores: np.ndarray) -> list[float]:
        """Apply Platt scaling"""
        epsilon = 1e-7
        scores_clipped = np.clip(scores, epsilon, 1 - epsilon)

        # Convert to logits
        logits = np.log(scores_clipped / (1 - scores_clipped)).reshape(-1, 1)

        # Apply Platt scaling
        calibrated_probs = self.calibrator.predict_proba(logits)[:, 1]  # type: ignore

        return calibrated_probs.tolist()  # type: ignore

    def _calibrate_isotonic(self, scores: np.ndarray) -> list[float]:
        """Apply isotonic regression"""
        calibrated_probs = self.calibrator.predict(scores)  # type: ignore
        return calibrated_probs.tolist()  # type: ignore

    def save_model(self, filepath: str) -> None:
        """Save calibration model"""
        model_data = {
            "method": self.method,
            "temperature": self.temperature,
            "calibrator": self.calibrator,
            "is_fitted": self.is_fitted,
        }

        with open(filepath, "wb") as f:
            pickle.dump(model_data, f)

        logger.info("Calibration model saved", filepath=filepath)

    def load_model(self, filepath: str) -> None:
        """Load calibration model"""
        with open(filepath, "rb") as f:
            model_data = pickle.load(f)

        self.method = model_data["method"]
        self.temperature = model_data["temperature"]
        self.calibrator = model_data["calibrator"]
        self.is_fitted = model_data["is_fitted"]

        logger.info("Calibration model loaded", filepath=filepath, method=self.method)