Files
ai-tax-agent/tests/unit/multi-model-calibration.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

284 lines
10 KiB
Python

"""Unit tests for multi-model calibration."""
from unittest.mock import MagicMock, patch
import pytest
from libs.calibration.multi_model import MultiModelCalibrator
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel
# mypy: disable-error-code=union-attr
# mypy: disable-error-code=no-untyped-def
class TestMultiModelCalibrator:
"""Test MultiModelCalibrator"""
@pytest.fixture
def sample_data(self):
"""Create sample training data"""
scores = [0.1, 0.3, 0.5, 0.7, 0.9]
labels = [False, False, True, True, True]
return scores, labels
def test_init(self):
"""Test initialization"""
calibrator = MultiModelCalibrator()
assert isinstance(calibrator.calibrators, dict)
assert len(calibrator.calibrators) == 0
def test_add_calibrator_default_method(self):
"""Test adding calibrator with default method"""
calibrator = MultiModelCalibrator()
calibrator.add_calibrator("model_a")
assert "model_a" in calibrator.calibrators
assert calibrator.calibrators["model_a"].method == "temperature"
def test_add_calibrator_custom_method(self):
"""Test adding calibrator with custom method"""
calibrator = MultiModelCalibrator()
calibrator.add_calibrator("model_b", method="platt")
assert "model_b" in calibrator.calibrators
assert calibrator.calibrators["model_b"].method == "platt"
def test_fit_existing_calibrator(self, sample_data):
"""Test fitting existing calibrator"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.add_calibrator("model_a")
calibrator.fit("model_a", scores, labels)
assert calibrator.calibrators["model_a"].is_fitted
def test_fit_auto_add_calibrator(self, sample_data):
"""Test fitting automatically adds calibrator if not exists"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
# Should auto-add calibrator
calibrator.fit("model_new", scores, labels)
assert "model_new" in calibrator.calibrators
assert calibrator.calibrators["model_new"].is_fitted
def test_calibrate_existing_model(self, sample_data):
"""Test calibrating with existing fitted model"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.fit("model_a", scores, labels)
test_scores = [0.2, 0.6, 0.8]
result = calibrator.calibrate("model_a", test_scores)
assert len(result) == len(test_scores)
assert all(0 <= p <= 1 for p in result)
def test_calibrate_nonexistent_model_returns_original(self):
"""Test calibrating nonexistent model returns original scores"""
calibrator = MultiModelCalibrator()
scores = [0.1, 0.5, 0.9]
# Should return original scores and log warning
result = calibrator.calibrate("nonexistent", scores)
assert result == scores
def test_calibrate_unfitted_model_returns_original(self, sample_data):
"""Test calibrating unfitted model returns original scores"""
calibrator = MultiModelCalibrator()
calibrator.add_calibrator("model_a") # Add but don't fit
test_scores = [0.2, 0.6, 0.8]
result = calibrator.calibrate("model_a", test_scores)
# Should return original scores since not fitted
assert result == test_scores
def test_save_models_creates_directory(self, sample_data):
"""Test saving models creates directory"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.fit("model_a", scores, labels)
calibrator.fit("model_b", scores, labels)
with (
patch("os.makedirs") as mock_makedirs,
patch.object(
calibrator.calibrators["model_a"], "save_model"
) as mock_save_a,
patch.object(
calibrator.calibrators["model_b"], "save_model"
) as mock_save_b,
):
calibrator.save_models("test_dir")
mock_makedirs.assert_called_once_with("test_dir", exist_ok=True)
mock_save_a.assert_called_once()
mock_save_b.assert_called_once()
def test_load_models_from_directory(self):
"""Test loading models from directory"""
calibrator = MultiModelCalibrator()
# Mock glob to return some model files
mock_files = [
"test_dir/model_a_calibrator.pkl",
"test_dir/model_b_calibrator.pkl",
]
with (
patch("libs.calibration.multi_model.glob.glob", return_value=mock_files),
patch(
"libs.calibration.multi_model.ConfidenceCalibrator"
) as mock_calibrator_class,
):
mock_calibrator_instance = MagicMock()
mock_calibrator_class.return_value = mock_calibrator_instance
calibrator.load_models("test_dir")
# Should have loaded two models
assert len(calibrator.calibrators) == 2
assert "model_a" in calibrator.calibrators
assert "model_b" in calibrator.calibrators
# Should have called load_model on each
assert mock_calibrator_instance.load_model.call_count == 2
def test_load_models_empty_directory(self):
"""Test loading from empty directory"""
calibrator = MultiModelCalibrator()
with patch("glob.glob", return_value=[]):
calibrator.load_models("empty_dir")
assert len(calibrator.calibrators) == 0
def test_get_model_names(self, sample_data):
"""Test getting model names"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.fit("model_a", scores, labels)
calibrator.fit("model_b", scores, labels)
names = calibrator.get_model_names()
assert set(names) == {"model_a", "model_b"}
def test_get_model_names_empty(self):
"""Test getting model names when empty"""
calibrator = MultiModelCalibrator()
names = calibrator.get_model_names()
assert names == []
def test_remove_calibrator(self, sample_data):
"""Test removing calibrator"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.fit("model_a", scores, labels)
calibrator.fit("model_b", scores, labels)
assert len(calibrator.calibrators) == 2
calibrator.remove_calibrator("model_a")
assert len(calibrator.calibrators) == 1
assert "model_a" not in calibrator.calibrators
assert "model_b" in calibrator.calibrators
def test_remove_nonexistent_calibrator_raises_error(self):
"""Test removing nonexistent calibrator raises error"""
calibrator = MultiModelCalibrator()
with pytest.raises(ValueError, match="Model 'nonexistent' not found"):
calibrator.remove_calibrator("nonexistent")
def test_has_model(self, sample_data):
"""Test checking if model exists"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.fit("model_a", scores, labels)
assert calibrator.has_model("model_a")
assert not calibrator.has_model("model_b")
def test_is_fitted(self, sample_data):
"""Test checking if model is fitted"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
calibrator.add_calibrator("model_a") # Add but don't fit
calibrator.fit("model_b", scores, labels) # Add and fit
assert not calibrator.is_fitted("model_a")
assert calibrator.is_fitted("model_b")
def test_is_fitted_nonexistent_model_raises_error(self):
"""Test checking fitted status of nonexistent model raises error"""
calibrator = MultiModelCalibrator()
with pytest.raises(ValueError, match="Model 'nonexistent' not found"):
calibrator.is_fitted("nonexistent")
def test_multiple_models_workflow(self, sample_data):
"""Test complete workflow with multiple models"""
scores, labels = sample_data
calibrator = MultiModelCalibrator()
# Add different models with different methods
calibrator.add_calibrator("temperature_model", "temperature")
calibrator.add_calibrator("platt_model", "platt")
calibrator.add_calibrator("isotonic_model", "isotonic")
# Fit all models
calibrator.fit("temperature_model", scores, labels)
calibrator.fit("platt_model", scores, labels)
calibrator.fit("isotonic_model", scores, labels)
# Test calibration for all models
test_scores = [0.2, 0.6, 0.8]
temp_result = calibrator.calibrate("temperature_model", test_scores)
platt_result = calibrator.calibrate("platt_model", test_scores)
isotonic_result = calibrator.calibrate("isotonic_model", test_scores)
# All should return valid probabilities
for result in [temp_result, platt_result, isotonic_result]:
assert len(result) == len(test_scores)
assert all(0 <= p <= 1 for p in result)
# Results should be different (unless by coincidence)
assert not (temp_result == platt_result == isotonic_result)
def test_fit_with_different_data_per_model(self):
"""Test fitting different models with different data"""
calibrator = MultiModelCalibrator()
# Different data for different models
scores_a = [0.1, 0.3, 0.7, 0.9]
labels_a = [False, False, True, True]
scores_b = [0.2, 0.4, 0.6, 0.8]
labels_b = [False, True, False, True]
calibrator.fit("model_a", scores_a, labels_a)
calibrator.fit("model_b", scores_b, labels_b)
assert calibrator.is_fitted("model_a")
assert calibrator.is_fitted("model_b")
# Both should be able to calibrate
result_a = calibrator.calibrate("model_a", [0.5])
result_b = calibrator.calibrate("model_b", [0.5])
assert len(result_a) == 1
assert len(result_b) == 1
assert 0 <= result_a[0] <= 1
assert 0 <= result_b[0] <= 1