Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
339 lines
12 KiB
Python
339 lines
12 KiB
Python
"""Unit tests for evidence status classification."""
|
|
|
|
# FILE: tests/unit/coverage/test_status_classifier.py
|
|
|
|
from datetime import datetime
|
|
|
|
import pytest
|
|
|
|
from libs.coverage.evaluator import CoverageEvaluator
|
|
from libs.schemas import (
|
|
CompiledCoveragePolicy,
|
|
CoveragePolicy,
|
|
Defaults,
|
|
FoundEvidence,
|
|
Status,
|
|
StatusClassifier,
|
|
StatusClassifierConfig,
|
|
TaxYearBoundary,
|
|
)
|
|
from libs.schemas.coverage.core import ConflictRules, Privacy, QuestionTemplates
|
|
|
|
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
|
|
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
|
|
# pylint: disable=too-many-locals,import-outside-toplevel
|
|
# mypy: disable-error-code=union-attr
|
|
# mypy: disable-error-code=no-untyped-def
|
|
|
|
|
|
class TestStatusClassifier:
|
|
"""Test evidence status classification logic"""
|
|
|
|
@pytest.fixture
|
|
def mock_policy(self):
|
|
"""Create mock compiled policy for testing"""
|
|
policy = CoveragePolicy(
|
|
version="1.0",
|
|
jurisdiction="UK",
|
|
tax_year="2024-25",
|
|
tax_year_boundary=TaxYearBoundary(start="2024-04-06", end="2025-04-05"),
|
|
defaults=Defaults(
|
|
confidence_thresholds={"ocr": 0.82, "extract": 0.85},
|
|
date_tolerance_days=30,
|
|
),
|
|
document_kinds=["P60"],
|
|
status_classifier=StatusClassifierConfig(
|
|
present_verified=StatusClassifier(
|
|
min_ocr=0.82,
|
|
min_extract=0.85,
|
|
date_in_year=True,
|
|
),
|
|
present_unverified=StatusClassifier(
|
|
min_ocr=0.60,
|
|
min_extract=0.70,
|
|
date_in_year_or_tolerance=True,
|
|
),
|
|
conflicting=StatusClassifier(
|
|
conflict_rules=["Same doc kind, different totals"]
|
|
),
|
|
missing=StatusClassifier(),
|
|
),
|
|
conflict_resolution=ConflictRules(precedence=["P60"]),
|
|
question_templates=QuestionTemplates(
|
|
default={"text": "test", "why": "test"}
|
|
),
|
|
privacy=Privacy(vector_pii_free=True, redact_patterns=[]),
|
|
)
|
|
|
|
return CompiledCoveragePolicy(
|
|
policy=policy,
|
|
compiled_predicates={},
|
|
compiled_at=datetime.utcnow(),
|
|
hash="test-hash",
|
|
source_files=["test.yaml"],
|
|
)
|
|
|
|
@pytest.fixture
|
|
def evaluator(self):
|
|
"""Create coverage evaluator for testing"""
|
|
return CoverageEvaluator()
|
|
|
|
def test_classify_missing_evidence(self, evaluator, mock_policy):
|
|
"""Test classification when no evidence found"""
|
|
found = []
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.MISSING
|
|
|
|
def test_classify_verified_evidence(self, evaluator, mock_policy):
|
|
"""Test classification of verified evidence"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2024-05-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|
|
|
|
def test_classify_unverified_evidence(self, evaluator, mock_policy):
|
|
"""Test classification of unverified evidence"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.70, # Below verified threshold
|
|
extract_confidence=0.75, # Below verified threshold
|
|
date="2024-05-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_UNVERIFIED
|
|
|
|
def test_classify_low_confidence_evidence(self, evaluator, mock_policy):
|
|
"""Test classification of very low confidence evidence"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.50, # Below unverified threshold
|
|
extract_confidence=0.55, # Below unverified threshold
|
|
date="2024-05-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.MISSING
|
|
|
|
def test_classify_conflicting_evidence(self, evaluator, mock_policy):
|
|
"""Test classification when multiple conflicting documents found"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2024-05-15T10:00:00Z",
|
|
),
|
|
FoundEvidence(
|
|
doc_id="DOC-002",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2024-05-20T10:00:00Z",
|
|
),
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.CONFLICTING
|
|
|
|
def test_classify_evidence_outside_tax_year(self, evaluator, mock_policy):
|
|
"""Test classification of evidence outside tax year"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2023-03-15T10:00:00Z", # Outside tax year
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
# Evidence outside tax year should be unverified even with high confidence
|
|
# This is correct business logic - date validation is part of verification
|
|
assert status == Status.PRESENT_UNVERIFIED
|
|
|
|
def test_classify_evidence_no_date(self, evaluator, mock_policy):
|
|
"""Test classification of evidence without date"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date=None,
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
# Evidence without date cannot be fully verified, even with high confidence
|
|
# This is correct business logic - date validation is required for verification
|
|
assert status == Status.PRESENT_UNVERIFIED
|
|
|
|
def test_parse_tax_year_bounds(self, evaluator):
|
|
"""Test parsing of tax year boundary strings"""
|
|
start_str = "2024-04-06"
|
|
end_str = "2025-04-05"
|
|
|
|
start, end = evaluator._parse_tax_year_bounds(start_str, end_str)
|
|
|
|
assert isinstance(start, datetime)
|
|
assert isinstance(end, datetime)
|
|
assert start.year == 2024
|
|
assert start.month == 4
|
|
assert start.day == 6
|
|
assert end.year == 2025
|
|
assert end.month == 4
|
|
assert end.day == 5
|
|
|
|
def test_evidence_within_tax_year(self, evaluator, mock_policy):
|
|
"""Test evidence date validation within tax year"""
|
|
# Evidence within tax year
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2024-06-15T10:00:00Z", # Within 2024-25 tax year
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|
|
|
|
def test_evidence_boundary_dates(self, evaluator, mock_policy):
|
|
"""Test evidence on tax year boundary dates"""
|
|
# Test start boundary
|
|
found_start = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2024-04-06T00:00:00Z", # Exact start date
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_start, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|
|
|
|
# Test end boundary
|
|
found_end = [
|
|
FoundEvidence(
|
|
doc_id="DOC-002",
|
|
kind="P60",
|
|
ocr_confidence=0.85,
|
|
extract_confidence=0.90,
|
|
date="2025-04-05T23:59:59Z", # Exact end date
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_end, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|
|
|
|
def test_threshold_edge_cases(self, evaluator, mock_policy):
|
|
"""Test classification at threshold boundaries"""
|
|
# Exactly at verified threshold
|
|
found_exact = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.82, # Exactly at threshold
|
|
extract_confidence=0.85, # Exactly at threshold
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_exact, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|
|
|
|
# Just below verified threshold
|
|
found_below = [
|
|
FoundEvidence(
|
|
doc_id="DOC-002",
|
|
kind="P60",
|
|
ocr_confidence=0.81, # Just below threshold
|
|
extract_confidence=0.84, # Just below threshold
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_below, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_UNVERIFIED
|
|
|
|
def test_mixed_confidence_levels(self, evaluator, mock_policy):
|
|
"""Test classification with mixed OCR and extract confidence"""
|
|
# High OCR, low extract
|
|
found_mixed1 = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.90, # High
|
|
extract_confidence=0.70, # Low
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_mixed1, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
|
|
|
|
# Low OCR, high extract
|
|
found_mixed2 = [
|
|
FoundEvidence(
|
|
doc_id="DOC-002",
|
|
kind="P60",
|
|
ocr_confidence=0.70, # Low
|
|
extract_confidence=0.90, # High
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found_mixed2, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
|
|
|
|
def test_zero_confidence_evidence(self, evaluator, mock_policy):
|
|
"""Test classification of zero confidence evidence"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=0.0,
|
|
extract_confidence=0.0,
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.MISSING
|
|
|
|
def test_perfect_confidence_evidence(self, evaluator, mock_policy):
|
|
"""Test classification of perfect confidence evidence"""
|
|
found = [
|
|
FoundEvidence(
|
|
doc_id="DOC-001",
|
|
kind="P60",
|
|
ocr_confidence=1.0,
|
|
extract_confidence=1.0,
|
|
date="2024-06-15T10:00:00Z",
|
|
)
|
|
]
|
|
|
|
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
|
assert status == Status.PRESENT_VERIFIED
|