Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
338
tests/unit/coverage/test_status_classifier.py
Normal file
338
tests/unit/coverage/test_status_classifier.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""Unit tests for evidence status classification."""
|
||||
|
||||
# FILE: tests/unit/coverage/test_status_classifier.py
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from libs.coverage.evaluator import CoverageEvaluator
|
||||
from libs.schemas import (
|
||||
CompiledCoveragePolicy,
|
||||
CoveragePolicy,
|
||||
Defaults,
|
||||
FoundEvidence,
|
||||
Status,
|
||||
StatusClassifier,
|
||||
StatusClassifierConfig,
|
||||
TaxYearBoundary,
|
||||
)
|
||||
from libs.schemas.coverage.core import ConflictRules, Privacy, QuestionTemplates
|
||||
|
||||
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
|
||||
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
|
||||
# pylint: disable=too-many-locals,import-outside-toplevel
|
||||
# mypy: disable-error-code=union-attr
|
||||
# mypy: disable-error-code=no-untyped-def
|
||||
|
||||
|
||||
class TestStatusClassifier:
|
||||
"""Test evidence status classification logic"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_policy(self):
|
||||
"""Create mock compiled policy for testing"""
|
||||
policy = CoveragePolicy(
|
||||
version="1.0",
|
||||
jurisdiction="UK",
|
||||
tax_year="2024-25",
|
||||
tax_year_boundary=TaxYearBoundary(start="2024-04-06", end="2025-04-05"),
|
||||
defaults=Defaults(
|
||||
confidence_thresholds={"ocr": 0.82, "extract": 0.85},
|
||||
date_tolerance_days=30,
|
||||
),
|
||||
document_kinds=["P60"],
|
||||
status_classifier=StatusClassifierConfig(
|
||||
present_verified=StatusClassifier(
|
||||
min_ocr=0.82,
|
||||
min_extract=0.85,
|
||||
date_in_year=True,
|
||||
),
|
||||
present_unverified=StatusClassifier(
|
||||
min_ocr=0.60,
|
||||
min_extract=0.70,
|
||||
date_in_year_or_tolerance=True,
|
||||
),
|
||||
conflicting=StatusClassifier(
|
||||
conflict_rules=["Same doc kind, different totals"]
|
||||
),
|
||||
missing=StatusClassifier(),
|
||||
),
|
||||
conflict_resolution=ConflictRules(precedence=["P60"]),
|
||||
question_templates=QuestionTemplates(
|
||||
default={"text": "test", "why": "test"}
|
||||
),
|
||||
privacy=Privacy(vector_pii_free=True, redact_patterns=[]),
|
||||
)
|
||||
|
||||
return CompiledCoveragePolicy(
|
||||
policy=policy,
|
||||
compiled_predicates={},
|
||||
compiled_at=datetime.utcnow(),
|
||||
hash="test-hash",
|
||||
source_files=["test.yaml"],
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def evaluator(self):
|
||||
"""Create coverage evaluator for testing"""
|
||||
return CoverageEvaluator()
|
||||
|
||||
def test_classify_missing_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification when no evidence found"""
|
||||
found = []
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.MISSING
|
||||
|
||||
def test_classify_verified_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification of verified evidence"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2024-05-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
|
||||
def test_classify_unverified_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification of unverified evidence"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.70, # Below verified threshold
|
||||
extract_confidence=0.75, # Below verified threshold
|
||||
date="2024-05-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_UNVERIFIED
|
||||
|
||||
def test_classify_low_confidence_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification of very low confidence evidence"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.50, # Below unverified threshold
|
||||
extract_confidence=0.55, # Below unverified threshold
|
||||
date="2024-05-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.MISSING
|
||||
|
||||
def test_classify_conflicting_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification when multiple conflicting documents found"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2024-05-15T10:00:00Z",
|
||||
),
|
||||
FoundEvidence(
|
||||
doc_id="DOC-002",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2024-05-20T10:00:00Z",
|
||||
),
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.CONFLICTING
|
||||
|
||||
def test_classify_evidence_outside_tax_year(self, evaluator, mock_policy):
|
||||
"""Test classification of evidence outside tax year"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2023-03-15T10:00:00Z", # Outside tax year
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
# Evidence outside tax year should be unverified even with high confidence
|
||||
# This is correct business logic - date validation is part of verification
|
||||
assert status == Status.PRESENT_UNVERIFIED
|
||||
|
||||
def test_classify_evidence_no_date(self, evaluator, mock_policy):
|
||||
"""Test classification of evidence without date"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date=None,
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
# Evidence without date cannot be fully verified, even with high confidence
|
||||
# This is correct business logic - date validation is required for verification
|
||||
assert status == Status.PRESENT_UNVERIFIED
|
||||
|
||||
def test_parse_tax_year_bounds(self, evaluator):
|
||||
"""Test parsing of tax year boundary strings"""
|
||||
start_str = "2024-04-06"
|
||||
end_str = "2025-04-05"
|
||||
|
||||
start, end = evaluator._parse_tax_year_bounds(start_str, end_str)
|
||||
|
||||
assert isinstance(start, datetime)
|
||||
assert isinstance(end, datetime)
|
||||
assert start.year == 2024
|
||||
assert start.month == 4
|
||||
assert start.day == 6
|
||||
assert end.year == 2025
|
||||
assert end.month == 4
|
||||
assert end.day == 5
|
||||
|
||||
def test_evidence_within_tax_year(self, evaluator, mock_policy):
|
||||
"""Test evidence date validation within tax year"""
|
||||
# Evidence within tax year
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2024-06-15T10:00:00Z", # Within 2024-25 tax year
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
|
||||
def test_evidence_boundary_dates(self, evaluator, mock_policy):
|
||||
"""Test evidence on tax year boundary dates"""
|
||||
# Test start boundary
|
||||
found_start = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2024-04-06T00:00:00Z", # Exact start date
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_start, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
|
||||
# Test end boundary
|
||||
found_end = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-002",
|
||||
kind="P60",
|
||||
ocr_confidence=0.85,
|
||||
extract_confidence=0.90,
|
||||
date="2025-04-05T23:59:59Z", # Exact end date
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_end, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
|
||||
def test_threshold_edge_cases(self, evaluator, mock_policy):
|
||||
"""Test classification at threshold boundaries"""
|
||||
# Exactly at verified threshold
|
||||
found_exact = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.82, # Exactly at threshold
|
||||
extract_confidence=0.85, # Exactly at threshold
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_exact, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
|
||||
# Just below verified threshold
|
||||
found_below = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-002",
|
||||
kind="P60",
|
||||
ocr_confidence=0.81, # Just below threshold
|
||||
extract_confidence=0.84, # Just below threshold
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_below, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_UNVERIFIED
|
||||
|
||||
def test_mixed_confidence_levels(self, evaluator, mock_policy):
|
||||
"""Test classification with mixed OCR and extract confidence"""
|
||||
# High OCR, low extract
|
||||
found_mixed1 = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.90, # High
|
||||
extract_confidence=0.70, # Low
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_mixed1, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
|
||||
|
||||
# Low OCR, high extract
|
||||
found_mixed2 = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-002",
|
||||
kind="P60",
|
||||
ocr_confidence=0.70, # Low
|
||||
extract_confidence=0.90, # High
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found_mixed2, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
|
||||
|
||||
def test_zero_confidence_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification of zero confidence evidence"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=0.0,
|
||||
extract_confidence=0.0,
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.MISSING
|
||||
|
||||
def test_perfect_confidence_evidence(self, evaluator, mock_policy):
|
||||
"""Test classification of perfect confidence evidence"""
|
||||
found = [
|
||||
FoundEvidence(
|
||||
doc_id="DOC-001",
|
||||
kind="P60",
|
||||
ocr_confidence=1.0,
|
||||
extract_confidence=1.0,
|
||||
date="2024-06-15T10:00:00Z",
|
||||
)
|
||||
]
|
||||
|
||||
status = evaluator.classify_status(found, mock_policy, "2024-25")
|
||||
assert status == Status.PRESENT_VERIFIED
|
||||
Reference in New Issue
Block a user