Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-10-11 08:41:36 +01:00
commit b324ff09ef
276 changed files with 55220 additions and 0 deletions

View File

@@ -0,0 +1,338 @@
"""Unit tests for evidence status classification."""
# FILE: tests/unit/coverage/test_status_classifier.py
from datetime import datetime
import pytest
from libs.coverage.evaluator import CoverageEvaluator
from libs.schemas import (
CompiledCoveragePolicy,
CoveragePolicy,
Defaults,
FoundEvidence,
Status,
StatusClassifier,
StatusClassifierConfig,
TaxYearBoundary,
)
from libs.schemas.coverage.core import ConflictRules, Privacy, QuestionTemplates
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel
# mypy: disable-error-code=union-attr
# mypy: disable-error-code=no-untyped-def
class TestStatusClassifier:
"""Test evidence status classification logic"""
@pytest.fixture
def mock_policy(self):
"""Create mock compiled policy for testing"""
policy = CoveragePolicy(
version="1.0",
jurisdiction="UK",
tax_year="2024-25",
tax_year_boundary=TaxYearBoundary(start="2024-04-06", end="2025-04-05"),
defaults=Defaults(
confidence_thresholds={"ocr": 0.82, "extract": 0.85},
date_tolerance_days=30,
),
document_kinds=["P60"],
status_classifier=StatusClassifierConfig(
present_verified=StatusClassifier(
min_ocr=0.82,
min_extract=0.85,
date_in_year=True,
),
present_unverified=StatusClassifier(
min_ocr=0.60,
min_extract=0.70,
date_in_year_or_tolerance=True,
),
conflicting=StatusClassifier(
conflict_rules=["Same doc kind, different totals"]
),
missing=StatusClassifier(),
),
conflict_resolution=ConflictRules(precedence=["P60"]),
question_templates=QuestionTemplates(
default={"text": "test", "why": "test"}
),
privacy=Privacy(vector_pii_free=True, redact_patterns=[]),
)
return CompiledCoveragePolicy(
policy=policy,
compiled_predicates={},
compiled_at=datetime.utcnow(),
hash="test-hash",
source_files=["test.yaml"],
)
@pytest.fixture
def evaluator(self):
"""Create coverage evaluator for testing"""
return CoverageEvaluator()
def test_classify_missing_evidence(self, evaluator, mock_policy):
"""Test classification when no evidence found"""
found = []
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.MISSING
def test_classify_verified_evidence(self, evaluator, mock_policy):
"""Test classification of verified evidence"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2024-05-15T10:00:00Z",
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED
def test_classify_unverified_evidence(self, evaluator, mock_policy):
"""Test classification of unverified evidence"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.70, # Below verified threshold
extract_confidence=0.75, # Below verified threshold
date="2024-05-15T10:00:00Z",
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.PRESENT_UNVERIFIED
def test_classify_low_confidence_evidence(self, evaluator, mock_policy):
"""Test classification of very low confidence evidence"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.50, # Below unverified threshold
extract_confidence=0.55, # Below unverified threshold
date="2024-05-15T10:00:00Z",
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.MISSING
def test_classify_conflicting_evidence(self, evaluator, mock_policy):
"""Test classification when multiple conflicting documents found"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2024-05-15T10:00:00Z",
),
FoundEvidence(
doc_id="DOC-002",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2024-05-20T10:00:00Z",
),
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.CONFLICTING
def test_classify_evidence_outside_tax_year(self, evaluator, mock_policy):
"""Test classification of evidence outside tax year"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2023-03-15T10:00:00Z", # Outside tax year
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
# Evidence outside tax year should be unverified even with high confidence
# This is correct business logic - date validation is part of verification
assert status == Status.PRESENT_UNVERIFIED
def test_classify_evidence_no_date(self, evaluator, mock_policy):
"""Test classification of evidence without date"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date=None,
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
# Evidence without date cannot be fully verified, even with high confidence
# This is correct business logic - date validation is required for verification
assert status == Status.PRESENT_UNVERIFIED
def test_parse_tax_year_bounds(self, evaluator):
"""Test parsing of tax year boundary strings"""
start_str = "2024-04-06"
end_str = "2025-04-05"
start, end = evaluator._parse_tax_year_bounds(start_str, end_str)
assert isinstance(start, datetime)
assert isinstance(end, datetime)
assert start.year == 2024
assert start.month == 4
assert start.day == 6
assert end.year == 2025
assert end.month == 4
assert end.day == 5
def test_evidence_within_tax_year(self, evaluator, mock_policy):
"""Test evidence date validation within tax year"""
# Evidence within tax year
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2024-06-15T10:00:00Z", # Within 2024-25 tax year
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED
def test_evidence_boundary_dates(self, evaluator, mock_policy):
"""Test evidence on tax year boundary dates"""
# Test start boundary
found_start = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2024-04-06T00:00:00Z", # Exact start date
)
]
status = evaluator.classify_status(found_start, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED
# Test end boundary
found_end = [
FoundEvidence(
doc_id="DOC-002",
kind="P60",
ocr_confidence=0.85,
extract_confidence=0.90,
date="2025-04-05T23:59:59Z", # Exact end date
)
]
status = evaluator.classify_status(found_end, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED
def test_threshold_edge_cases(self, evaluator, mock_policy):
"""Test classification at threshold boundaries"""
# Exactly at verified threshold
found_exact = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.82, # Exactly at threshold
extract_confidence=0.85, # Exactly at threshold
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found_exact, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED
# Just below verified threshold
found_below = [
FoundEvidence(
doc_id="DOC-002",
kind="P60",
ocr_confidence=0.81, # Just below threshold
extract_confidence=0.84, # Just below threshold
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found_below, mock_policy, "2024-25")
assert status == Status.PRESENT_UNVERIFIED
def test_mixed_confidence_levels(self, evaluator, mock_policy):
"""Test classification with mixed OCR and extract confidence"""
# High OCR, low extract
found_mixed1 = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.90, # High
extract_confidence=0.70, # Low
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found_mixed1, mock_policy, "2024-25")
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
# Low OCR, high extract
found_mixed2 = [
FoundEvidence(
doc_id="DOC-002",
kind="P60",
ocr_confidence=0.70, # Low
extract_confidence=0.90, # High
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found_mixed2, mock_policy, "2024-25")
assert status == Status.PRESENT_UNVERIFIED # Both must meet threshold
def test_zero_confidence_evidence(self, evaluator, mock_policy):
"""Test classification of zero confidence evidence"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=0.0,
extract_confidence=0.0,
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.MISSING
def test_perfect_confidence_evidence(self, evaluator, mock_policy):
"""Test classification of perfect confidence evidence"""
found = [
FoundEvidence(
doc_id="DOC-001",
kind="P60",
ocr_confidence=1.0,
extract_confidence=1.0,
date="2024-06-15T10:00:00Z",
)
]
status = evaluator.classify_status(found, mock_policy, "2024-25")
assert status == Status.PRESENT_VERIFIED