Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-10-11 08:41:36 +01:00
commit b324ff09ef
276 changed files with 55220 additions and 0 deletions

View File

View File

@@ -0,0 +1,146 @@
"""Core coverage policy models."""
from collections.abc import Callable
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
from ..enums import Role
class Validity(BaseModel):
"""Validity constraints for evidence"""
within_tax_year: bool = False
available_by: str | None = None
date_tolerance_days: int = 30
class StatusClassifier(BaseModel):
"""Rules for classifying evidence status"""
min_ocr: float = 0.82
min_extract: float = 0.85
date_in_year: bool = True
date_in_year_or_tolerance: bool = True
conflict_rules: list[str] = Field(default_factory=list)
class StatusClassifierConfig(BaseModel):
"""Complete status classifier configuration"""
present_verified: StatusClassifier
present_unverified: StatusClassifier
conflicting: StatusClassifier
missing: StatusClassifier = Field(default_factory=lambda: StatusClassifier())
class EvidenceItem(BaseModel):
"""Evidence requirement definition"""
id: str
role: Role
condition: str | None = None
boxes: list[str] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
validity: Validity = Field(default_factory=Validity)
reasons: dict[str, str] = Field(default_factory=dict)
class CrossCheck(BaseModel):
"""Cross-validation rule"""
name: str
logic: str
class SchedulePolicy(BaseModel):
"""Policy for a specific tax schedule"""
guidance_hint: str | None = None
evidence: list[EvidenceItem] = Field(default_factory=list)
cross_checks: list[CrossCheck] = Field(default_factory=list)
selection_rule: dict[str, str] = Field(default_factory=dict)
notes: dict[str, Any] = Field(default_factory=dict)
class Trigger(BaseModel):
"""Schedule trigger condition"""
any_of: list[str] = Field(default_factory=list)
all_of: list[str] = Field(default_factory=list)
class GuidanceRef(BaseModel):
"""Reference to guidance document"""
doc_id: str
kind: str
class QuestionTemplates(BaseModel):
"""Templates for generating clarifying questions"""
default: dict[str, str] = Field(default_factory=dict)
reasons: dict[str, str] = Field(default_factory=dict)
class ConflictRules(BaseModel):
"""Rules for handling conflicting evidence"""
precedence: list[str] = Field(default_factory=list)
escalation: dict[str, Any] = Field(default_factory=dict)
class TaxYearBoundary(BaseModel):
"""Tax year date boundaries"""
start: str
end: str
class Defaults(BaseModel):
"""Default configuration values"""
confidence_thresholds: dict[str, float] = Field(default_factory=dict)
date_tolerance_days: int = 30
require_lineage_bbox: bool = True
allow_bank_substantiation: bool = True
class Privacy(BaseModel):
"""Privacy and PII handling configuration"""
vector_pii_free: bool = True
redact_patterns: list[str] = Field(default_factory=list)
class CoveragePolicy(BaseModel):
"""Complete coverage policy definition"""
version: str
jurisdiction: str
tax_year: str
tax_year_boundary: TaxYearBoundary
defaults: Defaults
document_kinds: list[str] = Field(default_factory=list)
guidance_refs: dict[str, GuidanceRef] = Field(default_factory=dict)
triggers: dict[str, Trigger] = Field(default_factory=dict)
schedules: dict[str, SchedulePolicy] = Field(default_factory=dict)
status_classifier: StatusClassifierConfig
conflict_resolution: ConflictRules
question_templates: QuestionTemplates
privacy: Privacy
class CompiledCoveragePolicy(BaseModel):
"""Coverage policy with compiled predicates"""
policy: CoveragePolicy
compiled_predicates: dict[str, Callable[[str, str], bool]] = Field(
default_factory=dict
)
compiled_at: datetime
hash: str
source_files: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,112 @@
"""Coverage evaluation models."""
from datetime import datetime
from pydantic import BaseModel, Field
from ..enums import OverallStatus, Role, Status
class FoundEvidence(BaseModel):
"""Evidence found in the knowledge graph"""
doc_id: str
kind: str
confidence: float = 0.0
pages: list[int] = Field(default_factory=list)
bbox: dict[str, float] | None = None
ocr_confidence: float = 0.0
extract_confidence: float = 0.0
date: str | None = None
class Citation(BaseModel):
"""Citation reference"""
rule_id: str | None = None
doc_id: str | None = None
url: str | None = None
locator: str | None = None
section_id: str | None = None
page: int | None = None
bbox: dict[str, float] | None = None
class CoverageItem(BaseModel):
"""Coverage evaluation for a single evidence item"""
id: str
role: Role
status: Status
boxes: list[str] = Field(default_factory=list)
found: list[FoundEvidence] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
reason: str = ""
citations: list[Citation] = Field(default_factory=list)
class ScheduleCoverage(BaseModel):
"""Coverage evaluation for a schedule"""
schedule_id: str
status: OverallStatus
evidence: list[CoverageItem] = Field(default_factory=list)
class BlockingItem(BaseModel):
"""Item that blocks completion"""
schedule_id: str
evidence_id: str
class CoverageReport(BaseModel):
"""Complete coverage evaluation report"""
tax_year: str
taxpayer_id: str
schedules_required: list[str] = Field(default_factory=list)
overall_status: OverallStatus
coverage: list[ScheduleCoverage] = Field(default_factory=list)
blocking_items: list[BlockingItem] = Field(default_factory=list)
evaluated_at: datetime = Field(default_factory=datetime.utcnow)
policy_version: str = ""
class CoverageGap(BaseModel):
"""Gap in coverage requiring clarification"""
schedule_id: str
evidence_id: str
role: Role
reason: str
boxes: list[str] = Field(default_factory=list)
citations: list[Citation] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
class ClarifyContext(BaseModel):
"""Context for clarifying question"""
tax_year: str
taxpayer_id: str
jurisdiction: str
class UploadOption(BaseModel):
"""Upload option for user"""
label: str
accepted_formats: list[str] = Field(default_factory=list)
upload_endpoint: str
class ClarifyResponse(BaseModel):
"""Response to clarifying question request"""
question_text: str
why_it_is_needed: str
citations: list[Citation] = Field(default_factory=list)
options_to_provide: list[UploadOption] = Field(default_factory=list)
blocking: bool = False
boxes_affected: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,48 @@
"""Utility models for coverage system."""
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
from ..enums import OverallStatus
class PolicyError(Exception):
"""Policy loading or validation error"""
pass
class ValidationResult(BaseModel):
"""Policy validation result"""
ok: bool
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
class PolicyVersion(BaseModel):
"""Policy version record"""
id: int | None = None
version: str
jurisdiction: str
tax_year: str
tenant_id: str | None = None
source_files: list[str] = Field(default_factory=list)
compiled_at: datetime
hash: str
class CoverageAudit(BaseModel):
"""Coverage audit record"""
id: int | None = None
taxpayer_id: str
tax_year: str
policy_version: str
overall_status: OverallStatus
blocking_items: list[dict[str, Any]] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.utcnow)
trace_id: str | None = None