Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
175
libs/schemas/__init__.py
Normal file
175
libs/schemas/__init__.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""Shared Pydantic models mirroring ontology entities."""
|
||||
|
||||
# Import all enums
|
||||
# Import coverage models
|
||||
from .coverage.core import (
|
||||
CompiledCoveragePolicy,
|
||||
ConflictRules,
|
||||
CoveragePolicy,
|
||||
CrossCheck,
|
||||
Defaults,
|
||||
EvidenceItem,
|
||||
GuidanceRef,
|
||||
Privacy,
|
||||
QuestionTemplates,
|
||||
SchedulePolicy,
|
||||
StatusClassifier,
|
||||
StatusClassifierConfig,
|
||||
TaxYearBoundary,
|
||||
Trigger,
|
||||
Validity,
|
||||
)
|
||||
from .coverage.evaluation import (
|
||||
BlockingItem,
|
||||
Citation,
|
||||
ClarifyContext,
|
||||
ClarifyResponse,
|
||||
CoverageGap,
|
||||
CoverageItem,
|
||||
CoverageReport,
|
||||
FoundEvidence,
|
||||
ScheduleCoverage,
|
||||
UploadOption,
|
||||
)
|
||||
from .coverage.utils import CoverageAudit, PolicyError, PolicyVersion, ValidationResult
|
||||
|
||||
# Import all entities
|
||||
from .entities import (
|
||||
Account,
|
||||
BaseEntity,
|
||||
Calculation,
|
||||
Document,
|
||||
Evidence,
|
||||
ExpenseItem,
|
||||
FormBox,
|
||||
IncomeItem,
|
||||
Party,
|
||||
Payment,
|
||||
PropertyAsset,
|
||||
Rule,
|
||||
TaxpayerProfile,
|
||||
)
|
||||
from .enums import (
|
||||
DocumentKind,
|
||||
ExpenseType,
|
||||
HealthStatus,
|
||||
IncomeType,
|
||||
OverallStatus,
|
||||
PartySubtype,
|
||||
PropertyUsage,
|
||||
Role,
|
||||
Status,
|
||||
TaxpayerType,
|
||||
)
|
||||
|
||||
# Import error models
|
||||
from .errors import ErrorResponse, ValidationError, ValidationErrorResponse
|
||||
|
||||
# Import health models
|
||||
from .health import HealthCheck, ServiceHealth
|
||||
|
||||
# Import request models
|
||||
from .requests import (
|
||||
DocumentUploadRequest,
|
||||
ExtractionRequest,
|
||||
FirmSyncRequest,
|
||||
HMRCSubmissionRequest,
|
||||
RAGSearchRequest,
|
||||
ScheduleComputeRequest,
|
||||
)
|
||||
|
||||
# Import response models
|
||||
from .responses import (
|
||||
DocumentUploadResponse,
|
||||
ExtractionResponse,
|
||||
FirmSyncResponse,
|
||||
HMRCSubmissionResponse,
|
||||
RAGSearchResponse,
|
||||
ScheduleComputeResponse,
|
||||
)
|
||||
|
||||
# Import utility functions
|
||||
from .utils import get_entity_schemas
|
||||
|
||||
__all__ = [
|
||||
# Enums
|
||||
"DocumentKind",
|
||||
"ExpenseType",
|
||||
"HealthStatus",
|
||||
"IncomeType",
|
||||
"OverallStatus",
|
||||
"PartySubtype",
|
||||
"PropertyUsage",
|
||||
"Role",
|
||||
"Status",
|
||||
"TaxpayerType",
|
||||
# Entities
|
||||
"Account",
|
||||
"BaseEntity",
|
||||
"Calculation",
|
||||
"Document",
|
||||
"Evidence",
|
||||
"ExpenseItem",
|
||||
"FormBox",
|
||||
"IncomeItem",
|
||||
"Party",
|
||||
"Payment",
|
||||
"PropertyAsset",
|
||||
"Rule",
|
||||
"TaxpayerProfile",
|
||||
# Errors
|
||||
"ErrorResponse",
|
||||
"ValidationError",
|
||||
"ValidationErrorResponse",
|
||||
# Health
|
||||
"HealthCheck",
|
||||
"ServiceHealth",
|
||||
# Requests
|
||||
"DocumentUploadRequest",
|
||||
"ExtractionRequest",
|
||||
"FirmSyncRequest",
|
||||
"HMRCSubmissionRequest",
|
||||
"RAGSearchRequest",
|
||||
"ScheduleComputeRequest",
|
||||
# Responses
|
||||
"DocumentUploadResponse",
|
||||
"ExtractionResponse",
|
||||
"FirmSyncResponse",
|
||||
"HMRCSubmissionResponse",
|
||||
"RAGSearchResponse",
|
||||
"ScheduleComputeResponse",
|
||||
# Utils
|
||||
"get_entity_schemas",
|
||||
# Coverage core models
|
||||
"Validity",
|
||||
"StatusClassifier",
|
||||
"StatusClassifierConfig",
|
||||
"EvidenceItem",
|
||||
"CrossCheck",
|
||||
"SchedulePolicy",
|
||||
"Trigger",
|
||||
"GuidanceRef",
|
||||
"QuestionTemplates",
|
||||
"ConflictRules",
|
||||
"TaxYearBoundary",
|
||||
"Defaults",
|
||||
"Privacy",
|
||||
"CoveragePolicy",
|
||||
"CompiledCoveragePolicy",
|
||||
# Coverage evaluation models
|
||||
"FoundEvidence",
|
||||
"Citation",
|
||||
"CoverageItem",
|
||||
"ScheduleCoverage",
|
||||
"BlockingItem",
|
||||
"CoverageReport",
|
||||
"CoverageGap",
|
||||
"ClarifyContext",
|
||||
"UploadOption",
|
||||
"ClarifyResponse",
|
||||
# Coverage utility models
|
||||
"PolicyError",
|
||||
"ValidationResult",
|
||||
"PolicyVersion",
|
||||
"CoverageAudit",
|
||||
]
|
||||
0
libs/schemas/coverage/__init__.py
Normal file
0
libs/schemas/coverage/__init__.py
Normal file
146
libs/schemas/coverage/core.py
Normal file
146
libs/schemas/coverage/core.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""Core coverage policy models."""
|
||||
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..enums import Role
|
||||
|
||||
|
||||
class Validity(BaseModel):
|
||||
"""Validity constraints for evidence"""
|
||||
|
||||
within_tax_year: bool = False
|
||||
available_by: str | None = None
|
||||
date_tolerance_days: int = 30
|
||||
|
||||
|
||||
class StatusClassifier(BaseModel):
|
||||
"""Rules for classifying evidence status"""
|
||||
|
||||
min_ocr: float = 0.82
|
||||
min_extract: float = 0.85
|
||||
date_in_year: bool = True
|
||||
date_in_year_or_tolerance: bool = True
|
||||
conflict_rules: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class StatusClassifierConfig(BaseModel):
|
||||
"""Complete status classifier configuration"""
|
||||
|
||||
present_verified: StatusClassifier
|
||||
present_unverified: StatusClassifier
|
||||
conflicting: StatusClassifier
|
||||
missing: StatusClassifier = Field(default_factory=lambda: StatusClassifier())
|
||||
|
||||
|
||||
class EvidenceItem(BaseModel):
|
||||
"""Evidence requirement definition"""
|
||||
|
||||
id: str
|
||||
role: Role
|
||||
condition: str | None = None
|
||||
boxes: list[str] = Field(default_factory=list)
|
||||
acceptable_alternatives: list[str] = Field(default_factory=list)
|
||||
validity: Validity = Field(default_factory=Validity)
|
||||
reasons: dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class CrossCheck(BaseModel):
|
||||
"""Cross-validation rule"""
|
||||
|
||||
name: str
|
||||
logic: str
|
||||
|
||||
|
||||
class SchedulePolicy(BaseModel):
|
||||
"""Policy for a specific tax schedule"""
|
||||
|
||||
guidance_hint: str | None = None
|
||||
evidence: list[EvidenceItem] = Field(default_factory=list)
|
||||
cross_checks: list[CrossCheck] = Field(default_factory=list)
|
||||
selection_rule: dict[str, str] = Field(default_factory=dict)
|
||||
notes: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class Trigger(BaseModel):
|
||||
"""Schedule trigger condition"""
|
||||
|
||||
any_of: list[str] = Field(default_factory=list)
|
||||
all_of: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class GuidanceRef(BaseModel):
|
||||
"""Reference to guidance document"""
|
||||
|
||||
doc_id: str
|
||||
kind: str
|
||||
|
||||
|
||||
class QuestionTemplates(BaseModel):
|
||||
"""Templates for generating clarifying questions"""
|
||||
|
||||
default: dict[str, str] = Field(default_factory=dict)
|
||||
reasons: dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ConflictRules(BaseModel):
|
||||
"""Rules for handling conflicting evidence"""
|
||||
|
||||
precedence: list[str] = Field(default_factory=list)
|
||||
escalation: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TaxYearBoundary(BaseModel):
|
||||
"""Tax year date boundaries"""
|
||||
|
||||
start: str
|
||||
end: str
|
||||
|
||||
|
||||
class Defaults(BaseModel):
|
||||
"""Default configuration values"""
|
||||
|
||||
confidence_thresholds: dict[str, float] = Field(default_factory=dict)
|
||||
date_tolerance_days: int = 30
|
||||
require_lineage_bbox: bool = True
|
||||
allow_bank_substantiation: bool = True
|
||||
|
||||
|
||||
class Privacy(BaseModel):
|
||||
"""Privacy and PII handling configuration"""
|
||||
|
||||
vector_pii_free: bool = True
|
||||
redact_patterns: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CoveragePolicy(BaseModel):
|
||||
"""Complete coverage policy definition"""
|
||||
|
||||
version: str
|
||||
jurisdiction: str
|
||||
tax_year: str
|
||||
tax_year_boundary: TaxYearBoundary
|
||||
defaults: Defaults
|
||||
document_kinds: list[str] = Field(default_factory=list)
|
||||
guidance_refs: dict[str, GuidanceRef] = Field(default_factory=dict)
|
||||
triggers: dict[str, Trigger] = Field(default_factory=dict)
|
||||
schedules: dict[str, SchedulePolicy] = Field(default_factory=dict)
|
||||
status_classifier: StatusClassifierConfig
|
||||
conflict_resolution: ConflictRules
|
||||
question_templates: QuestionTemplates
|
||||
privacy: Privacy
|
||||
|
||||
|
||||
class CompiledCoveragePolicy(BaseModel):
|
||||
"""Coverage policy with compiled predicates"""
|
||||
|
||||
policy: CoveragePolicy
|
||||
compiled_predicates: dict[str, Callable[[str, str], bool]] = Field(
|
||||
default_factory=dict
|
||||
)
|
||||
compiled_at: datetime
|
||||
hash: str
|
||||
source_files: list[str] = Field(default_factory=list)
|
||||
112
libs/schemas/coverage/evaluation.py
Normal file
112
libs/schemas/coverage/evaluation.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Coverage evaluation models."""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..enums import OverallStatus, Role, Status
|
||||
|
||||
|
||||
class FoundEvidence(BaseModel):
|
||||
"""Evidence found in the knowledge graph"""
|
||||
|
||||
doc_id: str
|
||||
kind: str
|
||||
confidence: float = 0.0
|
||||
pages: list[int] = Field(default_factory=list)
|
||||
bbox: dict[str, float] | None = None
|
||||
ocr_confidence: float = 0.0
|
||||
extract_confidence: float = 0.0
|
||||
date: str | None = None
|
||||
|
||||
|
||||
class Citation(BaseModel):
|
||||
"""Citation reference"""
|
||||
|
||||
rule_id: str | None = None
|
||||
doc_id: str | None = None
|
||||
url: str | None = None
|
||||
locator: str | None = None
|
||||
section_id: str | None = None
|
||||
page: int | None = None
|
||||
bbox: dict[str, float] | None = None
|
||||
|
||||
|
||||
class CoverageItem(BaseModel):
|
||||
"""Coverage evaluation for a single evidence item"""
|
||||
|
||||
id: str
|
||||
role: Role
|
||||
status: Status
|
||||
boxes: list[str] = Field(default_factory=list)
|
||||
found: list[FoundEvidence] = Field(default_factory=list)
|
||||
acceptable_alternatives: list[str] = Field(default_factory=list)
|
||||
reason: str = ""
|
||||
citations: list[Citation] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ScheduleCoverage(BaseModel):
|
||||
"""Coverage evaluation for a schedule"""
|
||||
|
||||
schedule_id: str
|
||||
status: OverallStatus
|
||||
evidence: list[CoverageItem] = Field(default_factory=list)
|
||||
|
||||
|
||||
class BlockingItem(BaseModel):
|
||||
"""Item that blocks completion"""
|
||||
|
||||
schedule_id: str
|
||||
evidence_id: str
|
||||
|
||||
|
||||
class CoverageReport(BaseModel):
|
||||
"""Complete coverage evaluation report"""
|
||||
|
||||
tax_year: str
|
||||
taxpayer_id: str
|
||||
schedules_required: list[str] = Field(default_factory=list)
|
||||
overall_status: OverallStatus
|
||||
coverage: list[ScheduleCoverage] = Field(default_factory=list)
|
||||
blocking_items: list[BlockingItem] = Field(default_factory=list)
|
||||
evaluated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
policy_version: str = ""
|
||||
|
||||
|
||||
class CoverageGap(BaseModel):
|
||||
"""Gap in coverage requiring clarification"""
|
||||
|
||||
schedule_id: str
|
||||
evidence_id: str
|
||||
role: Role
|
||||
reason: str
|
||||
boxes: list[str] = Field(default_factory=list)
|
||||
citations: list[Citation] = Field(default_factory=list)
|
||||
acceptable_alternatives: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ClarifyContext(BaseModel):
|
||||
"""Context for clarifying question"""
|
||||
|
||||
tax_year: str
|
||||
taxpayer_id: str
|
||||
jurisdiction: str
|
||||
|
||||
|
||||
class UploadOption(BaseModel):
|
||||
"""Upload option for user"""
|
||||
|
||||
label: str
|
||||
accepted_formats: list[str] = Field(default_factory=list)
|
||||
upload_endpoint: str
|
||||
|
||||
|
||||
class ClarifyResponse(BaseModel):
|
||||
"""Response to clarifying question request"""
|
||||
|
||||
question_text: str
|
||||
why_it_is_needed: str
|
||||
citations: list[Citation] = Field(default_factory=list)
|
||||
options_to_provide: list[UploadOption] = Field(default_factory=list)
|
||||
blocking: bool = False
|
||||
boxes_affected: list[str] = Field(default_factory=list)
|
||||
48
libs/schemas/coverage/utils.py
Normal file
48
libs/schemas/coverage/utils.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Utility models for coverage system."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..enums import OverallStatus
|
||||
|
||||
|
||||
class PolicyError(Exception):
|
||||
"""Policy loading or validation error"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ValidationResult(BaseModel):
|
||||
"""Policy validation result"""
|
||||
|
||||
ok: bool
|
||||
errors: list[str] = Field(default_factory=list)
|
||||
warnings: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PolicyVersion(BaseModel):
|
||||
"""Policy version record"""
|
||||
|
||||
id: int | None = None
|
||||
version: str
|
||||
jurisdiction: str
|
||||
tax_year: str
|
||||
tenant_id: str | None = None
|
||||
source_files: list[str] = Field(default_factory=list)
|
||||
compiled_at: datetime
|
||||
hash: str
|
||||
|
||||
|
||||
class CoverageAudit(BaseModel):
|
||||
"""Coverage audit record"""
|
||||
|
||||
id: int | None = None
|
||||
taxpayer_id: str
|
||||
tax_year: str
|
||||
policy_version: str
|
||||
overall_status: OverallStatus
|
||||
blocking_items: list[dict[str, Any]] = Field(default_factory=list)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
trace_id: str | None = None
|
||||
230
libs/schemas/entities.py
Normal file
230
libs/schemas/entities.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Core business entities with temporal modeling."""
|
||||
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from .enums import (
|
||||
DocumentKind,
|
||||
ExpenseType,
|
||||
IncomeType,
|
||||
PartySubtype,
|
||||
PropertyUsage,
|
||||
TaxpayerType,
|
||||
)
|
||||
|
||||
|
||||
class BaseEntity(BaseModel):
|
||||
"""Base entity with temporal fields"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
str_strip_whitespace=True, validate_assignment=True, use_enum_values=True
|
||||
)
|
||||
|
||||
# Temporal fields (bitemporal modeling)
|
||||
valid_from: datetime = Field(
|
||||
..., description="When the fact became valid in reality"
|
||||
)
|
||||
valid_to: datetime | None = Field(
|
||||
None, description="When the fact ceased to be valid"
|
||||
)
|
||||
asserted_at: datetime = Field(
|
||||
default_factory=datetime.utcnow, description="When recorded in system"
|
||||
)
|
||||
retracted_at: datetime | None = Field(
|
||||
None, description="When retracted from system"
|
||||
)
|
||||
source: str = Field(..., description="Source of the information")
|
||||
extractor_version: str = Field(..., description="Version of extraction system")
|
||||
|
||||
|
||||
class TaxpayerProfile(BaseEntity):
|
||||
"""Taxpayer profile entity"""
|
||||
|
||||
taxpayer_id: str = Field(..., description="Unique taxpayer identifier")
|
||||
type: TaxpayerType = Field(..., description="Type of taxpayer")
|
||||
utr: str | None = Field(
|
||||
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
|
||||
)
|
||||
ni_number: str | None = Field(
|
||||
None,
|
||||
pattern=r"^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$",
|
||||
description="National Insurance Number",
|
||||
)
|
||||
residence: str | None = Field(None, description="Tax residence")
|
||||
|
||||
|
||||
class Document(BaseEntity):
|
||||
"""Document entity"""
|
||||
|
||||
doc_id: str = Field(
|
||||
..., pattern=r"^doc_[a-f0-9]{16}$", description="Document identifier"
|
||||
)
|
||||
kind: DocumentKind = Field(..., description="Type of document")
|
||||
source: str = Field(..., description="Source of document")
|
||||
mime: str = Field(..., description="MIME type")
|
||||
checksum: str = Field(
|
||||
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 checksum"
|
||||
)
|
||||
file_size: int | None = Field(None, ge=0, description="File size in bytes")
|
||||
pages: int | None = Field(None, ge=1, description="Number of pages")
|
||||
date_range: dict[str, date] | None = Field(None, description="Document date range")
|
||||
|
||||
|
||||
class Evidence(BaseEntity):
|
||||
"""Evidence entity linking to document snippets"""
|
||||
|
||||
snippet_id: str = Field(..., description="Evidence snippet identifier")
|
||||
doc_ref: str = Field(..., description="Reference to source document")
|
||||
page: int = Field(..., ge=1, description="Page number")
|
||||
bbox: list[float] | None = Field(
|
||||
None, description="Bounding box coordinates [x1, y1, x2, y2]"
|
||||
)
|
||||
text_hash: str = Field(
|
||||
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 hash of extracted text"
|
||||
)
|
||||
ocr_confidence: float | None = Field(
|
||||
None, ge=0.0, le=1.0, description="OCR confidence score"
|
||||
)
|
||||
|
||||
|
||||
class IncomeItem(BaseEntity):
|
||||
"""Income item entity"""
|
||||
|
||||
income_id: str = Field(..., description="Income item identifier")
|
||||
type: IncomeType = Field(..., description="Type of income")
|
||||
gross: Decimal = Field(..., ge=0, description="Gross amount")
|
||||
net: Decimal | None = Field(None, ge=0, description="Net amount")
|
||||
tax_withheld: Decimal | None = Field(None, ge=0, description="Tax withheld")
|
||||
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
|
||||
period_start: date | None = Field(None, description="Income period start")
|
||||
period_end: date | None = Field(None, description="Income period end")
|
||||
description: str | None = Field(None, description="Income description")
|
||||
|
||||
|
||||
class ExpenseItem(BaseEntity):
|
||||
"""Expense item entity"""
|
||||
|
||||
expense_id: str = Field(..., description="Expense item identifier")
|
||||
type: ExpenseType = Field(..., description="Type of expense")
|
||||
amount: Decimal = Field(..., ge=0, description="Expense amount")
|
||||
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
|
||||
description: str | None = Field(None, description="Expense description")
|
||||
category: str | None = Field(None, description="Expense category")
|
||||
allowable: bool | None = Field(None, description="Whether expense is allowable")
|
||||
capitalizable_flag: bool | None = Field(
|
||||
None, description="Whether expense should be capitalized"
|
||||
)
|
||||
vat_amount: Decimal | None = Field(None, ge=0, description="VAT amount")
|
||||
net_amount: Decimal | None = Field(
|
||||
None, ge=0, description="Net amount excluding VAT"
|
||||
)
|
||||
|
||||
|
||||
class Party(BaseEntity):
|
||||
"""Party entity (person or organization)"""
|
||||
|
||||
party_id: str = Field(..., description="Party identifier")
|
||||
name: str = Field(..., min_length=1, description="Party name")
|
||||
subtype: PartySubtype | None = Field(None, description="Party subtype")
|
||||
address: str | None = Field(None, description="Party address")
|
||||
vat_number: str | None = Field(
|
||||
None, pattern=r"^GB\d{9}$|^GB\d{12}$", description="UK VAT number"
|
||||
)
|
||||
utr: str | None = Field(
|
||||
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
|
||||
)
|
||||
reg_no: str | None = Field(None, description="Registration number")
|
||||
paye_reference: str | None = Field(None, description="PAYE reference")
|
||||
|
||||
|
||||
class Account(BaseEntity):
|
||||
"""Bank account entity"""
|
||||
|
||||
account_id: str = Field(..., description="Account identifier")
|
||||
iban: str | None = Field(
|
||||
None, pattern=r"^GB\d{2}[A-Z]{4}\d{14}$", description="UK IBAN"
|
||||
)
|
||||
sort_code: str | None = Field(
|
||||
None, pattern=r"^\d{2}-\d{2}-\d{2}$", description="Sort code"
|
||||
)
|
||||
account_no: str | None = Field(
|
||||
None, pattern=r"^\d{8}$", description="Account number"
|
||||
)
|
||||
institution: str | None = Field(None, description="Financial institution")
|
||||
account_type: str | None = Field(None, description="Account type")
|
||||
currency: str = Field(default="GBP", description="Account currency")
|
||||
|
||||
|
||||
class PropertyAsset(BaseEntity):
|
||||
"""Property asset entity"""
|
||||
|
||||
property_id: str = Field(..., description="Property identifier")
|
||||
address: str = Field(..., min_length=10, description="Property address")
|
||||
postcode: str | None = Field(
|
||||
None, pattern=r"^[A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2}$", description="UK postcode"
|
||||
)
|
||||
tenure: str | None = Field(None, description="Property tenure")
|
||||
ownership_share: float | None = Field(
|
||||
None, ge=0.0, le=1.0, description="Ownership share"
|
||||
)
|
||||
usage: PropertyUsage | None = Field(None, description="Property usage type")
|
||||
|
||||
|
||||
class Payment(BaseEntity):
|
||||
"""Payment transaction entity"""
|
||||
|
||||
payment_id: str = Field(..., description="Payment identifier")
|
||||
payment_date: date = Field(..., description="Payment date")
|
||||
amount: Decimal = Field(
|
||||
..., description="Payment amount (positive for credit, negative for debit)"
|
||||
)
|
||||
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
|
||||
direction: str = Field(..., description="Payment direction (credit/debit)")
|
||||
description: str | None = Field(None, description="Payment description")
|
||||
reference: str | None = Field(None, description="Payment reference")
|
||||
balance_after: Decimal | None = Field(
|
||||
None, description="Account balance after payment"
|
||||
)
|
||||
|
||||
|
||||
class Calculation(BaseEntity):
|
||||
"""Tax calculation entity"""
|
||||
|
||||
calculation_id: str = Field(..., description="Calculation identifier")
|
||||
schedule: str = Field(..., description="Tax schedule (SA100, SA103, etc.)")
|
||||
tax_year: str = Field(
|
||||
..., pattern=r"^\d{4}-\d{2}$", description="Tax year (e.g., 2023-24)"
|
||||
)
|
||||
total_income: Decimal | None = Field(None, ge=0, description="Total income")
|
||||
total_expenses: Decimal | None = Field(None, ge=0, description="Total expenses")
|
||||
net_profit: Decimal | None = Field(None, description="Net profit/loss")
|
||||
calculated_at: datetime = Field(
|
||||
default_factory=datetime.utcnow, description="Calculation timestamp"
|
||||
)
|
||||
|
||||
|
||||
class FormBox(BaseEntity):
|
||||
"""Form box entity"""
|
||||
|
||||
form: str = Field(..., description="Form identifier (SA100, SA103, etc.)")
|
||||
box: str = Field(..., description="Box identifier")
|
||||
value: Decimal | str | bool = Field(..., description="Box value")
|
||||
description: str | None = Field(None, description="Box description")
|
||||
confidence: float | None = Field(
|
||||
None, ge=0.0, le=1.0, description="Confidence score"
|
||||
)
|
||||
|
||||
|
||||
class Rule(BaseEntity):
|
||||
"""Tax rule entity"""
|
||||
|
||||
rule_id: str = Field(..., description="Rule identifier")
|
||||
name: str = Field(..., description="Rule name")
|
||||
description: str | None = Field(None, description="Rule description")
|
||||
jurisdiction: str = Field(default="UK", description="Tax jurisdiction")
|
||||
tax_years: list[str] = Field(..., description="Applicable tax years")
|
||||
formula: str | None = Field(None, description="Rule formula")
|
||||
conditions: dict[str, Any] | None = Field(None, description="Rule conditions")
|
||||
102
libs/schemas/enums.py
Normal file
102
libs/schemas/enums.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Enumeration types for the tax system."""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class TaxpayerType(str, Enum):
|
||||
"""Taxpayer types"""
|
||||
|
||||
INDIVIDUAL = "Individual"
|
||||
PARTNERSHIP = "Partnership"
|
||||
COMPANY = "Company"
|
||||
|
||||
|
||||
class DocumentKind(str, Enum):
|
||||
"""Document types"""
|
||||
|
||||
BANK_STATEMENT = "bank_statement"
|
||||
INVOICE = "invoice"
|
||||
RECEIPT = "receipt"
|
||||
P_AND_L = "p_and_l"
|
||||
BALANCE_SHEET = "balance_sheet"
|
||||
PAYSLIP = "payslip"
|
||||
DIVIDEND_VOUCHER = "dividend_voucher"
|
||||
PROPERTY_STATEMENT = "property_statement"
|
||||
PRIOR_RETURN = "prior_return"
|
||||
LETTER = "letter"
|
||||
CERTIFICATE = "certificate"
|
||||
|
||||
|
||||
class IncomeType(str, Enum):
|
||||
"""Income types"""
|
||||
|
||||
EMPLOYMENT = "employment"
|
||||
SELF_EMPLOYMENT = "self_employment"
|
||||
PROPERTY = "property"
|
||||
DIVIDEND = "dividend"
|
||||
INTEREST = "interest"
|
||||
OTHER = "other"
|
||||
|
||||
|
||||
class ExpenseType(str, Enum):
|
||||
"""Expense types"""
|
||||
|
||||
BUSINESS = "business"
|
||||
PROPERTY = "property"
|
||||
CAPITAL = "capital"
|
||||
PERSONAL = "personal"
|
||||
|
||||
|
||||
class PartySubtype(str, Enum):
|
||||
"""Party subtypes"""
|
||||
|
||||
EMPLOYER = "Employer"
|
||||
PAYER = "Payer"
|
||||
BANK = "Bank"
|
||||
LANDLORD = "Landlord"
|
||||
TENANT = "Tenant"
|
||||
SUPPLIER = "Supplier"
|
||||
CLIENT = "Client"
|
||||
|
||||
|
||||
class PropertyUsage(str, Enum):
|
||||
"""Property usage types"""
|
||||
|
||||
RESIDENTIAL = "residential"
|
||||
FURNISHED_HOLIDAY_LETTING = "furnished_holiday_letting"
|
||||
COMMERCIAL = "commercial"
|
||||
MIXED = "mixed"
|
||||
|
||||
|
||||
class HealthStatus(str, Enum):
|
||||
"""Health status values"""
|
||||
|
||||
HEALTHY = "healthy"
|
||||
UNHEALTHY = "unhealthy"
|
||||
DEGRADED = "degraded"
|
||||
|
||||
|
||||
# Coverage evaluation enums
|
||||
class Role(str, Enum):
|
||||
"""Evidence role in coverage evaluation"""
|
||||
|
||||
REQUIRED = "REQUIRED"
|
||||
CONDITIONALLY_REQUIRED = "CONDITIONALLY_REQUIRED"
|
||||
OPTIONAL = "OPTIONAL"
|
||||
|
||||
|
||||
class Status(str, Enum):
|
||||
"""Evidence status classification"""
|
||||
|
||||
PRESENT_VERIFIED = "present_verified"
|
||||
PRESENT_UNVERIFIED = "present_unverified"
|
||||
MISSING = "missing"
|
||||
CONFLICTING = "conflicting"
|
||||
|
||||
|
||||
class OverallStatus(str, Enum):
|
||||
"""Overall coverage status"""
|
||||
|
||||
OK = "ok"
|
||||
PARTIAL = "partial"
|
||||
BLOCKING = "blocking"
|
||||
30
libs/schemas/errors.py
Normal file
30
libs/schemas/errors.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Error response models."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
"""RFC7807 Problem+JSON error response"""
|
||||
|
||||
type: str = Field(..., description="Error type URI")
|
||||
title: str = Field(..., description="Error title")
|
||||
status: int = Field(..., description="HTTP status code")
|
||||
detail: str = Field(..., description="Error detail")
|
||||
instance: str = Field(..., description="Error instance URI")
|
||||
trace_id: str | None = Field(None, description="Trace identifier")
|
||||
|
||||
|
||||
class ValidationError(BaseModel):
|
||||
"""Validation error details"""
|
||||
|
||||
field: str = Field(..., description="Field name")
|
||||
message: str = Field(..., description="Error message")
|
||||
value: Any = Field(..., description="Invalid value")
|
||||
|
||||
|
||||
class ValidationErrorResponse(ErrorResponse):
|
||||
"""Validation error response with field details"""
|
||||
|
||||
errors: list[ValidationError] = Field(..., description="Validation errors")
|
||||
32
libs/schemas/health.py
Normal file
32
libs/schemas/health.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""Health check models."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .enums import HealthStatus
|
||||
|
||||
|
||||
class HealthCheck(BaseModel):
|
||||
"""Health check response"""
|
||||
|
||||
status: HealthStatus = Field(..., description="Overall health status")
|
||||
timestamp: datetime = Field(
|
||||
default_factory=datetime.utcnow, description="Check timestamp"
|
||||
)
|
||||
version: str = Field(..., description="Service version")
|
||||
checks: dict[str, dict[str, Any]] = Field(
|
||||
default_factory=dict, description="Individual checks"
|
||||
)
|
||||
|
||||
|
||||
class ServiceHealth(BaseModel):
|
||||
"""Individual service health status"""
|
||||
|
||||
name: str = Field(..., description="Service name")
|
||||
status: HealthStatus = Field(..., description="Service health status")
|
||||
response_time_ms: float | None = Field(
|
||||
None, description="Response time in milliseconds"
|
||||
)
|
||||
error: str | None = Field(None, description="Error message if unhealthy")
|
||||
65
libs/schemas/requests.py
Normal file
65
libs/schemas/requests.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""API request models."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from .enums import DocumentKind
|
||||
|
||||
|
||||
class DocumentUploadRequest(BaseModel):
|
||||
"""Request model for document upload"""
|
||||
|
||||
tenant_id: str = Field(..., description="Tenant identifier")
|
||||
kind: DocumentKind = Field(..., description="Document type")
|
||||
source: str = Field(..., description="Document source")
|
||||
|
||||
|
||||
class ExtractionRequest(BaseModel):
|
||||
"""Request model for document extraction"""
|
||||
|
||||
strategy: str = Field(default="hybrid", description="Extraction strategy")
|
||||
|
||||
|
||||
class RAGSearchRequest(BaseModel):
|
||||
"""Request model for RAG search"""
|
||||
|
||||
query: str = Field(..., min_length=1, description="Search query")
|
||||
tax_year: str | None = Field(None, description="Tax year filter")
|
||||
jurisdiction: str | None = Field(None, description="Jurisdiction filter")
|
||||
k: int = Field(default=10, ge=1, le=100, description="Number of results")
|
||||
|
||||
|
||||
class ScheduleComputeRequest(BaseModel):
|
||||
"""Request model for schedule computation"""
|
||||
|
||||
tax_year: str = Field(..., pattern=r"^\d{4}-\d{2}$", description="Tax year")
|
||||
taxpayer_id: str = Field(..., description="Taxpayer identifier")
|
||||
schedule_id: str = Field(..., description="Schedule identifier")
|
||||
|
||||
|
||||
class HMRCSubmissionRequest(BaseModel):
|
||||
"""Request model for HMRC submission"""
|
||||
|
||||
tax_year: str = Field(..., pattern=r"^\d{4}-\d{2}$", description="Tax year")
|
||||
taxpayer_id: str = Field(..., description="Taxpayer identifier")
|
||||
dry_run: bool = Field(default=True, description="Dry run flag")
|
||||
|
||||
|
||||
class FirmSyncRequest(BaseModel):
|
||||
"""Request to sync firm data"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
firm_id: str = Field(..., description="Firm identifier")
|
||||
system: str = Field(..., description="Practice management system to sync with")
|
||||
sync_type: str = Field(
|
||||
default="full", description="Type of sync: full, incremental"
|
||||
)
|
||||
force_refresh: bool = Field(
|
||||
default=False, description="Force refresh of cached data"
|
||||
)
|
||||
connection_config: dict[str, Any] = Field(
|
||||
...,
|
||||
description="Configuration for connecting to the practice management system",
|
||||
)
|
||||
69
libs/schemas/responses.py
Normal file
69
libs/schemas/responses.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""API response models."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class DocumentUploadResponse(BaseModel):
|
||||
"""Response model for document upload"""
|
||||
|
||||
doc_id: str = Field(..., description="Document identifier")
|
||||
s3_url: str = Field(..., description="S3 URL")
|
||||
checksum: str = Field(..., description="Document checksum")
|
||||
|
||||
|
||||
class ExtractionResponse(BaseModel):
|
||||
"""Response model for document extraction"""
|
||||
|
||||
extraction_id: str = Field(..., description="Extraction identifier")
|
||||
confidence: float = Field(..., ge=0.0, le=1.0, description="Overall confidence")
|
||||
extracted_fields: dict[str, Any] = Field(..., description="Extracted fields")
|
||||
provenance: list[dict[str, Any]] = Field(..., description="Provenance information")
|
||||
|
||||
|
||||
class RAGSearchResponse(BaseModel):
|
||||
"""Response model for RAG search"""
|
||||
|
||||
chunks: list[dict[str, Any]] = Field(..., description="Retrieved chunks")
|
||||
citations: list[dict[str, Any]] = Field(..., description="Source citations")
|
||||
kg_hints: list[dict[str, Any]] = Field(..., description="Knowledge graph hints")
|
||||
calibrated_confidence: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Calibrated confidence"
|
||||
)
|
||||
|
||||
|
||||
class ScheduleComputeResponse(BaseModel):
|
||||
"""Response model for schedule computation"""
|
||||
|
||||
calculation_id: str = Field(..., description="Calculation identifier")
|
||||
schedule: str = Field(..., description="Schedule identifier")
|
||||
form_boxes: dict[str, dict[str, Any]] = Field(
|
||||
..., description="Computed form boxes"
|
||||
)
|
||||
evidence_trail: list[dict[str, Any]] = Field(..., description="Evidence trail")
|
||||
|
||||
|
||||
class HMRCSubmissionResponse(BaseModel):
|
||||
"""Response model for HMRC submission"""
|
||||
|
||||
submission_id: str = Field(..., description="Submission identifier")
|
||||
status: str = Field(..., description="Submission status")
|
||||
hmrc_reference: str | None = Field(None, description="HMRC reference")
|
||||
submission_timestamp: datetime = Field(..., description="Submission timestamp")
|
||||
validation_results: dict[str, Any] = Field(..., description="Validation results")
|
||||
|
||||
|
||||
class FirmSyncResponse(BaseModel):
|
||||
"""Response from firm sync operation"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
firm_id: str = Field(..., description="Firm identifier")
|
||||
status: str = Field(..., description="Sync status: success, error, partial")
|
||||
message: str = Field(..., description="Status message")
|
||||
synced_entities: int = Field(default=0, description="Number of entities synced")
|
||||
errors: list[str] = Field(
|
||||
default_factory=list, description="List of errors encountered"
|
||||
)
|
||||
69
libs/schemas/utils.py
Normal file
69
libs/schemas/utils.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Utility functions for schema export."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .entities import (
|
||||
Account,
|
||||
Calculation,
|
||||
Document,
|
||||
Evidence,
|
||||
ExpenseItem,
|
||||
FormBox,
|
||||
IncomeItem,
|
||||
Party,
|
||||
Payment,
|
||||
PropertyAsset,
|
||||
Rule,
|
||||
TaxpayerProfile,
|
||||
)
|
||||
from .requests import (
|
||||
DocumentUploadRequest,
|
||||
ExtractionRequest,
|
||||
FirmSyncRequest,
|
||||
HMRCSubmissionRequest,
|
||||
RAGSearchRequest,
|
||||
ScheduleComputeRequest,
|
||||
)
|
||||
from .responses import (
|
||||
DocumentUploadResponse,
|
||||
ExtractionResponse,
|
||||
FirmSyncResponse,
|
||||
HMRCSubmissionResponse,
|
||||
RAGSearchResponse,
|
||||
ScheduleComputeResponse,
|
||||
)
|
||||
|
||||
|
||||
def get_entity_schemas() -> dict[str, dict[str, Any]]:
|
||||
"""Export JSON schemas for all models"""
|
||||
schemas = {}
|
||||
|
||||
# Core entities
|
||||
schemas["TaxpayerProfile"] = TaxpayerProfile.model_json_schema()
|
||||
schemas["Document"] = Document.model_json_schema()
|
||||
schemas["Evidence"] = Evidence.model_json_schema()
|
||||
schemas["IncomeItem"] = IncomeItem.model_json_schema()
|
||||
schemas["ExpenseItem"] = ExpenseItem.model_json_schema()
|
||||
schemas["Party"] = Party.model_json_schema()
|
||||
schemas["Account"] = Account.model_json_schema()
|
||||
schemas["PropertyAsset"] = PropertyAsset.model_json_schema()
|
||||
schemas["Payment"] = Payment.model_json_schema()
|
||||
schemas["Calculation"] = Calculation.model_json_schema()
|
||||
schemas["FormBox"] = FormBox.model_json_schema()
|
||||
schemas["Rule"] = Rule.model_json_schema()
|
||||
|
||||
# Request/Response models
|
||||
schemas["DocumentUploadRequest"] = DocumentUploadRequest.model_json_schema()
|
||||
schemas["DocumentUploadResponse"] = DocumentUploadResponse.model_json_schema()
|
||||
schemas["ExtractionRequest"] = ExtractionRequest.model_json_schema()
|
||||
schemas["ExtractionResponse"] = ExtractionResponse.model_json_schema()
|
||||
schemas["RAGSearchRequest"] = RAGSearchRequest.model_json_schema()
|
||||
schemas["RAGSearchResponse"] = RAGSearchResponse.model_json_schema()
|
||||
schemas["ScheduleComputeRequest"] = ScheduleComputeRequest.model_json_schema()
|
||||
schemas["ScheduleComputeResponse"] = ScheduleComputeResponse.model_json_schema()
|
||||
schemas["HMRCSubmissionRequest"] = HMRCSubmissionRequest.model_json_schema()
|
||||
schemas["HMRCSubmissionResponse"] = HMRCSubmissionResponse.model_json_schema()
|
||||
schemas["FirmSyncRequest"] = FirmSyncRequest.model_json_schema()
|
||||
schemas["FirmSyncResponse"] = FirmSyncResponse.model_json_schema()
|
||||
|
||||
return schemas
|
||||
Reference in New Issue
Block a user