Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-10-11 08:41:36 +01:00
commit b324ff09ef
276 changed files with 55220 additions and 0 deletions

175
libs/schemas/__init__.py Normal file
View File

@@ -0,0 +1,175 @@
"""Shared Pydantic models mirroring ontology entities."""
# Import all enums
# Import coverage models
from .coverage.core import (
CompiledCoveragePolicy,
ConflictRules,
CoveragePolicy,
CrossCheck,
Defaults,
EvidenceItem,
GuidanceRef,
Privacy,
QuestionTemplates,
SchedulePolicy,
StatusClassifier,
StatusClassifierConfig,
TaxYearBoundary,
Trigger,
Validity,
)
from .coverage.evaluation import (
BlockingItem,
Citation,
ClarifyContext,
ClarifyResponse,
CoverageGap,
CoverageItem,
CoverageReport,
FoundEvidence,
ScheduleCoverage,
UploadOption,
)
from .coverage.utils import CoverageAudit, PolicyError, PolicyVersion, ValidationResult
# Import all entities
from .entities import (
Account,
BaseEntity,
Calculation,
Document,
Evidence,
ExpenseItem,
FormBox,
IncomeItem,
Party,
Payment,
PropertyAsset,
Rule,
TaxpayerProfile,
)
from .enums import (
DocumentKind,
ExpenseType,
HealthStatus,
IncomeType,
OverallStatus,
PartySubtype,
PropertyUsage,
Role,
Status,
TaxpayerType,
)
# Import error models
from .errors import ErrorResponse, ValidationError, ValidationErrorResponse
# Import health models
from .health import HealthCheck, ServiceHealth
# Import request models
from .requests import (
DocumentUploadRequest,
ExtractionRequest,
FirmSyncRequest,
HMRCSubmissionRequest,
RAGSearchRequest,
ScheduleComputeRequest,
)
# Import response models
from .responses import (
DocumentUploadResponse,
ExtractionResponse,
FirmSyncResponse,
HMRCSubmissionResponse,
RAGSearchResponse,
ScheduleComputeResponse,
)
# Import utility functions
from .utils import get_entity_schemas
__all__ = [
# Enums
"DocumentKind",
"ExpenseType",
"HealthStatus",
"IncomeType",
"OverallStatus",
"PartySubtype",
"PropertyUsage",
"Role",
"Status",
"TaxpayerType",
# Entities
"Account",
"BaseEntity",
"Calculation",
"Document",
"Evidence",
"ExpenseItem",
"FormBox",
"IncomeItem",
"Party",
"Payment",
"PropertyAsset",
"Rule",
"TaxpayerProfile",
# Errors
"ErrorResponse",
"ValidationError",
"ValidationErrorResponse",
# Health
"HealthCheck",
"ServiceHealth",
# Requests
"DocumentUploadRequest",
"ExtractionRequest",
"FirmSyncRequest",
"HMRCSubmissionRequest",
"RAGSearchRequest",
"ScheduleComputeRequest",
# Responses
"DocumentUploadResponse",
"ExtractionResponse",
"FirmSyncResponse",
"HMRCSubmissionResponse",
"RAGSearchResponse",
"ScheduleComputeResponse",
# Utils
"get_entity_schemas",
# Coverage core models
"Validity",
"StatusClassifier",
"StatusClassifierConfig",
"EvidenceItem",
"CrossCheck",
"SchedulePolicy",
"Trigger",
"GuidanceRef",
"QuestionTemplates",
"ConflictRules",
"TaxYearBoundary",
"Defaults",
"Privacy",
"CoveragePolicy",
"CompiledCoveragePolicy",
# Coverage evaluation models
"FoundEvidence",
"Citation",
"CoverageItem",
"ScheduleCoverage",
"BlockingItem",
"CoverageReport",
"CoverageGap",
"ClarifyContext",
"UploadOption",
"ClarifyResponse",
# Coverage utility models
"PolicyError",
"ValidationResult",
"PolicyVersion",
"CoverageAudit",
]

View File

View File

@@ -0,0 +1,146 @@
"""Core coverage policy models."""
from collections.abc import Callable
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
from ..enums import Role
class Validity(BaseModel):
"""Validity constraints for evidence"""
within_tax_year: bool = False
available_by: str | None = None
date_tolerance_days: int = 30
class StatusClassifier(BaseModel):
"""Rules for classifying evidence status"""
min_ocr: float = 0.82
min_extract: float = 0.85
date_in_year: bool = True
date_in_year_or_tolerance: bool = True
conflict_rules: list[str] = Field(default_factory=list)
class StatusClassifierConfig(BaseModel):
"""Complete status classifier configuration"""
present_verified: StatusClassifier
present_unverified: StatusClassifier
conflicting: StatusClassifier
missing: StatusClassifier = Field(default_factory=lambda: StatusClassifier())
class EvidenceItem(BaseModel):
"""Evidence requirement definition"""
id: str
role: Role
condition: str | None = None
boxes: list[str] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
validity: Validity = Field(default_factory=Validity)
reasons: dict[str, str] = Field(default_factory=dict)
class CrossCheck(BaseModel):
"""Cross-validation rule"""
name: str
logic: str
class SchedulePolicy(BaseModel):
"""Policy for a specific tax schedule"""
guidance_hint: str | None = None
evidence: list[EvidenceItem] = Field(default_factory=list)
cross_checks: list[CrossCheck] = Field(default_factory=list)
selection_rule: dict[str, str] = Field(default_factory=dict)
notes: dict[str, Any] = Field(default_factory=dict)
class Trigger(BaseModel):
"""Schedule trigger condition"""
any_of: list[str] = Field(default_factory=list)
all_of: list[str] = Field(default_factory=list)
class GuidanceRef(BaseModel):
"""Reference to guidance document"""
doc_id: str
kind: str
class QuestionTemplates(BaseModel):
"""Templates for generating clarifying questions"""
default: dict[str, str] = Field(default_factory=dict)
reasons: dict[str, str] = Field(default_factory=dict)
class ConflictRules(BaseModel):
"""Rules for handling conflicting evidence"""
precedence: list[str] = Field(default_factory=list)
escalation: dict[str, Any] = Field(default_factory=dict)
class TaxYearBoundary(BaseModel):
"""Tax year date boundaries"""
start: str
end: str
class Defaults(BaseModel):
"""Default configuration values"""
confidence_thresholds: dict[str, float] = Field(default_factory=dict)
date_tolerance_days: int = 30
require_lineage_bbox: bool = True
allow_bank_substantiation: bool = True
class Privacy(BaseModel):
"""Privacy and PII handling configuration"""
vector_pii_free: bool = True
redact_patterns: list[str] = Field(default_factory=list)
class CoveragePolicy(BaseModel):
"""Complete coverage policy definition"""
version: str
jurisdiction: str
tax_year: str
tax_year_boundary: TaxYearBoundary
defaults: Defaults
document_kinds: list[str] = Field(default_factory=list)
guidance_refs: dict[str, GuidanceRef] = Field(default_factory=dict)
triggers: dict[str, Trigger] = Field(default_factory=dict)
schedules: dict[str, SchedulePolicy] = Field(default_factory=dict)
status_classifier: StatusClassifierConfig
conflict_resolution: ConflictRules
question_templates: QuestionTemplates
privacy: Privacy
class CompiledCoveragePolicy(BaseModel):
"""Coverage policy with compiled predicates"""
policy: CoveragePolicy
compiled_predicates: dict[str, Callable[[str, str], bool]] = Field(
default_factory=dict
)
compiled_at: datetime
hash: str
source_files: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,112 @@
"""Coverage evaluation models."""
from datetime import datetime
from pydantic import BaseModel, Field
from ..enums import OverallStatus, Role, Status
class FoundEvidence(BaseModel):
"""Evidence found in the knowledge graph"""
doc_id: str
kind: str
confidence: float = 0.0
pages: list[int] = Field(default_factory=list)
bbox: dict[str, float] | None = None
ocr_confidence: float = 0.0
extract_confidence: float = 0.0
date: str | None = None
class Citation(BaseModel):
"""Citation reference"""
rule_id: str | None = None
doc_id: str | None = None
url: str | None = None
locator: str | None = None
section_id: str | None = None
page: int | None = None
bbox: dict[str, float] | None = None
class CoverageItem(BaseModel):
"""Coverage evaluation for a single evidence item"""
id: str
role: Role
status: Status
boxes: list[str] = Field(default_factory=list)
found: list[FoundEvidence] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
reason: str = ""
citations: list[Citation] = Field(default_factory=list)
class ScheduleCoverage(BaseModel):
"""Coverage evaluation for a schedule"""
schedule_id: str
status: OverallStatus
evidence: list[CoverageItem] = Field(default_factory=list)
class BlockingItem(BaseModel):
"""Item that blocks completion"""
schedule_id: str
evidence_id: str
class CoverageReport(BaseModel):
"""Complete coverage evaluation report"""
tax_year: str
taxpayer_id: str
schedules_required: list[str] = Field(default_factory=list)
overall_status: OverallStatus
coverage: list[ScheduleCoverage] = Field(default_factory=list)
blocking_items: list[BlockingItem] = Field(default_factory=list)
evaluated_at: datetime = Field(default_factory=datetime.utcnow)
policy_version: str = ""
class CoverageGap(BaseModel):
"""Gap in coverage requiring clarification"""
schedule_id: str
evidence_id: str
role: Role
reason: str
boxes: list[str] = Field(default_factory=list)
citations: list[Citation] = Field(default_factory=list)
acceptable_alternatives: list[str] = Field(default_factory=list)
class ClarifyContext(BaseModel):
"""Context for clarifying question"""
tax_year: str
taxpayer_id: str
jurisdiction: str
class UploadOption(BaseModel):
"""Upload option for user"""
label: str
accepted_formats: list[str] = Field(default_factory=list)
upload_endpoint: str
class ClarifyResponse(BaseModel):
"""Response to clarifying question request"""
question_text: str
why_it_is_needed: str
citations: list[Citation] = Field(default_factory=list)
options_to_provide: list[UploadOption] = Field(default_factory=list)
blocking: bool = False
boxes_affected: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,48 @@
"""Utility models for coverage system."""
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
from ..enums import OverallStatus
class PolicyError(Exception):
"""Policy loading or validation error"""
pass
class ValidationResult(BaseModel):
"""Policy validation result"""
ok: bool
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
class PolicyVersion(BaseModel):
"""Policy version record"""
id: int | None = None
version: str
jurisdiction: str
tax_year: str
tenant_id: str | None = None
source_files: list[str] = Field(default_factory=list)
compiled_at: datetime
hash: str
class CoverageAudit(BaseModel):
"""Coverage audit record"""
id: int | None = None
taxpayer_id: str
tax_year: str
policy_version: str
overall_status: OverallStatus
blocking_items: list[dict[str, Any]] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.utcnow)
trace_id: str | None = None

230
libs/schemas/entities.py Normal file
View File

@@ -0,0 +1,230 @@
"""Core business entities with temporal modeling."""
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from .enums import (
DocumentKind,
ExpenseType,
IncomeType,
PartySubtype,
PropertyUsage,
TaxpayerType,
)
class BaseEntity(BaseModel):
"""Base entity with temporal fields"""
model_config = ConfigDict(
str_strip_whitespace=True, validate_assignment=True, use_enum_values=True
)
# Temporal fields (bitemporal modeling)
valid_from: datetime = Field(
..., description="When the fact became valid in reality"
)
valid_to: datetime | None = Field(
None, description="When the fact ceased to be valid"
)
asserted_at: datetime = Field(
default_factory=datetime.utcnow, description="When recorded in system"
)
retracted_at: datetime | None = Field(
None, description="When retracted from system"
)
source: str = Field(..., description="Source of the information")
extractor_version: str = Field(..., description="Version of extraction system")
class TaxpayerProfile(BaseEntity):
"""Taxpayer profile entity"""
taxpayer_id: str = Field(..., description="Unique taxpayer identifier")
type: TaxpayerType = Field(..., description="Type of taxpayer")
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
ni_number: str | None = Field(
None,
pattern=r"^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$",
description="National Insurance Number",
)
residence: str | None = Field(None, description="Tax residence")
class Document(BaseEntity):
"""Document entity"""
doc_id: str = Field(
..., pattern=r"^doc_[a-f0-9]{16}$", description="Document identifier"
)
kind: DocumentKind = Field(..., description="Type of document")
source: str = Field(..., description="Source of document")
mime: str = Field(..., description="MIME type")
checksum: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 checksum"
)
file_size: int | None = Field(None, ge=0, description="File size in bytes")
pages: int | None = Field(None, ge=1, description="Number of pages")
date_range: dict[str, date] | None = Field(None, description="Document date range")
class Evidence(BaseEntity):
"""Evidence entity linking to document snippets"""
snippet_id: str = Field(..., description="Evidence snippet identifier")
doc_ref: str = Field(..., description="Reference to source document")
page: int = Field(..., ge=1, description="Page number")
bbox: list[float] | None = Field(
None, description="Bounding box coordinates [x1, y1, x2, y2]"
)
text_hash: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 hash of extracted text"
)
ocr_confidence: float | None = Field(
None, ge=0.0, le=1.0, description="OCR confidence score"
)
class IncomeItem(BaseEntity):
"""Income item entity"""
income_id: str = Field(..., description="Income item identifier")
type: IncomeType = Field(..., description="Type of income")
gross: Decimal = Field(..., ge=0, description="Gross amount")
net: Decimal | None = Field(None, ge=0, description="Net amount")
tax_withheld: Decimal | None = Field(None, ge=0, description="Tax withheld")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
period_start: date | None = Field(None, description="Income period start")
period_end: date | None = Field(None, description="Income period end")
description: str | None = Field(None, description="Income description")
class ExpenseItem(BaseEntity):
"""Expense item entity"""
expense_id: str = Field(..., description="Expense item identifier")
type: ExpenseType = Field(..., description="Type of expense")
amount: Decimal = Field(..., ge=0, description="Expense amount")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
description: str | None = Field(None, description="Expense description")
category: str | None = Field(None, description="Expense category")
allowable: bool | None = Field(None, description="Whether expense is allowable")
capitalizable_flag: bool | None = Field(
None, description="Whether expense should be capitalized"
)
vat_amount: Decimal | None = Field(None, ge=0, description="VAT amount")
net_amount: Decimal | None = Field(
None, ge=0, description="Net amount excluding VAT"
)
class Party(BaseEntity):
"""Party entity (person or organization)"""
party_id: str = Field(..., description="Party identifier")
name: str = Field(..., min_length=1, description="Party name")
subtype: PartySubtype | None = Field(None, description="Party subtype")
address: str | None = Field(None, description="Party address")
vat_number: str | None = Field(
None, pattern=r"^GB\d{9}$|^GB\d{12}$", description="UK VAT number"
)
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
reg_no: str | None = Field(None, description="Registration number")
paye_reference: str | None = Field(None, description="PAYE reference")
class Account(BaseEntity):
"""Bank account entity"""
account_id: str = Field(..., description="Account identifier")
iban: str | None = Field(
None, pattern=r"^GB\d{2}[A-Z]{4}\d{14}$", description="UK IBAN"
)
sort_code: str | None = Field(
None, pattern=r"^\d{2}-\d{2}-\d{2}$", description="Sort code"
)
account_no: str | None = Field(
None, pattern=r"^\d{8}$", description="Account number"
)
institution: str | None = Field(None, description="Financial institution")
account_type: str | None = Field(None, description="Account type")
currency: str = Field(default="GBP", description="Account currency")
class PropertyAsset(BaseEntity):
"""Property asset entity"""
property_id: str = Field(..., description="Property identifier")
address: str = Field(..., min_length=10, description="Property address")
postcode: str | None = Field(
None, pattern=r"^[A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2}$", description="UK postcode"
)
tenure: str | None = Field(None, description="Property tenure")
ownership_share: float | None = Field(
None, ge=0.0, le=1.0, description="Ownership share"
)
usage: PropertyUsage | None = Field(None, description="Property usage type")
class Payment(BaseEntity):
"""Payment transaction entity"""
payment_id: str = Field(..., description="Payment identifier")
payment_date: date = Field(..., description="Payment date")
amount: Decimal = Field(
..., description="Payment amount (positive for credit, negative for debit)"
)
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
direction: str = Field(..., description="Payment direction (credit/debit)")
description: str | None = Field(None, description="Payment description")
reference: str | None = Field(None, description="Payment reference")
balance_after: Decimal | None = Field(
None, description="Account balance after payment"
)
class Calculation(BaseEntity):
"""Tax calculation entity"""
calculation_id: str = Field(..., description="Calculation identifier")
schedule: str = Field(..., description="Tax schedule (SA100, SA103, etc.)")
tax_year: str = Field(
..., pattern=r"^\d{4}-\d{2}$", description="Tax year (e.g., 2023-24)"
)
total_income: Decimal | None = Field(None, ge=0, description="Total income")
total_expenses: Decimal | None = Field(None, ge=0, description="Total expenses")
net_profit: Decimal | None = Field(None, description="Net profit/loss")
calculated_at: datetime = Field(
default_factory=datetime.utcnow, description="Calculation timestamp"
)
class FormBox(BaseEntity):
"""Form box entity"""
form: str = Field(..., description="Form identifier (SA100, SA103, etc.)")
box: str = Field(..., description="Box identifier")
value: Decimal | str | bool = Field(..., description="Box value")
description: str | None = Field(None, description="Box description")
confidence: float | None = Field(
None, ge=0.0, le=1.0, description="Confidence score"
)
class Rule(BaseEntity):
"""Tax rule entity"""
rule_id: str = Field(..., description="Rule identifier")
name: str = Field(..., description="Rule name")
description: str | None = Field(None, description="Rule description")
jurisdiction: str = Field(default="UK", description="Tax jurisdiction")
tax_years: list[str] = Field(..., description="Applicable tax years")
formula: str | None = Field(None, description="Rule formula")
conditions: dict[str, Any] | None = Field(None, description="Rule conditions")

102
libs/schemas/enums.py Normal file
View File

@@ -0,0 +1,102 @@
"""Enumeration types for the tax system."""
from enum import Enum
class TaxpayerType(str, Enum):
"""Taxpayer types"""
INDIVIDUAL = "Individual"
PARTNERSHIP = "Partnership"
COMPANY = "Company"
class DocumentKind(str, Enum):
"""Document types"""
BANK_STATEMENT = "bank_statement"
INVOICE = "invoice"
RECEIPT = "receipt"
P_AND_L = "p_and_l"
BALANCE_SHEET = "balance_sheet"
PAYSLIP = "payslip"
DIVIDEND_VOUCHER = "dividend_voucher"
PROPERTY_STATEMENT = "property_statement"
PRIOR_RETURN = "prior_return"
LETTER = "letter"
CERTIFICATE = "certificate"
class IncomeType(str, Enum):
"""Income types"""
EMPLOYMENT = "employment"
SELF_EMPLOYMENT = "self_employment"
PROPERTY = "property"
DIVIDEND = "dividend"
INTEREST = "interest"
OTHER = "other"
class ExpenseType(str, Enum):
"""Expense types"""
BUSINESS = "business"
PROPERTY = "property"
CAPITAL = "capital"
PERSONAL = "personal"
class PartySubtype(str, Enum):
"""Party subtypes"""
EMPLOYER = "Employer"
PAYER = "Payer"
BANK = "Bank"
LANDLORD = "Landlord"
TENANT = "Tenant"
SUPPLIER = "Supplier"
CLIENT = "Client"
class PropertyUsage(str, Enum):
"""Property usage types"""
RESIDENTIAL = "residential"
FURNISHED_HOLIDAY_LETTING = "furnished_holiday_letting"
COMMERCIAL = "commercial"
MIXED = "mixed"
class HealthStatus(str, Enum):
"""Health status values"""
HEALTHY = "healthy"
UNHEALTHY = "unhealthy"
DEGRADED = "degraded"
# Coverage evaluation enums
class Role(str, Enum):
"""Evidence role in coverage evaluation"""
REQUIRED = "REQUIRED"
CONDITIONALLY_REQUIRED = "CONDITIONALLY_REQUIRED"
OPTIONAL = "OPTIONAL"
class Status(str, Enum):
"""Evidence status classification"""
PRESENT_VERIFIED = "present_verified"
PRESENT_UNVERIFIED = "present_unverified"
MISSING = "missing"
CONFLICTING = "conflicting"
class OverallStatus(str, Enum):
"""Overall coverage status"""
OK = "ok"
PARTIAL = "partial"
BLOCKING = "blocking"

30
libs/schemas/errors.py Normal file
View File

@@ -0,0 +1,30 @@
"""Error response models."""
from typing import Any
from pydantic import BaseModel, Field
class ErrorResponse(BaseModel):
"""RFC7807 Problem+JSON error response"""
type: str = Field(..., description="Error type URI")
title: str = Field(..., description="Error title")
status: int = Field(..., description="HTTP status code")
detail: str = Field(..., description="Error detail")
instance: str = Field(..., description="Error instance URI")
trace_id: str | None = Field(None, description="Trace identifier")
class ValidationError(BaseModel):
"""Validation error details"""
field: str = Field(..., description="Field name")
message: str = Field(..., description="Error message")
value: Any = Field(..., description="Invalid value")
class ValidationErrorResponse(ErrorResponse):
"""Validation error response with field details"""
errors: list[ValidationError] = Field(..., description="Validation errors")

32
libs/schemas/health.py Normal file
View File

@@ -0,0 +1,32 @@
"""Health check models."""
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
from .enums import HealthStatus
class HealthCheck(BaseModel):
"""Health check response"""
status: HealthStatus = Field(..., description="Overall health status")
timestamp: datetime = Field(
default_factory=datetime.utcnow, description="Check timestamp"
)
version: str = Field(..., description="Service version")
checks: dict[str, dict[str, Any]] = Field(
default_factory=dict, description="Individual checks"
)
class ServiceHealth(BaseModel):
"""Individual service health status"""
name: str = Field(..., description="Service name")
status: HealthStatus = Field(..., description="Service health status")
response_time_ms: float | None = Field(
None, description="Response time in milliseconds"
)
error: str | None = Field(None, description="Error message if unhealthy")

65
libs/schemas/requests.py Normal file
View File

@@ -0,0 +1,65 @@
"""API request models."""
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from .enums import DocumentKind
class DocumentUploadRequest(BaseModel):
"""Request model for document upload"""
tenant_id: str = Field(..., description="Tenant identifier")
kind: DocumentKind = Field(..., description="Document type")
source: str = Field(..., description="Document source")
class ExtractionRequest(BaseModel):
"""Request model for document extraction"""
strategy: str = Field(default="hybrid", description="Extraction strategy")
class RAGSearchRequest(BaseModel):
"""Request model for RAG search"""
query: str = Field(..., min_length=1, description="Search query")
tax_year: str | None = Field(None, description="Tax year filter")
jurisdiction: str | None = Field(None, description="Jurisdiction filter")
k: int = Field(default=10, ge=1, le=100, description="Number of results")
class ScheduleComputeRequest(BaseModel):
"""Request model for schedule computation"""
tax_year: str = Field(..., pattern=r"^\d{4}-\d{2}$", description="Tax year")
taxpayer_id: str = Field(..., description="Taxpayer identifier")
schedule_id: str = Field(..., description="Schedule identifier")
class HMRCSubmissionRequest(BaseModel):
"""Request model for HMRC submission"""
tax_year: str = Field(..., pattern=r"^\d{4}-\d{2}$", description="Tax year")
taxpayer_id: str = Field(..., description="Taxpayer identifier")
dry_run: bool = Field(default=True, description="Dry run flag")
class FirmSyncRequest(BaseModel):
"""Request to sync firm data"""
model_config = ConfigDict(extra="forbid")
firm_id: str = Field(..., description="Firm identifier")
system: str = Field(..., description="Practice management system to sync with")
sync_type: str = Field(
default="full", description="Type of sync: full, incremental"
)
force_refresh: bool = Field(
default=False, description="Force refresh of cached data"
)
connection_config: dict[str, Any] = Field(
...,
description="Configuration for connecting to the practice management system",
)

69
libs/schemas/responses.py Normal file
View File

@@ -0,0 +1,69 @@
"""API response models."""
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
class DocumentUploadResponse(BaseModel):
"""Response model for document upload"""
doc_id: str = Field(..., description="Document identifier")
s3_url: str = Field(..., description="S3 URL")
checksum: str = Field(..., description="Document checksum")
class ExtractionResponse(BaseModel):
"""Response model for document extraction"""
extraction_id: str = Field(..., description="Extraction identifier")
confidence: float = Field(..., ge=0.0, le=1.0, description="Overall confidence")
extracted_fields: dict[str, Any] = Field(..., description="Extracted fields")
provenance: list[dict[str, Any]] = Field(..., description="Provenance information")
class RAGSearchResponse(BaseModel):
"""Response model for RAG search"""
chunks: list[dict[str, Any]] = Field(..., description="Retrieved chunks")
citations: list[dict[str, Any]] = Field(..., description="Source citations")
kg_hints: list[dict[str, Any]] = Field(..., description="Knowledge graph hints")
calibrated_confidence: float = Field(
..., ge=0.0, le=1.0, description="Calibrated confidence"
)
class ScheduleComputeResponse(BaseModel):
"""Response model for schedule computation"""
calculation_id: str = Field(..., description="Calculation identifier")
schedule: str = Field(..., description="Schedule identifier")
form_boxes: dict[str, dict[str, Any]] = Field(
..., description="Computed form boxes"
)
evidence_trail: list[dict[str, Any]] = Field(..., description="Evidence trail")
class HMRCSubmissionResponse(BaseModel):
"""Response model for HMRC submission"""
submission_id: str = Field(..., description="Submission identifier")
status: str = Field(..., description="Submission status")
hmrc_reference: str | None = Field(None, description="HMRC reference")
submission_timestamp: datetime = Field(..., description="Submission timestamp")
validation_results: dict[str, Any] = Field(..., description="Validation results")
class FirmSyncResponse(BaseModel):
"""Response from firm sync operation"""
model_config = ConfigDict(extra="forbid")
firm_id: str = Field(..., description="Firm identifier")
status: str = Field(..., description="Sync status: success, error, partial")
message: str = Field(..., description="Status message")
synced_entities: int = Field(default=0, description="Number of entities synced")
errors: list[str] = Field(
default_factory=list, description="List of errors encountered"
)

69
libs/schemas/utils.py Normal file
View File

@@ -0,0 +1,69 @@
"""Utility functions for schema export."""
from typing import Any
from .entities import (
Account,
Calculation,
Document,
Evidence,
ExpenseItem,
FormBox,
IncomeItem,
Party,
Payment,
PropertyAsset,
Rule,
TaxpayerProfile,
)
from .requests import (
DocumentUploadRequest,
ExtractionRequest,
FirmSyncRequest,
HMRCSubmissionRequest,
RAGSearchRequest,
ScheduleComputeRequest,
)
from .responses import (
DocumentUploadResponse,
ExtractionResponse,
FirmSyncResponse,
HMRCSubmissionResponse,
RAGSearchResponse,
ScheduleComputeResponse,
)
def get_entity_schemas() -> dict[str, dict[str, Any]]:
"""Export JSON schemas for all models"""
schemas = {}
# Core entities
schemas["TaxpayerProfile"] = TaxpayerProfile.model_json_schema()
schemas["Document"] = Document.model_json_schema()
schemas["Evidence"] = Evidence.model_json_schema()
schemas["IncomeItem"] = IncomeItem.model_json_schema()
schemas["ExpenseItem"] = ExpenseItem.model_json_schema()
schemas["Party"] = Party.model_json_schema()
schemas["Account"] = Account.model_json_schema()
schemas["PropertyAsset"] = PropertyAsset.model_json_schema()
schemas["Payment"] = Payment.model_json_schema()
schemas["Calculation"] = Calculation.model_json_schema()
schemas["FormBox"] = FormBox.model_json_schema()
schemas["Rule"] = Rule.model_json_schema()
# Request/Response models
schemas["DocumentUploadRequest"] = DocumentUploadRequest.model_json_schema()
schemas["DocumentUploadResponse"] = DocumentUploadResponse.model_json_schema()
schemas["ExtractionRequest"] = ExtractionRequest.model_json_schema()
schemas["ExtractionResponse"] = ExtractionResponse.model_json_schema()
schemas["RAGSearchRequest"] = RAGSearchRequest.model_json_schema()
schemas["RAGSearchResponse"] = RAGSearchResponse.model_json_schema()
schemas["ScheduleComputeRequest"] = ScheduleComputeRequest.model_json_schema()
schemas["ScheduleComputeResponse"] = ScheduleComputeResponse.model_json_schema()
schemas["HMRCSubmissionRequest"] = HMRCSubmissionRequest.model_json_schema()
schemas["HMRCSubmissionResponse"] = HMRCSubmissionResponse.model_json_schema()
schemas["FirmSyncRequest"] = FirmSyncRequest.model_json_schema()
schemas["FirmSyncResponse"] = FirmSyncResponse.model_json_schema()
return schemas