Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-10-11 08:41:36 +01:00
commit b324ff09ef
276 changed files with 55220 additions and 0 deletions

230
libs/schemas/entities.py Normal file
View File

@@ -0,0 +1,230 @@
"""Core business entities with temporal modeling."""
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from .enums import (
DocumentKind,
ExpenseType,
IncomeType,
PartySubtype,
PropertyUsage,
TaxpayerType,
)
class BaseEntity(BaseModel):
"""Base entity with temporal fields"""
model_config = ConfigDict(
str_strip_whitespace=True, validate_assignment=True, use_enum_values=True
)
# Temporal fields (bitemporal modeling)
valid_from: datetime = Field(
..., description="When the fact became valid in reality"
)
valid_to: datetime | None = Field(
None, description="When the fact ceased to be valid"
)
asserted_at: datetime = Field(
default_factory=datetime.utcnow, description="When recorded in system"
)
retracted_at: datetime | None = Field(
None, description="When retracted from system"
)
source: str = Field(..., description="Source of the information")
extractor_version: str = Field(..., description="Version of extraction system")
class TaxpayerProfile(BaseEntity):
"""Taxpayer profile entity"""
taxpayer_id: str = Field(..., description="Unique taxpayer identifier")
type: TaxpayerType = Field(..., description="Type of taxpayer")
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
ni_number: str | None = Field(
None,
pattern=r"^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$",
description="National Insurance Number",
)
residence: str | None = Field(None, description="Tax residence")
class Document(BaseEntity):
"""Document entity"""
doc_id: str = Field(
..., pattern=r"^doc_[a-f0-9]{16}$", description="Document identifier"
)
kind: DocumentKind = Field(..., description="Type of document")
source: str = Field(..., description="Source of document")
mime: str = Field(..., description="MIME type")
checksum: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 checksum"
)
file_size: int | None = Field(None, ge=0, description="File size in bytes")
pages: int | None = Field(None, ge=1, description="Number of pages")
date_range: dict[str, date] | None = Field(None, description="Document date range")
class Evidence(BaseEntity):
"""Evidence entity linking to document snippets"""
snippet_id: str = Field(..., description="Evidence snippet identifier")
doc_ref: str = Field(..., description="Reference to source document")
page: int = Field(..., ge=1, description="Page number")
bbox: list[float] | None = Field(
None, description="Bounding box coordinates [x1, y1, x2, y2]"
)
text_hash: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 hash of extracted text"
)
ocr_confidence: float | None = Field(
None, ge=0.0, le=1.0, description="OCR confidence score"
)
class IncomeItem(BaseEntity):
"""Income item entity"""
income_id: str = Field(..., description="Income item identifier")
type: IncomeType = Field(..., description="Type of income")
gross: Decimal = Field(..., ge=0, description="Gross amount")
net: Decimal | None = Field(None, ge=0, description="Net amount")
tax_withheld: Decimal | None = Field(None, ge=0, description="Tax withheld")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
period_start: date | None = Field(None, description="Income period start")
period_end: date | None = Field(None, description="Income period end")
description: str | None = Field(None, description="Income description")
class ExpenseItem(BaseEntity):
"""Expense item entity"""
expense_id: str = Field(..., description="Expense item identifier")
type: ExpenseType = Field(..., description="Type of expense")
amount: Decimal = Field(..., ge=0, description="Expense amount")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
description: str | None = Field(None, description="Expense description")
category: str | None = Field(None, description="Expense category")
allowable: bool | None = Field(None, description="Whether expense is allowable")
capitalizable_flag: bool | None = Field(
None, description="Whether expense should be capitalized"
)
vat_amount: Decimal | None = Field(None, ge=0, description="VAT amount")
net_amount: Decimal | None = Field(
None, ge=0, description="Net amount excluding VAT"
)
class Party(BaseEntity):
"""Party entity (person or organization)"""
party_id: str = Field(..., description="Party identifier")
name: str = Field(..., min_length=1, description="Party name")
subtype: PartySubtype | None = Field(None, description="Party subtype")
address: str | None = Field(None, description="Party address")
vat_number: str | None = Field(
None, pattern=r"^GB\d{9}$|^GB\d{12}$", description="UK VAT number"
)
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
reg_no: str | None = Field(None, description="Registration number")
paye_reference: str | None = Field(None, description="PAYE reference")
class Account(BaseEntity):
"""Bank account entity"""
account_id: str = Field(..., description="Account identifier")
iban: str | None = Field(
None, pattern=r"^GB\d{2}[A-Z]{4}\d{14}$", description="UK IBAN"
)
sort_code: str | None = Field(
None, pattern=r"^\d{2}-\d{2}-\d{2}$", description="Sort code"
)
account_no: str | None = Field(
None, pattern=r"^\d{8}$", description="Account number"
)
institution: str | None = Field(None, description="Financial institution")
account_type: str | None = Field(None, description="Account type")
currency: str = Field(default="GBP", description="Account currency")
class PropertyAsset(BaseEntity):
"""Property asset entity"""
property_id: str = Field(..., description="Property identifier")
address: str = Field(..., min_length=10, description="Property address")
postcode: str | None = Field(
None, pattern=r"^[A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2}$", description="UK postcode"
)
tenure: str | None = Field(None, description="Property tenure")
ownership_share: float | None = Field(
None, ge=0.0, le=1.0, description="Ownership share"
)
usage: PropertyUsage | None = Field(None, description="Property usage type")
class Payment(BaseEntity):
"""Payment transaction entity"""
payment_id: str = Field(..., description="Payment identifier")
payment_date: date = Field(..., description="Payment date")
amount: Decimal = Field(
..., description="Payment amount (positive for credit, negative for debit)"
)
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
direction: str = Field(..., description="Payment direction (credit/debit)")
description: str | None = Field(None, description="Payment description")
reference: str | None = Field(None, description="Payment reference")
balance_after: Decimal | None = Field(
None, description="Account balance after payment"
)
class Calculation(BaseEntity):
"""Tax calculation entity"""
calculation_id: str = Field(..., description="Calculation identifier")
schedule: str = Field(..., description="Tax schedule (SA100, SA103, etc.)")
tax_year: str = Field(
..., pattern=r"^\d{4}-\d{2}$", description="Tax year (e.g., 2023-24)"
)
total_income: Decimal | None = Field(None, ge=0, description="Total income")
total_expenses: Decimal | None = Field(None, ge=0, description="Total expenses")
net_profit: Decimal | None = Field(None, description="Net profit/loss")
calculated_at: datetime = Field(
default_factory=datetime.utcnow, description="Calculation timestamp"
)
class FormBox(BaseEntity):
"""Form box entity"""
form: str = Field(..., description="Form identifier (SA100, SA103, etc.)")
box: str = Field(..., description="Box identifier")
value: Decimal | str | bool = Field(..., description="Box value")
description: str | None = Field(None, description="Box description")
confidence: float | None = Field(
None, ge=0.0, le=1.0, description="Confidence score"
)
class Rule(BaseEntity):
"""Tax rule entity"""
rule_id: str = Field(..., description="Rule identifier")
name: str = Field(..., description="Rule name")
description: str | None = Field(None, description="Rule description")
jurisdiction: str = Field(default="UK", description="Tax jurisdiction")
tax_years: list[str] = Field(..., description="Applicable tax years")
formula: str | None = Field(None, description="Rule formula")
conditions: dict[str, Any] | None = Field(None, description="Rule conditions")