Files
ai-tax-agent/libs/schemas/entities.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

231 lines
8.8 KiB
Python

"""Core business entities with temporal modeling."""
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from .enums import (
DocumentKind,
ExpenseType,
IncomeType,
PartySubtype,
PropertyUsage,
TaxpayerType,
)
class BaseEntity(BaseModel):
"""Base entity with temporal fields"""
model_config = ConfigDict(
str_strip_whitespace=True, validate_assignment=True, use_enum_values=True
)
# Temporal fields (bitemporal modeling)
valid_from: datetime = Field(
..., description="When the fact became valid in reality"
)
valid_to: datetime | None = Field(
None, description="When the fact ceased to be valid"
)
asserted_at: datetime = Field(
default_factory=datetime.utcnow, description="When recorded in system"
)
retracted_at: datetime | None = Field(
None, description="When retracted from system"
)
source: str = Field(..., description="Source of the information")
extractor_version: str = Field(..., description="Version of extraction system")
class TaxpayerProfile(BaseEntity):
"""Taxpayer profile entity"""
taxpayer_id: str = Field(..., description="Unique taxpayer identifier")
type: TaxpayerType = Field(..., description="Type of taxpayer")
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
ni_number: str | None = Field(
None,
pattern=r"^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$",
description="National Insurance Number",
)
residence: str | None = Field(None, description="Tax residence")
class Document(BaseEntity):
"""Document entity"""
doc_id: str = Field(
..., pattern=r"^doc_[a-f0-9]{16}$", description="Document identifier"
)
kind: DocumentKind = Field(..., description="Type of document")
source: str = Field(..., description="Source of document")
mime: str = Field(..., description="MIME type")
checksum: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 checksum"
)
file_size: int | None = Field(None, ge=0, description="File size in bytes")
pages: int | None = Field(None, ge=1, description="Number of pages")
date_range: dict[str, date] | None = Field(None, description="Document date range")
class Evidence(BaseEntity):
"""Evidence entity linking to document snippets"""
snippet_id: str = Field(..., description="Evidence snippet identifier")
doc_ref: str = Field(..., description="Reference to source document")
page: int = Field(..., ge=1, description="Page number")
bbox: list[float] | None = Field(
None, description="Bounding box coordinates [x1, y1, x2, y2]"
)
text_hash: str = Field(
..., pattern=r"^[a-f0-9]{64}$", description="SHA-256 hash of extracted text"
)
ocr_confidence: float | None = Field(
None, ge=0.0, le=1.0, description="OCR confidence score"
)
class IncomeItem(BaseEntity):
"""Income item entity"""
income_id: str = Field(..., description="Income item identifier")
type: IncomeType = Field(..., description="Type of income")
gross: Decimal = Field(..., ge=0, description="Gross amount")
net: Decimal | None = Field(None, ge=0, description="Net amount")
tax_withheld: Decimal | None = Field(None, ge=0, description="Tax withheld")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
period_start: date | None = Field(None, description="Income period start")
period_end: date | None = Field(None, description="Income period end")
description: str | None = Field(None, description="Income description")
class ExpenseItem(BaseEntity):
"""Expense item entity"""
expense_id: str = Field(..., description="Expense item identifier")
type: ExpenseType = Field(..., description="Type of expense")
amount: Decimal = Field(..., ge=0, description="Expense amount")
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
description: str | None = Field(None, description="Expense description")
category: str | None = Field(None, description="Expense category")
allowable: bool | None = Field(None, description="Whether expense is allowable")
capitalizable_flag: bool | None = Field(
None, description="Whether expense should be capitalized"
)
vat_amount: Decimal | None = Field(None, ge=0, description="VAT amount")
net_amount: Decimal | None = Field(
None, ge=0, description="Net amount excluding VAT"
)
class Party(BaseEntity):
"""Party entity (person or organization)"""
party_id: str = Field(..., description="Party identifier")
name: str = Field(..., min_length=1, description="Party name")
subtype: PartySubtype | None = Field(None, description="Party subtype")
address: str | None = Field(None, description="Party address")
vat_number: str | None = Field(
None, pattern=r"^GB\d{9}$|^GB\d{12}$", description="UK VAT number"
)
utr: str | None = Field(
None, pattern=r"^\d{10}$", description="Unique Taxpayer Reference"
)
reg_no: str | None = Field(None, description="Registration number")
paye_reference: str | None = Field(None, description="PAYE reference")
class Account(BaseEntity):
"""Bank account entity"""
account_id: str = Field(..., description="Account identifier")
iban: str | None = Field(
None, pattern=r"^GB\d{2}[A-Z]{4}\d{14}$", description="UK IBAN"
)
sort_code: str | None = Field(
None, pattern=r"^\d{2}-\d{2}-\d{2}$", description="Sort code"
)
account_no: str | None = Field(
None, pattern=r"^\d{8}$", description="Account number"
)
institution: str | None = Field(None, description="Financial institution")
account_type: str | None = Field(None, description="Account type")
currency: str = Field(default="GBP", description="Account currency")
class PropertyAsset(BaseEntity):
"""Property asset entity"""
property_id: str = Field(..., description="Property identifier")
address: str = Field(..., min_length=10, description="Property address")
postcode: str | None = Field(
None, pattern=r"^[A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2}$", description="UK postcode"
)
tenure: str | None = Field(None, description="Property tenure")
ownership_share: float | None = Field(
None, ge=0.0, le=1.0, description="Ownership share"
)
usage: PropertyUsage | None = Field(None, description="Property usage type")
class Payment(BaseEntity):
"""Payment transaction entity"""
payment_id: str = Field(..., description="Payment identifier")
payment_date: date = Field(..., description="Payment date")
amount: Decimal = Field(
..., description="Payment amount (positive for credit, negative for debit)"
)
currency: str = Field(..., pattern=r"^[A-Z]{3}$", description="Currency code")
direction: str = Field(..., description="Payment direction (credit/debit)")
description: str | None = Field(None, description="Payment description")
reference: str | None = Field(None, description="Payment reference")
balance_after: Decimal | None = Field(
None, description="Account balance after payment"
)
class Calculation(BaseEntity):
"""Tax calculation entity"""
calculation_id: str = Field(..., description="Calculation identifier")
schedule: str = Field(..., description="Tax schedule (SA100, SA103, etc.)")
tax_year: str = Field(
..., pattern=r"^\d{4}-\d{2}$", description="Tax year (e.g., 2023-24)"
)
total_income: Decimal | None = Field(None, ge=0, description="Total income")
total_expenses: Decimal | None = Field(None, ge=0, description="Total expenses")
net_profit: Decimal | None = Field(None, description="Net profit/loss")
calculated_at: datetime = Field(
default_factory=datetime.utcnow, description="Calculation timestamp"
)
class FormBox(BaseEntity):
"""Form box entity"""
form: str = Field(..., description="Form identifier (SA100, SA103, etc.)")
box: str = Field(..., description="Box identifier")
value: Decimal | str | bool = Field(..., description="Box value")
description: str | None = Field(None, description="Box description")
confidence: float | None = Field(
None, ge=0.0, le=1.0, description="Confidence score"
)
class Rule(BaseEntity):
"""Tax rule entity"""
rule_id: str = Field(..., description="Rule identifier")
name: str = Field(..., description="Rule name")
description: str | None = Field(None, description="Rule description")
jurisdiction: str = Field(default="UK", description="Tax jurisdiction")
tax_years: list[str] = Field(..., description="Applicable tax years")
formula: str | None = Field(None, description="Rule formula")
conditions: dict[str, Any] | None = Field(None, description="Rule conditions")