Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
405
config/coverage.yaml
Normal file
405
config/coverage.yaml
Normal file
@@ -0,0 +1,405 @@
|
||||
# FILE: config/coverage.yaml
|
||||
version: "1.0"
|
||||
jurisdiction: "UK"
|
||||
tax_year: "2024-25"
|
||||
|
||||
tax_year_boundary:
|
||||
start: "2024-04-06"
|
||||
end: "2025-04-05"
|
||||
|
||||
defaults:
|
||||
confidence_thresholds:
|
||||
ocr: 0.82
|
||||
extract: 0.85
|
||||
date_tolerance_days: 30
|
||||
require_lineage_bbox: true
|
||||
allow_bank_substantiation: true # when primary statement missing, allow verified bank YTD + reconciliation
|
||||
|
||||
document_kinds:
|
||||
# canonical kinds used by extractor/classifier (map your classifier labels to these)
|
||||
- P60
|
||||
- P45
|
||||
- P11D
|
||||
- PayslipMonthly
|
||||
- FinalPayslipYTD
|
||||
- EmploymentContract
|
||||
- AccountsPAndL
|
||||
- AccountsBalanceSheet
|
||||
- CapitalAllowancesSchedule
|
||||
- MileageLog
|
||||
- LettingAgentStatements
|
||||
- TenancyLedger
|
||||
- MortgageInterestCertificate
|
||||
- OwnershipShareProof
|
||||
- OccupancyLog
|
||||
- BookingsCalendar
|
||||
- BankStatements
|
||||
- BuildingSocietyInterestCert
|
||||
- BankInterestAnnualStatement
|
||||
- DividendVouchers
|
||||
- ConsolidatedTaxVoucher
|
||||
- SLCAnnualStatement
|
||||
- PensionContributionStatement
|
||||
- GiftAidStatement
|
||||
- ForeignIncomeStatement
|
||||
- OverseasTaxCreditStatement
|
||||
- TrustDistributionStatement
|
||||
- EstateR185
|
||||
- CGT_BrokerAnnualReport
|
||||
- CGT_Computation
|
||||
- RemittanceBasisWorkpaper
|
||||
- ResidenceEvidence
|
||||
- HMRC_CodingNotice
|
||||
- HMRC_PaymentOnAccount
|
||||
- OtherSupportingDoc
|
||||
|
||||
guidance_refs:
|
||||
# Handy lookup keys used by AskClarifyingQuestion; keep them high-level & stable
|
||||
SA100_Notes_2025: { doc_id: "SA150-Notes-2025", kind: "Notes" }
|
||||
SA102_Notes_2025: { doc_id: "SA102-Notes-2025", kind: "Notes" }
|
||||
SA103S_Notes_2025: { doc_id: "SA103S-Notes-2025", kind: "Notes" }
|
||||
SA103F_Notes_2025: { doc_id: "SA103F-Notes-2025", kind: "Notes" }
|
||||
SA105_Notes_2025: { doc_id: "SA105-Notes-2025", kind: "Notes" }
|
||||
SA106_Notes_2025: { doc_id: "SA106-Notes-2025", kind: "Notes" }
|
||||
SA107_Notes_2025: { doc_id: "SA107-Notes-2025", kind: "Notes" }
|
||||
SA108_Notes_2025: { doc_id: "SA108-Notes-2025", kind: "Notes" }
|
||||
SA109_Notes_2025: { doc_id: "SA109-Notes-2025", kind: "Notes" }
|
||||
SA110_Notes_2025: { doc_id: "SA110-Notes-2025", kind: "Notes" }
|
||||
|
||||
triggers:
|
||||
# Evaluate against KG & intake flags to decide which schedules apply
|
||||
SA102:
|
||||
any_of:
|
||||
- exists: IncomeItem[type="Employment"]
|
||||
- taxpayer_flag: has_employment
|
||||
SA103S:
|
||||
any_of:
|
||||
- exists: IncomeItem[type="SelfEmployment" AND turnover_lt_vat_threshold=true]
|
||||
- taxpayer_flag: is_self_employed_short
|
||||
SA103F:
|
||||
any_of:
|
||||
- exists: IncomeItem[type="SelfEmployment" AND turnover_ge_vat_threshold=true]
|
||||
- taxpayer_flag: is_self_employed_full
|
||||
SA105:
|
||||
any_of:
|
||||
- exists: IncomeItem[type="UKPropertyRent"]
|
||||
- taxpayer_flag: has_property_income
|
||||
SA106:
|
||||
any_of:
|
||||
- exists: IncomeItem[type IN ["ForeignInterest","ForeignDividends","ForeignEmployment","EEA_FHL","OverseasProperty"]]
|
||||
- taxpayer_flag: has_foreign_income
|
||||
SA107:
|
||||
any_of:
|
||||
- exists: TrustDistribution
|
||||
- exists: EstateIncome
|
||||
- taxpayer_flag: has_trust_or_estate_income
|
||||
SA108:
|
||||
any_of:
|
||||
- exists: CapitalGain
|
||||
- taxpayer_flag: has_disposals
|
||||
SA109:
|
||||
any_of:
|
||||
- taxpayer_flag: claims_remittance_basis
|
||||
- exists: NonUKResident
|
||||
SA110:
|
||||
any_of:
|
||||
- filing_mode: paper
|
||||
- taxpayer_flag: wants_manual_calculation
|
||||
|
||||
schedules:
|
||||
SA102: # Employment
|
||||
guidance_hint: SA102_Notes_2025
|
||||
evidence:
|
||||
- id: P60
|
||||
role: REQUIRED
|
||||
boxes: ["SA102_b1", "SA102_b2"] # pay and UK tax taken off
|
||||
acceptable_alternatives: ["P45", "FinalPayslipYTD"]
|
||||
validity:
|
||||
within_tax_year: true
|
||||
reasons:
|
||||
short: "P60 (or P45/final payslip) provides year-to-date pay and PAYE tax figures for boxes 1–2."
|
||||
- id: P11D
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(BenefitInKind=true)
|
||||
boxes:
|
||||
[
|
||||
"SA102_b9",
|
||||
"SA102_b10",
|
||||
"SA102_b11",
|
||||
"SA102_b12",
|
||||
"SA102_b13",
|
||||
"SA102_b14",
|
||||
"SA102_b15",
|
||||
"SA102_b16",
|
||||
"SA102_b17",
|
||||
"SA102_b18",
|
||||
"SA102_b19",
|
||||
"SA102_b20",
|
||||
]
|
||||
acceptable_alternatives: ["EmployerStatement"]
|
||||
validity:
|
||||
available_by: "2025-07-06"
|
||||
reasons:
|
||||
short: "P11D carries benefits/expenses that map to boxes 9–20 when not payrolled."
|
||||
- id: SLCAnnualStatement
|
||||
role: OPTIONAL
|
||||
boxes: ["SA102_b21", "SA102_b21_1"]
|
||||
reasons:
|
||||
short: "Student/Postgrad loan indicators and plan types where applicable."
|
||||
- id: PayslipMonthly
|
||||
role: OPTIONAL
|
||||
boxes: ["SA102_b3"] # tips/other payments not on P60
|
||||
acceptable_alternatives: []
|
||||
- id: EmploymentContract
|
||||
role: OPTIONAL
|
||||
boxes: []
|
||||
reasons:
|
||||
short: "Used only for disambiguation (OFF-PAYROLL/IR35, director)."
|
||||
cross_checks:
|
||||
- name: "PAYE Reconcile"
|
||||
logic: "Sum(payrolled_BIKs_excluded_from_SLR) handled; P60 box totals = SA102_b1; PAYE tax = SA102_b2 within ±£1."
|
||||
|
||||
SA103S: # Self-employment (short)
|
||||
guidance_hint: SA103S_Notes_2025
|
||||
evidence:
|
||||
- id: AccountsPAndL
|
||||
role: REQUIRED
|
||||
boxes: ["SA103S_b9", "SA103S_b15", "SA103S_b28"]
|
||||
reasons:
|
||||
short: "Turnover and allowable expenses supporting net profit figures."
|
||||
- id: BankStatements
|
||||
role: REQUIRED
|
||||
boxes: ["SA103S_b9", "SA103S_b11", "SA103S_b17"]
|
||||
reasons:
|
||||
short: "Bank corroboration of takings/expenses (cash basis or traditional)."
|
||||
- id: CapitalAllowancesSchedule
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(ExpenseItem[category='CapitalAllowances'])
|
||||
boxes: ["SA103S_b49"]
|
||||
- id: MileageLog
|
||||
role: OPTIONAL
|
||||
boxes: ["SA103S_b20"]
|
||||
- id: HMRC_CodingNotice
|
||||
role: OPTIONAL
|
||||
boxes: []
|
||||
reasons:
|
||||
short: "Basis period changes or coding interactions."
|
||||
selection_rule:
|
||||
prefer_short_if: "turnover < VAT_threshold AND no_complex_adjustments"
|
||||
else_use: "SA103F"
|
||||
|
||||
SA103F: # Self-employment (full)
|
||||
guidance_hint: SA103F_Notes_2025
|
||||
evidence:
|
||||
- id: AccountsPAndL
|
||||
role: REQUIRED
|
||||
boxes: ["SA103F_b15", "SA103F_b31", "SA103F_b73"]
|
||||
- id: AccountsBalanceSheet
|
||||
role: REQUIRED
|
||||
boxes: []
|
||||
- id: BankStatements
|
||||
role: REQUIRED
|
||||
boxes: ["SA103F_b15", "SA103F_b31"]
|
||||
- id: CapitalAllowancesSchedule
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(ExpenseItem[category='CapitalAllowances'])
|
||||
boxes: ["SA103F_b50", "SA103F_b52", "SA103F_b55", "SA103F_b57"]
|
||||
- id: MileageLog
|
||||
role: OPTIONAL
|
||||
boxes: ["SA103F_b20"]
|
||||
notes:
|
||||
long_form_needed_if:
|
||||
- "turnover >= VAT_threshold"
|
||||
- "claims overlap adjustments, averaging, or multiple trades"
|
||||
|
||||
SA105: # UK Property (incl. UK FHL)
|
||||
guidance_hint: SA105_Notes_2025
|
||||
evidence:
|
||||
- id: LettingAgentStatements
|
||||
role: REQUIRED
|
||||
boxes: ["SA105_b5", "SA105_b20", "SA105_b29"] # income and totals; totals vs. sum of expenses
|
||||
acceptable_alternatives: ["TenancyLedger", "BankStatements"]
|
||||
reasons:
|
||||
short: "Gross rents, fees and charges per-year by property/portfolio."
|
||||
- id: MortgageInterestCertificate
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(ExpenseItem[category='FinanceCosts'])
|
||||
boxes: ["SA105_b44"] # feeds SA110 basic-rate credit
|
||||
- id: OwnershipShareProof
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: property_joint_ownership=true
|
||||
boxes: ["SA105_b3"]
|
||||
- id: OccupancyLog
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: candidate_FHL=true
|
||||
boxes: ["SA105_b5", "SA105_b20"]
|
||||
acceptable_alternatives: ["BookingsCalendar"]
|
||||
- id: BankStatements
|
||||
role: OPTIONAL
|
||||
boxes: ["SA105_b20", "SA105_b29"]
|
||||
cross_checks:
|
||||
- name: "Property Income Allowance Gate"
|
||||
logic: "If SA105_b20.1 claimed then no expense boxes 24–29 or FHL expense boxes 6–12 allowed."
|
||||
|
||||
SA106: # Foreign
|
||||
guidance_hint: SA106_Notes_2025
|
||||
evidence:
|
||||
- id: ForeignIncomeStatement
|
||||
role: REQUIRED
|
||||
boxes: ["SA106_b1", "SA106_b2", "SA106_b3", "SA106_b5"]
|
||||
reasons:
|
||||
short: "Dividends/interest/overseas employment; gross and tax paid."
|
||||
- id: OverseasTaxCreditStatement
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: claims_FTCR=true
|
||||
boxes: ["SA106_b2", "SA106_b5"]
|
||||
- id: EEA_FHL_OccupancyLog
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(IncomeItem[type='EEA_FHL'])
|
||||
boxes: ["SA106_b14", "SA106_b15"]
|
||||
- id: BankStatements
|
||||
role: OPTIONAL
|
||||
boxes: ["SA106_b1", "SA106_b3"]
|
||||
notes:
|
||||
remittance_interaction: "If remittance basis claimed, mirror to SA109."
|
||||
|
||||
SA107: # Trusts etc
|
||||
guidance_hint: SA107_Notes_2025
|
||||
evidence:
|
||||
- id: TrustDistributionStatement
|
||||
role: REQUIRED
|
||||
boxes: ["SA107_b1", "SA107_b2", "SA107_b3"]
|
||||
- id: EstateR185
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: received_estate_income=true
|
||||
boxes: ["SA107_b9", "SA107_b10"]
|
||||
- id: BankStatements
|
||||
role: OPTIONAL
|
||||
boxes: []
|
||||
|
||||
SA108: # Capital Gains
|
||||
guidance_hint: SA108_Notes_2025
|
||||
evidence:
|
||||
- id: CGT_BrokerAnnualReport
|
||||
role: REQUIRED
|
||||
boxes:
|
||||
[
|
||||
"SA108_b4",
|
||||
"SA108_b5",
|
||||
"SA108_b6",
|
||||
"SA108_b9",
|
||||
"SA108_b11",
|
||||
"SA108_b14",
|
||||
]
|
||||
reasons:
|
||||
short: "Disposals, proceeds, allowable costs, gain breakdowns (residential vs other)."
|
||||
- id: CGT_Computation
|
||||
role: REQUIRED
|
||||
boxes: ["SA108_b28", "SA108_b34"]
|
||||
- id: BankStatements
|
||||
role: OPTIONAL
|
||||
boxes: ["SA108_b4", "SA108_b5"]
|
||||
special_2024_25:
|
||||
adjustment_note: "Rate change adjustment for disposals on/after 2024-10-30 may be required."
|
||||
|
||||
SA109: # Residence / Remittance
|
||||
guidance_hint: SA109_Notes_2025
|
||||
evidence:
|
||||
- id: ResidenceEvidence
|
||||
role: REQUIRED
|
||||
boxes: ["SA109_b1", "SA109_b7", "SA109_b8", "SA109_b9"]
|
||||
- id: RemittanceBasisWorkpaper
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: claims_remittance_basis=true
|
||||
boxes: ["SA109_b28", "SA109_b39"]
|
||||
- id: ForeignIncomeStatement
|
||||
role: OPTIONAL
|
||||
boxes: ["SA109_b28", "SA109_b39"]
|
||||
|
||||
SA110: # Tax calculation summary (paper/manual)
|
||||
guidance_hint: SA110_Notes_2025
|
||||
evidence:
|
||||
- id: HMRC_PaymentOnAccount
|
||||
role: OPTIONAL
|
||||
boxes: ["SA110_b10", "SA110_b11"]
|
||||
- id: HMRC_CodingNotice
|
||||
role: OPTIONAL
|
||||
boxes: ["SA110_b7", "SA110_b8", "SA110_b9"]
|
||||
notes:
|
||||
online_filing: "If online, SA110 is computed automatically; still store calculation lineage for audit."
|
||||
|
||||
SA100: # Core return - savings/dividends/gift aid, etc.
|
||||
guidance_hint: SA100_Notes_2025
|
||||
evidence:
|
||||
- id: BankInterestAnnualStatement
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(IncomeItem[type='SavingsInterest'])
|
||||
boxes: ["SA100_b1"]
|
||||
- id: DividendVouchers
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(IncomeItem[type='Dividends'])
|
||||
boxes: ["SA100_b2"]
|
||||
acceptable_alternatives: ["ConsolidatedTaxVoucher"]
|
||||
- id: PensionContributionStatement
|
||||
role: CONDITIONALLY_REQUIRED
|
||||
condition: exists(PensionContribution[relief_method='RAS'])
|
||||
boxes: ["SA100_b4"]
|
||||
- id: GiftAidStatement
|
||||
role: OPTIONAL
|
||||
boxes: ["SA100_b5"]
|
||||
|
||||
status_classifier:
|
||||
# How we classify found evidence for coverage
|
||||
present_verified:
|
||||
min_ocr: 0.82
|
||||
min_extract: 0.85
|
||||
date_in_year: true
|
||||
present_unverified:
|
||||
min_ocr: 0.60
|
||||
min_extract: 0.70
|
||||
date_in_year_or_tolerance: true
|
||||
conflicting:
|
||||
conflict_rules:
|
||||
- "Same doc kind, different totals for same period ±£1"
|
||||
- "Totals disagree with KG aggregates by >£1"
|
||||
missing:
|
||||
default: true
|
||||
|
||||
conflict_resolution:
|
||||
precedence:
|
||||
[
|
||||
"LettingAgentStatements",
|
||||
"P60",
|
||||
"P11D",
|
||||
"ConsolidatedTaxVoucher",
|
||||
"BankStatements",
|
||||
"ManualEntry",
|
||||
]
|
||||
escalation:
|
||||
to_review: true
|
||||
reason_templates:
|
||||
- "Document totals disagree with computed aggregates."
|
||||
- "Low confidence OCR; request re-upload or alternative."
|
||||
|
||||
question_templates:
|
||||
default:
|
||||
text: "To complete the {schedule} for {tax_year}, we need {evidence}. These documents support boxes {boxes}. If you don’t have this, you can provide {alternatives}."
|
||||
why: "{why}. See guidance: {guidance_doc}."
|
||||
reasons:
|
||||
P60: "P60 provides your year-end pay and PAYE tax figures for the employment page."
|
||||
P11D: "P11D lists benefits and expenses that map directly to boxes 9–20 when not payrolled."
|
||||
LettingAgentStatements: "HMRC expects evidence of gross rents and expenses to support SA105 totals."
|
||||
MortgageInterestCertificate: "Mortgage interest supports the basic-rate tax reduction computation."
|
||||
CGT_BrokerAnnualReport: "Brokers’ annual summaries and computations substantiate proceeds, costs and gains."
|
||||
|
||||
privacy:
|
||||
# Ensure we never index PII into vectors
|
||||
vector_pii_free: true
|
||||
redact_patterns:
|
||||
- NI_Number
|
||||
- UTR
|
||||
- IBAN
|
||||
- SortCode
|
||||
- AccountNumber
|
||||
- Email
|
||||
- Phone
|
||||
281
config/heuristics.yaml
Normal file
281
config/heuristics.yaml
Normal file
@@ -0,0 +1,281 @@
|
||||
# FILE: config/heuristics.yaml
|
||||
|
||||
document_kinds:
|
||||
bank_statement:
|
||||
patterns:
|
||||
- "statement of account"
|
||||
- "current account"
|
||||
- "savings account"
|
||||
- "sort code: \\d{2}-\\d{2}-\\d{2}"
|
||||
classifiers:
|
||||
- has_sort_code_pattern
|
||||
- has_account_number
|
||||
- has_transaction_table
|
||||
|
||||
invoice:
|
||||
patterns:
|
||||
- "invoice"
|
||||
- "tax invoice"
|
||||
- "vat invoice"
|
||||
- "invoice number"
|
||||
classifiers:
|
||||
- has_vat_number
|
||||
- has_invoice_number
|
||||
- has_line_items
|
||||
|
||||
receipt:
|
||||
patterns:
|
||||
- "receipt"
|
||||
- "till receipt"
|
||||
- "card payment"
|
||||
classifiers:
|
||||
- has_merchant_name
|
||||
- has_payment_method
|
||||
|
||||
payslip:
|
||||
patterns:
|
||||
- "payslip"
|
||||
- "pay advice"
|
||||
- "salary statement"
|
||||
- "paye"
|
||||
classifiers:
|
||||
- has_employer_name
|
||||
- has_ni_contributions
|
||||
- has_tax_code
|
||||
|
||||
p60:
|
||||
patterns:
|
||||
- "p60"
|
||||
- "end of year certificate"
|
||||
classifiers:
|
||||
- has_tax_year_end
|
||||
- has_total_pay
|
||||
- has_total_tax
|
||||
|
||||
field_normalization:
|
||||
currency:
|
||||
patterns:
|
||||
gbp: ["£", "GBP", "pounds?", "sterling"]
|
||||
eur: ["€", "EUR", "euros?"]
|
||||
usd: ["$", "USD", "dollars?"]
|
||||
default: "GBP"
|
||||
|
||||
date_formats:
|
||||
- "%d/%m/%Y"
|
||||
- "%d-%m-%Y"
|
||||
- "%d %B %Y"
|
||||
- "%d %b %Y"
|
||||
- "%Y-%m-%d"
|
||||
|
||||
employer_names:
|
||||
canonical_mapping:
|
||||
"hmrc":
|
||||
["hm revenue & customs", "her majesty's revenue and customs", "hmrc"]
|
||||
"nhs": ["national health service", "nhs trust", "nhs foundation trust"]
|
||||
normalization_rules:
|
||||
- remove_legal_suffixes: ["ltd", "limited", "plc", "llp", "partnership"]
|
||||
- standardize_case: "title"
|
||||
- remove_extra_whitespace: true
|
||||
|
||||
address_parsing:
|
||||
postcode_pattern: "^[A-Z]{1,2}\\d[A-Z\\d]?\\s*\\d[A-Z]{2}$"
|
||||
components:
|
||||
- house_number
|
||||
- street_name
|
||||
- locality
|
||||
- town
|
||||
- county
|
||||
- postcode
|
||||
|
||||
line_item_mapping:
|
||||
sa102_employment:
|
||||
box_1_pay_from_employment:
|
||||
sources: ["payslip.gross_pay", "p60.total_pay"]
|
||||
aggregation: "sum"
|
||||
box_2_uk_tax_deducted:
|
||||
sources: ["payslip.tax_deducted", "p60.total_tax"]
|
||||
aggregation: "sum"
|
||||
|
||||
sa103_self_employment:
|
||||
box_12_turnover:
|
||||
sources: ["invoice.total", "receipt.amount"]
|
||||
filters: ["income_type = 'business'"]
|
||||
aggregation: "sum"
|
||||
box_31_total_expenses:
|
||||
sources: ["receipt.amount", "invoice.amount"]
|
||||
filters: ["expense_type = 'business'", "allowable = true"]
|
||||
aggregation: "sum"
|
||||
|
||||
sa105_property:
|
||||
box_20_property_income:
|
||||
sources: ["bank_statement.credit", "rental_statement.rent"]
|
||||
filters: ["description contains 'rent'"]
|
||||
aggregation: "sum"
|
||||
box_29_property_expenses:
|
||||
sources: ["invoice.amount", "receipt.amount"]
|
||||
filters:
|
||||
["category in ['repairs', 'maintenance', 'insurance', 'letting_fees']"]
|
||||
aggregation: "sum"
|
||||
|
||||
period_inference:
|
||||
uk_tax_year:
|
||||
start_month: 4
|
||||
start_day: 6
|
||||
boundary_logic: "6_april_to_5_april"
|
||||
|
||||
basis_period_reform:
|
||||
effective_from: "2024-04-06"
|
||||
transition_rules:
|
||||
- "align_to_tax_year"
|
||||
- "overlap_relief"
|
||||
|
||||
assignment_rules:
|
||||
employment_income: "payment_date"
|
||||
self_employment: "invoice_date_or_receipt_date"
|
||||
property_income: "due_date_or_receipt_date"
|
||||
dividends: "payment_date"
|
||||
interest: "credited_date"
|
||||
|
||||
dedupe_rules:
|
||||
same_transaction:
|
||||
keys: ["payer_name_norm", "amount", "date"]
|
||||
tolerance:
|
||||
amount: 0.01
|
||||
date_days: 2
|
||||
merge_strategy: "prefer_bank_statement"
|
||||
|
||||
same_invoice:
|
||||
keys: ["invoice_number", "supplier_name_norm"]
|
||||
tolerance:
|
||||
amount: 0.01
|
||||
merge_strategy: "prefer_original_document"
|
||||
|
||||
confidence_model:
|
||||
source_priors:
|
||||
bank_statement: 0.95
|
||||
official_certificate: 0.90
|
||||
p60: 0.90
|
||||
payslip: 0.85
|
||||
invoice: 0.80
|
||||
receipt: 0.75
|
||||
prior_return: 0.70
|
||||
manual_entry: 0.60
|
||||
|
||||
ocr_thresholds:
|
||||
high_confidence: 0.95
|
||||
medium_confidence: 0.85
|
||||
low_confidence: 0.70
|
||||
reject_threshold: 0.50
|
||||
|
||||
ensemble_weights:
|
||||
ocr_confidence: 0.4
|
||||
source_type: 0.3
|
||||
field_validation: 0.2
|
||||
cross_reference: 0.1
|
||||
|
||||
calibrated_confidence:
|
||||
method: "platt_scaling"
|
||||
calibration_data: "validation_set_predictions"
|
||||
bins: 10
|
||||
|
||||
conflict_resolution:
|
||||
precedence_matrix:
|
||||
amount_conflicts:
|
||||
1: "bank_statement"
|
||||
2: "official_certificate"
|
||||
3: "invoice"
|
||||
4: "receipt"
|
||||
5: "manual_entry"
|
||||
|
||||
date_conflicts:
|
||||
1: "bank_statement"
|
||||
2: "invoice"
|
||||
3: "receipt"
|
||||
4: "manual_entry"
|
||||
|
||||
party_name_conflicts:
|
||||
1: "official_certificate"
|
||||
2: "bank_statement"
|
||||
3: "invoice"
|
||||
4: "manual_entry"
|
||||
|
||||
escalation_criteria:
|
||||
amount_difference_threshold: 10.00
|
||||
confidence_gap_threshold: 0.3
|
||||
multiple_high_confidence_sources: true
|
||||
|
||||
validation_rules:
|
||||
utr_checksum: true
|
||||
ni_number_regex: "^[A-CEGHJ-PR-TW-Z]{2}\\d{6}[A-D]$"
|
||||
iban_check: true
|
||||
vat_gb_mod97: true
|
||||
rounding_policy: "HMRC" # options: bankers|away_from_zero|HMRC
|
||||
numeric_tolerance: 0.01
|
||||
|
||||
field_validations:
|
||||
sort_code: "^\\d{2}-\\d{2}-\\d{2}$"
|
||||
account_number: "^\\d{8}$"
|
||||
postcode: "^[A-Z]{1,2}\\d[A-Z\\d]?\\s*\\d[A-Z]{2}$"
|
||||
email: "^[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}$"
|
||||
phone: "^(\\+44|0)[1-9]\\d{8,9}$"
|
||||
|
||||
entity_resolution:
|
||||
blocking_keys:
|
||||
- payer_name_norm
|
||||
- sort_code_last4
|
||||
- postcode
|
||||
- vat_number
|
||||
|
||||
fuzzy_thresholds:
|
||||
name: 0.88
|
||||
address: 0.85
|
||||
phone: 0.90
|
||||
email: 0.95
|
||||
|
||||
canonical_source_priority:
|
||||
- bank_statement
|
||||
- official_certificate
|
||||
- prior_return
|
||||
- manual_entry
|
||||
|
||||
matching_algorithms:
|
||||
name: "jaro_winkler"
|
||||
address: "levenshtein"
|
||||
postcode: "exact"
|
||||
|
||||
privacy_redaction:
|
||||
pii_fields:
|
||||
- ni_number
|
||||
- utr
|
||||
- iban
|
||||
- sort_code
|
||||
- account_number
|
||||
- phone
|
||||
- email
|
||||
- full_address
|
||||
|
||||
masking_rules:
|
||||
mask_except_last4: ["ni_number", "utr", "iban", "sort_code", "phone"]
|
||||
mask_except_domain: ["email"]
|
||||
mask_house_number: ["address"]
|
||||
|
||||
log_sanitization:
|
||||
remove_fields: ["extracted_text", "ocr_raw_output"]
|
||||
hash_fields: ["text_hash", "doc_checksum"]
|
||||
|
||||
jurisdiction_overrides:
|
||||
uk_2023_24:
|
||||
personal_allowance: 12570
|
||||
basic_rate_threshold: 37700
|
||||
higher_rate_threshold: 125140
|
||||
dividend_allowance: 1000
|
||||
savings_allowance_basic: 1000
|
||||
savings_allowance_higher: 500
|
||||
|
||||
uk_2024_25:
|
||||
personal_allowance: 12570
|
||||
basic_rate_threshold: 37700
|
||||
higher_rate_threshold: 125140
|
||||
dividend_allowance: 500
|
||||
savings_allowance_basic: 1000
|
||||
savings_allowance_higher: 500
|
||||
Reference in New Issue
Block a user