Files
ai-tax-agent/db/neo4j_schema.cypher
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

112 lines
6.6 KiB
Plaintext

// FILE: db/neo4j_schema.cypher
// Node constraints and indexes
CREATE CONSTRAINT taxpayer_profile_id IF NOT EXISTS FOR (tp:TaxpayerProfile) REQUIRE tp.taxpayer_id IS UNIQUE;
CREATE CONSTRAINT tax_year_label IF NOT EXISTS FOR (ty:TaxYear) REQUIRE ty.label IS UNIQUE;
CREATE CONSTRAINT jurisdiction_code IF NOT EXISTS FOR (j:Jurisdiction) REQUIRE j.code IS UNIQUE;
CREATE CONSTRAINT tax_form_id IF NOT EXISTS FOR (tf:TaxForm) REQUIRE tf.form_id IS UNIQUE;
CREATE CONSTRAINT schedule_id IF NOT EXISTS FOR (s:Schedule) REQUIRE s.schedule_id IS UNIQUE;
CREATE CONSTRAINT form_box_id IF NOT EXISTS FOR (fb:FormBox) REQUIRE (fb.form_id, fb.schedule_id, fb.box_id) IS UNIQUE;
CREATE CONSTRAINT document_id IF NOT EXISTS FOR (d:Document) REQUIRE d.doc_id IS UNIQUE;
CREATE CONSTRAINT evidence_id IF NOT EXISTS FOR (e:Evidence) REQUIRE e.snippet_id IS UNIQUE;
CREATE CONSTRAINT party_id IF NOT EXISTS FOR (p:Party) REQUIRE p.party_id IS UNIQUE;
CREATE CONSTRAINT account_id IF NOT EXISTS FOR (a:Account) REQUIRE a.account_id IS UNIQUE;
CREATE CONSTRAINT calculation_id IF NOT EXISTS FOR (c:Calculation) REQUIRE c.formula_id IS UNIQUE;
CREATE CONSTRAINT rule_id IF NOT EXISTS FOR (r:Rule) REQUIRE r.rule_id IS UNIQUE;
CREATE CONSTRAINT etl_run_id IF NOT EXISTS FOR (etl:ETLRun) REQUIRE etl.run_id IS UNIQUE;
// Composite indexes for temporal queries
CREATE INDEX taxpayer_valid_time IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.valid_from, tp.valid_to);
CREATE INDEX income_valid_time IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.valid_from, ii.valid_to);
CREATE INDEX expense_valid_time IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.valid_from, ei.valid_to);
CREATE INDEX payment_valid_time IF NOT EXISTS FOR (p:Payment) ON (p.valid_from, p.valid_to);
// System time indexes for audit trails
CREATE INDEX taxpayer_system_time IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.asserted_at, tp.retracted_at);
CREATE INDEX income_system_time IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.asserted_at, ii.retracted_at);
CREATE INDEX expense_system_time IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.asserted_at, ei.retracted_at);
// Business logic indexes
CREATE INDEX income_type_period IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.type, ii.period_start, ii.period_end);
CREATE INDEX expense_type_period IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.type, ei.period_start, ei.period_end);
CREATE INDEX document_kind_date IF NOT EXISTS FOR (d:Document) ON (d.kind, d.date_range_start, d.date_range_end);
CREATE INDEX evidence_doc_page IF NOT EXISTS FOR (e:Evidence) ON (e.doc_ref, e.page);
CREATE INDEX party_type_name IF NOT EXISTS FOR (p:Party) ON (p.subtype, p.name);
// Tax-specific indexes
CREATE INDEX taxpayer_utr IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.utr);
CREATE INDEX taxpayer_ni IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.ni_number);
CREATE INDEX party_utr IF NOT EXISTS FOR (p:Party) ON (p.utr);
CREATE INDEX party_vat IF NOT EXISTS FOR (p:Party) ON (p.vat_number);
CREATE INDEX account_sort_code IF NOT EXISTS FOR (a:Account) ON (a.sort_code, a.account_no);
// Provenance indexes
CREATE INDEX evidence_text_hash IF NOT EXISTS FOR (e:Evidence) ON (e.text_hash);
CREATE INDEX document_checksum IF NOT EXISTS FOR (d:Document) ON (d.checksum);
// Performance indexes for calculations
CREATE INDEX calculation_version IF NOT EXISTS FOR (c:Calculation) ON (c.version, c.effective_from);
CREATE INDEX rule_effective_period IF NOT EXISTS FOR (r:Rule) ON (r.effective_from, r.effective_to);
CREATE INDEX exchange_rate_date IF NOT EXISTS FOR (er:ExchangeRate) ON (er.ccy_from, er.ccy_to, er.date);
// Full-text search indexes
CREATE FULLTEXT INDEX document_content IF NOT EXISTS FOR (d:Document) ON EACH [d.title, d.description];
CREATE FULLTEXT INDEX party_search IF NOT EXISTS FOR (p:Party) ON EACH [p.name, p.trading_name];
CREATE FULLTEXT INDEX evidence_text IF NOT EXISTS FOR (e:Evidence) ON EACH [e.extracted_text];
// Node existence constraints
CREATE CONSTRAINT taxpayer_required_fields IF NOT EXISTS FOR (tp:TaxpayerProfile) REQUIRE (tp.taxpayer_id, tp.type, tp.valid_from, tp.asserted_at) IS NOT NULL;
CREATE CONSTRAINT document_required_fields IF NOT EXISTS FOR (d:Document) REQUIRE (d.doc_id, d.kind, d.checksum, d.valid_from, d.asserted_at) IS NOT NULL;
CREATE CONSTRAINT evidence_required_fields IF NOT EXISTS FOR (e:Evidence) REQUIRE (e.snippet_id, e.doc_ref, e.page, e.text_hash, e.valid_from, e.asserted_at) IS NOT NULL;
CREATE CONSTRAINT income_required_fields IF NOT EXISTS FOR (ii:IncomeItem) REQUIRE (ii.type, ii.gross, ii.currency, ii.valid_from, ii.asserted_at) IS NOT NULL;
CREATE CONSTRAINT expense_required_fields IF NOT EXISTS FOR (ei:ExpenseItem) REQUIRE (ei.type, ei.amount, ei.currency, ei.valid_from, ei.asserted_at) IS NOT NULL;
// Range constraints
CREATE CONSTRAINT ocr_confidence_range IF NOT EXISTS FOR (e:Evidence) REQUIRE e.ocr_confidence >= 0 AND e.ocr_confidence <= 1;
CREATE CONSTRAINT positive_amounts IF NOT EXISTS FOR (ii:IncomeItem) REQUIRE ii.gross >= 0;
CREATE CONSTRAINT positive_expense IF NOT EXISTS FOR (ei:ExpenseItem) REQUIRE ei.amount >= 0;
// Relationship type definitions (for documentation)
// Core tax structure relationships
// (:Schedule)-[:BELONGS_TO]->(:TaxForm)
// (:TaxForm)-[:OF_TAX_YEAR]->(:TaxYear)
// (:TaxYear)-[:IN_JURISDICTION]->(:Jurisdiction)
// (:Schedule)-[:HAS_BOX]->(:FormBox)
// Financial data relationships
// (:IncomeItem|:ExpenseItem)-[:REPORTED_IN]->(:Schedule)
// (:Calculation)-[:COMPUTES]->(:FormBox)
// (:IncomeItem|:ExpenseItem)-[:DERIVED_FROM]->(:Evidence)
// (:Evidence)-[:SUPPORTED_BY]->(:Document)
// Party and account relationships
// (:Payment)-[:PAID_BY]->(:Party)
// (:Payment)-[:PAID_TO]->(:Party)
// (:TaxpayerProfile)-[:OWNS]->(:PropertyAsset)
// (:TaxpayerProfile)-[:EMPLOYED_BY]->(:Party)
// (:Party)-[:HAS_ACCOUNT]->(:Account)
// Temporal and audit relationships
// (:IncomeItem|:ExpenseItem)-[:APPLIES_TO]->(:ExchangeRate)
// (:Rule)-[:APPLIES]->(:IncomeItem|:ExpenseItem)
// (:NormalizationEvent)-[:NORMALIZED_FROM]->(:IncomeItem|:ExpenseItem)
// (:TaxpayerProfile)-[:HAS_VALID_BASIS]->(:Consent)
// (any)-[:PRODUCED_BY]->(:ETLRun)
// Temporal query helper procedures
CALL apoc.custom.asProcedure(
'temporal.asOf',
'MATCH (n) WHERE n.valid_from <= $asOfDate AND (n.valid_to IS NULL OR n.valid_to > $asOfDate) AND n.asserted_at <= $asOfDate AND (n.retracted_at IS NULL OR n.retracted_at > $asOfDate) RETURN n',
'read',
[['asOfDate', 'datetime']],
[['node', 'node']]
);
CALL apoc.custom.asProcedure(
'temporal.validDuring',
'MATCH (n) WHERE n.valid_from <= $endDate AND (n.valid_to IS NULL OR n.valid_to > $startDate) RETURN n',
'read',
[['startDate', 'datetime'], ['endDate', 'datetime']],
[['node', 'node']]
);