// FILE: db/neo4j_schema.cypher // Node constraints and indexes CREATE CONSTRAINT taxpayer_profile_id IF NOT EXISTS FOR (tp:TaxpayerProfile) REQUIRE tp.taxpayer_id IS UNIQUE; CREATE CONSTRAINT tax_year_label IF NOT EXISTS FOR (ty:TaxYear) REQUIRE ty.label IS UNIQUE; CREATE CONSTRAINT jurisdiction_code IF NOT EXISTS FOR (j:Jurisdiction) REQUIRE j.code IS UNIQUE; CREATE CONSTRAINT tax_form_id IF NOT EXISTS FOR (tf:TaxForm) REQUIRE tf.form_id IS UNIQUE; CREATE CONSTRAINT schedule_id IF NOT EXISTS FOR (s:Schedule) REQUIRE s.schedule_id IS UNIQUE; CREATE CONSTRAINT form_box_id IF NOT EXISTS FOR (fb:FormBox) REQUIRE (fb.form_id, fb.schedule_id, fb.box_id) IS UNIQUE; CREATE CONSTRAINT document_id IF NOT EXISTS FOR (d:Document) REQUIRE d.doc_id IS UNIQUE; CREATE CONSTRAINT evidence_id IF NOT EXISTS FOR (e:Evidence) REQUIRE e.snippet_id IS UNIQUE; CREATE CONSTRAINT party_id IF NOT EXISTS FOR (p:Party) REQUIRE p.party_id IS UNIQUE; CREATE CONSTRAINT account_id IF NOT EXISTS FOR (a:Account) REQUIRE a.account_id IS UNIQUE; CREATE CONSTRAINT calculation_id IF NOT EXISTS FOR (c:Calculation) REQUIRE c.formula_id IS UNIQUE; CREATE CONSTRAINT rule_id IF NOT EXISTS FOR (r:Rule) REQUIRE r.rule_id IS UNIQUE; CREATE CONSTRAINT etl_run_id IF NOT EXISTS FOR (etl:ETLRun) REQUIRE etl.run_id IS UNIQUE; // Composite indexes for temporal queries CREATE INDEX taxpayer_valid_time IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.valid_from, tp.valid_to); CREATE INDEX income_valid_time IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.valid_from, ii.valid_to); CREATE INDEX expense_valid_time IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.valid_from, ei.valid_to); CREATE INDEX payment_valid_time IF NOT EXISTS FOR (p:Payment) ON (p.valid_from, p.valid_to); // System time indexes for audit trails CREATE INDEX taxpayer_system_time IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.asserted_at, tp.retracted_at); CREATE INDEX income_system_time IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.asserted_at, ii.retracted_at); CREATE INDEX expense_system_time IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.asserted_at, ei.retracted_at); // Business logic indexes CREATE INDEX income_type_period IF NOT EXISTS FOR (ii:IncomeItem) ON (ii.type, ii.period_start, ii.period_end); CREATE INDEX expense_type_period IF NOT EXISTS FOR (ei:ExpenseItem) ON (ei.type, ei.period_start, ei.period_end); CREATE INDEX document_kind_date IF NOT EXISTS FOR (d:Document) ON (d.kind, d.date_range_start, d.date_range_end); CREATE INDEX evidence_doc_page IF NOT EXISTS FOR (e:Evidence) ON (e.doc_ref, e.page); CREATE INDEX party_type_name IF NOT EXISTS FOR (p:Party) ON (p.subtype, p.name); // Tax-specific indexes CREATE INDEX taxpayer_utr IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.utr); CREATE INDEX taxpayer_ni IF NOT EXISTS FOR (tp:TaxpayerProfile) ON (tp.ni_number); CREATE INDEX party_utr IF NOT EXISTS FOR (p:Party) ON (p.utr); CREATE INDEX party_vat IF NOT EXISTS FOR (p:Party) ON (p.vat_number); CREATE INDEX account_sort_code IF NOT EXISTS FOR (a:Account) ON (a.sort_code, a.account_no); // Provenance indexes CREATE INDEX evidence_text_hash IF NOT EXISTS FOR (e:Evidence) ON (e.text_hash); CREATE INDEX document_checksum IF NOT EXISTS FOR (d:Document) ON (d.checksum); // Performance indexes for calculations CREATE INDEX calculation_version IF NOT EXISTS FOR (c:Calculation) ON (c.version, c.effective_from); CREATE INDEX rule_effective_period IF NOT EXISTS FOR (r:Rule) ON (r.effective_from, r.effective_to); CREATE INDEX exchange_rate_date IF NOT EXISTS FOR (er:ExchangeRate) ON (er.ccy_from, er.ccy_to, er.date); // Full-text search indexes CREATE FULLTEXT INDEX document_content IF NOT EXISTS FOR (d:Document) ON EACH [d.title, d.description]; CREATE FULLTEXT INDEX party_search IF NOT EXISTS FOR (p:Party) ON EACH [p.name, p.trading_name]; CREATE FULLTEXT INDEX evidence_text IF NOT EXISTS FOR (e:Evidence) ON EACH [e.extracted_text]; // Node existence constraints CREATE CONSTRAINT taxpayer_required_fields IF NOT EXISTS FOR (tp:TaxpayerProfile) REQUIRE (tp.taxpayer_id, tp.type, tp.valid_from, tp.asserted_at) IS NOT NULL; CREATE CONSTRAINT document_required_fields IF NOT EXISTS FOR (d:Document) REQUIRE (d.doc_id, d.kind, d.checksum, d.valid_from, d.asserted_at) IS NOT NULL; CREATE CONSTRAINT evidence_required_fields IF NOT EXISTS FOR (e:Evidence) REQUIRE (e.snippet_id, e.doc_ref, e.page, e.text_hash, e.valid_from, e.asserted_at) IS NOT NULL; CREATE CONSTRAINT income_required_fields IF NOT EXISTS FOR (ii:IncomeItem) REQUIRE (ii.type, ii.gross, ii.currency, ii.valid_from, ii.asserted_at) IS NOT NULL; CREATE CONSTRAINT expense_required_fields IF NOT EXISTS FOR (ei:ExpenseItem) REQUIRE (ei.type, ei.amount, ei.currency, ei.valid_from, ei.asserted_at) IS NOT NULL; // Range constraints CREATE CONSTRAINT ocr_confidence_range IF NOT EXISTS FOR (e:Evidence) REQUIRE e.ocr_confidence >= 0 AND e.ocr_confidence <= 1; CREATE CONSTRAINT positive_amounts IF NOT EXISTS FOR (ii:IncomeItem) REQUIRE ii.gross >= 0; CREATE CONSTRAINT positive_expense IF NOT EXISTS FOR (ei:ExpenseItem) REQUIRE ei.amount >= 0; // Relationship type definitions (for documentation) // Core tax structure relationships // (:Schedule)-[:BELONGS_TO]->(:TaxForm) // (:TaxForm)-[:OF_TAX_YEAR]->(:TaxYear) // (:TaxYear)-[:IN_JURISDICTION]->(:Jurisdiction) // (:Schedule)-[:HAS_BOX]->(:FormBox) // Financial data relationships // (:IncomeItem|:ExpenseItem)-[:REPORTED_IN]->(:Schedule) // (:Calculation)-[:COMPUTES]->(:FormBox) // (:IncomeItem|:ExpenseItem)-[:DERIVED_FROM]->(:Evidence) // (:Evidence)-[:SUPPORTED_BY]->(:Document) // Party and account relationships // (:Payment)-[:PAID_BY]->(:Party) // (:Payment)-[:PAID_TO]->(:Party) // (:TaxpayerProfile)-[:OWNS]->(:PropertyAsset) // (:TaxpayerProfile)-[:EMPLOYED_BY]->(:Party) // (:Party)-[:HAS_ACCOUNT]->(:Account) // Temporal and audit relationships // (:IncomeItem|:ExpenseItem)-[:APPLIES_TO]->(:ExchangeRate) // (:Rule)-[:APPLIES]->(:IncomeItem|:ExpenseItem) // (:NormalizationEvent)-[:NORMALIZED_FROM]->(:IncomeItem|:ExpenseItem) // (:TaxpayerProfile)-[:HAS_VALID_BASIS]->(:Consent) // (any)-[:PRODUCED_BY]->(:ETLRun) // Temporal query helper procedures CALL apoc.custom.asProcedure( 'temporal.asOf', 'MATCH (n) WHERE n.valid_from <= $asOfDate AND (n.valid_to IS NULL OR n.valid_to > $asOfDate) AND n.asserted_at <= $asOfDate AND (n.retracted_at IS NULL OR n.retracted_at > $asOfDate) RETURN n', 'read', [['asOfDate', 'datetime']], [['node', 'node']] ); CALL apoc.custom.asProcedure( 'temporal.validDuring', 'MATCH (n) WHERE n.valid_from <= $endDate AND (n.valid_to IS NULL OR n.valid_to > $startDate) RETURN n', 'read', [['startDate', 'datetime'], ['endDate', 'datetime']], [['node', 'node']] );