# FILE: schemas/shapes.ttl # SHACL shapes for node/edge integrity @prefix sh: . @prefix xsd: . @prefix tax: . @prefix time: . @prefix prov: . # Base temporal shape for all nodes tax:TemporalNodeShape a sh:NodeShape ; sh:targetClass tax:TemporalNode ; sh:property [ sh:path time:hasBeginning ; sh:name "valid_from" ; sh:datatype xsd:dateTime ; sh:minCount 1 ; sh:maxCount 1 ; sh:description "When the fact became valid in reality" ; ] ; sh:property [ sh:path time:hasEnd ; sh:name "valid_to" ; sh:datatype xsd:dateTime ; sh:maxCount 1 ; sh:description "When the fact ceased to be valid in reality" ; ] ; sh:property [ sh:path prov:generatedAtTime ; sh:name "asserted_at" ; sh:datatype xsd:dateTime ; sh:minCount 1 ; sh:maxCount 1 ; sh:description "When the fact was recorded in the system" ; ] ; sh:property [ sh:path prov:invalidatedAtTime ; sh:name "retracted_at" ; sh:datatype xsd:dateTime ; sh:maxCount 1 ; sh:description "When the fact was retracted from the system" ; ] ; sh:property [ sh:path prov:wasAttributedTo ; sh:name "source" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:description "Source of the information" ; ] ; sh:property [ sh:path tax:extractorVersion ; sh:name "extractor_version" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:description "Version of the extraction system" ; ] . # TaxpayerProfile shape tax:TaxpayerProfileShape a sh:NodeShape ; sh:targetClass tax:TaxpayerProfile ; sh:property [ sh:path tax:taxpayerId ; sh:name "taxpayer_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:pattern "^[a-zA-Z0-9_-]+$" ; ] ; sh:property [ sh:path tax:taxpayerType ; sh:name "type" ; sh:in ( "Individual" "Partnership" "Company" ) ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:utr ; sh:name "utr" ; sh:datatype xsd:string ; sh:pattern "^[0-9]{10}$" ; sh:maxCount 1 ; sh:description "Unique Taxpayer Reference" ; ] ; sh:property [ sh:path tax:niNumber ; sh:name "ni_number" ; sh:datatype xsd:string ; sh:pattern "^[A-CEGHJ-PR-TW-Z]{2}\\d{6}[A-D]$" ; sh:maxCount 1 ; sh:description "National Insurance Number" ; ] ; sh:property [ sh:path tax:residence ; sh:name "residence" ; sh:datatype xsd:string ; sh:maxCount 1 ; ] . # Document shape tax:DocumentShape a sh:NodeShape ; sh:targetClass tax:Document ; sh:property [ sh:path tax:docId ; sh:name "doc_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:pattern "^doc_[a-f0-9]{16}$" ; ] ; sh:property [ sh:path tax:documentKind ; sh:name "kind" ; sh:in ( "bank_statement" "invoice" "receipt" "p_and_l" "balance_sheet" "payslip" "dividend_voucher" "property_statement" "prior_return" "letter" "certificate" ) ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:checksum ; sh:name "checksum" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:pattern "^[a-f0-9]{64}$" ; sh:description "SHA-256 checksum of document content" ; ] ; sh:property [ sh:path tax:fileSize ; sh:name "file_size" ; sh:datatype xsd:integer ; sh:minInclusive 0 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:pageCount ; sh:name "pages" ; sh:datatype xsd:integer ; sh:minInclusive 1 ; sh:maxCount 1 ; ] . # Evidence shape tax:EvidenceShape a sh:NodeShape ; sh:targetClass tax:Evidence ; sh:property [ sh:path tax:snippetId ; sh:name "snippet_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:pattern "^[a-zA-Z0-9_-]+$" ; ] ; sh:property [ sh:path tax:docRef ; sh:name "doc_ref" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:page ; sh:name "page" ; sh:datatype xsd:integer ; sh:minInclusive 1 ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:textHash ; sh:name "text_hash" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:pattern "^[a-f0-9]{64}$" ; sh:description "SHA-256 hash of extracted text" ; ] ; sh:property [ sh:path tax:ocrConfidence ; sh:name "ocr_confidence" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:maxInclusive 1.0 ; sh:maxCount 1 ; ] . # IncomeItem shape tax:IncomeItemShape a sh:NodeShape ; sh:targetClass tax:IncomeItem ; sh:property [ sh:path tax:incomeType ; sh:name "type" ; sh:in ( "employment" "self_employment" "property" "dividend" "interest" "other" ) ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:grossAmount ; sh:name "gross" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:netAmount ; sh:name "net" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:taxWithheld ; sh:name "tax_withheld" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:currency ; sh:name "currency" ; sh:datatype xsd:string ; sh:pattern "^[A-Z]{3}$" ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:periodStart ; sh:name "period_start" ; sh:datatype xsd:date ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:periodEnd ; sh:name "period_end" ; sh:datatype xsd:date ; sh:maxCount 1 ; ] . # ExpenseItem shape tax:ExpenseItemShape a sh:NodeShape ; sh:targetClass tax:ExpenseItem ; sh:property [ sh:path tax:expenseType ; sh:name "type" ; sh:in ( "business" "property" "capital" "personal" ) ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:amount ; sh:name "amount" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:currency ; sh:name "currency" ; sh:datatype xsd:string ; sh:pattern "^[A-Z]{3}$" ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:allowable ; sh:name "allowable" ; sh:datatype xsd:boolean ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:capitalizableFlag ; sh:name "capitalizable_flag" ; sh:datatype xsd:boolean ; sh:maxCount 1 ; ] . # Party shape tax:PartyShape a sh:NodeShape ; sh:targetClass tax:Party ; sh:property [ sh:path tax:partyId ; sh:name "party_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:name ; sh:name "name" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:minLength 1 ; ] ; sh:property [ sh:path tax:subtype ; sh:name "subtype" ; sh:in ( "Employer" "Payer" "Bank" "Landlord" "Tenant" "Supplier" "Client" ) ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:vatNumber ; sh:name "vat_number" ; sh:datatype xsd:string ; sh:pattern "^GB[0-9]{9}$|^GB[0-9]{12}$" ; sh:maxCount 1 ; sh:description "UK VAT registration number" ; ] ; sh:property [ sh:path tax:utr ; sh:name "utr" ; sh:datatype xsd:string ; sh:pattern "^[0-9]{10}$" ; sh:maxCount 1 ; ] . # Account shape tax:AccountShape a sh:NodeShape ; sh:targetClass tax:Account ; sh:property [ sh:path tax:accountId ; sh:name "account_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:iban ; sh:name "iban" ; sh:datatype xsd:string ; sh:pattern "^GB[0-9]{2}[A-Z]{4}[0-9]{14}$" ; sh:maxCount 1 ; sh:description "UK IBAN format" ; ] ; sh:property [ sh:path tax:sortCode ; sh:name "sort_code" ; sh:datatype xsd:string ; sh:pattern "^[0-9]{2}-[0-9]{2}-[0-9]{2}$" ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:accountNumber ; sh:name "account_no" ; sh:datatype xsd:string ; sh:pattern "^[0-9]{8}$" ; sh:maxCount 1 ; ] . # PropertyAsset shape tax:PropertyAssetShape a sh:NodeShape ; sh:targetClass tax:PropertyAsset ; sh:property [ sh:path tax:propertyId ; sh:name "property_id" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:address ; sh:name "address" ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ; sh:minLength 10 ; ] ; sh:property [ sh:path tax:postcode ; sh:name "postcode" ; sh:datatype xsd:string ; sh:pattern "^[A-Z]{1,2}[0-9][A-Z0-9]?\\s*[0-9][A-Z]{2}$" ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:usage ; sh:name "usage" ; sh:in ( "residential" "furnished_holiday_letting" "commercial" "mixed" ) ; sh:maxCount 1 ; ] ; sh:property [ sh:path tax:ownershipShare ; sh:name "ownership_share" ; sh:datatype xsd:decimal ; sh:minInclusive 0.0 ; sh:maxInclusive 1.0 ; sh:maxCount 1 ; ] . # Cross-node constraints tax:TemporalConsistencyShape a sh:NodeShape ; sh:targetClass tax:TemporalNode ; sh:sparql [ sh:message "valid_to must be after valid_from" ; sh:prefixes tax: ; sh:select """ SELECT $this WHERE { $this time:hasBeginning ?validFrom ; time:hasEnd ?validTo . FILTER (?validTo <= ?validFrom) } """ ; ] ; sh:sparql [ sh:message "asserted_at must be after valid_from" ; sh:prefixes tax: ; sh:select """ SELECT $this WHERE { $this time:hasBeginning ?validFrom ; prov:generatedAtTime ?assertedAt . FILTER (?assertedAt < ?validFrom) } """ ; ] . # Income/Expense consistency tax:FinancialConsistencyShape a sh:NodeShape ; sh:targetClass tax:IncomeItem ; sh:sparql [ sh:message "net amount cannot exceed gross amount" ; sh:prefixes tax: ; sh:select """ SELECT $this WHERE { $this tax:grossAmount ?gross ; tax:netAmount ?net . FILTER (?net > ?gross) } """ ; ] ; sh:sparql [ sh:message "tax withheld cannot exceed gross amount" ; sh:prefixes tax: ; sh:select """ SELECT $this WHERE { $this tax:grossAmount ?gross ; tax:taxWithheld ?tax . FILTER (?tax > ?gross) } """ ; ] . # Evidence provenance requirements tax:ProvenanceShape a sh:NodeShape ; sh:targetClass tax:IncomeItem, tax:ExpenseItem, tax:Payment ; sh:property [ sh:path tax:derivedFrom ; sh:name "derived_from_evidence" ; sh:class tax:Evidence ; sh:minCount 1 ; sh:description "All financial facts must have evidence" ; ] . # Document integrity tax:DocumentIntegrityShape a sh:NodeShape ; sh:targetClass tax:Document ; sh:sparql [ sh:message "Document must have at least one evidence item" ; sh:prefixes tax: ; sh:select """ SELECT $this WHERE { $this a tax:Document . FILTER NOT EXISTS { ?evidence tax:docRef $this . } } """ ; ] . # Calculation traceability tax:CalculationTraceabilityShape a sh:NodeShape ; sh:targetClass tax:Calculation ; sh:property [ sh:path tax:computesFormBox ; sh:name "computes_form_box" ; sh:class tax:FormBox ; sh:minCount 1 ; sh:maxCount 1 ; sh:description "Each calculation must compute exactly one form box" ; ] ; sh:property [ sh:path tax:basedOnRule ; sh:name "based_on_rule" ; sh:class tax:Rule ; sh:minCount 1 ; sh:description "Calculations must reference applicable rules" ; ] .