Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
510 lines
13 KiB
Turtle
510 lines
13 KiB
Turtle
# FILE: schemas/shapes.ttl
|
|
# SHACL shapes for node/edge integrity
|
|
|
|
@prefix sh: <http://www.w3.org/ns/shacl#> .
|
|
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
|
@prefix tax: <https://tax-kg.example.com/vocab#> .
|
|
@prefix time: <http://www.w3.org/2006/time#> .
|
|
@prefix prov: <http://www.w3.org/ns/prov#> .
|
|
|
|
# Base temporal shape for all nodes
|
|
tax:TemporalNodeShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:TemporalNode ;
|
|
sh:property [
|
|
sh:path time:hasBeginning ;
|
|
sh:name "valid_from" ;
|
|
sh:datatype xsd:dateTime ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:description "When the fact became valid in reality" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path time:hasEnd ;
|
|
sh:name "valid_to" ;
|
|
sh:datatype xsd:dateTime ;
|
|
sh:maxCount 1 ;
|
|
sh:description "When the fact ceased to be valid in reality" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path prov:generatedAtTime ;
|
|
sh:name "asserted_at" ;
|
|
sh:datatype xsd:dateTime ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:description "When the fact was recorded in the system" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path prov:invalidatedAtTime ;
|
|
sh:name "retracted_at" ;
|
|
sh:datatype xsd:dateTime ;
|
|
sh:maxCount 1 ;
|
|
sh:description "When the fact was retracted from the system" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path prov:wasAttributedTo ;
|
|
sh:name "source" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:description "Source of the information" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:extractorVersion ;
|
|
sh:name "extractor_version" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:description "Version of the extraction system" ;
|
|
] .
|
|
|
|
# TaxpayerProfile shape
|
|
tax:TaxpayerProfileShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:TaxpayerProfile ;
|
|
sh:property [
|
|
sh:path tax:taxpayerId ;
|
|
sh:name "taxpayer_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:pattern "^[a-zA-Z0-9_-]+$" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:taxpayerType ;
|
|
sh:name "type" ;
|
|
sh:in ( "Individual" "Partnership" "Company" ) ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:utr ;
|
|
sh:name "utr" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[0-9]{10}$" ;
|
|
sh:maxCount 1 ;
|
|
sh:description "Unique Taxpayer Reference" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:niNumber ;
|
|
sh:name "ni_number" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[A-CEGHJ-PR-TW-Z]{2}\\d{6}[A-D]$" ;
|
|
sh:maxCount 1 ;
|
|
sh:description "National Insurance Number" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:residence ;
|
|
sh:name "residence" ;
|
|
sh:datatype xsd:string ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# Document shape
|
|
tax:DocumentShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Document ;
|
|
sh:property [
|
|
sh:path tax:docId ;
|
|
sh:name "doc_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:pattern "^doc_[a-f0-9]{16}$" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:documentKind ;
|
|
sh:name "kind" ;
|
|
sh:in ( "bank_statement" "invoice" "receipt" "p_and_l" "balance_sheet"
|
|
"payslip" "dividend_voucher" "property_statement" "prior_return"
|
|
"letter" "certificate" ) ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:checksum ;
|
|
sh:name "checksum" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:pattern "^[a-f0-9]{64}$" ;
|
|
sh:description "SHA-256 checksum of document content" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:fileSize ;
|
|
sh:name "file_size" ;
|
|
sh:datatype xsd:integer ;
|
|
sh:minInclusive 0 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:pageCount ;
|
|
sh:name "pages" ;
|
|
sh:datatype xsd:integer ;
|
|
sh:minInclusive 1 ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# Evidence shape
|
|
tax:EvidenceShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Evidence ;
|
|
sh:property [
|
|
sh:path tax:snippetId ;
|
|
sh:name "snippet_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:pattern "^[a-zA-Z0-9_-]+$" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:docRef ;
|
|
sh:name "doc_ref" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:page ;
|
|
sh:name "page" ;
|
|
sh:datatype xsd:integer ;
|
|
sh:minInclusive 1 ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:textHash ;
|
|
sh:name "text_hash" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:pattern "^[a-f0-9]{64}$" ;
|
|
sh:description "SHA-256 hash of extracted text" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:ocrConfidence ;
|
|
sh:name "ocr_confidence" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:maxInclusive 1.0 ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# IncomeItem shape
|
|
tax:IncomeItemShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:IncomeItem ;
|
|
sh:property [
|
|
sh:path tax:incomeType ;
|
|
sh:name "type" ;
|
|
sh:in ( "employment" "self_employment" "property" "dividend" "interest" "other" ) ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:grossAmount ;
|
|
sh:name "gross" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:netAmount ;
|
|
sh:name "net" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:taxWithheld ;
|
|
sh:name "tax_withheld" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:currency ;
|
|
sh:name "currency" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[A-Z]{3}$" ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:periodStart ;
|
|
sh:name "period_start" ;
|
|
sh:datatype xsd:date ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:periodEnd ;
|
|
sh:name "period_end" ;
|
|
sh:datatype xsd:date ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# ExpenseItem shape
|
|
tax:ExpenseItemShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:ExpenseItem ;
|
|
sh:property [
|
|
sh:path tax:expenseType ;
|
|
sh:name "type" ;
|
|
sh:in ( "business" "property" "capital" "personal" ) ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:amount ;
|
|
sh:name "amount" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:currency ;
|
|
sh:name "currency" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[A-Z]{3}$" ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:allowable ;
|
|
sh:name "allowable" ;
|
|
sh:datatype xsd:boolean ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:capitalizableFlag ;
|
|
sh:name "capitalizable_flag" ;
|
|
sh:datatype xsd:boolean ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# Party shape
|
|
tax:PartyShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Party ;
|
|
sh:property [
|
|
sh:path tax:partyId ;
|
|
sh:name "party_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:name ;
|
|
sh:name "name" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:minLength 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:subtype ;
|
|
sh:name "subtype" ;
|
|
sh:in ( "Employer" "Payer" "Bank" "Landlord" "Tenant" "Supplier" "Client" ) ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:vatNumber ;
|
|
sh:name "vat_number" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^GB[0-9]{9}$|^GB[0-9]{12}$" ;
|
|
sh:maxCount 1 ;
|
|
sh:description "UK VAT registration number" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:utr ;
|
|
sh:name "utr" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[0-9]{10}$" ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# Account shape
|
|
tax:AccountShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Account ;
|
|
sh:property [
|
|
sh:path tax:accountId ;
|
|
sh:name "account_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:iban ;
|
|
sh:name "iban" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^GB[0-9]{2}[A-Z]{4}[0-9]{14}$" ;
|
|
sh:maxCount 1 ;
|
|
sh:description "UK IBAN format" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:sortCode ;
|
|
sh:name "sort_code" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[0-9]{2}-[0-9]{2}-[0-9]{2}$" ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:accountNumber ;
|
|
sh:name "account_no" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[0-9]{8}$" ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# PropertyAsset shape
|
|
tax:PropertyAssetShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:PropertyAsset ;
|
|
sh:property [
|
|
sh:path tax:propertyId ;
|
|
sh:name "property_id" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:address ;
|
|
sh:name "address" ;
|
|
sh:datatype xsd:string ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:minLength 10 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:postcode ;
|
|
sh:name "postcode" ;
|
|
sh:datatype xsd:string ;
|
|
sh:pattern "^[A-Z]{1,2}[0-9][A-Z0-9]?\\s*[0-9][A-Z]{2}$" ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:usage ;
|
|
sh:name "usage" ;
|
|
sh:in ( "residential" "furnished_holiday_letting" "commercial" "mixed" ) ;
|
|
sh:maxCount 1 ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:ownershipShare ;
|
|
sh:name "ownership_share" ;
|
|
sh:datatype xsd:decimal ;
|
|
sh:minInclusive 0.0 ;
|
|
sh:maxInclusive 1.0 ;
|
|
sh:maxCount 1 ;
|
|
] .
|
|
|
|
# Cross-node constraints
|
|
tax:TemporalConsistencyShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:TemporalNode ;
|
|
sh:sparql [
|
|
sh:message "valid_to must be after valid_from" ;
|
|
sh:prefixes tax: ;
|
|
sh:select """
|
|
SELECT $this
|
|
WHERE {
|
|
$this time:hasBeginning ?validFrom ;
|
|
time:hasEnd ?validTo .
|
|
FILTER (?validTo <= ?validFrom)
|
|
}
|
|
""" ;
|
|
] ;
|
|
sh:sparql [
|
|
sh:message "asserted_at must be after valid_from" ;
|
|
sh:prefixes tax: ;
|
|
sh:select """
|
|
SELECT $this
|
|
WHERE {
|
|
$this time:hasBeginning ?validFrom ;
|
|
prov:generatedAtTime ?assertedAt .
|
|
FILTER (?assertedAt < ?validFrom)
|
|
}
|
|
""" ;
|
|
] .
|
|
|
|
# Income/Expense consistency
|
|
tax:FinancialConsistencyShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:IncomeItem ;
|
|
sh:sparql [
|
|
sh:message "net amount cannot exceed gross amount" ;
|
|
sh:prefixes tax: ;
|
|
sh:select """
|
|
SELECT $this
|
|
WHERE {
|
|
$this tax:grossAmount ?gross ;
|
|
tax:netAmount ?net .
|
|
FILTER (?net > ?gross)
|
|
}
|
|
""" ;
|
|
] ;
|
|
sh:sparql [
|
|
sh:message "tax withheld cannot exceed gross amount" ;
|
|
sh:prefixes tax: ;
|
|
sh:select """
|
|
SELECT $this
|
|
WHERE {
|
|
$this tax:grossAmount ?gross ;
|
|
tax:taxWithheld ?tax .
|
|
FILTER (?tax > ?gross)
|
|
}
|
|
""" ;
|
|
] .
|
|
|
|
# Evidence provenance requirements
|
|
tax:ProvenanceShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:IncomeItem, tax:ExpenseItem, tax:Payment ;
|
|
sh:property [
|
|
sh:path tax:derivedFrom ;
|
|
sh:name "derived_from_evidence" ;
|
|
sh:class tax:Evidence ;
|
|
sh:minCount 1 ;
|
|
sh:description "All financial facts must have evidence" ;
|
|
] .
|
|
|
|
# Document integrity
|
|
tax:DocumentIntegrityShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Document ;
|
|
sh:sparql [
|
|
sh:message "Document must have at least one evidence item" ;
|
|
sh:prefixes tax: ;
|
|
sh:select """
|
|
SELECT $this
|
|
WHERE {
|
|
$this a tax:Document .
|
|
FILTER NOT EXISTS {
|
|
?evidence tax:docRef $this .
|
|
}
|
|
}
|
|
""" ;
|
|
] .
|
|
|
|
# Calculation traceability
|
|
tax:CalculationTraceabilityShape
|
|
a sh:NodeShape ;
|
|
sh:targetClass tax:Calculation ;
|
|
sh:property [
|
|
sh:path tax:computesFormBox ;
|
|
sh:name "computes_form_box" ;
|
|
sh:class tax:FormBox ;
|
|
sh:minCount 1 ;
|
|
sh:maxCount 1 ;
|
|
sh:description "Each calculation must compute exactly one form box" ;
|
|
] ;
|
|
sh:property [
|
|
sh:path tax:basedOnRule ;
|
|
sh:name "based_on_rule" ;
|
|
sh:class tax:Rule ;
|
|
sh:minCount 1 ;
|
|
sh:description "Calculations must reference applicable rules" ;
|
|
] .
|