{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Tax Knowledge Graph Schema", "definitions": { "temporal_properties": { "type": "object", "properties": { "valid_from": { "type": "string", "format": "date-time" }, "valid_to": { "type": "string", "format": "date-time" }, "asserted_at": { "type": "string", "format": "date-time" }, "retracted_at": { "type": ["string", "null"], "format": "date-time" }, "source": { "type": "string" }, "extractor_version": { "type": "string" } }, "required": ["valid_from", "asserted_at", "source", "extractor_version"] }, "provenance": { "type": "object", "properties": { "doc_id": { "type": "string" }, "page": { "type": "integer", "minimum": 1 }, "bbox": { "type": "object", "properties": { "x": { "type": "number" }, "y": { "type": "number" }, "width": { "type": "number" }, "height": { "type": "number" } }, "required": ["x", "y", "width", "height"] }, "text_hash": { "type": "string" }, "ocr_confidence": { "type": "number", "minimum": 0, "maximum": 1 } }, "required": ["doc_id", "page", "text_hash"] } }, "oneOf": [ { "title": "TaxpayerProfile", "type": "object", "properties": { "node_type": { "const": "TaxpayerProfile" }, "taxpayer_id": { "type": "string" }, "type": { "enum": ["Individual", "Partnership", "Company"] }, "residence": { "type": "string" }, "contact": { "type": "object", "properties": { "email": { "type": "string", "format": "email" }, "phone": { "type": "string" }, "address": { "type": "string" } } }, "tax_years": { "type": "array", "items": { "type": "string" } }, "utr": { "type": "string", "pattern": "^[0-9]{10}$" }, "ni_number": { "type": "string", "pattern": "^[A-CEGHJ-PR-TW-Z]{2}\\d{6}[A-D]$" } }, "allOf": [{ "$ref": "#/definitions/temporal_properties" }], "required": ["node_type", "taxpayer_id", "type"] }, { "title": "TaxYear", "type": "object", "properties": { "node_type": { "const": "TaxYear" }, "label": { "type": "string" }, "start_date": { "type": "string", "format": "date" }, "end_date": { "type": "string", "format": "date" }, "jurisdiction_ref": { "type": "string" } }, "allOf": [{ "$ref": "#/definitions/temporal_properties" }], "required": [ "node_type", "label", "start_date", "end_date", "jurisdiction_ref" ] }, { "title": "Document", "type": "object", "properties": { "node_type": { "const": "Document" }, "doc_id": { "type": "string" }, "kind": { "enum": [ "bank_statement", "invoice", "receipt", "p_and_l", "balance_sheet", "payslip", "dividend_voucher", "property_statement", "prior_return", "letter", "certificate" ] }, "source": { "type": "string" }, "mime": { "type": "string" }, "date_range": { "type": "object", "properties": { "start": { "type": "string", "format": "date" }, "end": { "type": "string", "format": "date" } } }, "checksum": { "type": "string" }, "file_size": { "type": "integer" }, "pages": { "type": "integer", "minimum": 1 } }, "allOf": [{ "$ref": "#/definitions/temporal_properties" }], "required": ["node_type", "doc_id", "kind", "source", "checksum"] }, { "title": "Evidence", "type": "object", "properties": { "node_type": { "const": "Evidence" }, "snippet_id": { "type": "string" }, "doc_ref": { "type": "string" }, "page": { "type": "integer", "minimum": 1 }, "bbox": { "type": "object", "properties": { "x": { "type": "number" }, "y": { "type": "number" }, "width": { "type": "number" }, "height": { "type": "number" } }, "required": ["x", "y", "width", "height"] }, "text_hash": { "type": "string" }, "ocr_confidence": { "type": "number", "minimum": 0, "maximum": 1 }, "extracted_text": { "type": "string" } }, "allOf": [{ "$ref": "#/definitions/temporal_properties" }], "required": [ "node_type", "snippet_id", "doc_ref", "page", "bbox", "text_hash" ] }, { "title": "IncomeItem", "type": "object", "properties": { "node_type": { "const": "IncomeItem" }, "type": { "enum": [ "employment", "self_employment", "property", "dividend", "interest", "other" ] }, "gross": { "type": "number" }, "net": { "type": "number" }, "tax_withheld": { "type": "number" }, "period_start": { "type": "string", "format": "date" }, "period_end": { "type": "string", "format": "date" }, "currency": { "type": "string", "pattern": "^[A-Z]{3}$" }, "description": { "type": "string" } }, "allOf": [ { "$ref": "#/definitions/temporal_properties" }, { "$ref": "#/definitions/provenance" } ], "required": ["node_type", "type", "gross", "currency"] }, { "title": "ExpenseItem", "type": "object", "properties": { "node_type": { "const": "ExpenseItem" }, "type": { "enum": ["business", "property", "capital", "personal"] }, "amount": { "type": "number" }, "category": { "type": "string" }, "capitalizable_flag": { "type": "boolean" }, "currency": { "type": "string", "pattern": "^[A-Z]{3}$" }, "description": { "type": "string" }, "allowable": { "type": "boolean" } }, "allOf": [ { "$ref": "#/definitions/temporal_properties" }, { "$ref": "#/definitions/provenance" } ], "required": ["node_type", "type", "amount", "currency"] } ] }