Files
ai-tax-agent/schemas/kg_schema.json
harkon fdba81809f
Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
completed local setup with compose
2025-11-26 13:17:17 +00:00

203 lines
6.4 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Tax Knowledge Graph Schema",
"definitions": {
"temporal_properties": {
"type": "object",
"properties": {
"valid_from": { "type": "string", "format": "date-time" },
"valid_to": { "type": "string", "format": "date-time" },
"asserted_at": { "type": "string", "format": "date-time" },
"retracted_at": { "type": ["string", "null"], "format": "date-time" },
"source": { "type": "string" },
"extractor_version": { "type": "string" }
},
"required": ["valid_from", "asserted_at", "source", "extractor_version"]
},
"provenance": {
"type": "object",
"properties": {
"doc_id": { "type": "string" },
"page": { "type": "integer", "minimum": 1 },
"bbox": {
"type": "object",
"properties": {
"x": { "type": "number" },
"y": { "type": "number" },
"width": { "type": "number" },
"height": { "type": "number" }
},
"required": ["x", "y", "width", "height"]
},
"text_hash": { "type": "string" },
"ocr_confidence": { "type": "number", "minimum": 0, "maximum": 1 }
},
"required": ["doc_id", "page", "text_hash"]
}
},
"oneOf": [
{
"title": "TaxpayerProfile",
"type": "object",
"properties": {
"node_type": { "const": "TaxpayerProfile" },
"taxpayer_id": { "type": "string" },
"type": { "enum": ["Individual", "Partnership", "Company"] },
"residence": { "type": "string" },
"contact": {
"type": "object",
"properties": {
"email": { "type": "string", "format": "email" },
"phone": { "type": "string" },
"address": { "type": "string" }
}
},
"tax_years": { "type": "array", "items": { "type": "string" } },
"utr": { "type": "string", "pattern": "^[0-9]{10}$" },
"ni_number": {
"type": "string",
"pattern": "^[A-CEGHJ-PR-TW-Z]{2}\\d{6}[A-D]$"
}
},
"allOf": [{ "$ref": "#/definitions/temporal_properties" }],
"required": ["node_type", "taxpayer_id", "type"]
},
{
"title": "TaxYear",
"type": "object",
"properties": {
"node_type": { "const": "TaxYear" },
"label": { "type": "string" },
"start_date": { "type": "string", "format": "date" },
"end_date": { "type": "string", "format": "date" },
"jurisdiction_ref": { "type": "string" }
},
"allOf": [{ "$ref": "#/definitions/temporal_properties" }],
"required": [
"node_type",
"label",
"start_date",
"end_date",
"jurisdiction_ref"
]
},
{
"title": "Document",
"type": "object",
"properties": {
"node_type": { "const": "Document" },
"doc_id": { "type": "string" },
"kind": {
"enum": [
"bank_statement",
"invoice",
"receipt",
"p_and_l",
"balance_sheet",
"payslip",
"dividend_voucher",
"property_statement",
"prior_return",
"letter",
"certificate"
]
},
"source": { "type": "string" },
"mime": { "type": "string" },
"date_range": {
"type": "object",
"properties": {
"start": { "type": "string", "format": "date" },
"end": { "type": "string", "format": "date" }
}
},
"checksum": { "type": "string" },
"file_size": { "type": "integer" },
"pages": { "type": "integer", "minimum": 1 }
},
"allOf": [{ "$ref": "#/definitions/temporal_properties" }],
"required": ["node_type", "doc_id", "kind", "source", "checksum"]
},
{
"title": "Evidence",
"type": "object",
"properties": {
"node_type": { "const": "Evidence" },
"snippet_id": { "type": "string" },
"doc_ref": { "type": "string" },
"page": { "type": "integer", "minimum": 1 },
"bbox": {
"type": "object",
"properties": {
"x": { "type": "number" },
"y": { "type": "number" },
"width": { "type": "number" },
"height": { "type": "number" }
},
"required": ["x", "y", "width", "height"]
},
"text_hash": { "type": "string" },
"ocr_confidence": { "type": "number", "minimum": 0, "maximum": 1 },
"extracted_text": { "type": "string" }
},
"allOf": [{ "$ref": "#/definitions/temporal_properties" }],
"required": [
"node_type",
"snippet_id",
"doc_ref",
"page",
"bbox",
"text_hash"
]
},
{
"title": "IncomeItem",
"type": "object",
"properties": {
"node_type": { "const": "IncomeItem" },
"type": {
"enum": [
"employment",
"self_employment",
"property",
"dividend",
"interest",
"other"
]
},
"gross": { "type": "number" },
"net": { "type": "number" },
"tax_withheld": { "type": "number" },
"period_start": { "type": "string", "format": "date" },
"period_end": { "type": "string", "format": "date" },
"currency": { "type": "string", "pattern": "^[A-Z]{3}$" },
"description": { "type": "string" }
},
"allOf": [
{ "$ref": "#/definitions/temporal_properties" },
{ "$ref": "#/definitions/provenance" }
],
"required": ["node_type", "type", "gross", "currency"]
},
{
"title": "ExpenseItem",
"type": "object",
"properties": {
"node_type": { "const": "ExpenseItem" },
"type": { "enum": ["business", "property", "capital", "personal"] },
"amount": { "type": "number" },
"category": { "type": "string" },
"capitalizable_flag": { "type": "boolean" },
"currency": { "type": "string", "pattern": "^[A-Z]{3}$" },
"description": { "type": "string" },
"allowable": { "type": "boolean" }
},
"allOf": [
{ "$ref": "#/definitions/temporal_properties" },
{ "$ref": "#/definitions/provenance" }
],
"required": ["node_type", "type", "amount", "currency"]
}
]
}