full ingestion -> OCR -> extraction flow is now working correctly.
Some checks failed
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
@@ -64,28 +64,6 @@ Return a JSON object with the extracted fields and confidence scores.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# Create app and settings
|
|
||||||
app, settings = create_app(
|
|
||||||
service_name="svc-extract",
|
|
||||||
title="Tax Agent Extraction Service",
|
|
||||||
description="LLM-based field extraction service",
|
|
||||||
settings_class=ExtractionSettings,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add middleware
|
|
||||||
middleware_factory = create_trusted_proxy_middleware(settings.internal_cidrs)
|
|
||||||
app.add_middleware(middleware_factory)
|
|
||||||
|
|
||||||
# Global clients
|
|
||||||
storage_client: StorageClient | None = None
|
|
||||||
document_storage: DocumentStorage | None = None
|
|
||||||
event_bus: EventBus | None = None
|
|
||||||
confidence_calibrator: ConfidenceCalibrator | None = None
|
|
||||||
tracer = get_tracer("svc-extract")
|
|
||||||
metrics = get_metrics()
|
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event() -> None:
|
async def startup_event() -> None:
|
||||||
"""Initialize service dependencies"""
|
"""Initialize service dependencies"""
|
||||||
global storage_client, document_storage, event_bus, confidence_calibrator
|
global storage_client, document_storage, event_bus, confidence_calibrator
|
||||||
@@ -116,7 +94,6 @@ async def startup_event() -> None:
|
|||||||
logger.info("Extraction service started successfully")
|
logger.info("Extraction service started successfully")
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
|
||||||
async def shutdown_event() -> None:
|
async def shutdown_event() -> None:
|
||||||
"""Cleanup service dependencies"""
|
"""Cleanup service dependencies"""
|
||||||
global event_bus
|
global event_bus
|
||||||
@@ -129,6 +106,29 @@ async def shutdown_event() -> None:
|
|||||||
logger.info("Extraction service shutdown complete")
|
logger.info("Extraction service shutdown complete")
|
||||||
|
|
||||||
|
|
||||||
|
# Create app and settings
|
||||||
|
app, settings = create_app(
|
||||||
|
service_name="svc-extract",
|
||||||
|
title="Tax Agent Extraction Service",
|
||||||
|
description="LLM-based field extraction service",
|
||||||
|
settings_class=ExtractionSettings,
|
||||||
|
startup_hooks=[startup_event],
|
||||||
|
shutdown_hooks=[shutdown_event],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add middleware
|
||||||
|
middleware_factory = create_trusted_proxy_middleware(settings.internal_cidrs)
|
||||||
|
app.add_middleware(middleware_factory)
|
||||||
|
|
||||||
|
# Global clients
|
||||||
|
storage_client: StorageClient | None = None
|
||||||
|
document_storage: DocumentStorage | None = None
|
||||||
|
event_bus: EventBus | None = None
|
||||||
|
confidence_calibrator: ConfidenceCalibrator | None = None
|
||||||
|
tracer = get_tracer("svc-extract")
|
||||||
|
metrics = get_metrics()
|
||||||
|
|
||||||
|
|
||||||
@app.post("/extract/{doc_id}", response_model=ExtractionResponse)
|
@app.post("/extract/{doc_id}", response_model=ExtractionResponse)
|
||||||
async def extract_fields(
|
async def extract_fields(
|
||||||
doc_id: str,
|
doc_id: str,
|
||||||
@@ -334,13 +334,14 @@ async def _extract_fields_async(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Update metrics
|
# Update metrics
|
||||||
metrics.counter("extractions_completed_total").labels(
|
metrics.counter(
|
||||||
tenant_id=tenant_id, strategy=strategy
|
"extract_extractions_completed_total",
|
||||||
).inc()
|
labelnames=["tenant_id", "strategy"],
|
||||||
|
).labels(tenant_id=tenant_id, strategy=strategy).inc()
|
||||||
|
|
||||||
metrics.histogram("extraction_confidence").labels(
|
metrics.histogram(
|
||||||
strategy=strategy
|
"extract_extraction_confidence", labelnames=["strategy"]
|
||||||
).observe(calibrated_confidence)
|
).labels(strategy=strategy).observe(calibrated_confidence)
|
||||||
|
|
||||||
# Publish completion event
|
# Publish completion event
|
||||||
event_payload = EventPayload(
|
event_payload = EventPayload(
|
||||||
@@ -371,7 +372,10 @@ async def _extract_fields_async(
|
|||||||
logger.error("Field extraction failed", doc_id=doc_id, error=str(e))
|
logger.error("Field extraction failed", doc_id=doc_id, error=str(e))
|
||||||
|
|
||||||
# Update error metrics
|
# Update error metrics
|
||||||
metrics.counter("extraction_errors_total").labels(
|
metrics.counter(
|
||||||
|
"extract_extraction_errors_total",
|
||||||
|
labelnames=["tenant_id", "strategy", "error_type"],
|
||||||
|
).labels(
|
||||||
tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__
|
tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
|||||||
@@ -77,11 +77,20 @@ def init_dependencies(app_settings: IngestionSettings) -> None:
|
|||||||
|
|
||||||
|
|
||||||
# Create app and settings
|
# Create app and settings
|
||||||
|
async def startup_event() -> None:
|
||||||
|
"""Initialize service dependencies"""
|
||||||
|
if event_bus is None:
|
||||||
|
raise ValueError("Event bus not initialized")
|
||||||
|
|
||||||
|
await event_bus.start()
|
||||||
|
|
||||||
|
|
||||||
app, _settings = create_app(
|
app, _settings = create_app(
|
||||||
service_name="svc-ingestion",
|
service_name="svc-ingestion",
|
||||||
title="Tax Agent Ingestion Service",
|
title="Tax Agent Ingestion Service",
|
||||||
description="Document upload and storage service",
|
description="Document upload and storage service",
|
||||||
settings_class=IngestionSettings,
|
settings_class=IngestionSettings,
|
||||||
|
startup_hooks=[startup_event],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize dependencies immediately
|
# Initialize dependencies immediately
|
||||||
@@ -158,6 +167,7 @@ async def upload_document(
|
|||||||
event_payload = EventPayload(
|
event_payload = EventPayload(
|
||||||
data={
|
data={
|
||||||
"doc_id": doc_id,
|
"doc_id": doc_id,
|
||||||
|
"tenant_id": tenant_id,
|
||||||
"filename": file.filename or "unknown",
|
"filename": file.filename or "unknown",
|
||||||
"kind": kind.value,
|
"kind": kind.value,
|
||||||
"source": source,
|
"source": source,
|
||||||
|
|||||||
@@ -21,8 +21,10 @@ RUN apt-get update && apt-get install -y \
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy service-specific requirements and install
|
# Copy service-specific requirements and install
|
||||||
|
# Copy base requirements and service-specific requirements
|
||||||
|
COPY libs/requirements-base.txt /tmp/libs-requirements.txt
|
||||||
COPY apps/svc_ocr/requirements.txt /tmp/service-requirements.txt
|
COPY apps/svc_ocr/requirements.txt /tmp/service-requirements.txt
|
||||||
RUN pip install --no-cache-dir -r /tmp/service-requirements.txt
|
RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/service-requirements.txt
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
COPY libs/ ./libs/
|
COPY libs/ ./libs/
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ async def init_dependencies(app_settings: OCRSettings) -> None:
|
|||||||
if attempt == max_retries:
|
if attempt == max_retries:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500, detail="Failed to connect to NATS after retries"
|
status_code=500, detail="Failed to connect to NATS after retries"
|
||||||
)
|
) from e
|
||||||
await asyncio.sleep(delay)
|
await asyncio.sleep(delay)
|
||||||
delay *= 2 # exponential backoff
|
delay *= 2 # exponential backoff
|
||||||
|
|
||||||
@@ -280,7 +280,7 @@ async def _handle_document_ingested(topic: str, payload: EventPayload) -> None:
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Auto-process PDF documents
|
# Auto-process PDF documents
|
||||||
if data.get("content_type") == "application/pdf":
|
if data.get("mime_type") == "application/pdf":
|
||||||
logger.info("Auto-processing ingested document", doc_id=doc_id)
|
logger.info("Auto-processing ingested document", doc_id=doc_id)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -347,13 +347,13 @@ async def _process_document_async(
|
|||||||
await ds.store_ocr_result(tenant_id, doc_id, ocr_results)
|
await ds.store_ocr_result(tenant_id, doc_id, ocr_results)
|
||||||
|
|
||||||
# Update metrics
|
# Update metrics
|
||||||
metrics.counter("documents_processed_total").labels(
|
metrics.counter(
|
||||||
tenant_id=tenant_id, strategy=strategy
|
"ocr_documents_processed_total", labelnames=["tenant_id", "strategy"]
|
||||||
).inc()
|
).labels(tenant_id=tenant_id, strategy=strategy).inc()
|
||||||
|
|
||||||
metrics.histogram("processing_duration_seconds").labels(
|
metrics.histogram(
|
||||||
strategy=strategy
|
"ocr_processing_duration_seconds", labelnames=["strategy"]
|
||||||
).observe(
|
).labels(strategy=strategy).observe(
|
||||||
datetime.utcnow().timestamp()
|
datetime.utcnow().timestamp()
|
||||||
- datetime.fromisoformat(
|
- datetime.fromisoformat(
|
||||||
ocr_results["processed_at"].replace("Z", "") # type: ignore
|
ocr_results["processed_at"].replace("Z", "") # type: ignore
|
||||||
@@ -386,7 +386,10 @@ async def _process_document_async(
|
|||||||
logger.error("OCR processing failed", doc_id=doc_id, error=str(e))
|
logger.error("OCR processing failed", doc_id=doc_id, error=str(e))
|
||||||
|
|
||||||
# Update error metrics
|
# Update error metrics
|
||||||
metrics.counter("processing_errors_total").labels(
|
metrics.counter(
|
||||||
|
"ocr_processing_errors_total",
|
||||||
|
labelnames=["tenant_id", "strategy", "error_type"],
|
||||||
|
).labels(
|
||||||
tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__
|
tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
|||||||
@@ -50,6 +50,20 @@ entries:
|
|||||||
groups:
|
groups:
|
||||||
- !Find [authentik_core.group, [name, "Administrators"]]
|
- !Find [authentik_core.group, [name, "Administrators"]]
|
||||||
|
|
||||||
|
# --- E2E Test User ---------------------------------------------------------
|
||||||
|
- model: authentik_core.user
|
||||||
|
state: present
|
||||||
|
identifiers:
|
||||||
|
username: e2e_tester
|
||||||
|
attrs:
|
||||||
|
name: "E2E Tester"
|
||||||
|
email: e2e@example.com
|
||||||
|
is_active: true
|
||||||
|
password: "password123"
|
||||||
|
groups:
|
||||||
|
- !Find [authentik_core.group, [name, "Tax Reviewers"]]
|
||||||
|
- !Find [authentik_core.group, [name, "Administrators"]]
|
||||||
|
|
||||||
# Helper finders
|
# Helper finders
|
||||||
|
|
||||||
# ========= OIDC Providers + Applications ==================================
|
# ========= OIDC Providers + Applications ==================================
|
||||||
@@ -317,6 +331,37 @@ entries:
|
|||||||
meta_publisher: "AI Tax Agent"
|
meta_publisher: "AI Tax Agent"
|
||||||
policy_engine_mode: "any"
|
policy_engine_mode: "any"
|
||||||
|
|
||||||
|
# --- NATS Monitoring (Proxy Provider for ForwardAuth) --------------------
|
||||||
|
- model: authentik_providers_proxy.proxyprovider
|
||||||
|
state: present
|
||||||
|
identifiers:
|
||||||
|
name: "NATS Monitoring Proxy"
|
||||||
|
attrs:
|
||||||
|
external_host: "https://nats.local.lan"
|
||||||
|
internal_host: "http://apa-nats:8222"
|
||||||
|
authorization_flow:
|
||||||
|
!Find [authentik_flows.flow, [slug, "default-authentication-flow"]]
|
||||||
|
invalidation_flow:
|
||||||
|
!Find [authentik_flows.flow, [slug, "default-invalidation-flow"]]
|
||||||
|
mode: "forward_single"
|
||||||
|
cookie_domain: "local.lan"
|
||||||
|
|
||||||
|
- model: authentik_core.application
|
||||||
|
state: present
|
||||||
|
identifiers:
|
||||||
|
slug: "nats-monitoring"
|
||||||
|
attrs:
|
||||||
|
name: "NATS Monitoring"
|
||||||
|
provider:
|
||||||
|
!Find [
|
||||||
|
authentik_providers_proxy.proxyprovider,
|
||||||
|
[name, "NATS Monitoring Proxy"],
|
||||||
|
]
|
||||||
|
meta_launch_url: "https://nats.local.lan"
|
||||||
|
meta_description: "NATS Messaging System Monitoring"
|
||||||
|
meta_publisher: "AI Tax Agent"
|
||||||
|
policy_engine_mode: "any"
|
||||||
|
|
||||||
# --- AI Tax Agent API (Proxy Provider for ForwardAuth) --------------------
|
# --- AI Tax Agent API (Proxy Provider for ForwardAuth) --------------------
|
||||||
- model: authentik_providers_proxy.proxyprovider
|
- model: authentik_providers_proxy.proxyprovider
|
||||||
state: present
|
state: present
|
||||||
@@ -368,3 +413,7 @@ entries:
|
|||||||
authentik_providers_proxy.proxyprovider,
|
authentik_providers_proxy.proxyprovider,
|
||||||
[name, "AI Tax Agent API Proxy"],
|
[name, "AI Tax Agent API Proxy"],
|
||||||
]
|
]
|
||||||
|
- !Find [
|
||||||
|
authentik_providers_proxy.proxyprovider,
|
||||||
|
[name, "NATS Monitoring Proxy"],
|
||||||
|
]
|
||||||
|
|||||||
@@ -331,6 +331,8 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
- frontend
|
- frontend
|
||||||
|
ports:
|
||||||
|
- "4222:4222" # Client connections (for local testing)
|
||||||
volumes:
|
volumes:
|
||||||
- nats_data:/data
|
- nats_data:/data
|
||||||
command: >
|
command: >
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ services:
|
|||||||
dockerfile: apps/svc_ingestion/Dockerfile
|
dockerfile: apps/svc_ingestion/Dockerfile
|
||||||
image: ai-tax-agent/svc-ingestion:local
|
image: ai-tax-agent/svc-ingestion:local
|
||||||
pull_policy: never
|
pull_policy: never
|
||||||
|
ports:
|
||||||
|
- "8000:8000" # Expose for local E2E testing
|
||||||
|
|
||||||
apa-svc-extract:
|
apa-svc-extract:
|
||||||
build:
|
build:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# FILE: libs/app_factory.py
|
# FILE: libs/app_factory.py
|
||||||
|
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator, Awaitable, Callable
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -36,6 +36,8 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume
|
|||||||
version: str = "1.0.0",
|
version: str = "1.0.0",
|
||||||
settings_class: type[BaseAppSettings] = BaseAppSettings,
|
settings_class: type[BaseAppSettings] = BaseAppSettings,
|
||||||
custom_settings: dict[str, Any] | None = None,
|
custom_settings: dict[str, Any] | None = None,
|
||||||
|
startup_hooks: list[Callable[[], Awaitable[None]]] | None = None,
|
||||||
|
shutdown_hooks: list[Callable[[], Awaitable[None]]] | None = None,
|
||||||
) -> tuple[FastAPI, BaseAppSettings]:
|
) -> tuple[FastAPI, BaseAppSettings]:
|
||||||
"""Create a FastAPI application with standard configuration"""
|
"""Create a FastAPI application with standard configuration"""
|
||||||
|
|
||||||
@@ -56,8 +58,14 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume
|
|||||||
) -> AsyncIterator[None]: # pylint: disable=unused-argument
|
) -> AsyncIterator[None]: # pylint: disable=unused-argument
|
||||||
# Startup
|
# Startup
|
||||||
setup_observability(settings)
|
setup_observability(settings)
|
||||||
|
if startup_hooks:
|
||||||
|
for hook in startup_hooks:
|
||||||
|
await hook()
|
||||||
yield
|
yield
|
||||||
# Shutdown
|
# Shutdown
|
||||||
|
if shutdown_hooks:
|
||||||
|
for hook in shutdown_hooks:
|
||||||
|
await hook()
|
||||||
|
|
||||||
# Create FastAPI app
|
# Create FastAPI app
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
|
|||||||
@@ -4,15 +4,15 @@
|
|||||||
class EventTopics: # pylint: disable=too-few-public-methods
|
class EventTopics: # pylint: disable=too-few-public-methods
|
||||||
"""Standard event topic names"""
|
"""Standard event topic names"""
|
||||||
|
|
||||||
DOC_INGESTED = "doc.ingested"
|
DOC_INGESTED = "doc_ingested"
|
||||||
DOC_OCR_READY = "doc.ocr_ready"
|
DOC_OCR_READY = "doc_ocr_ready"
|
||||||
DOC_EXTRACTED = "doc.extracted"
|
DOC_EXTRACTED = "doc_extracted"
|
||||||
KG_UPSERT_READY = "kg.upsert.ready"
|
KG_UPSERT_READY = "kg_upsert_ready"
|
||||||
KG_UPSERTED = "kg.upserted"
|
KG_UPSERTED = "kg_upserted"
|
||||||
RAG_INDEXED = "rag.indexed"
|
RAG_INDEXED = "rag_indexed"
|
||||||
CALC_SCHEDULE_READY = "calc.schedule_ready"
|
CALC_SCHEDULE_READY = "calc_schedule_ready"
|
||||||
FORM_FILLED = "form.filled"
|
FORM_FILLED = "form_filled"
|
||||||
HMRC_SUBMITTED = "hmrc.submitted"
|
HMRC_SUBMITTED = "hmrc_submitted"
|
||||||
REVIEW_REQUESTED = "review.requested"
|
REVIEW_REQUESTED = "review_requested"
|
||||||
REVIEW_COMPLETED = "review.completed"
|
REVIEW_COMPLETED = "review_completed"
|
||||||
FIRM_SYNC_COMPLETED = "firm.sync.completed"
|
FIRM_SYNC_COMPLETED = "firm_sync_completed"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ psycopg2-binary>=2.9.11
|
|||||||
neo4j>=6.0.2
|
neo4j>=6.0.2
|
||||||
redis[hiredis]>=6.4.0
|
redis[hiredis]>=6.4.0
|
||||||
|
|
||||||
minio>=7.2.18
|
minio==7.2.18
|
||||||
boto3>=1.34.0
|
boto3>=1.34.0
|
||||||
qdrant-client>=1.15.1
|
qdrant-client>=1.15.1
|
||||||
|
|
||||||
|
|||||||
@@ -72,22 +72,23 @@ class DocumentExtractedEventData(BaseEventData):
|
|||||||
"""Event emitted when field extraction is complete."""
|
"""Event emitted when field extraction is complete."""
|
||||||
|
|
||||||
doc_id: str = Field(..., description="Document identifier")
|
doc_id: str = Field(..., description="Document identifier")
|
||||||
|
tenant_id: str = Field(..., description="Tenant identifier")
|
||||||
extraction_id: str = Field(..., description="Unique extraction run identifier")
|
extraction_id: str = Field(..., description="Unique extraction run identifier")
|
||||||
strategy: Literal["llm", "rules", "hybrid"] = Field(
|
strategy: Literal["llm", "rules", "hybrid"] = Field(
|
||||||
..., description="Extraction strategy used"
|
..., description="Extraction strategy used"
|
||||||
)
|
)
|
||||||
fields_extracted: int = Field(..., ge=0, description="Number of fields extracted")
|
field_count: int = Field(..., ge=0, description="Number of fields extracted")
|
||||||
confidence_avg: float = Field(
|
confidence: float = Field(
|
||||||
..., ge=0.0, le=1.0, description="Average extraction confidence"
|
..., ge=0.0, le=1.0, description="Extraction confidence score"
|
||||||
)
|
)
|
||||||
calibrated_confidence: float = Field(
|
extraction_results: dict[str, Any] = Field(
|
||||||
..., ge=0.0, le=1.0, description="Calibrated confidence score"
|
..., description="Full extraction results including provenance"
|
||||||
)
|
)
|
||||||
model_name: str | None = Field(None, description="LLM model used (if applicable)")
|
model_name: str | None = Field(None, description="LLM model used (if applicable)")
|
||||||
processing_time_ms: int = Field(
|
processing_time_ms: int | None = Field(
|
||||||
..., ge=0, description="Processing time in milliseconds"
|
None, ge=0, description="Processing time in milliseconds"
|
||||||
)
|
)
|
||||||
storage_path: str = Field(..., description="Path to extraction results")
|
storage_path: str | None = Field(None, description="Path to extraction results")
|
||||||
|
|
||||||
|
|
||||||
# Knowledge Graph events
|
# Knowledge Graph events
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ def get_current_tenant(request: Request) -> str | None:
|
|||||||
if role.startswith("tenant:"):
|
if role.startswith("tenant:"):
|
||||||
return str(role.split(":", 1)[1])
|
return str(role.split(":", 1)[1])
|
||||||
|
|
||||||
|
# Check for explicit tenant header (useful for testing/API keys)
|
||||||
|
tenant_header = request.headers.get("X-Tenant-ID")
|
||||||
|
if tenant_header:
|
||||||
|
return tenant_header
|
||||||
|
|
||||||
# Default tenant for development
|
# Default tenant for development
|
||||||
return "default"
|
return "default"
|
||||||
|
|
||||||
|
|||||||
@@ -19,17 +19,13 @@ class StorageClient:
|
|||||||
async def ensure_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
|
async def ensure_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
|
||||||
"""Ensure bucket exists, create if not"""
|
"""Ensure bucket exists, create if not"""
|
||||||
try:
|
try:
|
||||||
# Check if bucket exists
|
self.client.make_bucket(bucket_name=bucket_name, location=region)
|
||||||
if self.client.bucket_exists(bucket_name):
|
|
||||||
logger.debug("Bucket already exists", bucket=bucket_name)
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Create bucket
|
|
||||||
self.client.make_bucket(bucket_name, location=region)
|
|
||||||
logger.info("Created bucket", bucket=bucket_name, region=region)
|
logger.info("Created bucket", bucket=bucket_name, region=region)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except S3Error as e:
|
except S3Error as e:
|
||||||
|
if e.code in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
|
||||||
|
logger.debug("Bucket already exists", bucket=bucket_name)
|
||||||
|
return True
|
||||||
logger.error("Failed to ensure bucket", bucket=bucket_name, error=str(e))
|
logger.error("Failed to ensure bucket", bucket=bucket_name, error=str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -1,200 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Test Authentik blueprint import after manual setup
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
DOMAIN=${DOMAIN:-local}
|
|
||||||
AUTHENTIK_URL="https://auth.${DOMAIN}"
|
|
||||||
AUTHENTIK_API_URL="$AUTHENTIK_URL/api/v3"
|
|
||||||
ADMIN_EMAIL="admin@local.local"
|
|
||||||
ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}"
|
|
||||||
|
|
||||||
echo -e "${BLUE}🧪 Testing Authentik blueprint import...${NC}"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# Function to check if setup is complete
|
|
||||||
check_setup_complete() {
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
local setup_code
|
|
||||||
setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true)
|
|
||||||
|
|
||||||
if [[ "$setup_code" == "404" ]]; then
|
|
||||||
return 0 # Setup is complete
|
|
||||||
else
|
|
||||||
return 1 # Setup is still needed
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to get API token via login
|
|
||||||
get_api_token_via_login() {
|
|
||||||
echo -e "${YELLOW}🔑 Getting API token via login...${NC}"
|
|
||||||
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
|
|
||||||
# Get login page and extract CSRF token
|
|
||||||
local login_page
|
|
||||||
login_page=$(curl -ks "${resolve[@]}" -c /tmp/auth_cookies.txt "$AUTHENTIK_URL/if/flow/default-authentication-flow/" || echo "")
|
|
||||||
|
|
||||||
if [ -z "$login_page" ]; then
|
|
||||||
echo -e "${RED}❌ Could not access login page${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Extract CSRF token from the page
|
|
||||||
local csrf_token
|
|
||||||
csrf_token=$(echo "$login_page" | grep -o 'name="csrfmiddlewaretoken"[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "")
|
|
||||||
|
|
||||||
if [ -z "$csrf_token" ]; then
|
|
||||||
echo -e "${RED}❌ Could not extract CSRF token${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✅ CSRF token extracted${NC}"
|
|
||||||
|
|
||||||
# Login
|
|
||||||
local login_response
|
|
||||||
login_response=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt -c /tmp/auth_cookies.txt \
|
|
||||||
-X POST "$AUTHENTIK_URL/if/flow/default-authentication-flow/" \
|
|
||||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
|
||||||
-H "Referer: $AUTHENTIK_URL/if/flow/default-authentication-flow/" \
|
|
||||||
-d "csrfmiddlewaretoken=$csrf_token&uid_field=$ADMIN_EMAIL&password=$ADMIN_PASSWORD" \
|
|
||||||
-w '%{http_code}' -o /tmp/login_response.html || echo "")
|
|
||||||
|
|
||||||
if [[ "$login_response" =~ ^(200|302)$ ]]; then
|
|
||||||
echo -e "${GREEN}✅ Login successful${NC}"
|
|
||||||
|
|
||||||
# Get admin interface page to get new CSRF token
|
|
||||||
local admin_page
|
|
||||||
admin_page=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt "$AUTHENTIK_URL/if/admin/" || echo "")
|
|
||||||
|
|
||||||
local admin_csrf
|
|
||||||
admin_csrf=$(echo "$admin_page" | grep -o 'name="csrfmiddlewaretoken"[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "")
|
|
||||||
|
|
||||||
if [ -n "$admin_csrf" ]; then
|
|
||||||
# Create API token
|
|
||||||
local token_response
|
|
||||||
token_response=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt \
|
|
||||||
-X POST "$AUTHENTIK_API_URL/core/tokens/" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "X-CSRFToken: $admin_csrf" \
|
|
||||||
-d "{
|
|
||||||
\"identifier\": \"blueprint-test-$(date +%s)\",
|
|
||||||
\"description\": \"Test token for blueprint import\",
|
|
||||||
\"expires\": \"2025-12-31T23:59:59Z\"
|
|
||||||
}" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
if [ -n "$token_response" ]; then
|
|
||||||
local token
|
|
||||||
token=$(echo "$token_response" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
if [ -n "$token" ]; then
|
|
||||||
echo -e "${GREEN}✅ API token created${NC}"
|
|
||||||
echo "$token"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${RED}❌ Failed to get API token${NC}"
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to import blueprint
|
|
||||||
import_blueprint() {
|
|
||||||
local token="$1"
|
|
||||||
|
|
||||||
echo -e "${YELLOW}📋 Importing blueprint...${NC}"
|
|
||||||
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
|
|
||||||
# Create blueprint instance
|
|
||||||
local blueprint_response
|
|
||||||
blueprint_response=$(curl -ks "${resolve[@]}" \
|
|
||||||
-X POST "$AUTHENTIK_API_URL/managed/blueprints/" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer $token" \
|
|
||||||
-d '{
|
|
||||||
"name": "AI Tax Agent Bootstrap",
|
|
||||||
"path": "/blueprints/bootstrap.yaml",
|
|
||||||
"context": {},
|
|
||||||
"enabled": true
|
|
||||||
}' 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
echo -e "${BLUE}Blueprint creation response:${NC}"
|
|
||||||
echo "$blueprint_response" | python3 -c "import sys, json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "$blueprint_response"
|
|
||||||
|
|
||||||
local blueprint_pk
|
|
||||||
blueprint_pk=$(echo "$blueprint_response" | python3 -c "import sys, json; print(json.load(sys.stdin).get('pk', ''))" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
if [ -n "$blueprint_pk" ]; then
|
|
||||||
echo -e "${GREEN}✅ Blueprint created with ID: $blueprint_pk${NC}"
|
|
||||||
|
|
||||||
# Apply the blueprint
|
|
||||||
echo -e "${YELLOW}🔄 Applying blueprint...${NC}"
|
|
||||||
local apply_response
|
|
||||||
apply_response=$(curl -ks "${resolve[@]}" \
|
|
||||||
-X POST "$AUTHENTIK_API_URL/managed/blueprints/$blueprint_pk/apply/" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer $token" \
|
|
||||||
-d '{}' 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
echo -e "${BLUE}Blueprint apply response:${NC}"
|
|
||||||
echo "$apply_response" | python3 -c "import sys, json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "$apply_response"
|
|
||||||
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Failed to create blueprint${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main function
|
|
||||||
main() {
|
|
||||||
# Check if setup is complete
|
|
||||||
if ! check_setup_complete; then
|
|
||||||
echo -e "${YELLOW}⚠️ Initial setup is still required${NC}"
|
|
||||||
echo -e "${BLUE}📋 Please complete setup at: https://auth.local.lan.lan/if/flow/initial-setup/${NC}"
|
|
||||||
echo -e "${BLUE}Use credentials: admin@local.local / admin123${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✅ Initial setup is complete${NC}"
|
|
||||||
|
|
||||||
# Get API token
|
|
||||||
local api_token
|
|
||||||
if api_token=$(get_api_token_via_login); then
|
|
||||||
echo -e "${GREEN}🔑 API token obtained${NC}"
|
|
||||||
|
|
||||||
# Import blueprint
|
|
||||||
if import_blueprint "$api_token"; then
|
|
||||||
echo -e "${GREEN}🎉 Blueprint import test completed!${NC}"
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Blueprint import failed${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Could not get API token${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
rm -f /tmp/auth_cookies.txt /tmp/login_response.html
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main function
|
|
||||||
main "$@"
|
|
||||||
@@ -1,155 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Complete Authentik initial setup and get API token
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
DOMAIN=${DOMAIN:-local}
|
|
||||||
AUTHENTIK_URL="https://auth.${DOMAIN}"
|
|
||||||
ADMIN_EMAIL="admin@local"
|
|
||||||
ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}"
|
|
||||||
ENV_FILE="infra/compose/.env"
|
|
||||||
|
|
||||||
echo -e "${BLUE}🔧 Completing Authentik initial setup...${NC}"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# Function to update env file
|
|
||||||
update_env_var() {
|
|
||||||
local var_name="$1"
|
|
||||||
local var_value="$2"
|
|
||||||
|
|
||||||
if grep -q "^${var_name}=" "$ENV_FILE"; then
|
|
||||||
# Update existing variable
|
|
||||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
||||||
# macOS
|
|
||||||
sed -i '' "s|^${var_name}=.*|${var_name}=${var_value}|" "$ENV_FILE"
|
|
||||||
else
|
|
||||||
# Linux
|
|
||||||
sed -i "s|^${var_name}=.*|${var_name}=${var_value}|" "$ENV_FILE"
|
|
||||||
fi
|
|
||||||
echo -e "${GREEN}✅ Updated ${var_name}${NC}"
|
|
||||||
else
|
|
||||||
# Add new variable
|
|
||||||
echo "${var_name}=${var_value}" >> "$ENV_FILE"
|
|
||||||
echo -e "${GREEN}✅ Added ${var_name}${NC}"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to check if setup is complete
|
|
||||||
check_setup_status() {
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
local setup_code
|
|
||||||
setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true)
|
|
||||||
|
|
||||||
if [[ "$setup_code" == "404" ]]; then
|
|
||||||
return 0 # Setup is complete
|
|
||||||
else
|
|
||||||
return 1 # Setup is still needed
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to get API token
|
|
||||||
get_api_token() {
|
|
||||||
echo -e "${YELLOW}🔑 Getting API token...${NC}"
|
|
||||||
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
|
|
||||||
# Get CSRF token first
|
|
||||||
local csrf_token
|
|
||||||
csrf_token=$(curl -ks "${resolve[@]}" -c /tmp/authentik_cookies.txt "$AUTHENTIK_URL/if/flow/default-authentication-flow/" | grep -o 'csrfmiddlewaretoken[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' || echo "")
|
|
||||||
|
|
||||||
if [ -z "$csrf_token" ]; then
|
|
||||||
echo -e "${RED}❌ Could not get CSRF token${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Login to get session
|
|
||||||
local login_response
|
|
||||||
login_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_cookies.txt -c /tmp/authentik_cookies.txt \
|
|
||||||
-X POST "$AUTHENTIK_URL/if/flow/default-authentication-flow/" \
|
|
||||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
|
||||||
-H "Referer: $AUTHENTIK_URL/if/flow/default-authentication-flow/" \
|
|
||||||
-d "csrfmiddlewaretoken=$csrf_token&uid_field=$ADMIN_EMAIL&password=$ADMIN_PASSWORD" \
|
|
||||||
-w '%{http_code}' -o /tmp/login_response.html || echo "")
|
|
||||||
|
|
||||||
if [[ "$login_response" =~ ^(200|302)$ ]]; then
|
|
||||||
echo -e "${GREEN}✅ Login successful${NC}"
|
|
||||||
|
|
||||||
# Create API token
|
|
||||||
local token_response
|
|
||||||
token_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_cookies.txt \
|
|
||||||
-X POST "$AUTHENTIK_URL/api/v3/core/tokens/" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "X-CSRFToken: $csrf_token" \
|
|
||||||
-d "{
|
|
||||||
\"identifier\": \"ai-tax-agent-bootstrap\",
|
|
||||||
\"description\": \"Bootstrap token for AI Tax Agent setup\",
|
|
||||||
\"expires\": \"2025-12-31T23:59:59Z\"
|
|
||||||
}" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
if [ -n "$token_response" ]; then
|
|
||||||
local token
|
|
||||||
token=$(echo "$token_response" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
if [ -n "$token" ]; then
|
|
||||||
echo -e "${GREEN}✅ API token created${NC}"
|
|
||||||
echo "$token"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${RED}❌ Failed to get API token${NC}"
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main function
|
|
||||||
main() {
|
|
||||||
# Check if setup is already complete
|
|
||||||
if check_setup_status; then
|
|
||||||
echo -e "${GREEN}✅ Authentik setup is already complete${NC}"
|
|
||||||
|
|
||||||
# Try to get API token
|
|
||||||
local api_token
|
|
||||||
if api_token=$(get_api_token); then
|
|
||||||
echo -e "${GREEN}🔑 API token obtained${NC}"
|
|
||||||
|
|
||||||
# Update .env file with token
|
|
||||||
update_env_var "AUTHENTIK_BOOTSTRAP_TOKEN" "$api_token"
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo -e "${GREEN}🎉 Setup complete! You can now run:${NC}"
|
|
||||||
echo -e " ${BLUE}make setup-authentik${NC} - to import blueprint configuration"
|
|
||||||
else
|
|
||||||
echo -e "${YELLOW}⚠️ Could not get API token automatically${NC}"
|
|
||||||
echo -e "${BLUE}📋 Manual steps:${NC}"
|
|
||||||
echo -e " 1. Open ${BLUE}https://auth.local.lan${NC} and log in"
|
|
||||||
echo -e " 2. Go to Admin Interface > Tokens"
|
|
||||||
echo -e " 3. Create a new token and update AUTHENTIK_BOOTSTRAP_TOKEN in .env"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo -e "${YELLOW}📋 Initial setup still required:${NC}"
|
|
||||||
echo -e " 1. Open ${BLUE}https://auth.local.lan.lan/if/flow/initial-setup/${NC}"
|
|
||||||
echo -e " 2. Complete the setup wizard with these credentials:"
|
|
||||||
echo -e " • Email: ${BLUE}$ADMIN_EMAIL${NC}"
|
|
||||||
echo -e " • Password: ${BLUE}$ADMIN_PASSWORD${NC}"
|
|
||||||
echo -e " 3. Re-run this script after setup is complete"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
rm -f /tmp/authentik_cookies.txt /tmp/login_response.html
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main function
|
|
||||||
main "$@"
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Automatically complete Authentik initial setup
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
DOMAIN=${DOMAIN:-local}
|
|
||||||
AUTHENTIK_URL="https://auth.${DOMAIN}"
|
|
||||||
ADMIN_EMAIL="admin@local.lan"
|
|
||||||
ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}"
|
|
||||||
|
|
||||||
echo -e "${BLUE}🤖 Automatically completing Authentik initial setup...${NC}"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# Function to complete initial setup
|
|
||||||
complete_initial_setup() {
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
|
|
||||||
echo -e "${YELLOW}📋 Completing initial setup form...${NC}"
|
|
||||||
|
|
||||||
# Get the initial setup page and extract CSRF token
|
|
||||||
local setup_page
|
|
||||||
setup_page=$(curl -ks "${resolve[@]}" -c /tmp/authentik_setup_cookies.txt "$AUTHENTIK_URL/if/flow/initial-setup/" || echo "")
|
|
||||||
|
|
||||||
if [ -z "$setup_page" ]; then
|
|
||||||
echo -e "${RED}❌ Could not access setup page${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Extract CSRF token
|
|
||||||
local csrf_token
|
|
||||||
csrf_token=$(echo "$setup_page" | grep -o 'csrfmiddlewaretoken[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "")
|
|
||||||
|
|
||||||
if [ -z "$csrf_token" ]; then
|
|
||||||
echo -e "${RED}❌ Could not extract CSRF token${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✅ CSRF token extracted${NC}"
|
|
||||||
|
|
||||||
# Submit the initial setup form
|
|
||||||
local setup_response
|
|
||||||
setup_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_setup_cookies.txt -c /tmp/authentik_setup_cookies.txt \
|
|
||||||
-X POST "$AUTHENTIK_URL/if/flow/initial-setup/" \
|
|
||||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
|
||||||
-H "Referer: $AUTHENTIK_URL/if/flow/initial-setup/" \
|
|
||||||
-d "csrfmiddlewaretoken=$csrf_token&email=$ADMIN_EMAIL&password=$ADMIN_PASSWORD&password_repeat=$ADMIN_PASSWORD" \
|
|
||||||
-w '%{http_code}' -o /tmp/setup_response.html || echo "")
|
|
||||||
|
|
||||||
if [[ "$setup_response" =~ ^(200|302)$ ]]; then
|
|
||||||
echo -e "${GREEN}✅ Initial setup completed successfully${NC}"
|
|
||||||
|
|
||||||
# Wait a moment for setup to complete
|
|
||||||
sleep 3
|
|
||||||
|
|
||||||
# Verify setup is complete by checking if setup page returns 404
|
|
||||||
local verify_code
|
|
||||||
verify_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true)
|
|
||||||
|
|
||||||
if [[ "$verify_code" == "404" ]]; then
|
|
||||||
echo -e "${GREEN}✅ Setup verification successful${NC}"
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
echo -e "${YELLOW}⚠️ Setup may not be complete (verification returned $verify_code)${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Setup failed (HTTP $setup_response)${NC}"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to check if setup is needed
|
|
||||||
check_setup_needed() {
|
|
||||||
local host
|
|
||||||
host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#')
|
|
||||||
local resolve=(--resolve "${host}:443:127.0.0.1")
|
|
||||||
local setup_code
|
|
||||||
setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true)
|
|
||||||
|
|
||||||
#TODO: this is not a valid check if setup is already complete, needs work. Authentik returns 200 even if setup is complete
|
|
||||||
if [[ "$setup_code" == "200" ]]; then
|
|
||||||
return 0 # Setup is needed
|
|
||||||
else
|
|
||||||
return 1 # Setup is not needed
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main function
|
|
||||||
main() {
|
|
||||||
if check_setup_needed; then
|
|
||||||
echo -e "${YELLOW}📋 Initial setup is required${NC}"
|
|
||||||
|
|
||||||
if complete_initial_setup; then
|
|
||||||
echo -e "${GREEN}🎉 Authentik initial setup completed automatically!${NC}"
|
|
||||||
echo
|
|
||||||
echo -e "${BLUE}📋 Next steps:${NC}"
|
|
||||||
echo -e " 1. Run ${BLUE}make complete-authentik-setup${NC} to get API token"
|
|
||||||
echo -e " 2. Run ${BLUE}make setup-authentik${NC} to import blueprint configuration"
|
|
||||||
echo -e " 3. Or run ${BLUE}make setup-sso${NC} to do both automatically"
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Automatic setup failed${NC}"
|
|
||||||
echo -e "${YELLOW}📋 Manual setup required:${NC}"
|
|
||||||
echo -e " 1. Open ${BLUE}https://auth.local.lan.lan/if/flow/initial-setup/${NC}"
|
|
||||||
echo -e " 2. Use credentials: ${BLUE}$ADMIN_EMAIL${NC} / ${BLUE}$ADMIN_PASSWORD${NC}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo -e "${GREEN}✅ Authentik setup is already complete${NC}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
rm -f /tmp/authentik_setup_cookies.txt /tmp/setup_response.html
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main function
|
|
||||||
main "$@"
|
|
||||||
@@ -38,14 +38,29 @@ async def test_backend_journey():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# 2. Upload a document
|
# 2. Upload a document
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient(
|
||||||
|
verify=False
|
||||||
|
) as client: # Disable SSL verification for local testing
|
||||||
# Create a dummy PDF file
|
# Create a dummy PDF file
|
||||||
files = {"file": ("test.pdf", b"%PDF-1.4 mock content", "application/pdf")}
|
# Create a valid minimal PDF file
|
||||||
|
pdf_content = (
|
||||||
|
b"%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj "
|
||||||
|
b"3 0 obj<</Type/Page/MediaBox[0 0 3 3]/Parent 2 0 R/Resources<<>>>>endobj\nxref\n0 4\n0000000000 65535 f\n"
|
||||||
|
b"0000000010 00000 n\n0000000060 00000 n\n0000000111 00000 n\ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n190\n%%EOF"
|
||||||
|
)
|
||||||
|
files = {"file": ("test.pdf", pdf_content, "application/pdf")}
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{INGESTION_URL}/upload",
|
f"{INGESTION_URL}/upload",
|
||||||
files=files,
|
files=files,
|
||||||
data={"kind": "invoice", "source": "e2e_test"},
|
data={"kind": "invoice", "source": "e2e_test"},
|
||||||
headers={"X-Tenant-ID": TENANT_ID, "X-User-ID": "e2e_tester"},
|
headers={
|
||||||
|
"X-Tenant-ID": TENANT_ID,
|
||||||
|
"X-User-ID": "e2e_tester",
|
||||||
|
# Required by TrustedProxyMiddleware
|
||||||
|
"X-Authenticated-User": "e2e_tester",
|
||||||
|
"X-Authenticated-Email": "e2e@example.com",
|
||||||
|
"Authorization": "Bearer mock-token",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
assert response.status_code == 200, f"Upload failed: {response.text}"
|
assert response.status_code == 200, f"Upload failed: {response.text}"
|
||||||
upload_data = response.json()
|
upload_data = response.json()
|
||||||
|
|||||||
Reference in New Issue
Block a user