diff --git a/apps/svc_extract/main.py b/apps/svc_extract/main.py index 2ee102c..f19020a 100644 --- a/apps/svc_extract/main.py +++ b/apps/svc_extract/main.py @@ -64,28 +64,6 @@ Return a JSON object with the extracted fields and confidence scores. """ -# Create app and settings -app, settings = create_app( - service_name="svc-extract", - title="Tax Agent Extraction Service", - description="LLM-based field extraction service", - settings_class=ExtractionSettings, -) - -# Add middleware -middleware_factory = create_trusted_proxy_middleware(settings.internal_cidrs) -app.add_middleware(middleware_factory) - -# Global clients -storage_client: StorageClient | None = None -document_storage: DocumentStorage | None = None -event_bus: EventBus | None = None -confidence_calibrator: ConfidenceCalibrator | None = None -tracer = get_tracer("svc-extract") -metrics = get_metrics() - - -@app.on_event("startup") async def startup_event() -> None: """Initialize service dependencies""" global storage_client, document_storage, event_bus, confidence_calibrator @@ -116,7 +94,6 @@ async def startup_event() -> None: logger.info("Extraction service started successfully") -@app.on_event("shutdown") async def shutdown_event() -> None: """Cleanup service dependencies""" global event_bus @@ -129,6 +106,29 @@ async def shutdown_event() -> None: logger.info("Extraction service shutdown complete") +# Create app and settings +app, settings = create_app( + service_name="svc-extract", + title="Tax Agent Extraction Service", + description="LLM-based field extraction service", + settings_class=ExtractionSettings, + startup_hooks=[startup_event], + shutdown_hooks=[shutdown_event], +) + +# Add middleware +middleware_factory = create_trusted_proxy_middleware(settings.internal_cidrs) +app.add_middleware(middleware_factory) + +# Global clients +storage_client: StorageClient | None = None +document_storage: DocumentStorage | None = None +event_bus: EventBus | None = None +confidence_calibrator: ConfidenceCalibrator | None = None +tracer = get_tracer("svc-extract") +metrics = get_metrics() + + @app.post("/extract/{doc_id}", response_model=ExtractionResponse) async def extract_fields( doc_id: str, @@ -334,13 +334,14 @@ async def _extract_fields_async( ) # Update metrics - metrics.counter("extractions_completed_total").labels( - tenant_id=tenant_id, strategy=strategy - ).inc() + metrics.counter( + "extract_extractions_completed_total", + labelnames=["tenant_id", "strategy"], + ).labels(tenant_id=tenant_id, strategy=strategy).inc() - metrics.histogram("extraction_confidence").labels( - strategy=strategy - ).observe(calibrated_confidence) + metrics.histogram( + "extract_extraction_confidence", labelnames=["strategy"] + ).labels(strategy=strategy).observe(calibrated_confidence) # Publish completion event event_payload = EventPayload( @@ -371,7 +372,10 @@ async def _extract_fields_async( logger.error("Field extraction failed", doc_id=doc_id, error=str(e)) # Update error metrics - metrics.counter("extraction_errors_total").labels( + metrics.counter( + "extract_extraction_errors_total", + labelnames=["tenant_id", "strategy", "error_type"], + ).labels( tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__ ).inc() diff --git a/apps/svc_ingestion/main.py b/apps/svc_ingestion/main.py index 812b654..df6f94c 100644 --- a/apps/svc_ingestion/main.py +++ b/apps/svc_ingestion/main.py @@ -77,11 +77,20 @@ def init_dependencies(app_settings: IngestionSettings) -> None: # Create app and settings +async def startup_event() -> None: + """Initialize service dependencies""" + if event_bus is None: + raise ValueError("Event bus not initialized") + + await event_bus.start() + + app, _settings = create_app( service_name="svc-ingestion", title="Tax Agent Ingestion Service", description="Document upload and storage service", settings_class=IngestionSettings, + startup_hooks=[startup_event], ) # Initialize dependencies immediately @@ -158,6 +167,7 @@ async def upload_document( event_payload = EventPayload( data={ "doc_id": doc_id, + "tenant_id": tenant_id, "filename": file.filename or "unknown", "kind": kind.value, "source": source, diff --git a/apps/svc_ocr/Dockerfile b/apps/svc_ocr/Dockerfile index c21fa66..334ce7f 100644 --- a/apps/svc_ocr/Dockerfile +++ b/apps/svc_ocr/Dockerfile @@ -21,8 +21,10 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app # Copy service-specific requirements and install +# Copy base requirements and service-specific requirements +COPY libs/requirements-base.txt /tmp/libs-requirements.txt COPY apps/svc_ocr/requirements.txt /tmp/service-requirements.txt -RUN pip install --no-cache-dir -r /tmp/service-requirements.txt +RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/service-requirements.txt # Copy application code COPY libs/ ./libs/ diff --git a/apps/svc_ocr/main.py b/apps/svc_ocr/main.py index 5c6348e..315b8bf 100644 --- a/apps/svc_ocr/main.py +++ b/apps/svc_ocr/main.py @@ -118,7 +118,7 @@ async def init_dependencies(app_settings: OCRSettings) -> None: if attempt == max_retries: raise HTTPException( status_code=500, detail="Failed to connect to NATS after retries" - ) + ) from e await asyncio.sleep(delay) delay *= 2 # exponential backoff @@ -280,7 +280,7 @@ async def _handle_document_ingested(topic: str, payload: EventPayload) -> None: return # Auto-process PDF documents - if data.get("content_type") == "application/pdf": + if data.get("mime_type") == "application/pdf": logger.info("Auto-processing ingested document", doc_id=doc_id) try: @@ -347,13 +347,13 @@ async def _process_document_async( await ds.store_ocr_result(tenant_id, doc_id, ocr_results) # Update metrics - metrics.counter("documents_processed_total").labels( - tenant_id=tenant_id, strategy=strategy - ).inc() + metrics.counter( + "ocr_documents_processed_total", labelnames=["tenant_id", "strategy"] + ).labels(tenant_id=tenant_id, strategy=strategy).inc() - metrics.histogram("processing_duration_seconds").labels( - strategy=strategy - ).observe( + metrics.histogram( + "ocr_processing_duration_seconds", labelnames=["strategy"] + ).labels(strategy=strategy).observe( datetime.utcnow().timestamp() - datetime.fromisoformat( ocr_results["processed_at"].replace("Z", "") # type: ignore @@ -386,7 +386,10 @@ async def _process_document_async( logger.error("OCR processing failed", doc_id=doc_id, error=str(e)) # Update error metrics - metrics.counter("processing_errors_total").labels( + metrics.counter( + "ocr_processing_errors_total", + labelnames=["tenant_id", "strategy", "error_type"], + ).labels( tenant_id=tenant_id, strategy=strategy, error_type=type(e).__name__ ).inc() diff --git a/infra/authentik/bootstrap.yaml b/infra/authentik/bootstrap.yaml index 68639b4..1335247 100644 --- a/infra/authentik/bootstrap.yaml +++ b/infra/authentik/bootstrap.yaml @@ -50,6 +50,20 @@ entries: groups: - !Find [authentik_core.group, [name, "Administrators"]] + # --- E2E Test User --------------------------------------------------------- + - model: authentik_core.user + state: present + identifiers: + username: e2e_tester + attrs: + name: "E2E Tester" + email: e2e@example.com + is_active: true + password: "password123" + groups: + - !Find [authentik_core.group, [name, "Tax Reviewers"]] + - !Find [authentik_core.group, [name, "Administrators"]] + # Helper finders # ========= OIDC Providers + Applications ================================== @@ -317,6 +331,37 @@ entries: meta_publisher: "AI Tax Agent" policy_engine_mode: "any" + # --- NATS Monitoring (Proxy Provider for ForwardAuth) -------------------- + - model: authentik_providers_proxy.proxyprovider + state: present + identifiers: + name: "NATS Monitoring Proxy" + attrs: + external_host: "https://nats.local.lan" + internal_host: "http://apa-nats:8222" + authorization_flow: + !Find [authentik_flows.flow, [slug, "default-authentication-flow"]] + invalidation_flow: + !Find [authentik_flows.flow, [slug, "default-invalidation-flow"]] + mode: "forward_single" + cookie_domain: "local.lan" + + - model: authentik_core.application + state: present + identifiers: + slug: "nats-monitoring" + attrs: + name: "NATS Monitoring" + provider: + !Find [ + authentik_providers_proxy.proxyprovider, + [name, "NATS Monitoring Proxy"], + ] + meta_launch_url: "https://nats.local.lan" + meta_description: "NATS Messaging System Monitoring" + meta_publisher: "AI Tax Agent" + policy_engine_mode: "any" + # --- AI Tax Agent API (Proxy Provider for ForwardAuth) -------------------- - model: authentik_providers_proxy.proxyprovider state: present @@ -368,3 +413,7 @@ entries: authentik_providers_proxy.proxyprovider, [name, "AI Tax Agent API Proxy"], ] + - !Find [ + authentik_providers_proxy.proxyprovider, + [name, "NATS Monitoring Proxy"], + ] diff --git a/infra/base/infrastructure.yaml b/infra/base/infrastructure.yaml index b61f5d9..ed8a54d 100644 --- a/infra/base/infrastructure.yaml +++ b/infra/base/infrastructure.yaml @@ -331,6 +331,8 @@ services: networks: - backend - frontend + ports: + - "4222:4222" # Client connections (for local testing) volumes: - nats_data:/data command: > diff --git a/infra/compose/compose.override.yaml b/infra/compose/compose.override.yaml index 771e3c6..9b9d45c 100644 --- a/infra/compose/compose.override.yaml +++ b/infra/compose/compose.override.yaml @@ -49,6 +49,8 @@ services: dockerfile: apps/svc_ingestion/Dockerfile image: ai-tax-agent/svc-ingestion:local pull_policy: never + ports: + - "8000:8000" # Expose for local E2E testing apa-svc-extract: build: diff --git a/libs/app_factory.py b/libs/app_factory.py index 50c04ec..cd8b1fb 100644 --- a/libs/app_factory.py +++ b/libs/app_factory.py @@ -2,7 +2,7 @@ # FILE: libs/app_factory.py -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Awaitable, Callable from contextlib import asynccontextmanager from typing import Any @@ -36,6 +36,8 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume version: str = "1.0.0", settings_class: type[BaseAppSettings] = BaseAppSettings, custom_settings: dict[str, Any] | None = None, + startup_hooks: list[Callable[[], Awaitable[None]]] | None = None, + shutdown_hooks: list[Callable[[], Awaitable[None]]] | None = None, ) -> tuple[FastAPI, BaseAppSettings]: """Create a FastAPI application with standard configuration""" @@ -56,8 +58,14 @@ def create_app( # pylint: disable=too-many-arguments,too-many-positional-argume ) -> AsyncIterator[None]: # pylint: disable=unused-argument # Startup setup_observability(settings) + if startup_hooks: + for hook in startup_hooks: + await hook() yield # Shutdown + if shutdown_hooks: + for hook in shutdown_hooks: + await hook() # Create FastAPI app app = FastAPI( diff --git a/libs/events/topics.py b/libs/events/topics.py index b3e7811..ba5088e 100644 --- a/libs/events/topics.py +++ b/libs/events/topics.py @@ -4,15 +4,15 @@ class EventTopics: # pylint: disable=too-few-public-methods """Standard event topic names""" - DOC_INGESTED = "doc.ingested" - DOC_OCR_READY = "doc.ocr_ready" - DOC_EXTRACTED = "doc.extracted" - KG_UPSERT_READY = "kg.upsert.ready" - KG_UPSERTED = "kg.upserted" - RAG_INDEXED = "rag.indexed" - CALC_SCHEDULE_READY = "calc.schedule_ready" - FORM_FILLED = "form.filled" - HMRC_SUBMITTED = "hmrc.submitted" - REVIEW_REQUESTED = "review.requested" - REVIEW_COMPLETED = "review.completed" - FIRM_SYNC_COMPLETED = "firm.sync.completed" + DOC_INGESTED = "doc_ingested" + DOC_OCR_READY = "doc_ocr_ready" + DOC_EXTRACTED = "doc_extracted" + KG_UPSERT_READY = "kg_upsert_ready" + KG_UPSERTED = "kg_upserted" + RAG_INDEXED = "rag_indexed" + CALC_SCHEDULE_READY = "calc_schedule_ready" + FORM_FILLED = "form_filled" + HMRC_SUBMITTED = "hmrc_submitted" + REVIEW_REQUESTED = "review_requested" + REVIEW_COMPLETED = "review_completed" + FIRM_SYNC_COMPLETED = "firm_sync_completed" diff --git a/libs/requirements-base.txt b/libs/requirements-base.txt index 2d30bfb..645374f 100644 --- a/libs/requirements-base.txt +++ b/libs/requirements-base.txt @@ -11,7 +11,7 @@ psycopg2-binary>=2.9.11 neo4j>=6.0.2 redis[hiredis]>=6.4.0 -minio>=7.2.18 +minio==7.2.18 boto3>=1.34.0 qdrant-client>=1.15.1 diff --git a/libs/schemas/events.py b/libs/schemas/events.py index 42414ef..a251bef 100644 --- a/libs/schemas/events.py +++ b/libs/schemas/events.py @@ -72,22 +72,23 @@ class DocumentExtractedEventData(BaseEventData): """Event emitted when field extraction is complete.""" doc_id: str = Field(..., description="Document identifier") + tenant_id: str = Field(..., description="Tenant identifier") extraction_id: str = Field(..., description="Unique extraction run identifier") strategy: Literal["llm", "rules", "hybrid"] = Field( ..., description="Extraction strategy used" ) - fields_extracted: int = Field(..., ge=0, description="Number of fields extracted") - confidence_avg: float = Field( - ..., ge=0.0, le=1.0, description="Average extraction confidence" + field_count: int = Field(..., ge=0, description="Number of fields extracted") + confidence: float = Field( + ..., ge=0.0, le=1.0, description="Extraction confidence score" ) - calibrated_confidence: float = Field( - ..., ge=0.0, le=1.0, description="Calibrated confidence score" + extraction_results: dict[str, Any] = Field( + ..., description="Full extraction results including provenance" ) model_name: str | None = Field(None, description="LLM model used (if applicable)") - processing_time_ms: int = Field( - ..., ge=0, description="Processing time in milliseconds" + processing_time_ms: int | None = Field( + None, ge=0, description="Processing time in milliseconds" ) - storage_path: str = Field(..., description="Path to extraction results") + storage_path: str | None = Field(None, description="Path to extraction results") # Knowledge Graph events diff --git a/libs/security/dependencies.py b/libs/security/dependencies.py index 69859c7..6e270b9 100644 --- a/libs/security/dependencies.py +++ b/libs/security/dependencies.py @@ -41,6 +41,11 @@ def get_current_tenant(request: Request) -> str | None: if role.startswith("tenant:"): return str(role.split(":", 1)[1]) + # Check for explicit tenant header (useful for testing/API keys) + tenant_header = request.headers.get("X-Tenant-ID") + if tenant_header: + return tenant_header + # Default tenant for development return "default" diff --git a/libs/storage/client.py b/libs/storage/client.py index a2659b9..1acfbe2 100644 --- a/libs/storage/client.py +++ b/libs/storage/client.py @@ -19,17 +19,13 @@ class StorageClient: async def ensure_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool: """Ensure bucket exists, create if not""" try: - # Check if bucket exists - if self.client.bucket_exists(bucket_name): - logger.debug("Bucket already exists", bucket=bucket_name) - return True - - # Create bucket - self.client.make_bucket(bucket_name, location=region) + self.client.make_bucket(bucket_name=bucket_name, location=region) logger.info("Created bucket", bucket=bucket_name, region=region) return True - except S3Error as e: + if e.code in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"): + logger.debug("Bucket already exists", bucket=bucket_name) + return True logger.error("Failed to ensure bucket", bucket=bucket_name, error=str(e)) return False diff --git a/scripts/authentik-blueprint-import.sh b/scripts/authentik-blueprint-import.sh deleted file mode 100755 index 6450187..0000000 --- a/scripts/authentik-blueprint-import.sh +++ /dev/null @@ -1,200 +0,0 @@ -#!/bin/bash -# Test Authentik blueprint import after manual setup - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Configuration -DOMAIN=${DOMAIN:-local} -AUTHENTIK_URL="https://auth.${DOMAIN}" -AUTHENTIK_API_URL="$AUTHENTIK_URL/api/v3" -ADMIN_EMAIL="admin@local.local" -ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}" - -echo -e "${BLUE}๐Ÿงช Testing Authentik blueprint import...${NC}" -echo - -# Function to check if setup is complete -check_setup_complete() { - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - local setup_code - setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true) - - if [[ "$setup_code" == "404" ]]; then - return 0 # Setup is complete - else - return 1 # Setup is still needed - fi -} - -# Function to get API token via login -get_api_token_via_login() { - echo -e "${YELLOW}๐Ÿ”‘ Getting API token via login...${NC}" - - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - - # Get login page and extract CSRF token - local login_page - login_page=$(curl -ks "${resolve[@]}" -c /tmp/auth_cookies.txt "$AUTHENTIK_URL/if/flow/default-authentication-flow/" || echo "") - - if [ -z "$login_page" ]; then - echo -e "${RED}โŒ Could not access login page${NC}" - return 1 - fi - - # Extract CSRF token from the page - local csrf_token - csrf_token=$(echo "$login_page" | grep -o 'name="csrfmiddlewaretoken"[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "") - - if [ -z "$csrf_token" ]; then - echo -e "${RED}โŒ Could not extract CSRF token${NC}" - return 1 - fi - - echo -e "${GREEN}โœ… CSRF token extracted${NC}" - - # Login - local login_response - login_response=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt -c /tmp/auth_cookies.txt \ - -X POST "$AUTHENTIK_URL/if/flow/default-authentication-flow/" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -H "Referer: $AUTHENTIK_URL/if/flow/default-authentication-flow/" \ - -d "csrfmiddlewaretoken=$csrf_token&uid_field=$ADMIN_EMAIL&password=$ADMIN_PASSWORD" \ - -w '%{http_code}' -o /tmp/login_response.html || echo "") - - if [[ "$login_response" =~ ^(200|302)$ ]]; then - echo -e "${GREEN}โœ… Login successful${NC}" - - # Get admin interface page to get new CSRF token - local admin_page - admin_page=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt "$AUTHENTIK_URL/if/admin/" || echo "") - - local admin_csrf - admin_csrf=$(echo "$admin_page" | grep -o 'name="csrfmiddlewaretoken"[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "") - - if [ -n "$admin_csrf" ]; then - # Create API token - local token_response - token_response=$(curl -ks "${resolve[@]}" -b /tmp/auth_cookies.txt \ - -X POST "$AUTHENTIK_API_URL/core/tokens/" \ - -H "Content-Type: application/json" \ - -H "X-CSRFToken: $admin_csrf" \ - -d "{ - \"identifier\": \"blueprint-test-$(date +%s)\", - \"description\": \"Test token for blueprint import\", - \"expires\": \"2025-12-31T23:59:59Z\" - }" 2>/dev/null || echo "") - - if [ -n "$token_response" ]; then - local token - token=$(echo "$token_response" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])" 2>/dev/null || echo "") - - if [ -n "$token" ]; then - echo -e "${GREEN}โœ… API token created${NC}" - echo "$token" - return 0 - fi - fi - fi - fi - - echo -e "${RED}โŒ Failed to get API token${NC}" - return 1 -} - -# Function to import blueprint -import_blueprint() { - local token="$1" - - echo -e "${YELLOW}๐Ÿ“‹ Importing blueprint...${NC}" - - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - - # Create blueprint instance - local blueprint_response - blueprint_response=$(curl -ks "${resolve[@]}" \ - -X POST "$AUTHENTIK_API_URL/managed/blueprints/" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $token" \ - -d '{ - "name": "AI Tax Agent Bootstrap", - "path": "/blueprints/bootstrap.yaml", - "context": {}, - "enabled": true - }' 2>/dev/null || echo "") - - echo -e "${BLUE}Blueprint creation response:${NC}" - echo "$blueprint_response" | python3 -c "import sys, json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "$blueprint_response" - - local blueprint_pk - blueprint_pk=$(echo "$blueprint_response" | python3 -c "import sys, json; print(json.load(sys.stdin).get('pk', ''))" 2>/dev/null || echo "") - - if [ -n "$blueprint_pk" ]; then - echo -e "${GREEN}โœ… Blueprint created with ID: $blueprint_pk${NC}" - - # Apply the blueprint - echo -e "${YELLOW}๐Ÿ”„ Applying blueprint...${NC}" - local apply_response - apply_response=$(curl -ks "${resolve[@]}" \ - -X POST "$AUTHENTIK_API_URL/managed/blueprints/$blueprint_pk/apply/" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $token" \ - -d '{}' 2>/dev/null || echo "") - - echo -e "${BLUE}Blueprint apply response:${NC}" - echo "$apply_response" | python3 -c "import sys, json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "$apply_response" - - return 0 - else - echo -e "${RED}โŒ Failed to create blueprint${NC}" - return 1 - fi -} - -# Main function -main() { - # Check if setup is complete - if ! check_setup_complete; then - echo -e "${YELLOW}โš ๏ธ Initial setup is still required${NC}" - echo -e "${BLUE}๐Ÿ“‹ Please complete setup at: https://auth.local.lan.lan/if/flow/initial-setup/${NC}" - echo -e "${BLUE}Use credentials: admin@local.local / admin123${NC}" - return 1 - fi - - echo -e "${GREEN}โœ… Initial setup is complete${NC}" - - # Get API token - local api_token - if api_token=$(get_api_token_via_login); then - echo -e "${GREEN}๐Ÿ”‘ API token obtained${NC}" - - # Import blueprint - if import_blueprint "$api_token"; then - echo -e "${GREEN}๐ŸŽ‰ Blueprint import test completed!${NC}" - else - echo -e "${RED}โŒ Blueprint import failed${NC}" - return 1 - fi - else - echo -e "${RED}โŒ Could not get API token${NC}" - return 1 - fi - - # Cleanup - rm -f /tmp/auth_cookies.txt /tmp/login_response.html -} - -# Run main function -main "$@" diff --git a/scripts/authentik-setup.sh b/scripts/authentik-setup.sh deleted file mode 100755 index 310a82e..0000000 --- a/scripts/authentik-setup.sh +++ /dev/null @@ -1,155 +0,0 @@ -#!/bin/bash -# Complete Authentik initial setup and get API token - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Configuration -DOMAIN=${DOMAIN:-local} -AUTHENTIK_URL="https://auth.${DOMAIN}" -ADMIN_EMAIL="admin@local" -ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}" -ENV_FILE="infra/compose/.env" - -echo -e "${BLUE}๐Ÿ”ง Completing Authentik initial setup...${NC}" -echo - -# Function to update env file -update_env_var() { - local var_name="$1" - local var_value="$2" - - if grep -q "^${var_name}=" "$ENV_FILE"; then - # Update existing variable - if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS - sed -i '' "s|^${var_name}=.*|${var_name}=${var_value}|" "$ENV_FILE" - else - # Linux - sed -i "s|^${var_name}=.*|${var_name}=${var_value}|" "$ENV_FILE" - fi - echo -e "${GREEN}โœ… Updated ${var_name}${NC}" - else - # Add new variable - echo "${var_name}=${var_value}" >> "$ENV_FILE" - echo -e "${GREEN}โœ… Added ${var_name}${NC}" - fi -} - -# Function to check if setup is complete -check_setup_status() { - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - local setup_code - setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true) - - if [[ "$setup_code" == "404" ]]; then - return 0 # Setup is complete - else - return 1 # Setup is still needed - fi -} - -# Function to get API token -get_api_token() { - echo -e "${YELLOW}๐Ÿ”‘ Getting API token...${NC}" - - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - - # Get CSRF token first - local csrf_token - csrf_token=$(curl -ks "${resolve[@]}" -c /tmp/authentik_cookies.txt "$AUTHENTIK_URL/if/flow/default-authentication-flow/" | grep -o 'csrfmiddlewaretoken[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' || echo "") - - if [ -z "$csrf_token" ]; then - echo -e "${RED}โŒ Could not get CSRF token${NC}" - return 1 - fi - - # Login to get session - local login_response - login_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_cookies.txt -c /tmp/authentik_cookies.txt \ - -X POST "$AUTHENTIK_URL/if/flow/default-authentication-flow/" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -H "Referer: $AUTHENTIK_URL/if/flow/default-authentication-flow/" \ - -d "csrfmiddlewaretoken=$csrf_token&uid_field=$ADMIN_EMAIL&password=$ADMIN_PASSWORD" \ - -w '%{http_code}' -o /tmp/login_response.html || echo "") - - if [[ "$login_response" =~ ^(200|302)$ ]]; then - echo -e "${GREEN}โœ… Login successful${NC}" - - # Create API token - local token_response - token_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_cookies.txt \ - -X POST "$AUTHENTIK_URL/api/v3/core/tokens/" \ - -H "Content-Type: application/json" \ - -H "X-CSRFToken: $csrf_token" \ - -d "{ - \"identifier\": \"ai-tax-agent-bootstrap\", - \"description\": \"Bootstrap token for AI Tax Agent setup\", - \"expires\": \"2025-12-31T23:59:59Z\" - }" 2>/dev/null || echo "") - - if [ -n "$token_response" ]; then - local token - token=$(echo "$token_response" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])" 2>/dev/null || echo "") - - if [ -n "$token" ]; then - echo -e "${GREEN}โœ… API token created${NC}" - echo "$token" - return 0 - fi - fi - fi - - echo -e "${RED}โŒ Failed to get API token${NC}" - return 1 -} - -# Main function -main() { - # Check if setup is already complete - if check_setup_status; then - echo -e "${GREEN}โœ… Authentik setup is already complete${NC}" - - # Try to get API token - local api_token - if api_token=$(get_api_token); then - echo -e "${GREEN}๐Ÿ”‘ API token obtained${NC}" - - # Update .env file with token - update_env_var "AUTHENTIK_BOOTSTRAP_TOKEN" "$api_token" - - echo - echo -e "${GREEN}๐ŸŽ‰ Setup complete! You can now run:${NC}" - echo -e " ${BLUE}make setup-authentik${NC} - to import blueprint configuration" - else - echo -e "${YELLOW}โš ๏ธ Could not get API token automatically${NC}" - echo -e "${BLUE}๐Ÿ“‹ Manual steps:${NC}" - echo -e " 1. Open ${BLUE}https://auth.local.lan${NC} and log in" - echo -e " 2. Go to Admin Interface > Tokens" - echo -e " 3. Create a new token and update AUTHENTIK_BOOTSTRAP_TOKEN in .env" - fi - else - echo -e "${YELLOW}๐Ÿ“‹ Initial setup still required:${NC}" - echo -e " 1. Open ${BLUE}https://auth.local.lan.lan/if/flow/initial-setup/${NC}" - echo -e " 2. Complete the setup wizard with these credentials:" - echo -e " โ€ข Email: ${BLUE}$ADMIN_EMAIL${NC}" - echo -e " โ€ข Password: ${BLUE}$ADMIN_PASSWORD${NC}" - echo -e " 3. Re-run this script after setup is complete" - fi - - # Cleanup - rm -f /tmp/authentik_cookies.txt /tmp/login_response.html -} - -# Run main function -main "$@" diff --git a/scripts/authentik_setup.sh b/scripts/authentik_setup.sh deleted file mode 100755 index 449abfc..0000000 --- a/scripts/authentik_setup.sh +++ /dev/null @@ -1,125 +0,0 @@ -#!/bin/bash -# Automatically complete Authentik initial setup - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Configuration -DOMAIN=${DOMAIN:-local} -AUTHENTIK_URL="https://auth.${DOMAIN}" -ADMIN_EMAIL="admin@local.lan" -ADMIN_PASSWORD="${AUTHENTIK_ADMIN_PASSWORD:-admin123}" - -echo -e "${BLUE}๐Ÿค– Automatically completing Authentik initial setup...${NC}" -echo - -# Function to complete initial setup -complete_initial_setup() { - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - - echo -e "${YELLOW}๐Ÿ“‹ Completing initial setup form...${NC}" - - # Get the initial setup page and extract CSRF token - local setup_page - setup_page=$(curl -ks "${resolve[@]}" -c /tmp/authentik_setup_cookies.txt "$AUTHENTIK_URL/if/flow/initial-setup/" || echo "") - - if [ -z "$setup_page" ]; then - echo -e "${RED}โŒ Could not access setup page${NC}" - return 1 - fi - - # Extract CSRF token - local csrf_token - csrf_token=$(echo "$setup_page" | grep -o 'csrfmiddlewaretoken[^>]*value="[^"]*"' | sed 's/.*value="\([^"]*\)".*/\1/' | head -1 || echo "") - - if [ -z "$csrf_token" ]; then - echo -e "${RED}โŒ Could not extract CSRF token${NC}" - return 1 - fi - - echo -e "${GREEN}โœ… CSRF token extracted${NC}" - - # Submit the initial setup form - local setup_response - setup_response=$(curl -ks "${resolve[@]}" -b /tmp/authentik_setup_cookies.txt -c /tmp/authentik_setup_cookies.txt \ - -X POST "$AUTHENTIK_URL/if/flow/initial-setup/" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -H "Referer: $AUTHENTIK_URL/if/flow/initial-setup/" \ - -d "csrfmiddlewaretoken=$csrf_token&email=$ADMIN_EMAIL&password=$ADMIN_PASSWORD&password_repeat=$ADMIN_PASSWORD" \ - -w '%{http_code}' -o /tmp/setup_response.html || echo "") - - if [[ "$setup_response" =~ ^(200|302)$ ]]; then - echo -e "${GREEN}โœ… Initial setup completed successfully${NC}" - - # Wait a moment for setup to complete - sleep 3 - - # Verify setup is complete by checking if setup page returns 404 - local verify_code - verify_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true) - - if [[ "$verify_code" == "404" ]]; then - echo -e "${GREEN}โœ… Setup verification successful${NC}" - return 0 - else - echo -e "${YELLOW}โš ๏ธ Setup may not be complete (verification returned $verify_code)${NC}" - return 1 - fi - else - echo -e "${RED}โŒ Setup failed (HTTP $setup_response)${NC}" - return 1 - fi -} - -# Function to check if setup is needed -check_setup_needed() { - local host - host=$(echo "$AUTHENTIK_URL" | sed -E 's#^https?://([^/]+).*$#\1#') - local resolve=(--resolve "${host}:443:127.0.0.1") - local setup_code - setup_code=$(curl -ks "${resolve[@]}" -o /dev/null -w '%{http_code}' "$AUTHENTIK_URL/if/flow/initial-setup/" || true) - - #TODO: this is not a valid check if setup is already complete, needs work. Authentik returns 200 even if setup is complete - if [[ "$setup_code" == "200" ]]; then - return 0 # Setup is needed - else - return 1 # Setup is not needed - fi -} - -# Main function -main() { - if check_setup_needed; then - echo -e "${YELLOW}๐Ÿ“‹ Initial setup is required${NC}" - - if complete_initial_setup; then - echo -e "${GREEN}๐ŸŽ‰ Authentik initial setup completed automatically!${NC}" - echo - echo -e "${BLUE}๐Ÿ“‹ Next steps:${NC}" - echo -e " 1. Run ${BLUE}make complete-authentik-setup${NC} to get API token" - echo -e " 2. Run ${BLUE}make setup-authentik${NC} to import blueprint configuration" - echo -e " 3. Or run ${BLUE}make setup-sso${NC} to do both automatically" - else - echo -e "${RED}โŒ Automatic setup failed${NC}" - echo -e "${YELLOW}๐Ÿ“‹ Manual setup required:${NC}" - echo -e " 1. Open ${BLUE}https://auth.local.lan.lan/if/flow/initial-setup/${NC}" - echo -e " 2. Use credentials: ${BLUE}$ADMIN_EMAIL${NC} / ${BLUE}$ADMIN_PASSWORD${NC}" - fi - else - echo -e "${GREEN}โœ… Authentik setup is already complete${NC}" - fi - - # Cleanup - rm -f /tmp/authentik_setup_cookies.txt /tmp/setup_response.html -} - -# Run main function -main "$@" diff --git a/tests/e2e/test_backend_journey.py b/tests/e2e/test_backend_journey.py index 6ad9c21..7d24a93 100644 --- a/tests/e2e/test_backend_journey.py +++ b/tests/e2e/test_backend_journey.py @@ -38,14 +38,29 @@ async def test_backend_journey(): try: # 2. Upload a document - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient( + verify=False + ) as client: # Disable SSL verification for local testing # Create a dummy PDF file - files = {"file": ("test.pdf", b"%PDF-1.4 mock content", "application/pdf")} + # Create a valid minimal PDF file + pdf_content = ( + b"%PDF-1.0\n1 0 obj<>endobj 2 0 obj<>endobj " + b"3 0 obj<>>>endobj\nxref\n0 4\n0000000000 65535 f\n" + b"0000000010 00000 n\n0000000060 00000 n\n0000000111 00000 n\ntrailer<>\nstartxref\n190\n%%EOF" + ) + files = {"file": ("test.pdf", pdf_content, "application/pdf")} response = await client.post( f"{INGESTION_URL}/upload", files=files, data={"kind": "invoice", "source": "e2e_test"}, - headers={"X-Tenant-ID": TENANT_ID, "X-User-ID": "e2e_tester"}, + headers={ + "X-Tenant-ID": TENANT_ID, + "X-User-ID": "e2e_tester", + # Required by TrustedProxyMiddleware + "X-Authenticated-User": "e2e_tester", + "X-Authenticated-Email": "e2e@example.com", + "Authorization": "Bearer mock-token", + }, ) assert response.status_code == 200, f"Upload failed: {response.text}" upload_data = response.json()