Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
53
apps/svc_rpa/Dockerfile
Normal file
53
apps/svc_rpa/Dockerfile
Normal file
@@ -0,0 +1,53 @@
|
||||
# Multi-stage build for svc_rpa
|
||||
FROM python:3.12-slim AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create virtual environment
|
||||
RUN python -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY libs/requirements-base.txt /tmp/libs-requirements.txt
|
||||
COPY apps/svc_rpa/requirements.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/requirements.txt
|
||||
|
||||
# Production stage
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& groupadd -r appuser \
|
||||
&& useradd -r -g appuser appuser
|
||||
|
||||
# Copy virtual environment from builder
|
||||
COPY --from=builder /opt/venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code
|
||||
COPY libs/ ./libs/
|
||||
COPY apps/svc_rpa/ ./apps/svc_rpa/
|
||||
|
||||
# Create non-root user and set permissions
|
||||
RUN chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/healthz || exit 1
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Run the application
|
||||
CMD ["python", "-m", "uvicorn", "apps.svc_rpa.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
524
apps/svc_rpa/main.py
Normal file
524
apps/svc_rpa/main.py
Normal file
@@ -0,0 +1,524 @@
|
||||
# FILE: apps/svc-rpa/main.py
|
||||
# mypy: disable-error-code=union-attr
|
||||
# Playwright automation for portal data extraction (HMRC, banks, etc.)
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# Import shared libraries
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
import ulid
|
||||
from fastapi import BackgroundTasks, Depends, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from playwright.async_api import Browser, Page, async_playwright
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from libs.app_factory import create_app
|
||||
from libs.config import BaseAppSettings, create_event_bus, create_vault_client
|
||||
from libs.events import EventBus, EventPayload
|
||||
from libs.observability import get_metrics, get_tracer, setup_observability
|
||||
from libs.schemas import ErrorResponse
|
||||
from libs.security import VaultTransitHelper, get_current_user, get_tenant_id
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class RPASettings(BaseAppSettings):
|
||||
"""Settings for RPA service"""
|
||||
|
||||
service_name: str = "svc-rpa"
|
||||
|
||||
# Browser configuration
|
||||
browser_type: str = "chromium" # chromium, firefox, webkit
|
||||
headless: bool = True
|
||||
timeout: int = 30000 # 30 seconds
|
||||
|
||||
# Portal configurations
|
||||
hmrc_base_url: str = "https://www.gov.uk/log-in-hmrc-online-services"
|
||||
open_banking_enabled: bool = False
|
||||
|
||||
# Security
|
||||
max_concurrent_sessions: int = 5
|
||||
session_timeout: int = 300 # 5 minutes
|
||||
|
||||
|
||||
# Create app and settings
|
||||
app, settings = create_app(
|
||||
service_name="svc-rpa",
|
||||
title="Tax Agent RPA Service",
|
||||
description="Robotic Process Automation for portal data extraction",
|
||||
settings_class=RPASettings,
|
||||
)
|
||||
|
||||
# Global clients
|
||||
vault_helper: VaultTransitHelper | None = None
|
||||
event_bus: EventBus | None = None
|
||||
browser: Browser | None = None
|
||||
active_sessions: dict[str, dict[str, Any]] = {}
|
||||
tracer = get_tracer("svc-rpa")
|
||||
metrics = get_metrics()
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event() -> None:
|
||||
"""Initialize service dependencies"""
|
||||
global vault_helper, event_bus, browser
|
||||
|
||||
logger.info("Starting RPA service")
|
||||
|
||||
# Setup observability
|
||||
setup_observability(settings)
|
||||
|
||||
# Initialize Vault helper
|
||||
vault_client = create_vault_client(settings)
|
||||
vault_helper = VaultTransitHelper(vault_client, "tax-agent-transit")
|
||||
|
||||
# Initialize event bus
|
||||
event_bus = create_event_bus(settings)
|
||||
await event_bus.start() # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
|
||||
|
||||
# Initialize browser
|
||||
playwright = await async_playwright().start()
|
||||
browser = await playwright[settings.browser_type].launch(
|
||||
headless=settings.headless,
|
||||
args=["--no-sandbox", "--disable-dev-shm-usage"] if settings.headless else [],
|
||||
)
|
||||
|
||||
logger.info("RPA service started successfully")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event() -> None:
|
||||
"""Cleanup service dependencies"""
|
||||
global event_bus, browser
|
||||
|
||||
logger.info("Shutting down RPA service")
|
||||
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
if event_bus:
|
||||
await event_bus.stop()
|
||||
|
||||
logger.info("RPA service shutdown complete")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check() -> dict[str, Any]:
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": settings.service_name,
|
||||
"version": settings.service_version,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"active_sessions": len(active_sessions),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/sessions")
|
||||
async def create_session(
|
||||
portal: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: dict[str, Any] = Depends(get_current_user),
|
||||
tenant_id: str = Depends(get_tenant_id),
|
||||
) -> dict[str, Any]:
|
||||
"""Create new RPA session"""
|
||||
|
||||
with tracer.start_as_current_span("create_session") as span:
|
||||
span.set_attribute("portal", portal)
|
||||
span.set_attribute("tenant_id", tenant_id)
|
||||
|
||||
try:
|
||||
# Check session limits
|
||||
if len(active_sessions) >= settings.max_concurrent_sessions:
|
||||
raise HTTPException(status_code=429, detail="Too many active sessions")
|
||||
|
||||
# Generate session ID
|
||||
session_id = str(ulid.new())
|
||||
span.set_attribute("session_id", session_id)
|
||||
|
||||
# Create browser context
|
||||
context = await browser.new_context( # pyright: ignore[reportOptionalMemberAccess]
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Store session
|
||||
active_sessions[session_id] = {
|
||||
"context": context,
|
||||
"page": page,
|
||||
"portal": portal,
|
||||
"tenant_id": tenant_id,
|
||||
"user_id": current_user.get("sub"),
|
||||
"created_at": datetime.utcnow(),
|
||||
"last_activity": datetime.utcnow(),
|
||||
}
|
||||
|
||||
# Schedule session cleanup
|
||||
background_tasks.add_task(
|
||||
_cleanup_session_after_timeout, session_id, settings.session_timeout
|
||||
)
|
||||
|
||||
logger.info("RPA session created", session_id=session_id, portal=portal)
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"portal": portal,
|
||||
"status": "created",
|
||||
"expires_at": (
|
||||
datetime.utcnow().timestamp() + settings.session_timeout
|
||||
),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to create session", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Failed to create session")
|
||||
|
||||
|
||||
@app.post("/sessions/{session_id}/navigate")
|
||||
async def navigate_to_url(
|
||||
session_id: str,
|
||||
url: str,
|
||||
current_user: dict[str, Any] = Depends(get_current_user),
|
||||
tenant_id: str = Depends(get_tenant_id),
|
||||
) -> dict[str, Any]:
|
||||
"""Navigate to URL in session"""
|
||||
|
||||
with tracer.start_as_current_span("navigate") as span:
|
||||
span.set_attribute("session_id", session_id)
|
||||
span.set_attribute("url", url)
|
||||
|
||||
try:
|
||||
session = _get_session(session_id, tenant_id)
|
||||
page = session["page"]
|
||||
|
||||
# Navigate to URL
|
||||
response = await page.goto(url, timeout=settings.timeout)
|
||||
|
||||
# Update last activity
|
||||
session["last_activity"] = datetime.utcnow()
|
||||
|
||||
# Take screenshot for debugging
|
||||
await page.screenshot()
|
||||
|
||||
logger.info(
|
||||
"Navigated to URL",
|
||||
session_id=session_id,
|
||||
url=url,
|
||||
status=response.status,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"url": page.url,
|
||||
"title": await page.title(),
|
||||
"response_status": response.status,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Navigation failed", session_id=session_id, url=url, error=str(e)
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"Navigation failed: {str(e)}")
|
||||
|
||||
|
||||
@app.post("/sessions/{session_id}/login")
|
||||
async def login_to_portal(
|
||||
session_id: str,
|
||||
credentials: dict[str, str],
|
||||
current_user: dict[str, Any] = Depends(get_current_user),
|
||||
tenant_id: str = Depends(get_tenant_id),
|
||||
) -> dict[str, Any]:
|
||||
"""Login to portal using encrypted credentials"""
|
||||
|
||||
with tracer.start_as_current_span("login") as span:
|
||||
span.set_attribute("session_id", session_id)
|
||||
|
||||
try:
|
||||
session = _get_session(session_id, tenant_id)
|
||||
page = session["page"]
|
||||
portal = session["portal"]
|
||||
|
||||
# Decrypt credentials
|
||||
decrypted_credentials: dict[str, Any] = {}
|
||||
for key, encrypted_value in credentials.items():
|
||||
decrypted_credentials[key] = (
|
||||
vault_helper.decrypt_field( # pyright: ignore[reportOptionalMemberAccess]
|
||||
key_name=key, ciphertext=encrypted_value
|
||||
)
|
||||
)
|
||||
|
||||
# Perform login based on portal type
|
||||
if portal == "hmrc":
|
||||
success = await _login_hmrc(page, decrypted_credentials)
|
||||
elif portal == "open_banking":
|
||||
success = await _login_open_banking(page, decrypted_credentials)
|
||||
else:
|
||||
raise ValueError(f"Unsupported portal: {portal}")
|
||||
|
||||
# Update session
|
||||
session["last_activity"] = datetime.utcnow()
|
||||
session["authenticated"] = success
|
||||
|
||||
if success:
|
||||
logger.info("Login successful", session_id=session_id, portal=portal)
|
||||
return {"status": "success", "authenticated": True}
|
||||
else:
|
||||
logger.warning("Login failed", session_id=session_id, portal=portal)
|
||||
return {"status": "failed", "authenticated": False}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Login error", session_id=session_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Login failed: {str(e)}")
|
||||
|
||||
|
||||
@app.post("/sessions/{session_id}/extract")
|
||||
async def extract_data(
|
||||
session_id: str,
|
||||
extraction_config: dict[str, Any],
|
||||
current_user: dict[str, Any] = Depends(get_current_user),
|
||||
tenant_id: str = Depends(get_tenant_id),
|
||||
) -> dict[str, Any]:
|
||||
"""Extract data from portal"""
|
||||
|
||||
with tracer.start_as_current_span("extract_data") as span:
|
||||
span.set_attribute("session_id", session_id)
|
||||
|
||||
try:
|
||||
session = _get_session(session_id, tenant_id)
|
||||
page = session["page"]
|
||||
portal = session["portal"]
|
||||
|
||||
# Check authentication
|
||||
if not session.get("authenticated", False):
|
||||
raise HTTPException(status_code=401, detail="Session not authenticated")
|
||||
|
||||
# Extract data based on portal and config
|
||||
if portal == "hmrc":
|
||||
extracted_data = await _extract_hmrc_data(page, extraction_config)
|
||||
elif portal == "open_banking":
|
||||
extracted_data = await _extract_banking_data(page, extraction_config)
|
||||
else:
|
||||
raise ValueError(f"Unsupported portal: {portal}")
|
||||
|
||||
# Update session
|
||||
session["last_activity"] = datetime.utcnow()
|
||||
|
||||
# Publish extraction event
|
||||
event_payload = EventPayload(
|
||||
data={
|
||||
"session_id": session_id,
|
||||
"portal": portal,
|
||||
"extraction_config": extraction_config,
|
||||
"extracted_data": extracted_data,
|
||||
"tenant_id": tenant_id,
|
||||
},
|
||||
actor=current_user.get("sub", "system"),
|
||||
tenant_id=tenant_id,
|
||||
trace_id=span.get_span_context().trace_id,
|
||||
)
|
||||
|
||||
await event_bus.publish("rpa.data_extracted", event_payload) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
|
||||
|
||||
logger.info(
|
||||
"Data extracted",
|
||||
session_id=session_id,
|
||||
portal=portal,
|
||||
records_count=len(extracted_data.get("records", [])),
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"extracted_data": extracted_data,
|
||||
"records_count": len(extracted_data.get("records", [])),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Data extraction failed", session_id=session_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Extraction failed: {str(e)}")
|
||||
|
||||
|
||||
@app.delete("/sessions/{session_id}")
|
||||
async def close_session(
|
||||
session_id: str,
|
||||
current_user: dict[str, Any] = Depends(get_current_user),
|
||||
tenant_id: str = Depends(get_tenant_id),
|
||||
) -> dict[str, str]:
|
||||
"""Close RPA session"""
|
||||
|
||||
with tracer.start_as_current_span("close_session") as span:
|
||||
span.set_attribute("session_id", session_id)
|
||||
|
||||
try:
|
||||
session = _get_session(session_id, tenant_id)
|
||||
|
||||
# Close browser context
|
||||
await session["context"].close()
|
||||
|
||||
# Remove from active sessions
|
||||
del active_sessions[session_id]
|
||||
|
||||
logger.info("Session closed", session_id=session_id)
|
||||
|
||||
return {"status": "closed"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to close session", session_id=session_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Failed to close session")
|
||||
|
||||
|
||||
def _get_session(session_id: str, tenant_id: str) -> dict[str, Any]:
|
||||
"""Get and validate session"""
|
||||
if session_id not in active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
session = active_sessions[session_id]
|
||||
|
||||
# Check tenant access
|
||||
if session["tenant_id"] != tenant_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Check timeout
|
||||
if (
|
||||
datetime.utcnow() - session["last_activity"]
|
||||
).seconds > settings.session_timeout:
|
||||
raise HTTPException(status_code=408, detail="Session expired")
|
||||
|
||||
return session
|
||||
|
||||
|
||||
async def _login_hmrc(page: Page, credentials: dict[str, str]) -> bool:
|
||||
"""Login to HMRC portal"""
|
||||
try:
|
||||
# Navigate to HMRC login
|
||||
await page.goto(settings.hmrc_base_url)
|
||||
|
||||
# Wait for login form
|
||||
await page.wait_for_selector('input[name="userId"]', timeout=settings.timeout)
|
||||
|
||||
# Fill credentials
|
||||
await page.fill('input[name="userId"]', credentials.get("user_id", ""))
|
||||
await page.fill('input[name="password"]', credentials.get("password", ""))
|
||||
|
||||
# Submit form
|
||||
await page.click('button[type="submit"]')
|
||||
|
||||
# Wait for redirect or error
|
||||
await page.wait_for_load_state("networkidle")
|
||||
|
||||
# Check if login was successful
|
||||
current_url = page.url
|
||||
return "sign-in" not in current_url.lower()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("HMRC login failed", error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
async def _login_open_banking(page: Page, credentials: dict[str, str]) -> bool:
|
||||
"""Login to Open Banking portal"""
|
||||
try:
|
||||
# This would implement Open Banking login flow
|
||||
# For now, return False as it's not implemented
|
||||
logger.warning("Open Banking login not implemented")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Open Banking login failed", error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
async def _extract_hmrc_data(page: Page, config: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Extract data from HMRC portal"""
|
||||
try:
|
||||
data_type = config.get("data_type", "tax_returns")
|
||||
tax_year = config.get("tax_year", "2023-24")
|
||||
|
||||
extracted_data = {
|
||||
"data_type": data_type,
|
||||
"tax_year": tax_year,
|
||||
"records": [],
|
||||
"extracted_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
if data_type == "tax_returns":
|
||||
# Navigate to tax returns section
|
||||
await page.click('a[href*="tax-return"]')
|
||||
await page.wait_for_load_state("networkidle")
|
||||
|
||||
# Extract return data
|
||||
returns = await page.query_selector_all(".tax-return-item")
|
||||
for return_element in returns:
|
||||
return_data = await return_element.evaluate(
|
||||
"""
|
||||
element => ({
|
||||
year: element.querySelector('.tax-year')?.textContent?.trim(),
|
||||
status: element.querySelector('.status')?.textContent?.trim(),
|
||||
amount: element.querySelector('.amount')?.textContent?.trim()
|
||||
})
|
||||
"""
|
||||
)
|
||||
extracted_data["records"].append(return_data)
|
||||
|
||||
return extracted_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("HMRC data extraction failed", error=str(e))
|
||||
return {"error": str(e), "records": []}
|
||||
|
||||
|
||||
async def _extract_banking_data(page: Page, config: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Extract banking data via Open Banking"""
|
||||
try:
|
||||
# This would implement Open Banking data extraction
|
||||
logger.warning("Open Banking extraction not implemented")
|
||||
return {"error": "Not implemented", "records": []}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Banking data extraction failed", error=str(e))
|
||||
return {"error": str(e), "records": []}
|
||||
|
||||
|
||||
async def _cleanup_session_after_timeout(session_id: str, timeout_seconds: int) -> None:
|
||||
"""Cleanup session after timeout"""
|
||||
await asyncio.sleep(timeout_seconds)
|
||||
|
||||
if session_id in active_sessions:
|
||||
try:
|
||||
session = active_sessions[session_id]
|
||||
await session["context"].close()
|
||||
del active_sessions[session_id]
|
||||
logger.info("Session cleaned up due to timeout", session_id=session_id)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to cleanup session", session_id=session_id, error=str(e)
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
|
||||
"""Handle HTTP exceptions with RFC7807 format"""
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=ErrorResponse(
|
||||
type=f"https://httpstatuses.com/{exc.status_code}",
|
||||
title=exc.detail,
|
||||
status=exc.status_code,
|
||||
detail=exc.detail,
|
||||
instance=str(request.url),
|
||||
trace_id="",
|
||||
).model_dump(),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8001, reload=True, log_config=None)
|
||||
17
apps/svc_rpa/requirements.txt
Normal file
17
apps/svc_rpa/requirements.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
# FastAPI and server
|
||||
fastapi>=0.104.1
|
||||
uvicorn[standard]>=0.24.0
|
||||
pydantic>=2.5.0
|
||||
|
||||
# Service-specific dependencies
|
||||
# Browser automation
|
||||
playwright>=1.40.0
|
||||
|
||||
# Additional async utilities
|
||||
# asyncio-timeout>=4.0.3 # Deprecated, use asyncio.timeout from Python 3.11+ standard library
|
||||
|
||||
# Session management
|
||||
aioredis>=2.0.1
|
||||
|
||||
# Browser management
|
||||
psutil>=5.9.0
|
||||
Reference in New Issue
Block a user