ai-tax-agent/tests/conftest.py

"""Pytest configuration and shared fixtures for coverage tests."""

# FILE: tests/conftest.py

import asyncio
import os
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock

import pytest
import yaml

from libs.schemas import (
    CompiledCoveragePolicy,
    ConflictRules,
    CoveragePolicy,
    Defaults,
    EvidenceItem,
    Privacy,
    QuestionTemplates,
    Role,
    SchedulePolicy,
    StatusClassifier,
    StatusClassifierConfig,
    TaxYearBoundary,
    Trigger,
)

# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel
# mypy: disable-error-code=union-attr
# mypy: disable-error-code=no-untyped-def


@pytest.fixture(scope="session")
def event_loop():
    """Create an instance of the default event loop for the test session."""
    loop = asyncio.get_event_loop_policy().new_event_loop()
    yield loop
    loop.close()


@pytest.fixture
def temp_config_dir():
    """Create temporary config directory with test policy files"""
    with tempfile.TemporaryDirectory() as temp_dir:
        config_dir = Path(temp_dir)

        # Create baseline policy
        baseline_policy = {
            "version": "1.0",
            "jurisdiction": "UK",
            "tax_year": "2024-25",
            "tax_year_boundary": {"start": "2024-04-06", "end": "2025-04-05"},
            "defaults": {
                "confidence_thresholds": {"ocr": 0.82, "extract": 0.85},
                "date_tolerance_days": 30,
            },
            "document_kinds": ["P60", "P11D", "P45"],
            "triggers": {
                "SA102": {"any_of": ["exists(IncomeItem[type='Employment'])"]},
                "SA105": {"any_of": ["exists(IncomeItem[type='UKPropertyRent'])"]},
            },
            "schedules": {
                "SA102": {
                    "evidence": [
                        {
                            "id": "P60",
                            "role": "REQUIRED",
                            "boxes": ["SA102_b1", "SA102_b2"],
                            "acceptable_alternatives": ["P45", "FinalPayslipYTD"],
                        },
                        {
                            "id": "P11D",
                            "role": "CONDITIONALLY_REQUIRED",
                            "condition": "exists(BenefitInKind=true)",
                            "boxes": ["SA102_b9"],
                        },
                    ]
                },
                "SA105": {
                    "evidence": [
                        {
                            "id": "LettingAgentStatements",
                            "role": "REQUIRED",
                            "boxes": ["SA105_b5"],
                        }
                    ]
                },
            },
            "status_classifier": {
                "present_verified": {
                    "min_ocr": 0.82,
                    "min_extract": 0.85,
                    "date_in_year": True,
                },
                "present_unverified": {
                    "min_ocr": 0.60,
                    "min_extract": 0.70,
                    "date_in_year_or_tolerance": True,
                },
                "conflicting": {"conflict_rules": ["Same doc kind, different totals"]},
                "missing": {"default": True},
            },
            "conflict_resolution": {"precedence": ["P60", "P11D"]},
            "question_templates": {
                "default": {
                    "text": "To complete the {schedule} for {tax_year}, we need {evidence}.",
                    "why": "{why}. See guidance: {guidance_doc}.",
                }
            },
            "privacy": {"vector_pii_free": True, "redact_patterns": []},
        }

        with open(config_dir / "coverage.yaml", "w") as f:
            yaml.dump(baseline_policy, f)

        yield config_dir


@pytest.fixture
def sample_policy():
    """Create sample compiled policy for testing"""
    policy = CoveragePolicy(
        version="1.0",
        jurisdiction="UK",
        tax_year="2024-25",
        tax_year_boundary=TaxYearBoundary(start="2024-04-06", end="2025-04-05"),
        defaults=Defaults(
            confidence_thresholds={"ocr": 0.82, "extract": 0.85},
            date_tolerance_days=30,
        ),
        document_kinds=["P60", "P11D"],
        triggers={"SA102": Trigger(any_of=["exists(IncomeItem[type='Employment'])"])},
        schedules={
            "SA102": SchedulePolicy(
                evidence=[
                    EvidenceItem(
                        id="P60",
                        role=Role.REQUIRED,
                        boxes=["SA102_b1", "SA102_b2"],
                        acceptable_alternatives=["P45", "FinalPayslipYTD"],
                    )
                ]
            )
        },
        status_classifier=StatusClassifierConfig(
            present_verified=StatusClassifier(min_ocr=0.82, min_extract=0.85),
            present_unverified=StatusClassifier(min_ocr=0.60, min_extract=0.70),
            conflicting=StatusClassifier(conflict_rules=[]),
            missing=StatusClassifier(conflict_rules=[]),
        ),
        conflict_resolution=ConflictRules(precedence=["P60"]),
        question_templates=QuestionTemplates(default={"text": "test", "why": "test"}),
        privacy=Privacy(vector_pii_free=True, redact_patterns=[]),
    )

    # Create compiled policy with mock predicates
    compiled = CompiledCoveragePolicy(
        policy=policy,
        compiled_predicates={
            "exists(IncomeItem[type='Employment'])": lambda tid, ty: True
        },
        compiled_at=datetime.utcnow(),
        hash="test-hash",
        source_files=["test.yaml"],
    )

    return compiled


@pytest.fixture
def mock_kg_client():
    """Create mock KG client for testing"""
    client = AsyncMock()

    # Default successful evidence finding
    client.run_query = AsyncMock(
        return_value=[
            {
                "doc_id": "DOC-P60-001",
                "kind": "P60",
                "page": 1,
                "bbox": {"x": 100, "y": 200, "width": 300, "height": 50},
                "ocr_confidence": 0.95,
                "extract_confidence": 0.92,
                "date": "2024-05-15",
            }
        ]
    )

    return client


@pytest.fixture
def mock_rag_client():
    """Create mock RAG client for testing"""
    client = AsyncMock()

    # Default citation search results
    client.search = AsyncMock(
        return_value=[
            {
                "doc_id": "SA102-Notes-2025",
                "locator": "p.3 §1.1",
                "url": "https://docs.local/SA102-Notes-2025#p3s1.1",
            }
        ]
    )

    return client


@pytest.fixture
def mock_db_session():
    """Create mock database session for testing"""
    session = AsyncMock()

    # Mock database operations
    session.add = MagicMock()
    session.commit = AsyncMock()
    session.rollback = AsyncMock()
    session.close = AsyncMock()

    return session


@pytest.fixture
def mock_policy_loader():
    """Create mock policy loader for testing"""
    loader = MagicMock()

    # Mock policy loading
    loader.load_policy = MagicMock()
    loader.compile_predicates = MagicMock()
    loader.validate_policy = MagicMock()

    return loader


@pytest.fixture(autouse=True)
def setup_test_environment():
    """Set up test environment variables"""
    original_env = os.environ.copy()

    # Set test environment variables
    os.environ.update(
        {
            "ENVIRONMENT": "test",
            "CONFIG_DIR": "/tmp/test-config",
            "NEO4J_URI": "bolt://localhost:7687",
            "NEO4J_USER": "neo4j",
            "NEO4J_PASSWORD": "testpass",
            "POSTGRES_URL": "postgresql://postgres:postgres@localhost:5432/test_db",
            "QDRANT_URL": "http://localhost:6333",
            "VAULT_URL": "http://localhost:8200",
            "VAULT_TOKEN": "test-token",
        }
    )

    yield

    # Restore original environment
    os.environ.clear()
    os.environ.update(original_env)


@pytest.fixture
def sample_evidence_data():
    """Sample evidence data for testing"""
    return [
        {
            "doc_id": "DOC-P60-001",
            "kind": "P60",
            "page": 1,
            "bbox": {"x": 100, "y": 200, "width": 300, "height": 50},
            "ocr_confidence": 0.95,
            "extract_confidence": 0.92,
            "date": "2024-05-15T10:00:00Z",
        },
        {
            "doc_id": "DOC-P11D-001",
            "kind": "P11D",
            "page": 1,
            "bbox": {"x": 50, "y": 100, "width": 400, "height": 60},
            "ocr_confidence": 0.88,
            "extract_confidence": 0.90,
            "date": "2024-07-06T14:30:00Z",
        },
    ]


@pytest.fixture
def sample_citation_data():
    """Sample citation data for testing"""
    return [
        {
            "rule_id": "UK.SA102.P60.Required",
            "doc_id": "SA102-Notes-2025",
            "locator": "p.3 §1.1",
            "url": "https://docs.local/SA102-Notes-2025#p3s1.1",
        },
        {
            "rule_id": "UK.SA102.P11D.Conditional",
            "doc_id": "SA102-Notes-2025",
            "locator": "p.5 §2.3",
            "url": "https://docs.local/SA102-Notes-2025#p5s2.3",
        },
    ]


# Pytest markers for test categorization
pytest_plugins: list[str] = []


def pytest_configure(config):
    """Configure pytest markers"""
    config.addinivalue_line("markers", "unit: mark test as a unit test")
    config.addinivalue_line("markers", "integration: mark test as an integration test")
    config.addinivalue_line("markers", "e2e: mark test as an end-to-end test")
    config.addinivalue_line("markers", "slow: mark test as slow running")


def pytest_collection_modifyitems(config, items):
    """Automatically mark tests based on their location"""
    for item in items:
        # Mark tests based on directory structure
        if "unit" in str(item.fspath):
            item.add_marker(pytest.mark.unit)
        elif "integration" in str(item.fspath):
            item.add_marker(pytest.mark.integration)
        elif "e2e" in str(item.fspath):
            item.add_marker(pytest.mark.e2e)

        # Mark async tests
        if asyncio.iscoroutinefunction(item.function):
            item.add_marker(pytest.mark.asyncio)