ai-tax-agent/tests/unit/coverage/test_question_templates.py

"""Unit tests for question template generation."""

# FILE: tests/unit/coverage/test_question_templates.py

import pytest

from libs.schemas import Citation, ClarifyContext, CoverageGap, Role, UploadOption

# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel
# mypy: disable-error-code=union-attr
# mypy: disable-error-code=no-untyped-def


class TestQuestionTemplates:
    """Test question template generation and formatting"""

    @pytest.fixture
    def sample_gap(self):
        """Create sample coverage gap for testing"""
        return CoverageGap(
            schedule_id="SA102",
            evidence_id="P60",
            role=Role.REQUIRED,
            reason="P60 provides year-end pay and PAYE tax figures",
            boxes=["SA102_b1", "SA102_b2"],
            citations=[
                Citation(
                    rule_id="UK.SA102.P60.Required",
                    doc_id="SA102-Notes-2025",
                    locator="p.3 §1.1",
                )
            ],
            acceptable_alternatives=["P45", "FinalPayslipYTD"],
        )

    @pytest.fixture
    def sample_context(self):
        """Create sample clarify context for testing"""
        return ClarifyContext(
            tax_year="2024-25",
            taxpayer_id="T-001",
            jurisdiction="UK",
        )

    def test_question_text_formatting(self, sample_gap, sample_context):
        """Test basic question text formatting"""
        # Mock the _generate_clarifying_question function behavior
        evidence_name = sample_gap.evidence_id
        schedule_name = sample_gap.schedule_id
        boxes_text = ", ".join(sample_gap.boxes)
        alternatives_text = ", ".join(sample_gap.acceptable_alternatives)

        # Template format
        template_text = "To complete the {schedule} for {tax_year}, we need {evidence}. These documents support boxes {boxes}. If you don't have this, you can provide {alternatives}."

        question_text = template_text.format(
            schedule=schedule_name,
            tax_year=sample_context.tax_year,
            evidence=evidence_name,
            boxes=boxes_text,
            alternatives=alternatives_text,
        )

        expected = "To complete the SA102 for 2024-25, we need P60. These documents support boxes SA102_b1, SA102_b2. If you don't have this, you can provide P45, FinalPayslipYTD."
        assert question_text == expected

    def test_why_text_formatting(self, sample_gap):
        """Test why explanation formatting"""
        template_why = "{why}. See guidance: {guidance_doc}."

        why_text = template_why.format(
            why=sample_gap.reason,
            guidance_doc="policy guidance",
        )

        expected = "P60 provides year-end pay and PAYE tax figures. See guidance: policy guidance."
        assert why_text == expected

    def test_upload_options_generation(self, sample_gap):
        """Test upload options generation"""
        options = []

        # Generate options for alternatives
        for alt in sample_gap.acceptable_alternatives:
            options.append(
                UploadOption(
                    label=f"Upload {alt} (PDF/CSV)",
                    accepted_formats=["pdf", "csv"],
                    upload_endpoint=f"/v1/ingest/upload?tag={alt}",
                )
            )

        assert len(options) == 2
        assert options[0].label == "Upload P45 (PDF/CSV)"
        assert options[0].accepted_formats == ["pdf", "csv"]
        assert options[0].upload_endpoint == "/v1/ingest/upload?tag=P45"
        assert options[1].label == "Upload FinalPayslipYTD (PDF/CSV)"
        assert options[1].upload_endpoint == "/v1/ingest/upload?tag=FinalPayslipYTD"

    def test_upload_options_no_alternatives(self):
        """Test upload options when no alternatives available"""
        gap_no_alternatives = CoverageGap(
            schedule_id="SA102",
            evidence_id="P60",
            role=Role.REQUIRED,
            reason="Required document",
            boxes=["SA102_b1"],
            acceptable_alternatives=[],
        )

        options = []

        # When no alternatives, create option for main evidence
        if not gap_no_alternatives.acceptable_alternatives:
            options.append(
                UploadOption(
                    label=f"Upload {gap_no_alternatives.evidence_id} (PDF/CSV)",
                    accepted_formats=["pdf", "csv"],
                    upload_endpoint=f"/v1/ingest/upload?tag={gap_no_alternatives.evidence_id}",
                )
            )

        assert len(options) == 1
        assert options[0].label == "Upload P60 (PDF/CSV)"
        assert options[0].upload_endpoint == "/v1/ingest/upload?tag=P60"

    def test_blocking_determination(self, sample_gap):
        """Test blocking status determination"""
        # Required evidence should be blocking
        assert sample_gap.role == Role.REQUIRED
        blocking = sample_gap.role.value == "REQUIRED"
        assert blocking is True

        # Optional evidence should not be blocking
        optional_gap = CoverageGap(
            schedule_id="SA102",
            evidence_id="PayslipMonthly",
            role=Role.OPTIONAL,
            reason="Optional supporting document",
            boxes=["SA102_b3"],
        )

        blocking_optional = optional_gap.role.value == "REQUIRED"
        assert blocking_optional is False

    def test_boxes_affected_formatting(self, sample_gap):
        """Test boxes affected list formatting"""
        boxes_affected = sample_gap.boxes
        assert boxes_affected == ["SA102_b1", "SA102_b2"]

        # Test empty boxes
        gap_no_boxes = CoverageGap(
            schedule_id="SA102",
            evidence_id="EmploymentContract",
            role=Role.OPTIONAL,
            reason="Used for disambiguation",
            boxes=[],
        )

        assert gap_no_boxes.boxes == []

    def test_citations_preservation(self, sample_gap):
        """Test that citations are preserved in response"""
        citations = sample_gap.citations
        assert len(citations) == 1
        assert citations[0].rule_id == "UK.SA102.P60.Required"
        assert citations[0].doc_id == "SA102-Notes-2025"
        assert citations[0].locator == "p.3 §1.1"

    def test_multiple_alternatives_formatting(self):
        """Test formatting with multiple alternatives"""
        gap_many_alternatives = CoverageGap(
            schedule_id="SA105",
            evidence_id="LettingAgentStatements",
            role=Role.REQUIRED,
            reason="Evidence of rental income",
            boxes=["SA105_b5", "SA105_b20"],
            acceptable_alternatives=[
                "TenancyLedger",
                "BankStatements",
                "RentalAgreements",
            ],
        )

        alternatives_text = ", ".join(gap_many_alternatives.acceptable_alternatives)
        expected = "TenancyLedger, BankStatements, RentalAgreements"
        assert alternatives_text == expected

    def test_empty_boxes_formatting(self):
        """Test formatting when no boxes specified"""
        gap_no_boxes = CoverageGap(
            schedule_id="SA102",
            evidence_id="EmploymentContract",
            role=Role.OPTIONAL,
            reason="Used for disambiguation",
            boxes=[],
        )

        boxes_text = (
            ", ".join(gap_no_boxes.boxes) if gap_no_boxes.boxes else "relevant boxes"
        )
        assert boxes_text == "relevant boxes"

    def test_special_characters_in_evidence_names(self):
        """Test handling of special characters in evidence names"""
        gap_special_chars = CoverageGap(
            schedule_id="SA106",
            evidence_id="EEA_FHL",
            role=Role.CONDITIONALLY_REQUIRED,
            reason="European Economic Area Furnished Holiday Lettings",
            boxes=["SA106_b14"],
        )

        # Should handle underscores and other characters
        assert gap_special_chars.evidence_id == "EEA_FHL"

        # Upload endpoint should handle special characters
        upload_endpoint = f"/v1/ingest/upload?tag={gap_special_chars.evidence_id}"
        assert upload_endpoint == "/v1/ingest/upload?tag=EEA_FHL"

    def test_long_reason_text(self):
        """Test handling of long reason text"""
        long_reason = "This is a very long reason that explains in great detail why this particular piece of evidence is absolutely essential for completing the tax return accurately and in compliance with HMRC requirements and regulations."

        gap_long_reason = CoverageGap(
            schedule_id="SA108",
            evidence_id="CGT_BrokerAnnualReport",
            role=Role.REQUIRED,
            reason=long_reason,
            boxes=["SA108_b4", "SA108_b5"],
        )

        # Should preserve full reason text
        assert gap_long_reason.reason == long_reason
        assert len(gap_long_reason.reason) > 100

    def test_multiple_upload_formats(self):
        """Test generation of upload options with different formats"""
        evidence_id = "AccountsPAndL"

        # Different evidence types might accept different formats
        formats_map = {
            "AccountsPAndL": ["pdf", "xlsx", "csv"],
            "BankStatements": ["pdf", "csv", "ofx"],
            "P60": ["pdf", "jpg", "png"],
        }

        for evidence, formats in formats_map.items():
            option = UploadOption(
                label=f"Upload {evidence}",
                accepted_formats=formats,
                upload_endpoint=f"/v1/ingest/upload?tag={evidence}",
            )

            assert option.accepted_formats == formats
            assert evidence in option.upload_endpoint

    def test_context_variations(self):
        """Test question generation with different contexts"""
        contexts = [
            ClarifyContext(tax_year="2024-25", taxpayer_id="T-001", jurisdiction="UK"),
            ClarifyContext(tax_year="2023-24", taxpayer_id="T-002", jurisdiction="UK"),
            ClarifyContext(tax_year="2024-25", taxpayer_id="T-003", jurisdiction="US"),
        ]

        for context in contexts:
            # Each context should be valid
            assert context.tax_year.startswith("20")
            assert context.taxpayer_id.startswith("T-")
            assert context.jurisdiction in ["UK", "US", "CA", "AU"]