"""Property-based tests for the Code Generator.

**Validates: Requirements 2.2, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6**

Properties tested:
- Property 10: References in generated output use Terraform syntax
- Property 11: Generated HCL syntactic validity
- Property 12: File organization by resource type
- Property 13: Variable extraction for shared values
- Property 14: Identifier sanitization validity
- Property 15: Traceability comments in generated code
"""

import re

from hypothesis import given, settings, assume, HealthCheck
from hypothesis import strategies as st

from iac_reverse.generator import CodeGenerator, VariableExtractor, sanitize_identifier
from iac_reverse.models import (
    CpuArchitecture,
    DependencyGraph,
    DiscoveredResource,
    PlatformCategory,
    ProviderType,
    ResourceRelationship,
    ScanProfile,
)


# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------

provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))

# Strategy for resource names (valid identifiers with some variety)
resource_name_strategy = st.text(
    min_size=1,
    max_size=20,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")

# Strategy for resource types (terraform-style: provider_type)
resource_type_strategy = st.sampled_from([
    "kubernetes_deployment",
    "kubernetes_service",
    "kubernetes_namespace",
    "docker_service",
    "docker_network",
    "docker_volume",
    "synology_shared_folder",
    "synology_volume",
    "harvester_virtualmachine",
    "harvester_volume",
    "bare_metal_hardware",
    "windows_service",
    "windows_iis_site",
])

# Strategy for simple attribute values (strings, ints, bools)
simple_attr_value_strategy = st.one_of(
    st.text(min_size=1, max_size=30, alphabet=st.characters(
        whitelist_categories=("L", "N"), whitelist_characters="_-./: "
    )).filter(lambda s: s.strip() != ""),
    st.integers(min_value=0, max_value=10000),
    st.booleans(),
)

# Strategy for attribute dictionaries
attributes_strategy = st.dictionaries(
    keys=st.text(
        min_size=1,
        max_size=15,
        alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"),
    ).filter(lambda s: s.strip() != "" and s[0].isalpha()),
    values=simple_attr_value_strategy,
    min_size=1,
    max_size=5,
)


def make_resource(
    unique_id: str,
    resource_type: str = "kubernetes_deployment",
    name: str = "my_resource",
    provider: ProviderType = ProviderType.KUBERNETES,
    platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION,
    architecture: CpuArchitecture = CpuArchitecture.AMD64,
    attributes: dict | None = None,
    raw_references: list[str] | None = None,
) -> DiscoveredResource:
    """Helper to create a DiscoveredResource with sensible defaults."""
    return DiscoveredResource(
        resource_type=resource_type,
        unique_id=unique_id,
        name=name,
        provider=provider,
        platform_category=platform_category,
        architecture=architecture,
        endpoint="https://api.internal.lab:6443",
        attributes=attributes or {"key": "value"},
        raw_references=raw_references or [],
    )


def make_dependency_graph(
    resources: list[DiscoveredResource],
    relationships: list[ResourceRelationship] | None = None,
) -> DependencyGraph:
    """Helper to create a DependencyGraph from resources."""
    return DependencyGraph(
        resources=resources,
        relationships=relationships or [],
        topological_order=[r.unique_id for r in resources],
        cycles=[],
        unresolved_references=[],
    )


@st.composite
def resource_with_dependency_strategy(draw):
    """Generate a pair of resources where one depends on the other.

    Returns (resources, relationships) where the first resource references the second.
    """
    resource_type_a = draw(resource_type_strategy)
    resource_type_b = draw(resource_type_strategy)
    name_a = draw(resource_name_strategy)
    name_b = draw(resource_name_strategy)
    arch = draw(cpu_architecture_strategy)

    # Ensure unique IDs are different
    uid_a = f"ns/{resource_type_a}/{name_a}"
    uid_b = f"ns/{resource_type_b}/{name_b}"
    assume(uid_a != uid_b)

    # Resource B is the dependency target
    resource_b = make_resource(
        unique_id=uid_b,
        resource_type=resource_type_b,
        name=name_b,
        architecture=arch,
        attributes={"port": 8080},
    )

    # Resource A references resource B's unique_id in its attributes
    resource_a = make_resource(
        unique_id=uid_a,
        resource_type=resource_type_a,
        name=name_a,
        architecture=arch,
        attributes={"target_id": uid_b, "replicas": 3},
        raw_references=[uid_b],
    )

    relationship = ResourceRelationship(
        source_id=uid_a,
        target_id=uid_b,
        relationship_type="reference",
        source_attribute="target_id",
    )

    return [resource_a, resource_b], [relationship]


@st.composite
def multiple_resources_strategy(draw):
    """Generate a list of resources with distinct types for file organization testing."""
    num_types = draw(st.integers(min_value=1, max_value=5))
    types = draw(
        st.lists(
            resource_type_strategy,
            min_size=num_types,
            max_size=num_types,
            unique=True,
        )
    )

    resources = []
    for i, rtype in enumerate(types):
        # Each type gets 1-3 resources
        num_resources_of_type = draw(st.integers(min_value=1, max_value=3))
        for j in range(num_resources_of_type):
            uid = f"{rtype}/instance_{i}_{j}"
            name = f"res_{i}_{j}"
            attrs = draw(attributes_strategy)
            resource = make_resource(
                unique_id=uid,
                resource_type=rtype,
                name=name,
                attributes=attrs,
            )
            resources.append(resource)

    return resources


@st.composite
def resources_with_shared_values_strategy(draw):
    """Generate resources where at least one attribute value appears in 2+ resources."""
    shared_key = draw(st.sampled_from(["region", "environment", "zone", "cluster"]))
    shared_value = draw(st.text(
        min_size=1,
        max_size=15,
        alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
    ).filter(lambda s: s.strip() != ""))

    num_resources = draw(st.integers(min_value=2, max_value=5))

    resources = []
    for i in range(num_resources):
        uid = f"resource_{i}"
        name = f"res_{i}"
        # All resources share the same key-value pair
        attrs = {shared_key: shared_value, "name": f"instance_{i}"}
        resource = make_resource(
            unique_id=uid,
            resource_type="kubernetes_deployment",
            name=name,
            attributes=attrs,
        )
        resources.append(resource)

    return resources, shared_key, shared_value


# Strategy for arbitrary strings to test sanitize_identifier
arbitrary_string_strategy = st.text(min_size=0, max_size=50)


# ---------------------------------------------------------------------------
# Property 10: References in generated output use Terraform syntax
# ---------------------------------------------------------------------------


class TestReferencesUseTerraformSyntax:
    """Property 10: References in generated output use Terraform syntax.

    **Validates: Requirements 2.2, 3.5**

    For any resource with dependencies, the generated HCL uses Terraform
    resource references (type.name.id) not hardcoded IDs.
    """

    @given(data=resource_with_dependency_strategy())
    @settings(max_examples=100)
    def test_references_use_terraform_resource_syntax(
        self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
    ):
        """Generated HCL uses type.name.id references instead of hardcoded IDs."""
        resources, relationships = data
        graph = make_dependency_graph(resources, relationships)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        # The source resource (resources[0]) references resources[1]
        target = resources[1]
        target_tf_name = sanitize_identifier(target.name)
        expected_ref = f"{target.resource_type}.{target_tf_name}.id"

        # Find the file containing the source resource
        source = resources[0]
        source_file = None
        for f in result.resource_files:
            if f.filename == f"{source.resource_type}.tf":
                source_file = f
                break

        assert source_file is not None, (
            f"Expected file {source.resource_type}.tf not found"
        )

        # The generated content should contain the Terraform reference
        assert expected_ref in source_file.content, (
            f"Expected Terraform reference '{expected_ref}' not found in output. "
            f"Content: {source_file.content[:500]}"
        )

    @given(data=resource_with_dependency_strategy())
    @settings(max_examples=100)
    def test_hardcoded_ids_not_present_for_resolved_references(
        self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
    ):
        """The target resource's unique_id should not appear as a hardcoded string in the source resource's block."""
        resources, relationships = data
        graph = make_dependency_graph(resources, relationships)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        target = resources[1]
        source = resources[0]

        # Find the file containing the source resource
        source_file = None
        for f in result.resource_files:
            if f.filename == f"{source.resource_type}.tf":
                source_file = f
                break

        assert source_file is not None

        # The hardcoded unique_id of the target should NOT appear as a quoted string
        hardcoded_pattern = f'"{target.unique_id}"'
        assert hardcoded_pattern not in source_file.content, (
            f"Hardcoded ID '{hardcoded_pattern}' should not appear in generated HCL. "
            f"Should use Terraform reference instead."
        )


# ---------------------------------------------------------------------------
# Property 11: Generated HCL syntactic validity
# ---------------------------------------------------------------------------


class TestGeneratedHclSyntacticValidity:
    """Property 11: Generated HCL syntactic validity.

    **Validates: Requirements 3.1**

    For any set of resources, the generated HCL contains valid resource blocks
    with proper structure (resource keyword, type, name, braces).
    """

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100, suppress_health_check=[HealthCheck.too_slow])
    def test_generated_hcl_has_valid_resource_blocks(
        self, resources: list[DiscoveredResource]
    ):
        """Each generated file contains properly structured resource blocks."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        for gen_file in result.resource_files:
            content = gen_file.content

            # Each resource block should have the pattern:
            # resource "type" "name" {
            resource_block_pattern = re.compile(
                r'resource\s+"[^"]+"\s+"[^"]+"\s*\{'
            )
            blocks_found = resource_block_pattern.findall(content)
            assert len(blocks_found) == gen_file.resource_count, (
                f"Expected {gen_file.resource_count} resource blocks in "
                f"{gen_file.filename}, found {len(blocks_found)}"
            )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_generated_hcl_has_balanced_braces(
        self, resources: list[DiscoveredResource]
    ):
        """Generated HCL has balanced opening and closing braces."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        for gen_file in result.resource_files:
            content = gen_file.content
            open_braces = content.count("{")
            close_braces = content.count("}")
            assert open_braces == close_braces, (
                f"Unbalanced braces in {gen_file.filename}: "
                f"{open_braces} opening vs {close_braces} closing"
            )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_generated_hcl_resource_type_matches_filename(
        self, resources: list[DiscoveredResource]
    ):
        """Each resource block's type matches the file it's in (filename = type.tf)."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        for gen_file in result.resource_files:
            expected_type = gen_file.filename.replace(".tf", "")
            # All resource blocks in this file should be of the expected type
            resource_types_in_file = re.findall(
                r'resource\s+"([^"]+)"', gen_file.content
            )
            for rtype in resource_types_in_file:
                assert rtype == expected_type, (
                    f"Resource type '{rtype}' found in {gen_file.filename} "
                    f"but expected only '{expected_type}'"
                )


# ---------------------------------------------------------------------------
# Property 12: File organization by resource type
# ---------------------------------------------------------------------------


class TestFileOrganizationByResourceType:
    """Property 12: File organization by resource type.

    **Validates: Requirements 3.2**

    For any set of resources, each resource type gets its own .tf file.
    """

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_one_file_per_resource_type(
        self, resources: list[DiscoveredResource]
    ):
        """The number of resource files equals the number of distinct resource types."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        distinct_types = {r.resource_type for r in resources}
        assert len(result.resource_files) == len(distinct_types), (
            f"Expected {len(distinct_types)} files for {len(distinct_types)} "
            f"distinct types, got {len(result.resource_files)}"
        )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_each_file_named_after_resource_type(
        self, resources: list[DiscoveredResource]
    ):
        """Each generated file is named <resource_type>.tf."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        distinct_types = {r.resource_type for r in resources}
        expected_filenames = {f"{rt}.tf" for rt in distinct_types}
        actual_filenames = {f.filename for f in result.resource_files}

        assert actual_filenames == expected_filenames, (
            f"Expected filenames {expected_filenames}, got {actual_filenames}"
        )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_every_resource_appears_in_exactly_one_file(
        self, resources: list[DiscoveredResource]
    ):
        """Every resource's unique_id appears in exactly one generated file."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        for resource in resources:
            files_containing = [
                f.filename
                for f in result.resource_files
                if resource.unique_id in f.content
            ]
            assert len(files_containing) == 1, (
                f"Resource '{resource.unique_id}' found in {len(files_containing)} "
                f"files: {files_containing}. Expected exactly 1."
            )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_resource_count_per_file_matches(
        self, resources: list[DiscoveredResource]
    ):
        """Each file's resource_count matches the actual number of resources of that type."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        # Count resources per type
        from collections import Counter
        type_counts = Counter(r.resource_type for r in resources)

        for gen_file in result.resource_files:
            expected_type = gen_file.filename.replace(".tf", "")
            assert gen_file.resource_count == type_counts[expected_type], (
                f"File {gen_file.filename} reports {gen_file.resource_count} resources "
                f"but expected {type_counts[expected_type]}"
            )


# ---------------------------------------------------------------------------
# Property 13: Variable extraction for shared values
# ---------------------------------------------------------------------------


class TestVariableExtractionForSharedValues:
    """Property 13: Variable extraction for shared values.

    **Validates: Requirements 3.3**

    For any set of resources where a value appears in 2+ resources,
    a variable is extracted.
    """

    @given(data=resources_with_shared_values_strategy())
    @settings(max_examples=100)
    def test_shared_value_produces_extracted_variable(
        self, data: tuple[list[DiscoveredResource], str, str]
    ):
        """A value appearing in 2+ resources results in an extracted variable."""
        resources, shared_key, shared_value = data

        extractor = VariableExtractor()
        variables = extractor.extract_variables(resources)

        # There should be at least one variable extracted for the shared key
        var_names = [v.name for v in variables]
        # The variable name should contain the shared key
        matching_vars = [v for v in variables if shared_key in v.name]
        assert len(matching_vars) >= 1, (
            f"Expected at least one variable for shared key '{shared_key}', "
            f"got variables: {var_names}"
        )

    @given(data=resources_with_shared_values_strategy())
    @settings(max_examples=100)
    def test_extracted_variable_has_correct_default(
        self, data: tuple[list[DiscoveredResource], str, str]
    ):
        """The extracted variable's default value matches the shared value."""
        resources, shared_key, shared_value = data

        extractor = VariableExtractor()
        variables = extractor.extract_variables(resources)

        matching_vars = [v for v in variables if shared_key in v.name]
        assert len(matching_vars) >= 1

        # The default should be the shared value (formatted as a string literal)
        var = matching_vars[0]
        assert shared_value in var.default_value, (
            f"Expected default to contain '{shared_value}', got '{var.default_value}'"
        )

    @given(data=resources_with_shared_values_strategy())
    @settings(max_examples=100)
    def test_extracted_variable_tracks_usage(
        self, data: tuple[list[DiscoveredResource], str, str]
    ):
        """The extracted variable's used_by list contains at least 2 resource IDs."""
        resources, shared_key, shared_value = data

        extractor = VariableExtractor()
        variables = extractor.extract_variables(resources)

        matching_vars = [v for v in variables if shared_key in v.name]
        assert len(matching_vars) >= 1

        var = matching_vars[0]
        assert len(var.used_by) >= 2, (
            f"Expected variable to be used by 2+ resources, "
            f"got {len(var.used_by)}: {var.used_by}"
        )

    @given(data=resources_with_shared_values_strategy())
    @settings(max_examples=100)
    def test_extracted_variable_has_type_and_description(
        self, data: tuple[list[DiscoveredResource], str, str]
    ):
        """Each extracted variable has a non-empty type expression and description."""
        resources, shared_key, shared_value = data

        extractor = VariableExtractor()
        variables = extractor.extract_variables(resources)

        for var in variables:
            assert var.type_expr != "", f"Variable '{var.name}' has empty type_expr"
            assert var.description != "", f"Variable '{var.name}' has empty description"


# ---------------------------------------------------------------------------
# Property 14: Identifier sanitization validity
# ---------------------------------------------------------------------------


class TestIdentifierSanitizationValidity:
    """Property 14: Identifier sanitization validity.

    **Validates: Requirements 3.4**

    For any input string, sanitize_identifier produces a valid Terraform identifier.
    """

    TERRAFORM_IDENTIFIER_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")

    @given(name=arbitrary_string_strategy)
    @settings(max_examples=200)
    def test_sanitized_identifier_matches_terraform_pattern(self, name: str):
        """The output always matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
        result = sanitize_identifier(name)
        assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
            f"sanitize_identifier({name!r}) = {result!r} does not match "
            f"Terraform identifier pattern"
        )

    @given(name=arbitrary_string_strategy)
    @settings(max_examples=200)
    def test_sanitized_identifier_is_non_empty(self, name: str):
        """The output is always a non-empty string."""
        result = sanitize_identifier(name)
        assert len(result) > 0, (
            f"sanitize_identifier({name!r}) produced empty string"
        )

    @given(name=st.text(min_size=1, max_size=30, alphabet="0123456789"))
    @settings(max_examples=100)
    def test_digit_only_input_produces_valid_identifier(self, name: str):
        """Input consisting only of digits still produces a valid identifier."""
        result = sanitize_identifier(name)
        assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
            f"sanitize_identifier({name!r}) = {result!r} is not valid for digit-only input"
        )
        # Must not start with a digit
        assert not result[0].isdigit(), (
            f"sanitize_identifier({name!r}) = {result!r} starts with a digit"
        )

    @given(name=st.text(
        min_size=1,
        max_size=30,
        alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
    ).filter(lambda s: s[0].isalpha() or s[0] == "_"))
    @settings(max_examples=100)
    def test_already_valid_identifiers_are_preserved_or_simplified(self, name: str):
        """Input that is already a valid identifier produces a valid result."""
        result = sanitize_identifier(name)
        assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
            f"sanitize_identifier({name!r}) = {result!r} is not valid"
        )


# ---------------------------------------------------------------------------
# Property 15: Traceability comments in generated code
# ---------------------------------------------------------------------------


class TestTraceabilityCommentsInGeneratedCode:
    """Property 15: Traceability comments in generated code.

    **Validates: Requirements 3.6**

    For any resource, the generated HCL includes a comment with the original unique_id.
    """

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_each_resource_has_traceability_comment(
        self, resources: list[DiscoveredResource]
    ):
        """Every resource's unique_id appears in a comment in the generated output."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        # Collect all generated content
        all_content = "\n".join(f.content for f in result.resource_files)

        for resource in resources:
            # The unique_id should appear in a comment line
            comment_pattern = f"# Source: {resource.unique_id}"
            assert comment_pattern in all_content, (
                f"Traceability comment for resource '{resource.unique_id}' "
                f"not found in generated output"
            )

    @given(resources=multiple_resources_strategy())
    @settings(max_examples=100)
    def test_traceability_comment_precedes_resource_block(
        self, resources: list[DiscoveredResource]
    ):
        """The traceability comment appears before its corresponding resource block."""
        graph = make_dependency_graph(resources)
        profiles: list[ScanProfile] = []

        generator = CodeGenerator()
        result = generator.generate(graph, profiles)

        for resource in resources:
            # Find the file containing this resource
            target_file = None
            for f in result.resource_files:
                if resource.unique_id in f.content:
                    target_file = f
                    break

            assert target_file is not None

            content = target_file.content
            comment_pos = content.find(f"# Source: {resource.unique_id}")
            tf_name = sanitize_identifier(resource.name)
            block_pattern = f'resource "{resource.resource_type}" "{tf_name}"'
            block_pos = content.find(block_pattern, comment_pos)

            assert comment_pos < block_pos, (
                f"Comment for '{resource.unique_id}' (pos {comment_pos}) "
                f"should precede resource block (pos {block_pos})"
            )