SnarfCode/tests/property/test_state_builder_prop.py

"""Property-based tests for the State Builder.

**Validates: Requirements 4.1, 4.2, 4.4, 4.5**

Properties tested:
- Property 16: State file structural validity
- Property 17: State entry completeness and schema correctness
"""

import json
import re
import uuid

from hypothesis import given, settings, assume
from hypothesis import strategies as st

from iac_reverse.generator.sanitize import sanitize_identifier
from iac_reverse.models import (
    CodeGenerationResult,
    CpuArchitecture,
    DependencyGraph,
    DiscoveredResource,
    GeneratedFile,
    PlatformCategory,
    PROVIDER_SUPPORTED_RESOURCE_TYPES,
    ProviderType,
    ResourceRelationship,
)
from iac_reverse.state_builder import StateBuilder


# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------

provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))

# All supported resource types across all providers (flat list)
ALL_SUPPORTED_RESOURCE_TYPES = []
for _types in PROVIDER_SUPPORTED_RESOURCE_TYPES.values():
    ALL_SUPPORTED_RESOURCE_TYPES.extend(_types)

resource_type_strategy = st.sampled_from(ALL_SUPPORTED_RESOURCE_TYPES)

# Strategy for resource names (valid identifiers with some variety)
resource_name_strategy = st.text(
    min_size=1,
    max_size=20,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")

# Strategy for unique IDs (non-empty strings)
unique_id_strategy = st.text(
    min_size=1,
    max_size=40,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-/:."),
).filter(lambda s: s.strip() != "")

# Strategy for simple attribute values
simple_attr_value_strategy = st.one_of(
    st.text(
        min_size=1,
        max_size=30,
        alphabet=st.characters(
            whitelist_categories=("L", "N"), whitelist_characters="_-./: "
        ),
    ).filter(lambda s: s.strip() != ""),
    st.integers(min_value=0, max_value=10000),
    st.booleans(),
)

# Strategy for attribute dictionaries (non-empty)
attributes_strategy = st.dictionaries(
    keys=st.text(
        min_size=1,
        max_size=15,
        alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"),
    ).filter(lambda s: s.strip() != "" and s[0].isalpha()),
    values=simple_attr_value_strategy,
    min_size=1,
    max_size=5,
)

# Strategy for provider version strings (semver-like)
provider_version_strategy = st.from_regex(r"[1-9][0-9]{0,1}\.[0-9]{1,2}\.[0-9]{1,2}", fullmatch=True)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def make_resource(
    unique_id: str,
    resource_type: str = "kubernetes_deployment",
    name: str = "my_resource",
    provider: ProviderType = ProviderType.KUBERNETES,
    platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION,
    architecture: CpuArchitecture = CpuArchitecture.AMD64,
    attributes: dict | None = None,
    raw_references: list[str] | None = None,
) -> DiscoveredResource:
    """Helper to create a DiscoveredResource with sensible defaults."""
    return DiscoveredResource(
        resource_type=resource_type,
        unique_id=unique_id,
        name=name,
        provider=provider,
        platform_category=platform_category,
        architecture=architecture,
        endpoint="https://api.internal.lab:6443",
        attributes=attributes or {"key": "value"},
        raw_references=raw_references or [],
    )


def make_dependency_graph(
    resources: list[DiscoveredResource],
    relationships: list[ResourceRelationship] | None = None,
) -> DependencyGraph:
    """Helper to create a DependencyGraph from resources."""
    return DependencyGraph(
        resources=resources,
        relationships=relationships or [],
        topological_order=[r.unique_id for r in resources],
        cycles=[],
        unresolved_references=[],
    )


def make_code_generation_result() -> CodeGenerationResult:
    """Helper to create a minimal CodeGenerationResult."""
    return CodeGenerationResult(
        resource_files=[
            GeneratedFile(filename="main.tf", content="", resource_count=0)
        ],
        variables_file=GeneratedFile(
            filename="variables.tf", content="", resource_count=0
        ),
        provider_file=GeneratedFile(
            filename="provider.tf", content="", resource_count=0
        ),
    )


# ---------------------------------------------------------------------------
# Composite strategies
# ---------------------------------------------------------------------------


@st.composite
def mappable_resource_strategy(draw):
    """Generate a single DiscoveredResource that is mappable to state.

    A mappable resource has a non-empty unique_id and a recognized resource type.
    """
    resource_type = draw(resource_type_strategy)
    name = draw(resource_name_strategy)
    unique_id = draw(unique_id_strategy)
    provider = draw(provider_type_strategy)
    platform_category = draw(platform_category_strategy)
    architecture = draw(cpu_architecture_strategy)
    attributes = draw(attributes_strategy)

    return make_resource(
        unique_id=unique_id,
        resource_type=resource_type,
        name=name,
        provider=provider,
        platform_category=platform_category,
        architecture=architecture,
        attributes=attributes,
    )


@st.composite
def multiple_mappable_resources_strategy(draw):
    """Generate a list of mappable resources with unique IDs."""
    num_resources = draw(st.integers(min_value=1, max_value=5))
    resources = []
    seen_ids = set()

    for _ in range(num_resources):
        resource = draw(mappable_resource_strategy())
        # Ensure unique IDs are distinct
        if resource.unique_id in seen_ids:
            continue
        seen_ids.add(resource.unique_id)
        resources.append(resource)

    assume(len(resources) >= 1)
    return resources


@st.composite
def resource_with_sensitive_attrs_strategy(draw):
    """Generate a resource with attributes that include sensitive-looking keys."""
    resource_type = draw(resource_type_strategy)
    name = draw(resource_name_strategy)
    unique_id = draw(unique_id_strategy)

    # Include at least one sensitive key
    sensitive_key = draw(st.sampled_from([
        "password", "api_secret", "auth_token", "private_key", "tls_certificate",
    ]))
    sensitive_value = draw(st.text(min_size=1, max_size=20, alphabet="abcdefghijklmnop"))

    # Also include non-sensitive attributes
    normal_attrs = draw(attributes_strategy)
    normal_attrs[sensitive_key] = sensitive_value

    return make_resource(
        unique_id=unique_id,
        resource_type=resource_type,
        name=name,
        attributes=normal_attrs,
    )


# ---------------------------------------------------------------------------
# Property 16: State file structural validity
# ---------------------------------------------------------------------------


class TestStateFileStructuralValidity:
    """Property 16: State file structural validity.

    **Validates: Requirements 4.1**

    For any set of resources, the generated state file has version=4,
    valid UUID lineage, serial=1, and valid JSON structure.
    """

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_file_version_is_4(
        self, resources: list[DiscoveredResource]
    ):
        """The generated state file always has version=4."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert state_file.version == 4, (
            f"Expected version=4, got version={state_file.version}"
        )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_file_has_valid_uuid_lineage(
        self, resources: list[DiscoveredResource]
    ):
        """The generated state file has a valid UUID lineage."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        # Lineage should be a valid UUID
        try:
            parsed_uuid = uuid.UUID(state_file.lineage)
        except ValueError:
            raise AssertionError(
                f"Lineage '{state_file.lineage}' is not a valid UUID"
            )

        assert parsed_uuid.version == 4, (
            f"Expected UUID version 4, got version {parsed_uuid.version}"
        )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_file_serial_is_1(
        self, resources: list[DiscoveredResource]
    ):
        """The generated state file always has serial=1."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert state_file.serial == 1, (
            f"Expected serial=1, got serial={state_file.serial}"
        )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_file_produces_valid_json(
        self, resources: list[DiscoveredResource]
    ):
        """The state file serializes to valid JSON via to_json()."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")
        json_str = state_file.to_json()

        # Must parse as valid JSON
        try:
            parsed = json.loads(json_str)
        except json.JSONDecodeError as e:
            raise AssertionError(
                f"State file to_json() produced invalid JSON: {e}"
            )

        assert isinstance(parsed, dict), "State JSON root must be a dict"

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_json_has_required_top_level_fields(
        self, resources: list[DiscoveredResource]
    ):
        """The serialized state JSON has version, terraform_version, serial, lineage, resources."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")
        parsed = json.loads(state_file.to_json())

        required_fields = {"version", "terraform_version", "serial", "lineage", "resources"}
        missing = required_fields - set(parsed.keys())
        assert not missing, (
            f"State JSON missing required top-level fields: {missing}"
        )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_json_resource_entries_have_required_fields(
        self, resources: list[DiscoveredResource]
    ):
        """Each resource entry in the JSON has mode, type, name, provider, and instances."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")
        parsed = json.loads(state_file.to_json())

        required_resource_fields = {"mode", "type", "name", "provider", "instances"}

        for i, entry in enumerate(parsed["resources"]):
            missing = required_resource_fields - set(entry.keys())
            assert not missing, (
                f"Resource entry {i} missing required fields: {missing}. "
                f"Entry keys: {list(entry.keys())}"
            )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_json_instances_have_schema_and_attributes(
        self, resources: list[DiscoveredResource]
    ):
        """Each instance in the state JSON has schema_version, attributes, sensitive_attributes, dependencies."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")
        parsed = json.loads(state_file.to_json())

        required_instance_fields = {
            "schema_version", "attributes", "sensitive_attributes", "dependencies"
        }

        for i, entry in enumerate(parsed["resources"]):
            for j, instance in enumerate(entry["instances"]):
                missing = required_instance_fields - set(instance.keys())
                assert not missing, (
                    f"Resource {i}, instance {j} missing fields: {missing}. "
                    f"Instance keys: {list(instance.keys())}"
                )


# ---------------------------------------------------------------------------
# Property 17: State entry completeness and schema correctness
# ---------------------------------------------------------------------------


class TestStateEntryCompletenessAndSchemaCorrectness:
    """Property 17: State entry completeness and schema correctness.

    **Validates: Requirements 4.4, 4.5**

    For any resource, the state entry has non-empty resource_type,
    resource_name, provider_id, and attributes matching the discovery data.
    """

    @given(resource=mappable_resource_strategy())
    @settings(max_examples=100)
    def test_state_entry_has_non_empty_resource_type(
        self, resource: DiscoveredResource
    ):
        """Each state entry has a non-empty resource_type."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]
        assert entry.resource_type != "", (
            "State entry resource_type must not be empty"
        )
        assert entry.resource_type == resource.resource_type, (
            f"Expected resource_type '{resource.resource_type}', "
            f"got '{entry.resource_type}'"
        )

    @given(resource=mappable_resource_strategy())
    @settings(max_examples=100)
    def test_state_entry_has_non_empty_resource_name(
        self, resource: DiscoveredResource
    ):
        """Each state entry has a non-empty resource_name (sanitized)."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]
        assert entry.resource_name != "", (
            "State entry resource_name must not be empty"
        )
        # The name should be a sanitized version of the original
        expected_name = sanitize_identifier(resource.name)
        assert entry.resource_name == expected_name, (
            f"Expected resource_name '{expected_name}', "
            f"got '{entry.resource_name}'"
        )

    @given(resource=mappable_resource_strategy())
    @settings(max_examples=100)
    def test_state_entry_has_non_empty_provider_id(
        self, resource: DiscoveredResource
    ):
        """Each state entry has a non-empty provider_id matching the resource's unique_id."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]
        assert entry.provider_id != "", (
            "State entry provider_id must not be empty"
        )
        assert entry.provider_id == resource.unique_id, (
            f"Expected provider_id '{resource.unique_id}', "
            f"got '{entry.provider_id}'"
        )

    @given(resource=mappable_resource_strategy())
    @settings(max_examples=100)
    def test_state_entry_attributes_match_discovery_data(
        self, resource: DiscoveredResource
    ):
        """State entry attributes contain all attributes from the discovered resource."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]

        # All discovery attributes should be present in the state entry
        for key, value in resource.attributes.items():
            assert key in entry.attributes, (
                f"Discovery attribute '{key}' missing from state entry attributes. "
                f"State attrs: {list(entry.attributes.keys())}"
            )
            assert entry.attributes[key] == value, (
                f"Attribute '{key}' mismatch: discovery={value}, "
                f"state={entry.attributes[key]}"
            )

    @given(
        resource=mappable_resource_strategy(),
        provider_version=provider_version_strategy,
    )
    @settings(max_examples=100)
    def test_state_entry_schema_version_matches_provider_version(
        self, resource: DiscoveredResource, provider_version: str
    ):
        """State entry schema_version matches the major version from provider_version."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, provider_version)

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]

        # Schema version should be the major version number
        expected_schema_version = int(provider_version.split(".")[0])
        assert entry.schema_version == expected_schema_version, (
            f"Expected schema_version={expected_schema_version} "
            f"(from provider_version='{provider_version}'), "
            f"got schema_version={entry.schema_version}"
        )

    @given(resource=resource_with_sensitive_attrs_strategy())
    @settings(max_examples=100)
    def test_state_entry_marks_sensitive_attributes(
        self, resource: DiscoveredResource
    ):
        """State entry identifies and marks sensitive attributes correctly."""
        builder = StateBuilder()
        graph = make_dependency_graph([resource])
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")

        assert len(state_file.resources) == 1
        entry = state_file.resources[0]

        # Sensitive attributes list should not be empty when resource has
        # attributes with sensitive patterns (password, secret, token, key, certificate)
        sensitive_patterns = ["password", "secret", "token", "key", "certificate"]
        has_sensitive = any(
            any(pattern in attr_key.lower() for pattern in sensitive_patterns)
            for attr_key in resource.attributes.keys()
        )

        if has_sensitive:
            assert len(entry.sensitive_attributes) > 0, (
                f"Resource has sensitive-looking attributes "
                f"{list(resource.attributes.keys())} but sensitive_attributes "
                f"is empty"
            )

    @given(resources=multiple_mappable_resources_strategy())
    @settings(max_examples=100)
    def test_state_json_id_field_matches_provider_id(
        self, resources: list[DiscoveredResource]
    ):
        """In the serialized JSON, each instance's attributes.id matches the provider_id."""
        builder = StateBuilder()
        graph = make_dependency_graph(resources)
        code_result = make_code_generation_result()

        state_file = builder.build(code_result, graph, "1.0.0")
        parsed = json.loads(state_file.to_json())

        for i, entry in enumerate(parsed["resources"]):
            for instance in entry["instances"]:
                assert "id" in instance["attributes"], (
                    f"Resource entry {i} instance missing 'id' in attributes"
                )
                # The id should be non-empty
                assert instance["attributes"]["id"] != "", (
                    f"Resource entry {i} has empty 'id' attribute"
                )