SnarfCode/tests/property/test_dependency_resolver_prop.py

"""Property-based tests for the Dependency Resolver.

**Validates: Requirements 2.1, 2.3, 2.4, 2.5**

Properties tested:
- Property 6: Dependency relationship identification
- Property 7: Cycle detection correctness
- Property 8: Topological order validity
- Property 9: Unresolved references become data sources or variables
"""

from hypothesis import given, settings, assume
from hypothesis import strategies as st

from iac_reverse.models import (
    CpuArchitecture,
    DependencyGraph,
    DiscoveredResource,
    PlatformCategory,
    ProviderType,
    ResourceRelationship,
    ScanResult,
    UnresolvedReference,
)
from iac_reverse.resolver import DependencyResolver


# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------

provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))

# Strategy for generating valid resource IDs
resource_id_strategy = st.text(
    min_size=3,
    max_size=50,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-/"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)

# Strategy for resource names
resource_name_strategy = st.text(
    min_size=1,
    max_size=30,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")

# Strategy for resource types (simple identifiers)
resource_type_strategy = st.text(
    min_size=3,
    max_size=40,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)

# Strategy for endpoint strings
endpoint_strategy = st.text(
    min_size=5,
    max_size=50,
    alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters=".-:/"),
).filter(lambda s: s.strip() != "")


def make_resource(
    unique_id: str,
    resource_type: str = "generic_resource",
    name: str = "resource",
    raw_references: list[str] | None = None,
    attributes: dict | None = None,
) -> DiscoveredResource:
    """Helper to create a DiscoveredResource with sensible defaults."""
    return DiscoveredResource(
        resource_type=resource_type,
        unique_id=unique_id,
        name=name,
        provider=ProviderType.KUBERNETES,
        platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
        architecture=CpuArchitecture.AMD64,
        endpoint="https://api.internal.lab:6443",
        attributes=attributes or {"key": "value"},
        raw_references=raw_references or [],
    )


def make_scan_result(resources: list[DiscoveredResource]) -> ScanResult:
    """Helper to create a ScanResult from a list of resources."""
    return ScanResult(
        resources=resources,
        warnings=[],
        errors=[],
        scan_timestamp="2024-01-15T10:30:00Z",
        profile_hash="test_hash",
        is_partial=False,
    )


# Strategy to generate a list of resources with unique IDs and controlled references
@st.composite
def acyclic_resource_graph_strategy(draw):
    """Generate a set of resources forming an acyclic dependency graph.

    Resources are created in order, and each resource can only reference
    resources that were created before it (ensuring no cycles).
    """
    num_resources = draw(st.integers(min_value=2, max_value=8))

    resources = []
    ids = []
    for i in range(num_resources):
        uid = f"resource_{i}"
        ids.append(uid)

        # Each resource can only reference earlier resources (ensures acyclic)
        if i > 0:
            num_refs = draw(st.integers(min_value=0, max_value=min(i, 3)))
            refs = draw(
                st.lists(
                    st.sampled_from(ids[:i]),
                    min_size=num_refs,
                    max_size=num_refs,
                    unique=True,
                )
            )
        else:
            refs = []

        resource = make_resource(
            unique_id=uid,
            name=f"res_{i}",
            raw_references=refs,
        )
        resources.append(resource)

    return resources


@st.composite
def cyclic_resource_graph_strategy(draw):
    """Generate a set of resources that contain at least one cycle.

    Creates a base set of resources and then adds references to form a cycle.
    """
    num_resources = draw(st.integers(min_value=2, max_value=6))

    resources = []
    ids = []
    for i in range(num_resources):
        uid = f"resource_{i}"
        ids.append(uid)
        resource = make_resource(
            unique_id=uid,
            name=f"res_{i}",
            raw_references=[],
        )
        resources.append(resource)

    # Create a cycle: pick a subset of at least 2 resources and form a ring
    cycle_size = draw(st.integers(min_value=2, max_value=num_resources))
    cycle_indices = draw(
        st.lists(
            st.sampled_from(list(range(num_resources))),
            min_size=cycle_size,
            max_size=cycle_size,
            unique=True,
        )
    )

    # Form a ring: each resource in the cycle references the next one
    for j in range(len(cycle_indices)):
        src_idx = cycle_indices[j]
        tgt_idx = cycle_indices[(j + 1) % len(cycle_indices)]
        target_id = ids[tgt_idx]
        if target_id not in resources[src_idx].raw_references:
            resources[src_idx].raw_references.append(target_id)

    return resources


@st.composite
def resources_with_unresolved_refs_strategy(draw):
    """Generate resources where some raw_references point to IDs not in the inventory."""
    num_resources = draw(st.integers(min_value=1, max_value=5))

    resources = []
    ids = []
    for i in range(num_resources):
        uid = f"resource_{i}"
        ids.append(uid)

    # Generate unresolved reference IDs (not in the inventory)
    num_unresolved = draw(st.integers(min_value=1, max_value=4))
    unresolved_ids = []
    for i in range(num_unresolved):
        # Mix of IDs with "/" (should suggest data_source) and without (should suggest variable)
        if draw(st.booleans()):
            unresolved_id = f"external/resource/{i}"
        else:
            unresolved_id = f"external_var_{i}"
        unresolved_ids.append(unresolved_id)

    # Create resources, some referencing unresolved IDs
    for i in range(num_resources):
        # Pick some unresolved refs for this resource
        num_ext_refs = draw(st.integers(min_value=0, max_value=min(num_unresolved, 2)))
        ext_refs = draw(
            st.lists(
                st.sampled_from(unresolved_ids),
                min_size=num_ext_refs,
                max_size=num_ext_refs,
                unique=True,
            )
        )

        resource = make_resource(
            unique_id=ids[i],
            name=f"res_{i}",
            raw_references=ext_refs,
        )
        resources.append(resource)

    return resources, unresolved_ids


# ---------------------------------------------------------------------------
# Property 6: Dependency relationship identification
# ---------------------------------------------------------------------------


class TestDependencyRelationshipIdentification:
    """Property 6: Dependency relationship identification.

    **Validates: Requirements 2.1**

    For any resource with raw_references pointing to other resources in the
    inventory, the resolver SHALL create a ResourceRelationship for each
    resolved reference.
    """

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_relationship_created_for_each_resolved_reference(
        self, resources: list[DiscoveredResource]
    ):
        """For each raw_reference pointing to a known resource, a relationship is created."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        # Count expected relationships: each raw_reference that points to a resource in inventory
        resource_ids = {r.unique_id for r in resources}
        expected_relationships = 0
        for resource in resources:
            for ref in resource.raw_references:
                if ref in resource_ids:
                    expected_relationships += 1

        assert len(graph.relationships) == expected_relationships

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_relationship_source_and_target_are_correct(
        self, resources: list[DiscoveredResource]
    ):
        """Each relationship has source_id as the referencing resource and target_id as the referenced."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        resource_ids = {r.unique_id for r in resources}

        for rel in graph.relationships:
            # source_id is the resource that holds the reference
            assert rel.source_id in resource_ids
            # target_id is the resource being referenced
            assert rel.target_id in resource_ids

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_relationship_type_is_valid(
        self, resources: list[DiscoveredResource]
    ):
        """Each relationship has a valid relationship_type."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        valid_types = {"parent-child", "reference", "dependency"}
        for rel in graph.relationships:
            assert rel.relationship_type in valid_types

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_relationship_source_attribute_is_non_empty(
        self, resources: list[DiscoveredResource]
    ):
        """Each relationship has a non-empty source_attribute."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for rel in graph.relationships:
            assert isinstance(rel.source_attribute, str)
            assert len(rel.source_attribute) > 0


# ---------------------------------------------------------------------------
# Property 7: Cycle detection correctness
# ---------------------------------------------------------------------------


class TestCycleDetectionCorrectness:
    """Property 7: Cycle detection correctness.

    **Validates: Requirements 2.3**

    For any graph containing a cycle, the resolver SHALL detect and report it
    in the cycles list. For any acyclic dependency graph, the resolver SHALL
    report zero cycles.
    """

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_acyclic_graph_reports_zero_cycles(
        self, resources: list[DiscoveredResource]
    ):
        """An acyclic graph should have no cycles reported."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        assert len(graph.cycles) == 0

    @given(resources=cyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_cyclic_graph_reports_at_least_one_cycle(
        self, resources: list[DiscoveredResource]
    ):
        """A graph with a cycle should have at least one cycle reported."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        assert len(graph.cycles) >= 1

    @given(resources=cyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_cycle_contains_valid_resource_ids(
        self, resources: list[DiscoveredResource]
    ):
        """Each reported cycle contains only valid resource IDs from the inventory."""
        scan_result = make_scan_result(resources)
        resource_ids = {r.unique_id for r in resources}
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for cycle in graph.cycles:
            assert len(cycle) >= 2, "A cycle must involve at least 2 resources"
            for resource_id in cycle:
                assert resource_id in resource_ids

    @given(resources=cyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_cycle_reports_have_resolution_suggestions(
        self, resources: list[DiscoveredResource]
    ):
        """Each cycle report includes a suggested break edge and resolution strategy."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for report in graph.cycle_reports:
            assert report.suggested_break is not None
            assert len(report.suggested_break) == 2
            assert report.break_relationship_type in {"parent-child", "reference", "dependency"}
            assert isinstance(report.resolution_strategy, str)
            assert len(report.resolution_strategy) > 0


# ---------------------------------------------------------------------------
# Property 8: Topological order validity
# ---------------------------------------------------------------------------


class TestTopologicalOrderValidity:
    """Property 8: Topological order validity.

    **Validates: Requirements 2.4**

    For any acyclic dependency graph, no resource SHALL appear before any
    resource it depends on in the topological order.
    """

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_topological_order_contains_all_resources(
        self, resources: list[DiscoveredResource]
    ):
        """The topological order must contain all resource IDs."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        resource_ids = {r.unique_id for r in resources}
        assert set(graph.topological_order) == resource_ids

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_dependencies_appear_before_dependents(
        self, resources: list[DiscoveredResource]
    ):
        """For every dependency edge (A depends on B), B appears before A in topological order.

        In the resolver, if resource A has B in raw_references, then A depends on B,
        meaning B must appear before A in the topological order.
        """
        scan_result = make_scan_result(resources)
        resource_ids = {r.unique_id for r in resources}
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        # Build position map
        position = {rid: idx for idx, rid in enumerate(graph.topological_order)}

        # For each resource, its referenced resources (that are in inventory) must come before it
        for resource in resources:
            for ref_id in resource.raw_references:
                if ref_id in resource_ids:
                    assert position[ref_id] < position[resource.unique_id], (
                        f"Resource '{ref_id}' (dependency) should appear before "
                        f"'{resource.unique_id}' (dependent) in topological order"
                    )

    @given(resources=acyclic_resource_graph_strategy())
    @settings(max_examples=100)
    def test_topological_order_has_no_duplicates(
        self, resources: list[DiscoveredResource]
    ):
        """The topological order must not contain duplicate entries."""
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        assert len(graph.topological_order) == len(set(graph.topological_order))


# ---------------------------------------------------------------------------
# Property 9: Unresolved references become data sources or variables
# ---------------------------------------------------------------------------


class TestUnresolvedReferences:
    """Property 9: Unresolved references become data sources or variables.

    **Validates: Requirements 2.5**

    For any raw_reference pointing to an ID not in the inventory, the resolver
    SHALL create an UnresolvedReference with suggested_resolution of either
    "data_source" or "variable".
    """

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_unresolved_references_are_tracked(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """Each reference to an ID not in inventory creates an UnresolvedReference."""
        resources, unresolved_ids = data
        scan_result = make_scan_result(resources)
        resource_ids = {r.unique_id for r in resources}
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        # Count expected unresolved references
        expected_unresolved = 0
        for resource in resources:
            for ref in resource.raw_references:
                if ref not in resource_ids:
                    expected_unresolved += 1

        assert len(graph.unresolved_references) == expected_unresolved

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_unresolved_references_suggest_data_source_or_variable(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """Each UnresolvedReference has suggested_resolution of 'data_source' or 'variable'."""
        resources, _ = data
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for unresolved in graph.unresolved_references:
            assert unresolved.suggested_resolution in {"data_source", "variable"}, (
                f"Expected 'data_source' or 'variable', got '{unresolved.suggested_resolution}'"
            )

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_unresolved_references_have_valid_source_resource(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """Each UnresolvedReference has a source_resource_id that exists in the inventory."""
        resources, _ = data
        scan_result = make_scan_result(resources)
        resource_ids = {r.unique_id for r in resources}
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for unresolved in graph.unresolved_references:
            assert unresolved.source_resource_id in resource_ids

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_unresolved_references_have_non_empty_fields(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """Each UnresolvedReference has non-empty source_attribute and referenced_id."""
        resources, _ = data
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for unresolved in graph.unresolved_references:
            assert isinstance(unresolved.source_attribute, str)
            assert len(unresolved.source_attribute) > 0
            assert isinstance(unresolved.referenced_id, str)
            assert len(unresolved.referenced_id) > 0

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_ids_with_slash_or_colon_suggest_data_source(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """References containing '/' or ':' should suggest 'data_source' resolution."""
        resources, _ = data
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for unresolved in graph.unresolved_references:
            if "/" in unresolved.referenced_id or ":" in unresolved.referenced_id:
                assert unresolved.suggested_resolution == "data_source", (
                    f"Reference '{unresolved.referenced_id}' contains '/' or ':' "
                    f"and should suggest 'data_source', got '{unresolved.suggested_resolution}'"
                )

    @given(data=resources_with_unresolved_refs_strategy())
    @settings(max_examples=100)
    def test_ids_without_slash_or_colon_suggest_variable(
        self, data: tuple[list[DiscoveredResource], list[str]]
    ):
        """References without '/' or ':' should suggest 'variable' resolution."""
        resources, _ = data
        scan_result = make_scan_result(resources)
        resolver = DependencyResolver(scan_result)
        graph = resolver.resolve()

        for unresolved in graph.unresolved_references:
            if "/" not in unresolved.referenced_id and ":" not in unresolved.referenced_id:
                assert unresolved.suggested_resolution == "variable", (
                    f"Reference '{unresolved.referenced_id}' has no '/' or ':' "
                    f"and should suggest 'variable', got '{unresolved.suggested_resolution}'"
                )