Files
SnarfCode/tests/property/test_dependency_resolver_prop.py
2026-05-22 00:19:30 -04:00

566 lines
21 KiB
Python

"""Property-based tests for the Dependency Resolver.
**Validates: Requirements 2.1, 2.3, 2.4, 2.5**
Properties tested:
- Property 6: Dependency relationship identification
- Property 7: Cycle detection correctness
- Property 8: Topological order validity
- Property 9: Unresolved references become data sources or variables
"""
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from iac_reverse.models import (
CpuArchitecture,
DependencyGraph,
DiscoveredResource,
PlatformCategory,
ProviderType,
ResourceRelationship,
ScanResult,
UnresolvedReference,
)
from iac_reverse.resolver import DependencyResolver
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
# Strategy for generating valid resource IDs
resource_id_strategy = st.text(
min_size=3,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-/"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)
# Strategy for resource names
resource_name_strategy = st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")
# Strategy for resource types (simple identifiers)
resource_type_strategy = st.text(
min_size=3,
max_size=40,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)
# Strategy for endpoint strings
endpoint_strategy = st.text(
min_size=5,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters=".-:/"),
).filter(lambda s: s.strip() != "")
def make_resource(
unique_id: str,
resource_type: str = "generic_resource",
name: str = "resource",
raw_references: list[str] | None = None,
attributes: dict | None = None,
) -> DiscoveredResource:
"""Helper to create a DiscoveredResource with sensible defaults."""
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=ProviderType.KUBERNETES,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="https://api.internal.lab:6443",
attributes=attributes or {"key": "value"},
raw_references=raw_references or [],
)
def make_scan_result(resources: list[DiscoveredResource]) -> ScanResult:
"""Helper to create a ScanResult from a list of resources."""
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-15T10:30:00Z",
profile_hash="test_hash",
is_partial=False,
)
# Strategy to generate a list of resources with unique IDs and controlled references
@st.composite
def acyclic_resource_graph_strategy(draw):
"""Generate a set of resources forming an acyclic dependency graph.
Resources are created in order, and each resource can only reference
resources that were created before it (ensuring no cycles).
"""
num_resources = draw(st.integers(min_value=2, max_value=8))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
# Each resource can only reference earlier resources (ensures acyclic)
if i > 0:
num_refs = draw(st.integers(min_value=0, max_value=min(i, 3)))
refs = draw(
st.lists(
st.sampled_from(ids[:i]),
min_size=num_refs,
max_size=num_refs,
unique=True,
)
)
else:
refs = []
resource = make_resource(
unique_id=uid,
name=f"res_{i}",
raw_references=refs,
)
resources.append(resource)
return resources
@st.composite
def cyclic_resource_graph_strategy(draw):
"""Generate a set of resources that contain at least one cycle.
Creates a base set of resources and then adds references to form a cycle.
"""
num_resources = draw(st.integers(min_value=2, max_value=6))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
resource = make_resource(
unique_id=uid,
name=f"res_{i}",
raw_references=[],
)
resources.append(resource)
# Create a cycle: pick a subset of at least 2 resources and form a ring
cycle_size = draw(st.integers(min_value=2, max_value=num_resources))
cycle_indices = draw(
st.lists(
st.sampled_from(list(range(num_resources))),
min_size=cycle_size,
max_size=cycle_size,
unique=True,
)
)
# Form a ring: each resource in the cycle references the next one
for j in range(len(cycle_indices)):
src_idx = cycle_indices[j]
tgt_idx = cycle_indices[(j + 1) % len(cycle_indices)]
target_id = ids[tgt_idx]
if target_id not in resources[src_idx].raw_references:
resources[src_idx].raw_references.append(target_id)
return resources
@st.composite
def resources_with_unresolved_refs_strategy(draw):
"""Generate resources where some raw_references point to IDs not in the inventory."""
num_resources = draw(st.integers(min_value=1, max_value=5))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
# Generate unresolved reference IDs (not in the inventory)
num_unresolved = draw(st.integers(min_value=1, max_value=4))
unresolved_ids = []
for i in range(num_unresolved):
# Mix of IDs with "/" (should suggest data_source) and without (should suggest variable)
if draw(st.booleans()):
unresolved_id = f"external/resource/{i}"
else:
unresolved_id = f"external_var_{i}"
unresolved_ids.append(unresolved_id)
# Create resources, some referencing unresolved IDs
for i in range(num_resources):
# Pick some unresolved refs for this resource
num_ext_refs = draw(st.integers(min_value=0, max_value=min(num_unresolved, 2)))
ext_refs = draw(
st.lists(
st.sampled_from(unresolved_ids),
min_size=num_ext_refs,
max_size=num_ext_refs,
unique=True,
)
)
resource = make_resource(
unique_id=ids[i],
name=f"res_{i}",
raw_references=ext_refs,
)
resources.append(resource)
return resources, unresolved_ids
# ---------------------------------------------------------------------------
# Property 6: Dependency relationship identification
# ---------------------------------------------------------------------------
class TestDependencyRelationshipIdentification:
"""Property 6: Dependency relationship identification.
**Validates: Requirements 2.1**
For any resource with raw_references pointing to other resources in the
inventory, the resolver SHALL create a ResourceRelationship for each
resolved reference.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_created_for_each_resolved_reference(
self, resources: list[DiscoveredResource]
):
"""For each raw_reference pointing to a known resource, a relationship is created."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Count expected relationships: each raw_reference that points to a resource in inventory
resource_ids = {r.unique_id for r in resources}
expected_relationships = 0
for resource in resources:
for ref in resource.raw_references:
if ref in resource_ids:
expected_relationships += 1
assert len(graph.relationships) == expected_relationships
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_source_and_target_are_correct(
self, resources: list[DiscoveredResource]
):
"""Each relationship has source_id as the referencing resource and target_id as the referenced."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
resource_ids = {r.unique_id for r in resources}
for rel in graph.relationships:
# source_id is the resource that holds the reference
assert rel.source_id in resource_ids
# target_id is the resource being referenced
assert rel.target_id in resource_ids
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_type_is_valid(
self, resources: list[DiscoveredResource]
):
"""Each relationship has a valid relationship_type."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
valid_types = {"parent-child", "reference", "dependency"}
for rel in graph.relationships:
assert rel.relationship_type in valid_types
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_source_attribute_is_non_empty(
self, resources: list[DiscoveredResource]
):
"""Each relationship has a non-empty source_attribute."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for rel in graph.relationships:
assert isinstance(rel.source_attribute, str)
assert len(rel.source_attribute) > 0
# ---------------------------------------------------------------------------
# Property 7: Cycle detection correctness
# ---------------------------------------------------------------------------
class TestCycleDetectionCorrectness:
"""Property 7: Cycle detection correctness.
**Validates: Requirements 2.3**
For any graph containing a cycle, the resolver SHALL detect and report it
in the cycles list. For any acyclic dependency graph, the resolver SHALL
report zero cycles.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_acyclic_graph_reports_zero_cycles(
self, resources: list[DiscoveredResource]
):
"""An acyclic graph should have no cycles reported."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.cycles) == 0
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cyclic_graph_reports_at_least_one_cycle(
self, resources: list[DiscoveredResource]
):
"""A graph with a cycle should have at least one cycle reported."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.cycles) >= 1
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cycle_contains_valid_resource_ids(
self, resources: list[DiscoveredResource]
):
"""Each reported cycle contains only valid resource IDs from the inventory."""
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for cycle in graph.cycles:
assert len(cycle) >= 2, "A cycle must involve at least 2 resources"
for resource_id in cycle:
assert resource_id in resource_ids
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cycle_reports_have_resolution_suggestions(
self, resources: list[DiscoveredResource]
):
"""Each cycle report includes a suggested break edge and resolution strategy."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for report in graph.cycle_reports:
assert report.suggested_break is not None
assert len(report.suggested_break) == 2
assert report.break_relationship_type in {"parent-child", "reference", "dependency"}
assert isinstance(report.resolution_strategy, str)
assert len(report.resolution_strategy) > 0
# ---------------------------------------------------------------------------
# Property 8: Topological order validity
# ---------------------------------------------------------------------------
class TestTopologicalOrderValidity:
"""Property 8: Topological order validity.
**Validates: Requirements 2.4**
For any acyclic dependency graph, no resource SHALL appear before any
resource it depends on in the topological order.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_topological_order_contains_all_resources(
self, resources: list[DiscoveredResource]
):
"""The topological order must contain all resource IDs."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
resource_ids = {r.unique_id for r in resources}
assert set(graph.topological_order) == resource_ids
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_dependencies_appear_before_dependents(
self, resources: list[DiscoveredResource]
):
"""For every dependency edge (A depends on B), B appears before A in topological order.
In the resolver, if resource A has B in raw_references, then A depends on B,
meaning B must appear before A in the topological order.
"""
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Build position map
position = {rid: idx for idx, rid in enumerate(graph.topological_order)}
# For each resource, its referenced resources (that are in inventory) must come before it
for resource in resources:
for ref_id in resource.raw_references:
if ref_id in resource_ids:
assert position[ref_id] < position[resource.unique_id], (
f"Resource '{ref_id}' (dependency) should appear before "
f"'{resource.unique_id}' (dependent) in topological order"
)
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_topological_order_has_no_duplicates(
self, resources: list[DiscoveredResource]
):
"""The topological order must not contain duplicate entries."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.topological_order) == len(set(graph.topological_order))
# ---------------------------------------------------------------------------
# Property 9: Unresolved references become data sources or variables
# ---------------------------------------------------------------------------
class TestUnresolvedReferences:
"""Property 9: Unresolved references become data sources or variables.
**Validates: Requirements 2.5**
For any raw_reference pointing to an ID not in the inventory, the resolver
SHALL create an UnresolvedReference with suggested_resolution of either
"data_source" or "variable".
"""
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_are_tracked(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each reference to an ID not in inventory creates an UnresolvedReference."""
resources, unresolved_ids = data
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Count expected unresolved references
expected_unresolved = 0
for resource in resources:
for ref in resource.raw_references:
if ref not in resource_ids:
expected_unresolved += 1
assert len(graph.unresolved_references) == expected_unresolved
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_suggest_data_source_or_variable(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has suggested_resolution of 'data_source' or 'variable'."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert unresolved.suggested_resolution in {"data_source", "variable"}, (
f"Expected 'data_source' or 'variable', got '{unresolved.suggested_resolution}'"
)
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_have_valid_source_resource(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has a source_resource_id that exists in the inventory."""
resources, _ = data
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert unresolved.source_resource_id in resource_ids
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_have_non_empty_fields(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has non-empty source_attribute and referenced_id."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert isinstance(unresolved.source_attribute, str)
assert len(unresolved.source_attribute) > 0
assert isinstance(unresolved.referenced_id, str)
assert len(unresolved.referenced_id) > 0
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_ids_with_slash_or_colon_suggest_data_source(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""References containing '/' or ':' should suggest 'data_source' resolution."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
if "/" in unresolved.referenced_id or ":" in unresolved.referenced_id:
assert unresolved.suggested_resolution == "data_source", (
f"Reference '{unresolved.referenced_id}' contains '/' or ':' "
f"and should suggest 'data_source', got '{unresolved.suggested_resolution}'"
)
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_ids_without_slash_or_colon_suggest_variable(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""References without '/' or ':' should suggest 'variable' resolution."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
if "/" not in unresolved.referenced_id and ":" not in unresolved.referenced_id:
assert unresolved.suggested_resolution == "variable", (
f"Reference '{unresolved.referenced_id}' has no '/' or ':' "
f"and should suggest 'variable', got '{unresolved.suggested_resolution}'"
)