Created IAC reverse generator

This commit is contained in:
p2913020
2026-05-22 00:19:30 -04:00
parent d04c2c6e4b
commit 1a11244fff
161 changed files with 26806 additions and 51 deletions

View File

@@ -0,0 +1 @@
"""Property-based tests for IaC Reverse Engineering Tool."""

Binary file not shown.

View File

@@ -0,0 +1,719 @@
"""Property-based tests for the Code Generator.
**Validates: Requirements 2.2, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6**
Properties tested:
- Property 10: References in generated output use Terraform syntax
- Property 11: Generated HCL syntactic validity
- Property 12: File organization by resource type
- Property 13: Variable extraction for shared values
- Property 14: Identifier sanitization validity
- Property 15: Traceability comments in generated code
"""
import re
from hypothesis import given, settings, assume, HealthCheck
from hypothesis import strategies as st
from iac_reverse.generator import CodeGenerator, VariableExtractor, sanitize_identifier
from iac_reverse.models import (
CpuArchitecture,
DependencyGraph,
DiscoveredResource,
PlatformCategory,
ProviderType,
ResourceRelationship,
ScanProfile,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
# Strategy for resource names (valid identifiers with some variety)
resource_name_strategy = st.text(
min_size=1,
max_size=20,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")
# Strategy for resource types (terraform-style: provider_type)
resource_type_strategy = st.sampled_from([
"kubernetes_deployment",
"kubernetes_service",
"kubernetes_namespace",
"docker_service",
"docker_network",
"docker_volume",
"synology_shared_folder",
"synology_volume",
"harvester_virtualmachine",
"harvester_volume",
"bare_metal_hardware",
"windows_service",
"windows_iis_site",
])
# Strategy for simple attribute values (strings, ints, bools)
simple_attr_value_strategy = st.one_of(
st.text(min_size=1, max_size=30, alphabet=st.characters(
whitelist_categories=("L", "N"), whitelist_characters="_-./: "
)).filter(lambda s: s.strip() != ""),
st.integers(min_value=0, max_value=10000),
st.booleans(),
)
# Strategy for attribute dictionaries
attributes_strategy = st.dictionaries(
keys=st.text(
min_size=1,
max_size=15,
alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and s[0].isalpha()),
values=simple_attr_value_strategy,
min_size=1,
max_size=5,
)
def make_resource(
unique_id: str,
resource_type: str = "kubernetes_deployment",
name: str = "my_resource",
provider: ProviderType = ProviderType.KUBERNETES,
platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION,
architecture: CpuArchitecture = CpuArchitecture.AMD64,
attributes: dict | None = None,
raw_references: list[str] | None = None,
) -> DiscoveredResource:
"""Helper to create a DiscoveredResource with sensible defaults."""
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=provider,
platform_category=platform_category,
architecture=architecture,
endpoint="https://api.internal.lab:6443",
attributes=attributes or {"key": "value"},
raw_references=raw_references or [],
)
def make_dependency_graph(
resources: list[DiscoveredResource],
relationships: list[ResourceRelationship] | None = None,
) -> DependencyGraph:
"""Helper to create a DependencyGraph from resources."""
return DependencyGraph(
resources=resources,
relationships=relationships or [],
topological_order=[r.unique_id for r in resources],
cycles=[],
unresolved_references=[],
)
@st.composite
def resource_with_dependency_strategy(draw):
"""Generate a pair of resources where one depends on the other.
Returns (resources, relationships) where the first resource references the second.
"""
resource_type_a = draw(resource_type_strategy)
resource_type_b = draw(resource_type_strategy)
name_a = draw(resource_name_strategy)
name_b = draw(resource_name_strategy)
arch = draw(cpu_architecture_strategy)
# Ensure unique IDs are different
uid_a = f"ns/{resource_type_a}/{name_a}"
uid_b = f"ns/{resource_type_b}/{name_b}"
assume(uid_a != uid_b)
# Resource B is the dependency target
resource_b = make_resource(
unique_id=uid_b,
resource_type=resource_type_b,
name=name_b,
architecture=arch,
attributes={"port": 8080},
)
# Resource A references resource B's unique_id in its attributes
resource_a = make_resource(
unique_id=uid_a,
resource_type=resource_type_a,
name=name_a,
architecture=arch,
attributes={"target_id": uid_b, "replicas": 3},
raw_references=[uid_b],
)
relationship = ResourceRelationship(
source_id=uid_a,
target_id=uid_b,
relationship_type="reference",
source_attribute="target_id",
)
return [resource_a, resource_b], [relationship]
@st.composite
def multiple_resources_strategy(draw):
"""Generate a list of resources with distinct types for file organization testing."""
num_types = draw(st.integers(min_value=1, max_value=5))
types = draw(
st.lists(
resource_type_strategy,
min_size=num_types,
max_size=num_types,
unique=True,
)
)
resources = []
for i, rtype in enumerate(types):
# Each type gets 1-3 resources
num_resources_of_type = draw(st.integers(min_value=1, max_value=3))
for j in range(num_resources_of_type):
uid = f"{rtype}/instance_{i}_{j}"
name = f"res_{i}_{j}"
attrs = draw(attributes_strategy)
resource = make_resource(
unique_id=uid,
resource_type=rtype,
name=name,
attributes=attrs,
)
resources.append(resource)
return resources
@st.composite
def resources_with_shared_values_strategy(draw):
"""Generate resources where at least one attribute value appears in 2+ resources."""
shared_key = draw(st.sampled_from(["region", "environment", "zone", "cluster"]))
shared_value = draw(st.text(
min_size=1,
max_size=15,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != ""))
num_resources = draw(st.integers(min_value=2, max_value=5))
resources = []
for i in range(num_resources):
uid = f"resource_{i}"
name = f"res_{i}"
# All resources share the same key-value pair
attrs = {shared_key: shared_value, "name": f"instance_{i}"}
resource = make_resource(
unique_id=uid,
resource_type="kubernetes_deployment",
name=name,
attributes=attrs,
)
resources.append(resource)
return resources, shared_key, shared_value
# Strategy for arbitrary strings to test sanitize_identifier
arbitrary_string_strategy = st.text(min_size=0, max_size=50)
# ---------------------------------------------------------------------------
# Property 10: References in generated output use Terraform syntax
# ---------------------------------------------------------------------------
class TestReferencesUseTerraformSyntax:
"""Property 10: References in generated output use Terraform syntax.
**Validates: Requirements 2.2, 3.5**
For any resource with dependencies, the generated HCL uses Terraform
resource references (type.name.id) not hardcoded IDs.
"""
@given(data=resource_with_dependency_strategy())
@settings(max_examples=100)
def test_references_use_terraform_resource_syntax(
self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
):
"""Generated HCL uses type.name.id references instead of hardcoded IDs."""
resources, relationships = data
graph = make_dependency_graph(resources, relationships)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# The source resource (resources[0]) references resources[1]
target = resources[1]
target_tf_name = sanitize_identifier(target.name)
expected_ref = f"{target.resource_type}.{target_tf_name}.id"
# Find the file containing the source resource
source = resources[0]
source_file = None
for f in result.resource_files:
if f.filename == f"{source.resource_type}.tf":
source_file = f
break
assert source_file is not None, (
f"Expected file {source.resource_type}.tf not found"
)
# The generated content should contain the Terraform reference
assert expected_ref in source_file.content, (
f"Expected Terraform reference '{expected_ref}' not found in output. "
f"Content: {source_file.content[:500]}"
)
@given(data=resource_with_dependency_strategy())
@settings(max_examples=100)
def test_hardcoded_ids_not_present_for_resolved_references(
self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
):
"""The target resource's unique_id should not appear as a hardcoded string in the source resource's block."""
resources, relationships = data
graph = make_dependency_graph(resources, relationships)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
target = resources[1]
source = resources[0]
# Find the file containing the source resource
source_file = None
for f in result.resource_files:
if f.filename == f"{source.resource_type}.tf":
source_file = f
break
assert source_file is not None
# The hardcoded unique_id of the target should NOT appear as a quoted string
hardcoded_pattern = f'"{target.unique_id}"'
assert hardcoded_pattern not in source_file.content, (
f"Hardcoded ID '{hardcoded_pattern}' should not appear in generated HCL. "
f"Should use Terraform reference instead."
)
# ---------------------------------------------------------------------------
# Property 11: Generated HCL syntactic validity
# ---------------------------------------------------------------------------
class TestGeneratedHclSyntacticValidity:
"""Property 11: Generated HCL syntactic validity.
**Validates: Requirements 3.1**
For any set of resources, the generated HCL contains valid resource blocks
with proper structure (resource keyword, type, name, braces).
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100, suppress_health_check=[HealthCheck.too_slow])
def test_generated_hcl_has_valid_resource_blocks(
self, resources: list[DiscoveredResource]
):
"""Each generated file contains properly structured resource blocks."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
content = gen_file.content
# Each resource block should have the pattern:
# resource "type" "name" {
resource_block_pattern = re.compile(
r'resource\s+"[^"]+"\s+"[^"]+"\s*\{'
)
blocks_found = resource_block_pattern.findall(content)
assert len(blocks_found) == gen_file.resource_count, (
f"Expected {gen_file.resource_count} resource blocks in "
f"{gen_file.filename}, found {len(blocks_found)}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_generated_hcl_has_balanced_braces(
self, resources: list[DiscoveredResource]
):
"""Generated HCL has balanced opening and closing braces."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
content = gen_file.content
open_braces = content.count("{")
close_braces = content.count("}")
assert open_braces == close_braces, (
f"Unbalanced braces in {gen_file.filename}: "
f"{open_braces} opening vs {close_braces} closing"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_generated_hcl_resource_type_matches_filename(
self, resources: list[DiscoveredResource]
):
"""Each resource block's type matches the file it's in (filename = type.tf)."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
expected_type = gen_file.filename.replace(".tf", "")
# All resource blocks in this file should be of the expected type
resource_types_in_file = re.findall(
r'resource\s+"([^"]+)"', gen_file.content
)
for rtype in resource_types_in_file:
assert rtype == expected_type, (
f"Resource type '{rtype}' found in {gen_file.filename} "
f"but expected only '{expected_type}'"
)
# ---------------------------------------------------------------------------
# Property 12: File organization by resource type
# ---------------------------------------------------------------------------
class TestFileOrganizationByResourceType:
"""Property 12: File organization by resource type.
**Validates: Requirements 3.2**
For any set of resources, each resource type gets its own .tf file.
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_one_file_per_resource_type(
self, resources: list[DiscoveredResource]
):
"""The number of resource files equals the number of distinct resource types."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
distinct_types = {r.resource_type for r in resources}
assert len(result.resource_files) == len(distinct_types), (
f"Expected {len(distinct_types)} files for {len(distinct_types)} "
f"distinct types, got {len(result.resource_files)}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_each_file_named_after_resource_type(
self, resources: list[DiscoveredResource]
):
"""Each generated file is named <resource_type>.tf."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
distinct_types = {r.resource_type for r in resources}
expected_filenames = {f"{rt}.tf" for rt in distinct_types}
actual_filenames = {f.filename for f in result.resource_files}
assert actual_filenames == expected_filenames, (
f"Expected filenames {expected_filenames}, got {actual_filenames}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_every_resource_appears_in_exactly_one_file(
self, resources: list[DiscoveredResource]
):
"""Every resource's unique_id appears in exactly one generated file."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for resource in resources:
files_containing = [
f.filename
for f in result.resource_files
if resource.unique_id in f.content
]
assert len(files_containing) == 1, (
f"Resource '{resource.unique_id}' found in {len(files_containing)} "
f"files: {files_containing}. Expected exactly 1."
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_resource_count_per_file_matches(
self, resources: list[DiscoveredResource]
):
"""Each file's resource_count matches the actual number of resources of that type."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# Count resources per type
from collections import Counter
type_counts = Counter(r.resource_type for r in resources)
for gen_file in result.resource_files:
expected_type = gen_file.filename.replace(".tf", "")
assert gen_file.resource_count == type_counts[expected_type], (
f"File {gen_file.filename} reports {gen_file.resource_count} resources "
f"but expected {type_counts[expected_type]}"
)
# ---------------------------------------------------------------------------
# Property 13: Variable extraction for shared values
# ---------------------------------------------------------------------------
class TestVariableExtractionForSharedValues:
"""Property 13: Variable extraction for shared values.
**Validates: Requirements 3.3**
For any set of resources where a value appears in 2+ resources,
a variable is extracted.
"""
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_shared_value_produces_extracted_variable(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""A value appearing in 2+ resources results in an extracted variable."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
# There should be at least one variable extracted for the shared key
var_names = [v.name for v in variables]
# The variable name should contain the shared key
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1, (
f"Expected at least one variable for shared key '{shared_key}', "
f"got variables: {var_names}"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_has_correct_default(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""The extracted variable's default value matches the shared value."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1
# The default should be the shared value (formatted as a string literal)
var = matching_vars[0]
assert shared_value in var.default_value, (
f"Expected default to contain '{shared_value}', got '{var.default_value}'"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_tracks_usage(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""The extracted variable's used_by list contains at least 2 resource IDs."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1
var = matching_vars[0]
assert len(var.used_by) >= 2, (
f"Expected variable to be used by 2+ resources, "
f"got {len(var.used_by)}: {var.used_by}"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_has_type_and_description(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""Each extracted variable has a non-empty type expression and description."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
for var in variables:
assert var.type_expr != "", f"Variable '{var.name}' has empty type_expr"
assert var.description != "", f"Variable '{var.name}' has empty description"
# ---------------------------------------------------------------------------
# Property 14: Identifier sanitization validity
# ---------------------------------------------------------------------------
class TestIdentifierSanitizationValidity:
"""Property 14: Identifier sanitization validity.
**Validates: Requirements 3.4**
For any input string, sanitize_identifier produces a valid Terraform identifier.
"""
TERRAFORM_IDENTIFIER_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
@given(name=arbitrary_string_strategy)
@settings(max_examples=200)
def test_sanitized_identifier_matches_terraform_pattern(self, name: str):
"""The output always matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} does not match "
f"Terraform identifier pattern"
)
@given(name=arbitrary_string_strategy)
@settings(max_examples=200)
def test_sanitized_identifier_is_non_empty(self, name: str):
"""The output is always a non-empty string."""
result = sanitize_identifier(name)
assert len(result) > 0, (
f"sanitize_identifier({name!r}) produced empty string"
)
@given(name=st.text(min_size=1, max_size=30, alphabet="0123456789"))
@settings(max_examples=100)
def test_digit_only_input_produces_valid_identifier(self, name: str):
"""Input consisting only of digits still produces a valid identifier."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} is not valid for digit-only input"
)
# Must not start with a digit
assert not result[0].isdigit(), (
f"sanitize_identifier({name!r}) = {result!r} starts with a digit"
)
@given(name=st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: s[0].isalpha() or s[0] == "_"))
@settings(max_examples=100)
def test_already_valid_identifiers_are_preserved_or_simplified(self, name: str):
"""Input that is already a valid identifier produces a valid result."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} is not valid"
)
# ---------------------------------------------------------------------------
# Property 15: Traceability comments in generated code
# ---------------------------------------------------------------------------
class TestTraceabilityCommentsInGeneratedCode:
"""Property 15: Traceability comments in generated code.
**Validates: Requirements 3.6**
For any resource, the generated HCL includes a comment with the original unique_id.
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_each_resource_has_traceability_comment(
self, resources: list[DiscoveredResource]
):
"""Every resource's unique_id appears in a comment in the generated output."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# Collect all generated content
all_content = "\n".join(f.content for f in result.resource_files)
for resource in resources:
# The unique_id should appear in a comment line
comment_pattern = f"# Source: {resource.unique_id}"
assert comment_pattern in all_content, (
f"Traceability comment for resource '{resource.unique_id}' "
f"not found in generated output"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_traceability_comment_precedes_resource_block(
self, resources: list[DiscoveredResource]
):
"""The traceability comment appears before its corresponding resource block."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for resource in resources:
# Find the file containing this resource
target_file = None
for f in result.resource_files:
if resource.unique_id in f.content:
target_file = f
break
assert target_file is not None
content = target_file.content
comment_pos = content.find(f"# Source: {resource.unique_id}")
tf_name = sanitize_identifier(resource.name)
block_pattern = f'resource "{resource.resource_type}" "{tf_name}"'
block_pos = content.find(block_pattern, comment_pos)
assert comment_pos < block_pos, (
f"Comment for '{resource.unique_id}' (pos {comment_pos}) "
f"should precede resource block (pos {block_pos})"
)

View File

@@ -0,0 +1,565 @@
"""Property-based tests for the Dependency Resolver.
**Validates: Requirements 2.1, 2.3, 2.4, 2.5**
Properties tested:
- Property 6: Dependency relationship identification
- Property 7: Cycle detection correctness
- Property 8: Topological order validity
- Property 9: Unresolved references become data sources or variables
"""
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from iac_reverse.models import (
CpuArchitecture,
DependencyGraph,
DiscoveredResource,
PlatformCategory,
ProviderType,
ResourceRelationship,
ScanResult,
UnresolvedReference,
)
from iac_reverse.resolver import DependencyResolver
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
# Strategy for generating valid resource IDs
resource_id_strategy = st.text(
min_size=3,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-/"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)
# Strategy for resource names
resource_name_strategy = st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")
# Strategy for resource types (simple identifiers)
resource_type_strategy = st.text(
min_size=3,
max_size=40,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and len(s) >= 3)
# Strategy for endpoint strings
endpoint_strategy = st.text(
min_size=5,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters=".-:/"),
).filter(lambda s: s.strip() != "")
def make_resource(
unique_id: str,
resource_type: str = "generic_resource",
name: str = "resource",
raw_references: list[str] | None = None,
attributes: dict | None = None,
) -> DiscoveredResource:
"""Helper to create a DiscoveredResource with sensible defaults."""
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=ProviderType.KUBERNETES,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="https://api.internal.lab:6443",
attributes=attributes or {"key": "value"},
raw_references=raw_references or [],
)
def make_scan_result(resources: list[DiscoveredResource]) -> ScanResult:
"""Helper to create a ScanResult from a list of resources."""
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-15T10:30:00Z",
profile_hash="test_hash",
is_partial=False,
)
# Strategy to generate a list of resources with unique IDs and controlled references
@st.composite
def acyclic_resource_graph_strategy(draw):
"""Generate a set of resources forming an acyclic dependency graph.
Resources are created in order, and each resource can only reference
resources that were created before it (ensuring no cycles).
"""
num_resources = draw(st.integers(min_value=2, max_value=8))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
# Each resource can only reference earlier resources (ensures acyclic)
if i > 0:
num_refs = draw(st.integers(min_value=0, max_value=min(i, 3)))
refs = draw(
st.lists(
st.sampled_from(ids[:i]),
min_size=num_refs,
max_size=num_refs,
unique=True,
)
)
else:
refs = []
resource = make_resource(
unique_id=uid,
name=f"res_{i}",
raw_references=refs,
)
resources.append(resource)
return resources
@st.composite
def cyclic_resource_graph_strategy(draw):
"""Generate a set of resources that contain at least one cycle.
Creates a base set of resources and then adds references to form a cycle.
"""
num_resources = draw(st.integers(min_value=2, max_value=6))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
resource = make_resource(
unique_id=uid,
name=f"res_{i}",
raw_references=[],
)
resources.append(resource)
# Create a cycle: pick a subset of at least 2 resources and form a ring
cycle_size = draw(st.integers(min_value=2, max_value=num_resources))
cycle_indices = draw(
st.lists(
st.sampled_from(list(range(num_resources))),
min_size=cycle_size,
max_size=cycle_size,
unique=True,
)
)
# Form a ring: each resource in the cycle references the next one
for j in range(len(cycle_indices)):
src_idx = cycle_indices[j]
tgt_idx = cycle_indices[(j + 1) % len(cycle_indices)]
target_id = ids[tgt_idx]
if target_id not in resources[src_idx].raw_references:
resources[src_idx].raw_references.append(target_id)
return resources
@st.composite
def resources_with_unresolved_refs_strategy(draw):
"""Generate resources where some raw_references point to IDs not in the inventory."""
num_resources = draw(st.integers(min_value=1, max_value=5))
resources = []
ids = []
for i in range(num_resources):
uid = f"resource_{i}"
ids.append(uid)
# Generate unresolved reference IDs (not in the inventory)
num_unresolved = draw(st.integers(min_value=1, max_value=4))
unresolved_ids = []
for i in range(num_unresolved):
# Mix of IDs with "/" (should suggest data_source) and without (should suggest variable)
if draw(st.booleans()):
unresolved_id = f"external/resource/{i}"
else:
unresolved_id = f"external_var_{i}"
unresolved_ids.append(unresolved_id)
# Create resources, some referencing unresolved IDs
for i in range(num_resources):
# Pick some unresolved refs for this resource
num_ext_refs = draw(st.integers(min_value=0, max_value=min(num_unresolved, 2)))
ext_refs = draw(
st.lists(
st.sampled_from(unresolved_ids),
min_size=num_ext_refs,
max_size=num_ext_refs,
unique=True,
)
)
resource = make_resource(
unique_id=ids[i],
name=f"res_{i}",
raw_references=ext_refs,
)
resources.append(resource)
return resources, unresolved_ids
# ---------------------------------------------------------------------------
# Property 6: Dependency relationship identification
# ---------------------------------------------------------------------------
class TestDependencyRelationshipIdentification:
"""Property 6: Dependency relationship identification.
**Validates: Requirements 2.1**
For any resource with raw_references pointing to other resources in the
inventory, the resolver SHALL create a ResourceRelationship for each
resolved reference.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_created_for_each_resolved_reference(
self, resources: list[DiscoveredResource]
):
"""For each raw_reference pointing to a known resource, a relationship is created."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Count expected relationships: each raw_reference that points to a resource in inventory
resource_ids = {r.unique_id for r in resources}
expected_relationships = 0
for resource in resources:
for ref in resource.raw_references:
if ref in resource_ids:
expected_relationships += 1
assert len(graph.relationships) == expected_relationships
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_source_and_target_are_correct(
self, resources: list[DiscoveredResource]
):
"""Each relationship has source_id as the referencing resource and target_id as the referenced."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
resource_ids = {r.unique_id for r in resources}
for rel in graph.relationships:
# source_id is the resource that holds the reference
assert rel.source_id in resource_ids
# target_id is the resource being referenced
assert rel.target_id in resource_ids
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_type_is_valid(
self, resources: list[DiscoveredResource]
):
"""Each relationship has a valid relationship_type."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
valid_types = {"parent-child", "reference", "dependency"}
for rel in graph.relationships:
assert rel.relationship_type in valid_types
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_relationship_source_attribute_is_non_empty(
self, resources: list[DiscoveredResource]
):
"""Each relationship has a non-empty source_attribute."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for rel in graph.relationships:
assert isinstance(rel.source_attribute, str)
assert len(rel.source_attribute) > 0
# ---------------------------------------------------------------------------
# Property 7: Cycle detection correctness
# ---------------------------------------------------------------------------
class TestCycleDetectionCorrectness:
"""Property 7: Cycle detection correctness.
**Validates: Requirements 2.3**
For any graph containing a cycle, the resolver SHALL detect and report it
in the cycles list. For any acyclic dependency graph, the resolver SHALL
report zero cycles.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_acyclic_graph_reports_zero_cycles(
self, resources: list[DiscoveredResource]
):
"""An acyclic graph should have no cycles reported."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.cycles) == 0
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cyclic_graph_reports_at_least_one_cycle(
self, resources: list[DiscoveredResource]
):
"""A graph with a cycle should have at least one cycle reported."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.cycles) >= 1
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cycle_contains_valid_resource_ids(
self, resources: list[DiscoveredResource]
):
"""Each reported cycle contains only valid resource IDs from the inventory."""
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for cycle in graph.cycles:
assert len(cycle) >= 2, "A cycle must involve at least 2 resources"
for resource_id in cycle:
assert resource_id in resource_ids
@given(resources=cyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_cycle_reports_have_resolution_suggestions(
self, resources: list[DiscoveredResource]
):
"""Each cycle report includes a suggested break edge and resolution strategy."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for report in graph.cycle_reports:
assert report.suggested_break is not None
assert len(report.suggested_break) == 2
assert report.break_relationship_type in {"parent-child", "reference", "dependency"}
assert isinstance(report.resolution_strategy, str)
assert len(report.resolution_strategy) > 0
# ---------------------------------------------------------------------------
# Property 8: Topological order validity
# ---------------------------------------------------------------------------
class TestTopologicalOrderValidity:
"""Property 8: Topological order validity.
**Validates: Requirements 2.4**
For any acyclic dependency graph, no resource SHALL appear before any
resource it depends on in the topological order.
"""
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_topological_order_contains_all_resources(
self, resources: list[DiscoveredResource]
):
"""The topological order must contain all resource IDs."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
resource_ids = {r.unique_id for r in resources}
assert set(graph.topological_order) == resource_ids
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_dependencies_appear_before_dependents(
self, resources: list[DiscoveredResource]
):
"""For every dependency edge (A depends on B), B appears before A in topological order.
In the resolver, if resource A has B in raw_references, then A depends on B,
meaning B must appear before A in the topological order.
"""
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Build position map
position = {rid: idx for idx, rid in enumerate(graph.topological_order)}
# For each resource, its referenced resources (that are in inventory) must come before it
for resource in resources:
for ref_id in resource.raw_references:
if ref_id in resource_ids:
assert position[ref_id] < position[resource.unique_id], (
f"Resource '{ref_id}' (dependency) should appear before "
f"'{resource.unique_id}' (dependent) in topological order"
)
@given(resources=acyclic_resource_graph_strategy())
@settings(max_examples=100)
def test_topological_order_has_no_duplicates(
self, resources: list[DiscoveredResource]
):
"""The topological order must not contain duplicate entries."""
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
assert len(graph.topological_order) == len(set(graph.topological_order))
# ---------------------------------------------------------------------------
# Property 9: Unresolved references become data sources or variables
# ---------------------------------------------------------------------------
class TestUnresolvedReferences:
"""Property 9: Unresolved references become data sources or variables.
**Validates: Requirements 2.5**
For any raw_reference pointing to an ID not in the inventory, the resolver
SHALL create an UnresolvedReference with suggested_resolution of either
"data_source" or "variable".
"""
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_are_tracked(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each reference to an ID not in inventory creates an UnresolvedReference."""
resources, unresolved_ids = data
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
# Count expected unresolved references
expected_unresolved = 0
for resource in resources:
for ref in resource.raw_references:
if ref not in resource_ids:
expected_unresolved += 1
assert len(graph.unresolved_references) == expected_unresolved
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_suggest_data_source_or_variable(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has suggested_resolution of 'data_source' or 'variable'."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert unresolved.suggested_resolution in {"data_source", "variable"}, (
f"Expected 'data_source' or 'variable', got '{unresolved.suggested_resolution}'"
)
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_have_valid_source_resource(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has a source_resource_id that exists in the inventory."""
resources, _ = data
scan_result = make_scan_result(resources)
resource_ids = {r.unique_id for r in resources}
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert unresolved.source_resource_id in resource_ids
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_unresolved_references_have_non_empty_fields(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""Each UnresolvedReference has non-empty source_attribute and referenced_id."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
assert isinstance(unresolved.source_attribute, str)
assert len(unresolved.source_attribute) > 0
assert isinstance(unresolved.referenced_id, str)
assert len(unresolved.referenced_id) > 0
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_ids_with_slash_or_colon_suggest_data_source(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""References containing '/' or ':' should suggest 'data_source' resolution."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
if "/" in unresolved.referenced_id or ":" in unresolved.referenced_id:
assert unresolved.suggested_resolution == "data_source", (
f"Reference '{unresolved.referenced_id}' contains '/' or ':' "
f"and should suggest 'data_source', got '{unresolved.suggested_resolution}'"
)
@given(data=resources_with_unresolved_refs_strategy())
@settings(max_examples=100)
def test_ids_without_slash_or_colon_suggest_variable(
self, data: tuple[list[DiscoveredResource], list[str]]
):
"""References without '/' or ':' should suggest 'variable' resolution."""
resources, _ = data
scan_result = make_scan_result(resources)
resolver = DependencyResolver(scan_result)
graph = resolver.resolve()
for unresolved in graph.unresolved_references:
if "/" not in unresolved.referenced_id and ":" not in unresolved.referenced_id:
assert unresolved.suggested_resolution == "variable", (
f"Reference '{unresolved.referenced_id}' has no '/' or ':' "
f"and should suggest 'variable', got '{unresolved.suggested_resolution}'"
)

View File

@@ -0,0 +1,308 @@
"""Property-based tests for drift report correctness.
**Validates: Requirements 7.3**
Properties tested:
- Property 22: Drift report correctness — For any terraform plan output
containing planned changes, the Validator SHALL report each change with
the correct resource address and change type (add, modify, destroy).
"""
import json
import tempfile
from unittest.mock import MagicMock, patch
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from iac_reverse.models import PlannedChange, ValidationResult
from iac_reverse.validator import Validator
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
# Terraform action types that map to our change types
TERRAFORM_ACTIONS = ["create", "update", "delete"]
# Expected mapping from terraform actions to our change types
ACTION_TO_CHANGE_TYPE = {
"create": "add",
"update": "modify",
"delete": "destroy",
}
# Strategy for valid terraform resource addresses
# Format: <resource_type>.<resource_name> or <module>.<resource_type>.<name>
resource_type_prefix_strategy = st.sampled_from([
"aws_instance",
"kubernetes_deployment",
"docker_service",
"harvester_virtualmachine",
"synology_shared_folder",
"windows_service",
"bare_metal_hardware",
"null_resource",
"local_file",
"random_id",
])
resource_name_suffix_strategy = st.text(
min_size=1,
max_size=20,
alphabet=st.characters(whitelist_categories=("Ll",), whitelist_characters="_"),
).filter(lambda s: s[0].isalpha() or s[0] == "_")
@st.composite
def resource_address_strategy(draw):
"""Generate a valid terraform resource address like 'aws_instance.my_server'."""
prefix = draw(resource_type_prefix_strategy)
suffix = draw(resource_name_suffix_strategy)
# Optionally add a module prefix
use_module = draw(st.booleans())
if use_module:
module_name = draw(st.text(
min_size=1,
max_size=10,
alphabet=st.characters(whitelist_categories=("Ll",), whitelist_characters="_"),
).filter(lambda s: s[0].isalpha()))
return f"module.{module_name}.{prefix}.{suffix}"
return f"{prefix}.{suffix}"
terraform_action_strategy = st.sampled_from(TERRAFORM_ACTIONS)
@st.composite
def planned_change_entry_strategy(draw):
"""Generate a single planned change entry as it appears in terraform plan JSON output."""
addr = draw(resource_address_strategy())
action = draw(terraform_action_strategy)
return (addr, action)
@st.composite
def planned_changes_list_strategy(draw):
"""Generate a list of planned changes with unique resource addresses."""
num_changes = draw(st.integers(min_value=1, max_value=10))
changes = []
seen_addrs = set()
for _ in range(num_changes):
entry = draw(planned_change_entry_strategy())
addr, action = entry
# Ensure unique addresses
if addr in seen_addrs:
continue
seen_addrs.add(addr)
changes.append((addr, action))
assume(len(changes) >= 1)
return changes
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
VALIDATE_SUCCESS_JSON = json.dumps(
{"valid": True, "error_count": 0, "diagnostics": []}
)
def _make_completed_process(returncode=0, stdout="", stderr=""):
"""Create a mock CompletedProcess-like object."""
mock = MagicMock()
mock.returncode = returncode
mock.stdout = stdout
mock.stderr = stderr
return mock
def build_plan_output(changes: list[tuple[str, str]]) -> str:
"""Build terraform plan JSON streaming output from a list of (addr, action) tuples."""
lines = [json.dumps({"type": "version", "terraform": "1.7.0"})]
for addr, action in changes:
lines.append(
json.dumps(
{
"type": "planned_change",
"change": {
"resource": {"addr": addr},
"action": action,
},
}
)
)
# Add change_summary
total_add = sum(1 for _, a in changes if a == "create")
total_change = sum(1 for _, a in changes if a == "update")
total_remove = sum(1 for _, a in changes if a == "delete")
lines.append(
json.dumps(
{
"type": "change_summary",
"changes": {
"add": total_add,
"change": total_change,
"remove": total_remove,
},
}
)
)
return "\n".join(lines)
def run_validator_with_plan(plan_output: str) -> ValidationResult:
"""Run the Validator with mocked subprocess calls, returning the result."""
init_result = _make_completed_process(returncode=0)
validate_result = _make_completed_process(
returncode=0, stdout=VALIDATE_SUCCESS_JSON
)
plan_result = _make_completed_process(returncode=2, stdout=plan_output)
with tempfile.TemporaryDirectory() as tmp_dir:
with patch("shutil.which", return_value="/usr/bin/terraform"), patch(
"subprocess.run",
side_effect=[init_result, validate_result, plan_result],
):
validator = Validator()
return validator.validate(tmp_dir)
# ---------------------------------------------------------------------------
# Property 22: Drift report correctness
# ---------------------------------------------------------------------------
class TestDriftReportCorrectness:
"""Property 22: Drift report correctness.
**Validates: Requirements 7.3**
For any terraform plan output containing N planned changes, the drift
report SHALL list exactly N entries, each with the correct resource
address and change type (add, modify, or destroy).
"""
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_count_matches_planned_changes(
self, changes: list[tuple[str, str]]
):
"""The number of reported planned changes equals the number in the plan output."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
assert len(result.planned_changes) == len(changes), (
f"Expected {len(changes)} planned changes, "
f"got {len(result.planned_changes)}. "
f"Input changes: {changes}"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_resource_addresses_match(
self, changes: list[tuple[str, str]]
):
"""Each reported change has the correct resource address from the plan."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
expected_addrs = {addr for addr, _ in changes}
actual_addrs = {c.resource_address for c in result.planned_changes}
assert actual_addrs == expected_addrs, (
f"Resource address mismatch.\n"
f"Expected: {sorted(expected_addrs)}\n"
f"Actual: {sorted(actual_addrs)}"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_change_types_correct(
self, changes: list[tuple[str, str]]
):
"""Each reported change has the correct change type mapping."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
# Build expected mapping: addr -> change_type
expected_map = {
addr: ACTION_TO_CHANGE_TYPE[action] for addr, action in changes
}
for planned_change in result.planned_changes:
addr = planned_change.resource_address
assert addr in expected_map, (
f"Unexpected resource address '{addr}' in planned changes"
)
expected_type = expected_map[addr]
assert planned_change.change_type == expected_type, (
f"For resource '{addr}': expected change_type='{expected_type}', "
f"got '{planned_change.change_type}'"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_plan_success_is_false(
self, changes: list[tuple[str, str]]
):
"""When there are planned changes, plan_success is always False."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
assert result.plan_success is False, (
f"plan_success should be False when there are {len(changes)} "
f"planned changes, but got True"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_each_change_is_planned_change_instance(
self, changes: list[tuple[str, str]]
):
"""Each entry in the drift report is a PlannedChange instance."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
for i, change in enumerate(result.planned_changes):
assert isinstance(change, PlannedChange), (
f"Entry {i} is {type(change).__name__}, expected PlannedChange"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_change_type_in_valid_set(
self, changes: list[tuple[str, str]]
):
"""Every reported change_type is one of 'add', 'modify', or 'destroy'."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
valid_types = {"add", "modify", "destroy"}
for change in result.planned_changes:
assert change.change_type in valid_types, (
f"Invalid change_type '{change.change_type}' for resource "
f"'{change.resource_address}'. Must be one of {valid_types}"
)
@given(changes=planned_changes_list_strategy())
@settings(max_examples=100)
def test_drift_report_no_duplicate_addresses(
self, changes: list[tuple[str, str]]
):
"""No resource address appears more than once in the drift report."""
plan_output = build_plan_output(changes)
result = run_validator_with_plan(plan_output)
addresses = [c.resource_address for c in result.planned_changes]
assert len(addresses) == len(set(addresses)), (
f"Duplicate resource addresses found in drift report: "
f"{[a for a in addresses if addresses.count(a) > 1]}"
)

View File

@@ -0,0 +1,790 @@
"""Property-based tests for Incremental Scan Engine.
**Validates: Requirements 8.1, 8.2, 8.3, 8.5, 8.6**
Properties tested:
- Property 23: Change classification correctness
- Property 24: Incremental update scope
- Property 25: Removed resource exclusion
- Property 26: Snapshot retention
"""
import json
import tempfile
from pathlib import Path
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from iac_reverse.incremental import ChangeDetector, IncrementalUpdater, SnapshotStore
from iac_reverse.models import (
ChangeSummary,
ChangeType,
CpuArchitecture,
DiscoveredResource,
PlatformCategory,
ProviderType,
ResourceChange,
ScanResult,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_strategy = st.sampled_from(list(ProviderType))
platform_strategy = st.sampled_from(list(PlatformCategory))
architecture_strategy = st.sampled_from(list(CpuArchitecture))
# Simple attribute values for resources
attribute_value_strategy = st.one_of(
st.text(min_size=1, max_size=20, alphabet="abcdefghijklmnopqrstuvwxyz0123456789"),
st.integers(min_value=0, max_value=1000),
st.booleans(),
)
attributes_strategy = st.dictionaries(
keys=st.text(min_size=1, max_size=10, alphabet="abcdefghijklmnopqrstuvwxyz_"),
values=attribute_value_strategy,
min_size=1,
max_size=5,
)
# Resource name strategy (valid identifiers)
resource_name_strategy = st.text(
min_size=1,
max_size=15,
alphabet="abcdefghijklmnopqrstuvwxyz_",
).filter(lambda s: s[0].isalpha())
# Resource type strategy
resource_type_strategy = st.sampled_from([
"docker_service",
"kubernetes_deployment",
"synology_shared_folder",
"harvester_virtualmachine",
"bare_metal_hardware",
"windows_service",
])
@st.composite
def discovered_resource_strategy(draw, uid=None):
"""Generate a DiscoveredResource with valid fields."""
resource_type = draw(resource_type_strategy)
unique_id = uid or draw(st.text(
min_size=5, max_size=30,
alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-/",
).filter(lambda s: s[0].isalpha()))
name = draw(resource_name_strategy)
provider = draw(provider_strategy)
platform = draw(platform_strategy)
arch = draw(architecture_strategy)
endpoint = draw(st.text(min_size=3, max_size=20, alphabet="abcdefghijklmnopqrstuvwxyz."))
attributes = draw(attributes_strategy)
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=provider,
platform_category=platform,
architecture=arch,
endpoint=endpoint,
attributes=attributes,
raw_references=[],
)
@st.composite
def scan_result_strategy(draw, min_resources=0, max_resources=8):
"""Generate a ScanResult with unique resource IDs."""
num_resources = draw(st.integers(min_value=min_resources, max_value=max_resources))
resources = []
seen_ids = set()
for i in range(num_resources):
uid = f"resource_{i}_{draw(st.text(min_size=3, max_size=8, alphabet='abcdefghijklmnopqrstuvwxyz'))}"
if uid in seen_ids:
uid = f"resource_{i}_fallback"
seen_ids.add(uid)
resource = draw(discovered_resource_strategy(uid=uid))
resources.append(resource)
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-15T10:30:00Z",
profile_hash="test_profile_hash",
is_partial=False,
)
@st.composite
def scan_result_pair_strategy(draw):
"""Generate a pair of scan results with some overlap for meaningful diffs.
Creates a previous and current scan where:
- Some resources exist in both (potentially modified)
- Some resources only in previous (removed)
- Some resources only in current (added)
"""
# Shared resources (exist in both, may be modified)
num_shared = draw(st.integers(min_value=0, max_value=4))
# Resources only in previous (will be removed)
num_removed = draw(st.integers(min_value=0, max_value=3))
# Resources only in current (will be added)
num_added = draw(st.integers(min_value=0, max_value=3))
assume(num_shared + num_removed + num_added >= 1)
previous_resources = []
current_resources = []
# Generate shared resources
for i in range(num_shared):
uid = f"shared_{i}"
resource_type = draw(resource_type_strategy)
name = draw(resource_name_strategy)
provider = draw(provider_strategy)
platform = draw(platform_strategy)
arch = draw(architecture_strategy)
endpoint = draw(st.text(min_size=3, max_size=10, alphabet="abcdefghijklmnopqrstuvwxyz."))
prev_attrs = draw(attributes_strategy)
prev_resource = DiscoveredResource(
resource_type=resource_type,
unique_id=uid,
name=name,
provider=provider,
platform_category=platform,
architecture=arch,
endpoint=endpoint,
attributes=prev_attrs,
raw_references=[],
)
previous_resources.append(prev_resource)
# Possibly modify attributes for current version
modify = draw(st.booleans())
if modify:
curr_attrs = draw(attributes_strategy)
else:
curr_attrs = dict(prev_attrs)
curr_resource = DiscoveredResource(
resource_type=resource_type,
unique_id=uid,
name=name,
provider=provider,
platform_category=platform,
architecture=arch,
endpoint=endpoint,
attributes=curr_attrs,
raw_references=[],
)
current_resources.append(curr_resource)
# Generate removed resources (only in previous)
for i in range(num_removed):
uid = f"removed_{i}"
resource = draw(discovered_resource_strategy(uid=uid))
previous_resources.append(resource)
# Generate added resources (only in current)
for i in range(num_added):
uid = f"added_{i}"
resource = draw(discovered_resource_strategy(uid=uid))
current_resources.append(resource)
previous = ScanResult(
resources=previous_resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-14T09:00:00Z",
profile_hash="test_profile",
is_partial=False,
)
current = ScanResult(
resources=current_resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-15T10:30:00Z",
profile_hash="test_profile",
is_partial=False,
)
return previous, current
# ---------------------------------------------------------------------------
# Property 23: Change classification correctness
# ---------------------------------------------------------------------------
class TestChangeClassificationCorrectness:
"""Property 23: Change classification correctness.
**Validates: Requirements 8.1, 8.5**
For any pair of scan results (previous and current), every resource
SHALL be classified exactly once as: added, removed, or modified.
The summary counts SHALL equal the actual number of resources in each
category.
"""
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_every_resource_classified_exactly_once(self, data):
"""Every resource is classified as exactly one of: added, removed, or modified."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
prev_ids = {r.unique_id for r in previous.resources}
curr_ids = {r.unique_id for r in current.resources}
all_ids = prev_ids | curr_ids
# Each change should reference a resource from either scan
change_ids = [c.resource_id for c in summary.changes]
# No duplicates in changes
assert len(change_ids) == len(set(change_ids)), (
f"Duplicate resource IDs in changes: "
f"{[rid for rid in change_ids if change_ids.count(rid) > 1]}"
)
# Every changed resource must be from the union of both scans
for change in summary.changes:
assert change.resource_id in all_ids, (
f"Change references unknown resource: {change.resource_id}"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_added_resources_in_current_not_previous(self, data):
"""Resources classified as ADDED are in current but not in previous."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
prev_ids = {r.unique_id for r in previous.resources}
curr_ids = {r.unique_id for r in current.resources}
added_changes = [c for c in summary.changes if c.change_type == ChangeType.ADDED]
for change in added_changes:
assert change.resource_id in curr_ids, (
f"ADDED resource {change.resource_id} not in current scan"
)
assert change.resource_id not in prev_ids, (
f"ADDED resource {change.resource_id} exists in previous scan"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_removed_resources_in_previous_not_current(self, data):
"""Resources classified as REMOVED are in previous but not in current."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
prev_ids = {r.unique_id for r in previous.resources}
curr_ids = {r.unique_id for r in current.resources}
removed_changes = [c for c in summary.changes if c.change_type == ChangeType.REMOVED]
for change in removed_changes:
assert change.resource_id in prev_ids, (
f"REMOVED resource {change.resource_id} not in previous scan"
)
assert change.resource_id not in curr_ids, (
f"REMOVED resource {change.resource_id} exists in current scan"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_modified_resources_in_both_with_differing_attributes(self, data):
"""Resources classified as MODIFIED exist in both scans with differing attributes."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
prev_map = {r.unique_id: r for r in previous.resources}
curr_map = {r.unique_id: r for r in current.resources}
modified_changes = [c for c in summary.changes if c.change_type == ChangeType.MODIFIED]
for change in modified_changes:
assert change.resource_id in prev_map, (
f"MODIFIED resource {change.resource_id} not in previous scan"
)
assert change.resource_id in curr_map, (
f"MODIFIED resource {change.resource_id} not in current scan"
)
# Attributes must actually differ
assert prev_map[change.resource_id].attributes != curr_map[change.resource_id].attributes, (
f"MODIFIED resource {change.resource_id} has identical attributes"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_summary_counts_match_actual_changes(self, data):
"""Summary counts equal the actual number of resources in each category."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
actual_added = sum(1 for c in summary.changes if c.change_type == ChangeType.ADDED)
actual_removed = sum(1 for c in summary.changes if c.change_type == ChangeType.REMOVED)
actual_modified = sum(1 for c in summary.changes if c.change_type == ChangeType.MODIFIED)
assert summary.added_count == actual_added, (
f"added_count={summary.added_count} != actual={actual_added}"
)
assert summary.removed_count == actual_removed, (
f"removed_count={summary.removed_count} != actual={actual_removed}"
)
assert summary.modified_count == actual_modified, (
f"modified_count={summary.modified_count} != actual={actual_modified}"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100)
def test_change_types_are_valid(self, data):
"""Every change has a valid ChangeType value."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
valid_types = {ChangeType.ADDED, ChangeType.REMOVED, ChangeType.MODIFIED}
for change in summary.changes:
assert change.change_type in valid_types, (
f"Invalid change_type: {change.change_type}"
)
# ---------------------------------------------------------------------------
# Property 24: Incremental update scope
# ---------------------------------------------------------------------------
class TestIncrementalUpdateScope:
"""Property 24: Incremental update scope.
**Validates: Requirements 8.2**
For any change set applied to existing IaC files, only files containing
added, modified, or removed resources SHALL be modified. Files containing
only unchanged resources SHALL remain identical.
"""
@given(data=scan_result_pair_strategy())
@settings(max_examples=100, deadline=None)
def test_only_changed_resource_files_are_modified(self, data):
"""Only .tf files for resource types with changes are modified."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
# Skip if no changes (nothing to test)
assume(len(summary.changes) > 0)
with tempfile.TemporaryDirectory() as tmp_dir:
# Create initial .tf files for all resource types in previous scan
resource_types_in_previous = {r.resource_type for r in previous.resources}
# Also create a file for an "unchanged" resource type
unchanged_type = "unchanged_resource_type"
resource_types_in_previous.add(unchanged_type)
for rt in resource_types_in_previous:
tf_path = Path(tmp_dir) / f"{rt}.tf"
tf_path.write_text(f'# Placeholder for {rt}\n', encoding="utf-8")
# Record original content of the unchanged file
unchanged_path = Path(tmp_dir) / f"{unchanged_type}.tf"
original_unchanged_content = unchanged_path.read_text(encoding="utf-8")
# Build resource_attributes for added resources
resource_attributes = {}
for change in summary.changes:
if change.change_type == ChangeType.ADDED:
# Find the resource in current scan
for r in current.resources:
if r.unique_id == change.resource_id:
resource_attributes[change.resource_id] = r.attributes
break
# Apply incremental update
updater = IncrementalUpdater(
change_summary=summary,
output_dir=tmp_dir,
resource_attributes=resource_attributes,
)
updater.apply()
# The unchanged file should not be modified
assert unchanged_path.read_text(encoding="utf-8") == original_unchanged_content, (
"File for unchanged resource type was modified"
)
# Modified files should only be for resource types with changes
changed_resource_types = {c.resource_type for c in summary.changes}
for modified_file in updater.modified_files:
file_name = Path(modified_file).name
# Modified files should be .tf files for changed resource types
# or the state file
if file_name == "terraform.tfstate":
continue
assert file_name.endswith(".tf"), (
f"Unexpected modified file: {file_name}"
)
rt = file_name[:-3] # strip .tf
assert rt in changed_resource_types, (
f"File {file_name} was modified but resource type "
f"'{rt}' has no changes"
)
# ---------------------------------------------------------------------------
# Property 25: Removed resource exclusion
# ---------------------------------------------------------------------------
class TestRemovedResourceExclusion:
"""Property 25: Removed resource exclusion.
**Validates: Requirements 8.3**
For any resource classified as removed, the updated IaC output SHALL
not contain a resource block for that resource, AND the updated state
file SHALL not contain a state entry for that resource.
"""
@given(data=scan_result_pair_strategy())
@settings(max_examples=100, deadline=None)
def test_removed_resources_not_in_tf_files(self, data):
"""Removed resources do not appear in .tf files after update."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
removed_changes = [c for c in summary.changes if c.change_type == ChangeType.REMOVED]
assume(len(removed_changes) > 0)
with tempfile.TemporaryDirectory() as tmp_dir:
# Create .tf files with resource blocks for previous resources
from iac_reverse.generator.sanitize import sanitize_identifier
resources_by_type: dict[str, list] = {}
for r in previous.resources:
resources_by_type.setdefault(r.resource_type, []).append(r)
for rt, resources in resources_by_type.items():
tf_path = Path(tmp_dir) / f"{rt}.tf"
lines = []
for r in resources:
tf_name = sanitize_identifier(r.name)
lines.append(f'# Source: {r.unique_id}')
lines.append(f'resource "{rt}" "{tf_name}" {{')
for k, v in r.attributes.items():
lines.append(f' {k} = "{v}"')
lines.append("}")
lines.append("")
tf_path.write_text("\n".join(lines), encoding="utf-8")
# Build resource_attributes for added resources
resource_attributes = {}
for change in summary.changes:
if change.change_type == ChangeType.ADDED:
for r in current.resources:
if r.unique_id == change.resource_id:
resource_attributes[change.resource_id] = r.attributes
break
# Apply incremental update
updater = IncrementalUpdater(
change_summary=summary,
output_dir=tmp_dir,
resource_attributes=resource_attributes,
)
updater.apply()
# Verify removed resources are not in any .tf file
for change in removed_changes:
tf_path = Path(tmp_dir) / f"{change.resource_type}.tf"
if tf_path.exists():
content = tf_path.read_text(encoding="utf-8")
tf_name = sanitize_identifier(change.resource_name)
# The resource block should not exist
block_header = f'resource "{change.resource_type}" "{tf_name}"'
assert block_header not in content, (
f"Removed resource {change.resource_id} still has a "
f"resource block in {tf_path.name}"
)
@given(data=scan_result_pair_strategy())
@settings(max_examples=100, deadline=None)
def test_removed_resources_not_in_state_file(self, data):
"""Removed resources do not appear in the state file after update."""
previous, current = data
detector = ChangeDetector()
summary = detector.compare(current, previous)
removed_changes = [c for c in summary.changes if c.change_type == ChangeType.REMOVED]
assume(len(removed_changes) > 0)
with tempfile.TemporaryDirectory() as tmp_dir:
from iac_reverse.generator.sanitize import sanitize_identifier
# Create initial state file with entries for previous resources
state = {
"version": 4,
"terraform_version": "1.7.0",
"serial": 1,
"lineage": "test-lineage",
"outputs": {},
"resources": [],
}
for r in previous.resources:
tf_name = sanitize_identifier(r.name)
state["resources"].append({
"mode": "managed",
"type": r.resource_type,
"name": tf_name,
"provider": f'provider["registry.terraform.io/hashicorp/{r.resource_type.split("_")[0]}"]',
"instances": [{
"schema_version": 0,
"attributes": {"id": r.unique_id, **r.attributes},
"sensitive_attributes": [],
"dependencies": [],
}],
})
state_path = Path(tmp_dir) / "terraform.tfstate"
state_path.write_text(json.dumps(state, indent=2), encoding="utf-8")
# Create .tf files so updater can process removals
resources_by_type: dict[str, list] = {}
for r in previous.resources:
resources_by_type.setdefault(r.resource_type, []).append(r)
for rt, resources in resources_by_type.items():
tf_path = Path(tmp_dir) / f"{rt}.tf"
lines = []
for r in resources:
tf_name = sanitize_identifier(r.name)
lines.append(f'# Source: {r.unique_id}')
lines.append(f'resource "{rt}" "{tf_name}" {{')
for k, v in r.attributes.items():
lines.append(f' {k} = "{v}"')
lines.append("}")
lines.append("")
tf_path.write_text("\n".join(lines), encoding="utf-8")
# Build resource_attributes for added resources
resource_attributes = {}
for change in summary.changes:
if change.change_type == ChangeType.ADDED:
for r in current.resources:
if r.unique_id == change.resource_id:
resource_attributes[change.resource_id] = r.attributes
break
# Apply incremental update
updater = IncrementalUpdater(
change_summary=summary,
output_dir=tmp_dir,
resource_attributes=resource_attributes,
)
updater.apply()
# Verify removed resources are not in state file
updated_state = json.loads(
state_path.read_text(encoding="utf-8")
)
state_entries = updated_state.get("resources", [])
for change in removed_changes:
tf_name = sanitize_identifier(change.resource_name)
matching = [
e for e in state_entries
if e.get("type") == change.resource_type
and e.get("name") == tf_name
]
assert len(matching) == 0, (
f"Removed resource {change.resource_id} still has a "
f"state entry (type={change.resource_type}, name={tf_name})"
)
# ---------------------------------------------------------------------------
# Property 26: Snapshot retention
# ---------------------------------------------------------------------------
class TestSnapshotRetention:
"""Property 26: Snapshot retention.
**Validates: Requirements 8.6**
For any sequence of N scans (N >= 2) for the same Scan_Profile, at
least the two most recent scan results SHALL be retained in storage
after each scan completes.
"""
@given(num_scans=st.integers(min_value=2, max_value=8))
@settings(max_examples=100)
def test_at_least_two_snapshots_retained(self, num_scans):
"""After N scans, at least 2 most recent snapshots are retained."""
from unittest.mock import patch
from datetime import datetime, timezone
with tempfile.TemporaryDirectory() as tmp_dir:
store = SnapshotStore(base_dir=tmp_dir)
profile_hash = "retention_test_profile"
# Store N scan results with mocked timestamps to ensure unique filenames
for i in range(num_scans):
result = ScanResult(
resources=[
DiscoveredResource(
resource_type="docker_service",
unique_id=f"svc_{i}",
name=f"service_{i}",
provider=ProviderType.DOCKER_SWARM,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="localhost",
attributes={"version": str(i)},
raw_references=[],
)
],
warnings=[],
errors=[],
scan_timestamp=f"2024-01-{15 + i:02d}T10:00:00Z",
profile_hash=profile_hash,
is_partial=False,
)
# Mock datetime.now to return unique timestamps
mock_time = datetime(2024, 1, 15 + i, 10, 0, 0, tzinfo=timezone.utc)
with patch(
"iac_reverse.incremental.snapshot_store.datetime"
) as mock_dt:
mock_dt.now.return_value = mock_time
mock_dt.side_effect = lambda *a, **kw: datetime(*a, **kw)
store.store_snapshot(result, profile_hash)
# Count remaining snapshots
snapshot_files = list(store.snapshot_dir.glob(f"{profile_hash}_*.json"))
assert len(snapshot_files) >= 2, (
f"After {num_scans} scans, only {len(snapshot_files)} "
f"snapshots retained (expected >= 2)"
)
@given(num_scans=st.integers(min_value=2, max_value=8))
@settings(max_examples=100)
def test_most_recent_snapshot_is_loadable(self, num_scans):
"""The most recent snapshot can be loaded after multiple stores."""
from unittest.mock import patch
from datetime import datetime, timezone
with tempfile.TemporaryDirectory() as tmp_dir:
store = SnapshotStore(base_dir=tmp_dir)
profile_hash = "loadable_test_profile"
last_resource_id = None
for i in range(num_scans):
last_resource_id = f"svc_{i}"
result = ScanResult(
resources=[
DiscoveredResource(
resource_type="kubernetes_deployment",
unique_id=last_resource_id,
name=f"deploy_{i}",
provider=ProviderType.KUBERNETES,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AARCH64,
endpoint="k8s-api.local",
attributes={"replicas": i + 1},
raw_references=[],
)
],
warnings=[],
errors=[],
scan_timestamp=f"2024-01-{15 + i:02d}T10:00:00Z",
profile_hash=profile_hash,
is_partial=False,
)
mock_time = datetime(2024, 1, 15 + i, 10, 0, 0, tzinfo=timezone.utc)
with patch(
"iac_reverse.incremental.snapshot_store.datetime"
) as mock_dt:
mock_dt.now.return_value = mock_time
mock_dt.side_effect = lambda *a, **kw: datetime(*a, **kw)
store.store_snapshot(result, profile_hash)
# Load the most recent snapshot
loaded = store.load_previous(profile_hash)
assert loaded is not None, "Could not load most recent snapshot"
assert len(loaded.resources) == 1
assert loaded.resources[0].unique_id == last_resource_id, (
f"Expected most recent resource '{last_resource_id}', "
f"got '{loaded.resources[0].unique_id}'"
)
@given(num_scans=st.integers(min_value=3, max_value=10))
@settings(max_examples=100)
def test_different_profiles_retain_independently(self, num_scans):
"""Snapshots for different profiles are retained independently."""
from unittest.mock import patch
from datetime import datetime, timezone
with tempfile.TemporaryDirectory() as tmp_dir:
store = SnapshotStore(base_dir=tmp_dir)
profile_a = "profile_alpha"
profile_b = "profile_beta"
scan_idx = 0
for i in range(num_scans):
for profile_hash in [profile_a, profile_b]:
result = ScanResult(
resources=[
DiscoveredResource(
resource_type="docker_service",
unique_id=f"{profile_hash}_svc_{i}",
name=f"svc_{i}",
provider=ProviderType.DOCKER_SWARM,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="localhost",
attributes={"idx": i},
raw_references=[],
)
],
warnings=[],
errors=[],
scan_timestamp=f"2024-01-{15 + i:02d}T10:00:00Z",
profile_hash=profile_hash,
is_partial=False,
)
# Use unique timestamps per store call
mock_time = datetime(2024, 1, 15, 10, scan_idx, 0, tzinfo=timezone.utc)
scan_idx += 1
with patch(
"iac_reverse.incremental.snapshot_store.datetime"
) as mock_dt:
mock_dt.now.return_value = mock_time
mock_dt.side_effect = lambda *a, **kw: datetime(*a, **kw)
store.store_snapshot(result, profile_hash)
# Both profiles should have at least 2 snapshots
snapshots_a = list(store.snapshot_dir.glob(f"{profile_a}_*.json"))
snapshots_b = list(store.snapshot_dir.glob(f"{profile_b}_*.json"))
assert len(snapshots_a) >= 2, (
f"Profile A has {len(snapshots_a)} snapshots (expected >= 2)"
)
assert len(snapshots_b) >= 2, (
f"Profile B has {len(snapshots_b)} snapshots (expected >= 2)"
)

View File

@@ -0,0 +1,803 @@
"""Property-based tests for multi-provider merging and filtering.
**Validates: Requirements 5.3, 5.4, 6.1, 6.2, 6.4, 6.6, 6.7**
Property 18: Multi-provider merge with naming conflict resolution
For any two or more resource inventories from different on-premises providers
where resource names collide, the merged inventory SHALL contain all resources
from all providers, with conflicting names prefixed by the provider identifier,
and no resources lost.
Property 19: Provider block generation
For any resource set spanning N distinct on-premises providers, the generated
provider configuration SHALL contain exactly N provider blocks, one per distinct
provider.
Property 20: Scan profile validation completeness (additional multi-provider scenarios)
Already covered in test_scan_profile_validation_prop.py; this adds multi-provider
scenarios.
Property 21: Filtering correctness
For any scan profile with resource type filters, the discovered resources SHALL
be a subset where every resource's type is in the filter list. No resource outside
the filter criteria shall appear.
"""
from typing import Callable
from hypothesis import given, assume, settings
from hypothesis import strategies as st
from iac_reverse.models import (
CpuArchitecture,
DiscoveredResource,
GeneratedFile,
PlatformCategory,
PROVIDER_PLATFORM_MAP,
PROVIDER_SUPPORTED_RESOURCE_TYPES,
ProviderType,
ScanProfile,
ScanProgress,
ScanResult,
)
from iac_reverse.generator import ProviderBlockGenerator, ResourceMerger
from iac_reverse.plugin_base import ProviderPlugin
from iac_reverse.scanner.scanner import Scanner
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
architecture_strategy = st.sampled_from(list(CpuArchitecture))
non_empty_credentials_strategy = st.dictionaries(
keys=st.text(
min_size=1, max_size=20,
alphabet=st.characters(whitelist_categories=("L", "N")),
),
values=st.text(min_size=1, max_size=50),
min_size=1,
max_size=5,
)
# Strategy for resource names that could collide across providers
resource_name_strategy = st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N", "Pd")),
).filter(lambda s: s.strip())
def discovered_resource_strategy(
provider: ProviderType,
name: str | None = None,
) -> st.SearchStrategy[DiscoveredResource]:
"""Generate a DiscoveredResource for a given provider with optional fixed name."""
supported_types = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
platform_category = PROVIDER_PLATFORM_MAP[provider]
return st.builds(
DiscoveredResource,
resource_type=st.sampled_from(supported_types),
unique_id=st.uuids().map(str),
name=st.just(name) if name else resource_name_strategy,
provider=st.just(provider),
platform_category=st.just(platform_category),
architecture=architecture_strategy,
endpoint=st.just("http://localhost:8080"),
attributes=st.just({"key": "value"}),
raw_references=st.just([]),
)
def scan_result_strategy(
provider: ProviderType,
resources: list[DiscoveredResource] | None = None,
) -> st.SearchStrategy[ScanResult]:
"""Generate a ScanResult for a given provider."""
if resources is not None:
return st.just(ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-01T00:00:00Z",
profile_hash="abc123",
))
return st.builds(
ScanResult,
resources=st.lists(
discovered_resource_strategy(provider),
min_size=1,
max_size=5,
),
warnings=st.just([]),
errors=st.just([]),
scan_timestamp=st.just("2024-01-01T00:00:00Z"),
profile_hash=st.just("abc123"),
)
# ---------------------------------------------------------------------------
# Mock Plugin for Filtering Tests
# ---------------------------------------------------------------------------
class FilteringPlugin(ProviderPlugin):
"""A plugin that discovers resources only for requested resource types."""
def __init__(self, provider: ProviderType):
self._provider = provider
self._supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
def authenticate(self, credentials: dict[str, str]) -> None:
pass
def get_platform_category(self) -> PlatformCategory:
return PROVIDER_PLATFORM_MAP[self._provider]
def list_endpoints(self) -> list[str]:
return ["http://localhost:8080"]
def list_supported_resource_types(self) -> list[str]:
return self._supported
def detect_architecture(self, endpoint: str) -> CpuArchitecture:
return CpuArchitecture.AMD64
def discover_resources(
self,
endpoints: list[str],
resource_types: list[str],
progress_callback: Callable[[ScanProgress], None],
) -> ScanResult:
"""Discover exactly one resource per requested resource type."""
resources = []
for i, rt in enumerate(resource_types):
resources.append(
DiscoveredResource(
resource_type=rt,
unique_id=f"id-{rt}-{i}",
name=f"resource-{rt}-{i}",
provider=self._provider,
platform_category=PROVIDER_PLATFORM_MAP[self._provider],
architecture=CpuArchitecture.AMD64,
endpoint="http://localhost:8080",
attributes={"key": "value"},
)
)
progress_callback(ScanProgress(
current_resource_type=rt,
resources_discovered=i + 1,
resource_types_completed=i + 1,
total_resource_types=len(resource_types),
))
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="2024-01-01T00:00:00Z",
profile_hash="abc123",
)
# ---------------------------------------------------------------------------
# Property 18: Multi-provider merge with naming conflict resolution
# ---------------------------------------------------------------------------
class TestMultiProviderMergeConflictResolution:
"""Property 18: Multi-provider merge with naming conflict resolution.
When resources from different providers share the same name, the merger
prefixes with provider identifier.
**Validates: Requirements 5.3**
"""
@given(
provider_a=provider_type_strategy,
provider_b=provider_type_strategy,
shared_name=resource_name_strategy,
)
@settings(max_examples=100)
def test_conflicting_names_are_prefixed(self, provider_a, provider_b, shared_name):
"""Resources with the same name from different providers get prefixed."""
assume(provider_a != provider_b)
resource_a = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_a][0],
unique_id="id-a",
name=shared_name,
provider=provider_a,
platform_category=PROVIDER_PLATFORM_MAP[provider_a],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-a:8080",
attributes={"source": "a"},
)
resource_b = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_b][0],
unique_id="id-b",
name=shared_name,
provider=provider_b,
platform_category=PROVIDER_PLATFORM_MAP[provider_b],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-b:8080",
attributes={"source": "b"},
)
scan_result_a = ScanResult(
resources=[resource_a],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
scan_result_b = ScanResult(
resources=[resource_b],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
merger = ResourceMerger()
merged = merger.merge([scan_result_a, scan_result_b])
# Both resources must be present (no loss)
assert len(merged) == 2
# Conflicting names must be prefixed with provider identifier
merged_names = {r.name for r in merged}
expected_name_a = f"{provider_a.value}_{shared_name}"
expected_name_b = f"{provider_b.value}_{shared_name}"
assert expected_name_a in merged_names, (
f"Expected '{expected_name_a}' in merged names, got: {merged_names}"
)
assert expected_name_b in merged_names, (
f"Expected '{expected_name_b}' in merged names, got: {merged_names}"
)
@given(
provider_a=provider_type_strategy,
provider_b=provider_type_strategy,
name_a=resource_name_strategy,
name_b=resource_name_strategy,
)
@settings(max_examples=100)
def test_non_conflicting_names_unchanged(self, provider_a, provider_b, name_a, name_b):
"""Resources with unique names across providers are not prefixed."""
assume(provider_a != provider_b)
assume(name_a != name_b)
resource_a = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_a][0],
unique_id="id-a",
name=name_a,
provider=provider_a,
platform_category=PROVIDER_PLATFORM_MAP[provider_a],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-a:8080",
attributes={},
)
resource_b = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_b][0],
unique_id="id-b",
name=name_b,
provider=provider_b,
platform_category=PROVIDER_PLATFORM_MAP[provider_b],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-b:8080",
attributes={},
)
scan_result_a = ScanResult(
resources=[resource_a],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
scan_result_b = ScanResult(
resources=[resource_b],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
merger = ResourceMerger()
merged = merger.merge([scan_result_a, scan_result_b])
# No resources lost
assert len(merged) == 2
# Names should remain unchanged (no prefix)
merged_names = {r.name for r in merged}
assert name_a in merged_names, (
f"Expected original name '{name_a}' preserved, got: {merged_names}"
)
assert name_b in merged_names, (
f"Expected original name '{name_b}' preserved, got: {merged_names}"
)
@given(
provider_a=provider_type_strategy,
provider_b=provider_type_strategy,
provider_c=provider_type_strategy,
shared_name=resource_name_strategy,
)
@settings(max_examples=100)
def test_three_provider_conflict_all_prefixed(
self, provider_a, provider_b, provider_c, shared_name
):
"""When 3 providers share a name, all get prefixed."""
assume(len({provider_a, provider_b, provider_c}) == 3)
resources_and_results = []
for provider in [provider_a, provider_b, provider_c]:
resource = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider][0],
unique_id=f"id-{provider.value}",
name=shared_name,
provider=provider,
platform_category=PROVIDER_PLATFORM_MAP[provider],
architecture=CpuArchitecture.AMD64,
endpoint=f"http://{provider.value}:8080",
attributes={},
)
result = ScanResult(
resources=[resource],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
resources_and_results.append(result)
merger = ResourceMerger()
merged = merger.merge(resources_and_results)
# All 3 resources preserved
assert len(merged) == 3
# All must be prefixed
for provider in [provider_a, provider_b, provider_c]:
expected = f"{provider.value}_{shared_name}"
assert any(r.name == expected for r in merged), (
f"Expected prefixed name '{expected}' in merged results"
)
@given(
provider_a=provider_type_strategy,
provider_b=provider_type_strategy,
shared_name=resource_name_strategy,
)
@settings(max_examples=100)
def test_merge_preserves_all_resources_no_loss(
self, provider_a, provider_b, shared_name
):
"""Merging never loses resources regardless of naming conflicts."""
assume(provider_a != provider_b)
resource_a = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_a][0],
unique_id="id-a",
name=shared_name,
provider=provider_a,
platform_category=PROVIDER_PLATFORM_MAP[provider_a],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-a:8080",
attributes={"source": "a"},
)
resource_b = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_b][0],
unique_id="id-b",
name=shared_name,
provider=provider_b,
platform_category=PROVIDER_PLATFORM_MAP[provider_b],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-b:8080",
attributes={"source": "b"},
)
# Also add a non-conflicting resource
resource_c = DiscoveredResource(
resource_type=PROVIDER_SUPPORTED_RESOURCE_TYPES[provider_a][0],
unique_id="id-c",
name="unique_resource_name",
provider=provider_a,
platform_category=PROVIDER_PLATFORM_MAP[provider_a],
architecture=CpuArchitecture.AMD64,
endpoint="http://host-a:8080",
attributes={"source": "c"},
)
scan_result_a = ScanResult(
resources=[resource_a, resource_c],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
scan_result_b = ScanResult(
resources=[resource_b],
warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
merger = ResourceMerger()
merged = merger.merge([scan_result_a, scan_result_b])
# Total resources = 3 (no loss)
assert len(merged) == 3
# Provider-specific attributes preserved
unique_ids = {r.unique_id for r in merged}
assert "id-a" in unique_ids
assert "id-b" in unique_ids
assert "id-c" in unique_ids
# ---------------------------------------------------------------------------
# Property 19: Provider block generation
# ---------------------------------------------------------------------------
class TestProviderBlockGeneration:
"""Property 19: Provider block generation.
For any set of providers used, a provider block is generated for each.
**Validates: Requirements 5.4**
"""
@given(
providers=st.lists(
provider_type_strategy,
min_size=1,
max_size=6,
).map(lambda ps: list(set(ps))), # Deduplicate
)
@settings(max_examples=100)
def test_one_provider_block_per_distinct_provider(self, providers):
"""Generated output contains exactly one provider block per distinct provider."""
assume(len(providers) >= 1)
# Create profiles for each provider
profiles = [
ScanProfile(
provider=p,
credentials={"token": "test-token"},
)
for p in providers
]
provider_types = set(providers)
generator = ProviderBlockGenerator()
result = generator.generate(profiles=profiles, provider_types=provider_types)
# Result should be a GeneratedFile
assert isinstance(result, GeneratedFile)
assert result.filename == "providers.tf"
content = result.content
# Count provider blocks: each provider type should have exactly one
# provider "name" { block
for provider_type in provider_types:
# Get the terraform provider name for this type
from iac_reverse.generator.provider_block import _PROVIDER_METADATA
tf_name = _PROVIDER_METADATA[provider_type][0]
provider_block_marker = f'provider "{tf_name}"'
count = content.count(provider_block_marker)
assert count == 1, (
f"Expected exactly 1 provider block for '{tf_name}', "
f"found {count} in:\n{content}"
)
@given(
providers=st.lists(
provider_type_strategy,
min_size=2,
max_size=6,
).map(lambda ps: list(set(ps))),
)
@settings(max_examples=100)
def test_required_providers_block_lists_all(self, providers):
"""The terraform required_providers block lists all providers used."""
assume(len(providers) >= 2)
profiles = [
ScanProfile(
provider=p,
credentials={"token": "test-token"},
)
for p in providers
]
provider_types = set(providers)
generator = ProviderBlockGenerator()
result = generator.generate(profiles=profiles, provider_types=provider_types)
content = result.content
# The required_providers block must exist
assert "required_providers" in content
# Each provider must appear in the required_providers block
from iac_reverse.generator.provider_block import _PROVIDER_METADATA
for provider_type in provider_types:
tf_name, source, _ = _PROVIDER_METADATA[provider_type]
assert tf_name in content, (
f"Expected provider name '{tf_name}' in required_providers block"
)
assert source in content, (
f"Expected source '{source}' in required_providers block"
)
@given(provider=provider_type_strategy)
@settings(max_examples=100)
def test_single_provider_generates_one_block(self, provider):
"""A single provider generates exactly one provider block."""
profiles = [
ScanProfile(
provider=provider,
credentials={"token": "test-token"},
)
]
generator = ProviderBlockGenerator()
result = generator.generate(
profiles=profiles,
provider_types={provider},
)
content = result.content
from iac_reverse.generator.provider_block import _PROVIDER_METADATA
tf_name = _PROVIDER_METADATA[provider][0]
# Exactly one provider block
provider_block_marker = f'provider "{tf_name}"'
assert content.count(provider_block_marker) == 1
# terraform block with required_providers
assert "terraform {" in content
assert "required_providers" in content
# ---------------------------------------------------------------------------
# Property 20: Scan profile validation completeness (multi-provider scenarios)
# ---------------------------------------------------------------------------
class TestScanProfileValidationMultiProvider:
"""Property 20: Scan profile validation completeness (multi-provider scenarios).
Additional multi-provider scenarios beyond what's in test_scan_profile_validation_prop.py.
**Validates: Requirements 6.1, 6.6, 6.7**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_valid_multi_provider_profiles_all_pass_validation(
self, provider, credentials
):
"""Each valid profile in a multi-provider set passes validation independently."""
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
errors = profile.validate()
assert errors == [], f"Expected no errors for valid profile, got: {errors}"
@given(
provider_a=provider_type_strategy,
provider_b=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_mixed_valid_invalid_profiles_detected_independently(
self, provider_a, provider_b, credentials
):
"""Invalid profiles are detected independently in a multi-provider set."""
# Valid profile
valid_profile = ScanProfile(
provider=provider_a,
credentials=credentials,
resource_type_filters=None,
)
# Invalid profile (empty credentials)
invalid_profile = ScanProfile(
provider=provider_b,
credentials={},
resource_type_filters=None,
)
valid_errors = valid_profile.validate()
invalid_errors = invalid_profile.validate()
assert valid_errors == []
assert len(invalid_errors) >= 1
assert any("credentials" in e for e in invalid_errors)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_cross_provider_resource_types_detected_as_unsupported(
self, provider, credentials
):
"""Resource types from a different provider are flagged as unsupported."""
# Pick a resource type from a different provider
other_providers = [p for p in ProviderType if p != provider]
assume(len(other_providers) > 0)
other_provider = other_providers[0]
other_types = PROVIDER_SUPPORTED_RESOURCE_TYPES[other_provider]
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=other_types[:2],
)
errors = profile.validate()
# Should detect unsupported types (unless they happen to overlap)
supported = set(PROVIDER_SUPPORTED_RESOURCE_TYPES[provider])
unsupported = [t for t in other_types[:2] if t not in supported]
if unsupported:
assert any("unsupported" in e.lower() for e in errors), (
f"Expected unsupported error for cross-provider types, got: {errors}"
)
# ---------------------------------------------------------------------------
# Property 21: Filtering correctness
# ---------------------------------------------------------------------------
class TestFilteringCorrectness:
"""Property 21: Filtering correctness.
When resource type filters are specified, only those types appear in the
scan result.
**Validates: Requirements 6.2, 6.4**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_filtered_scan_returns_only_filtered_types(self, provider, credentials):
"""When filters are specified, only filtered resource types appear in results."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
assume(len(supported) >= 2)
# Pick a subset of supported types as filter
filter_types = supported[:2]
plugin = FilteringPlugin(provider=provider)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=filter_types,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# All discovered resources must have types in the filter list
for resource in result.resources:
assert resource.resource_type in filter_types, (
f"Resource type '{resource.resource_type}' not in filter list "
f"{filter_types}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_no_filter_returns_all_supported_types(self, provider, credentials):
"""When no filters are specified, all supported types are discovered."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = FilteringPlugin(provider=provider)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# All supported types should be discovered
discovered_types = {r.resource_type for r in result.resources}
for rt in supported:
assert rt in discovered_types, (
f"Expected type '{rt}' to be discovered when no filter, "
f"got: {discovered_types}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_single_type_filter_returns_only_that_type(self, provider, credentials):
"""A single-type filter returns only resources of that type."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
assume(len(supported) >= 1)
single_filter = [supported[0]]
plugin = FilteringPlugin(provider=provider)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=single_filter,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# Only the single filtered type should appear
for resource in result.resources:
assert resource.resource_type == single_filter[0], (
f"Expected only '{single_filter[0]}', got '{resource.resource_type}'"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_empty_filter_returns_no_resources(self, provider, credentials):
"""An empty filter list results in no resources discovered."""
plugin = FilteringPlugin(provider=provider)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=[],
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# Empty filter means nothing to discover
assert len(result.resources) == 0, (
f"Expected 0 resources with empty filter, got {len(result.resources)}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_filter_subset_excludes_non_filtered_types(self, provider, credentials):
"""Types not in the filter list do not appear in results."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
assume(len(supported) >= 3)
# Filter to first 2 types only
filter_types = supported[:2]
excluded_types = supported[2:]
plugin = FilteringPlugin(provider=provider)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=filter_types,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# None of the excluded types should appear
discovered_types = {r.resource_type for r in result.resources}
for excluded in excluded_types:
assert excluded not in discovered_types, (
f"Excluded type '{excluded}' should not appear in filtered results"
)

View File

@@ -0,0 +1,222 @@
"""Property-based tests for resource inventory completeness.
**Validates: Requirements 1.2**
Property 1: Resource inventory completeness
For any discovered resource from any on-premises provider (Docker Swarm, Kubernetes,
Synology, Harvester, Bare Metal, Windows), the resulting inventory entry SHALL contain
non-empty values for resource_type, unique_id, name, provider, platform_category,
architecture, and attributes fields.
"""
from hypothesis import given, settings
from hypothesis import strategies as st
from iac_reverse.models import (
CpuArchitecture,
DiscoveredResource,
PlatformCategory,
ProviderType,
ScanResult,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
non_empty_text_strategy = st.text(
min_size=1,
max_size=100,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "S")),
).filter(lambda s: s.strip() != "")
resource_type_strategy = st.text(
min_size=1,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: len(s) > 0 and s.strip() != "")
unique_id_strategy = st.text(
min_size=1,
max_size=200,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "S")),
).filter(lambda s: s.strip() != "")
name_strategy = st.text(
min_size=1,
max_size=100,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "S")),
).filter(lambda s: s.strip() != "")
endpoint_strategy = st.text(
min_size=1,
max_size=200,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "S")),
).filter(lambda s: s.strip() != "")
non_empty_attributes_strategy = st.dictionaries(
keys=st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
),
values=st.one_of(
st.text(min_size=1, max_size=50),
st.integers(min_value=0, max_value=10000),
st.booleans(),
),
min_size=1,
max_size=10,
)
raw_references_strategy = st.lists(
st.text(min_size=0, max_size=100),
min_size=0,
max_size=5,
)
discovered_resource_strategy = st.builds(
DiscoveredResource,
resource_type=resource_type_strategy,
unique_id=unique_id_strategy,
name=name_strategy,
provider=provider_type_strategy,
platform_category=platform_category_strategy,
architecture=cpu_architecture_strategy,
endpoint=endpoint_strategy,
attributes=non_empty_attributes_strategy,
raw_references=raw_references_strategy,
)
# ---------------------------------------------------------------------------
# Property Tests
# ---------------------------------------------------------------------------
class TestResourceInventoryCompleteness:
"""Property 1: Resource inventory completeness.
**Validates: Requirements 1.2**
For any discovered resource from any on-premises provider, the resulting
inventory entry SHALL contain non-empty values for resource_type, unique_id,
name, provider, platform_category, architecture, and attributes fields.
"""
@given(resource=discovered_resource_strategy)
def test_resource_type_is_non_empty(self, resource: DiscoveredResource):
"""resource_type field must be a non-empty string."""
assert isinstance(resource.resource_type, str)
assert len(resource.resource_type) > 0
assert resource.resource_type.strip() != ""
@given(resource=discovered_resource_strategy)
def test_unique_id_is_non_empty(self, resource: DiscoveredResource):
"""unique_id field must be a non-empty string."""
assert isinstance(resource.unique_id, str)
assert len(resource.unique_id) > 0
assert resource.unique_id.strip() != ""
@given(resource=discovered_resource_strategy)
def test_name_is_non_empty(self, resource: DiscoveredResource):
"""name field must be a non-empty string."""
assert isinstance(resource.name, str)
assert len(resource.name) > 0
assert resource.name.strip() != ""
@given(resource=discovered_resource_strategy)
def test_provider_is_valid_enum(self, resource: DiscoveredResource):
"""provider field must be a valid ProviderType enum value."""
assert isinstance(resource.provider, ProviderType)
assert resource.provider is not None
@given(resource=discovered_resource_strategy)
def test_platform_category_is_valid_enum(self, resource: DiscoveredResource):
"""platform_category field must be a valid PlatformCategory enum value."""
assert isinstance(resource.platform_category, PlatformCategory)
assert resource.platform_category is not None
@given(resource=discovered_resource_strategy)
def test_architecture_is_valid_enum(self, resource: DiscoveredResource):
"""architecture field must be a valid CpuArchitecture enum value."""
assert isinstance(resource.architecture, CpuArchitecture)
assert resource.architecture is not None
@given(resource=discovered_resource_strategy)
def test_attributes_is_non_empty_dict(self, resource: DiscoveredResource):
"""attributes field must be a non-empty dictionary."""
assert isinstance(resource.attributes, dict)
assert len(resource.attributes) > 0
@given(resource=discovered_resource_strategy)
def test_all_mandatory_fields_populated(self, resource: DiscoveredResource):
"""All mandatory fields must be non-empty/non-None simultaneously."""
# resource_type
assert isinstance(resource.resource_type, str) and len(resource.resource_type) > 0
# unique_id
assert isinstance(resource.unique_id, str) and len(resource.unique_id) > 0
# name
assert isinstance(resource.name, str) and len(resource.name) > 0
# provider
assert isinstance(resource.provider, ProviderType)
# platform_category
assert isinstance(resource.platform_category, PlatformCategory)
# architecture
assert isinstance(resource.architecture, CpuArchitecture)
# attributes
assert isinstance(resource.attributes, dict) and len(resource.attributes) > 0
@given(
resources=st.lists(discovered_resource_strategy, min_size=1, max_size=10),
warnings=st.lists(st.text(min_size=0, max_size=50), max_size=3),
errors=st.lists(st.text(min_size=0, max_size=50), max_size=3),
)
@settings(max_examples=50)
def test_scan_result_resources_all_have_mandatory_fields(
self, resources, warnings, errors
):
"""When a mock plugin produces resources in a ScanResult, every resource has all required fields."""
scan_result = ScanResult(
resources=resources,
warnings=warnings,
errors=errors,
scan_timestamp="2024-01-15T10:30:00Z",
profile_hash="test_hash_abc123",
is_partial=False,
)
for resource in scan_result.resources:
# resource_type is non-empty string
assert isinstance(resource.resource_type, str)
assert len(resource.resource_type) > 0
assert resource.resource_type.strip() != ""
# unique_id is non-empty string
assert isinstance(resource.unique_id, str)
assert len(resource.unique_id) > 0
assert resource.unique_id.strip() != ""
# name is non-empty string
assert isinstance(resource.name, str)
assert len(resource.name) > 0
assert resource.name.strip() != ""
# provider is valid enum
assert isinstance(resource.provider, ProviderType)
# platform_category is valid enum
assert isinstance(resource.platform_category, PlatformCategory)
# architecture is valid enum
assert isinstance(resource.architecture, CpuArchitecture)
# attributes is non-empty dict
assert isinstance(resource.attributes, dict)
assert len(resource.attributes) > 0

View File

@@ -0,0 +1,257 @@
"""Property-based tests for ScanProfile validation completeness.
**Validates: Requirements 6.1, 6.6, 6.7**
Property 20: Scan profile validation completeness
For any scan profile with K invalid fields (missing provider, empty credentials,
unreachable endpoints, filters exceeding 200 entries, or unsupported resource types),
the validation error SHALL list all K invalid fields in a single response.
"""
from hypothesis import given, assume, settings
from hypothesis import strategies as st
from iac_reverse.models import (
MAX_RESOURCE_TYPE_FILTERS,
PROVIDER_SUPPORTED_RESOURCE_TYPES,
ProviderType,
ScanProfile,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
non_empty_credentials_strategy = st.dictionaries(
keys=st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=("L", "N", "P"))),
values=st.text(min_size=1, max_size=50),
min_size=1,
max_size=5,
)
empty_credentials_strategy = st.just({})
def valid_resource_types_strategy(provider: ProviderType) -> st.SearchStrategy:
"""Generate a list of valid resource types for the given provider."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
return st.lists(st.sampled_from(supported), min_size=0, max_size=min(len(supported), 10))
invalid_resource_type_strategy = st.text(
min_size=5, max_size=30,
alphabet=st.characters(whitelist_categories=("L",))
).filter(
lambda t: all(t not in types for types in PROVIDER_SUPPORTED_RESOURCE_TYPES.values())
)
# ---------------------------------------------------------------------------
# Property Tests
# ---------------------------------------------------------------------------
class TestScanProfileValidationCompleteness:
"""Property 20: Scan profile validation completeness.
**Validates: Requirements 6.1, 6.6, 6.7**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
def test_valid_profile_returns_no_errors(self, provider, credentials):
"""A profile with non-empty credentials and no filters is always valid."""
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
errors = profile.validate()
assert errors == [], f"Expected no errors for valid profile, got: {errors}"
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
def test_valid_profile_with_valid_filters_returns_no_errors(self, provider, credentials):
"""A profile with valid credentials and valid resource type filters is valid."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=supported,
)
errors = profile.validate()
assert errors == [], f"Expected no errors for valid profile with valid filters, got: {errors}"
@given(provider=provider_type_strategy)
def test_empty_credentials_always_produces_credentials_error(self, provider):
"""Empty credentials must always produce an error mentioning 'credentials'."""
profile = ScanProfile(
provider=provider,
credentials={},
resource_type_filters=None,
)
errors = profile.validate()
assert len(errors) >= 1
assert any("credentials" in e for e in errors), (
f"Expected error mentioning 'credentials', got: {errors}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
extra_count=st.integers(min_value=1, max_value=50),
)
def test_oversized_filters_produces_count_error(self, provider, credentials, extra_count):
"""Filters exceeding MAX_RESOURCE_TYPE_FILTERS must produce an error about the count limit."""
# Build a list that exceeds the limit using valid types repeated
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
oversized_count = MAX_RESOURCE_TYPE_FILTERS + extra_count
filters = (supported * (oversized_count // len(supported) + 1))[:oversized_count]
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=filters,
)
errors = profile.validate()
assert any(
"at most" in e or str(MAX_RESOURCE_TYPE_FILTERS) in e
for e in errors
), f"Expected error mentioning count limit, got: {errors}"
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
invalid_types=st.lists(invalid_resource_type_strategy, min_size=1, max_size=5),
)
def test_unsupported_types_produces_unsupported_error(self, provider, credentials, invalid_types):
"""Unsupported resource types must produce an error mentioning them."""
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=invalid_types,
)
errors = profile.validate()
assert any("unsupported" in e.lower() for e in errors), (
f"Expected error mentioning unsupported types, got: {errors}"
)
@given(
provider=provider_type_strategy,
invalid_types=st.lists(invalid_resource_type_strategy, min_size=1, max_size=3),
)
def test_no_short_circuit_credentials_and_unsupported(self, provider, invalid_types):
"""When both credentials are empty AND unsupported types exist, both errors are reported."""
profile = ScanProfile(
provider=provider,
credentials={},
resource_type_filters=invalid_types,
)
errors = profile.validate()
assert len(errors) >= 2, f"Expected at least 2 errors, got {len(errors)}: {errors}"
assert any("credentials" in e for e in errors), (
f"Expected credentials error, got: {errors}"
)
assert any("unsupported" in e.lower() for e in errors), (
f"Expected unsupported types error, got: {errors}"
)
@given(
provider=provider_type_strategy,
extra_count=st.integers(min_value=1, max_value=20),
)
def test_no_short_circuit_credentials_and_oversized(self, provider, extra_count):
"""When both credentials are empty AND filters exceed limit, both errors are reported."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
oversized_count = MAX_RESOURCE_TYPE_FILTERS + extra_count
filters = (supported * (oversized_count // len(supported) + 1))[:oversized_count]
profile = ScanProfile(
provider=provider,
credentials={},
resource_type_filters=filters,
)
errors = profile.validate()
assert len(errors) >= 2, f"Expected at least 2 errors, got {len(errors)}: {errors}"
assert any("credentials" in e for e in errors), (
f"Expected credentials error, got: {errors}"
)
assert any(
"at most" in e or str(MAX_RESOURCE_TYPE_FILTERS) in e
for e in errors
), f"Expected count limit error, got: {errors}"
@given(
provider=provider_type_strategy,
extra_count=st.integers(min_value=1, max_value=10),
invalid_types=st.lists(invalid_resource_type_strategy, min_size=1, max_size=3),
)
def test_no_short_circuit_all_three_issues(self, provider, extra_count, invalid_types):
"""When credentials empty, filters oversized, AND unsupported types exist, all errors reported."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
oversized_count = MAX_RESOURCE_TYPE_FILTERS + extra_count
# Mix valid types (to reach oversized count) with invalid types
valid_padding = (supported * (oversized_count // len(supported) + 1))[:oversized_count]
filters = valid_padding + invalid_types
profile = ScanProfile(
provider=provider,
credentials={},
resource_type_filters=filters,
)
errors = profile.validate()
assert len(errors) >= 3, f"Expected at least 3 errors, got {len(errors)}: {errors}"
assert any("credentials" in e for e in errors), (
f"Expected credentials error, got: {errors}"
)
assert any(
"at most" in e or str(MAX_RESOURCE_TYPE_FILTERS) in e
for e in errors
), f"Expected count limit error, got: {errors}"
assert any("unsupported" in e.lower() for e in errors), (
f"Expected unsupported types error, got: {errors}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
def test_empty_list_filters_is_valid(self, provider, credentials):
"""An empty resource_type_filters list (not None) should be valid."""
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=[],
)
errors = profile.validate()
assert errors == [], f"Expected no errors for empty filter list, got: {errors}"
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
count=st.integers(min_value=1, max_value=MAX_RESOURCE_TYPE_FILTERS),
)
def test_filters_at_or_below_limit_with_valid_types_is_valid(self, provider, credentials, count):
"""Any number of valid filters at or below the limit should produce no count error."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
# Repeat valid types to reach the desired count
filters = (supported * (count // len(supported) + 1))[:count]
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=filters,
)
errors = profile.validate()
# Should not have a count limit error
assert not any(
"at most" in e or (str(MAX_RESOURCE_TYPE_FILTERS) in e and "entries" in e)
for e in errors
), f"Unexpected count limit error for {count} filters: {errors}"

View File

@@ -0,0 +1,608 @@
"""Property-based tests for Scanner behavior.
**Validates: Requirements 1.3, 1.4, 1.5, 1.7**
Property 2: Authentication error descriptiveness
For any provider type and any authentication failure reason, the error returned
by the Scanner SHALL contain both the provider name string and the failure reason string.
Property 3: Graceful degradation on unsupported resource types
For any scan request containing a mix of supported and unsupported resource types,
the Scanner SHALL produce warnings for each unsupported type AND return a complete
inventory for all supported types.
Property 4: Progress reporting frequency
The Scanner SHALL report progress at least once per resource type completion.
Property 5: Partial inventory preservation on failure
If the Provider API connection is lost during an active scan, the Scanner SHALL
return a partial resource inventory.
"""
from typing import Callable
import pytest
from hypothesis import given, settings
from hypothesis import strategies as st
from iac_reverse.models import (
CpuArchitecture,
DiscoveredResource,
PlatformCategory,
PROVIDER_SUPPORTED_RESOURCE_TYPES,
ProviderType,
ScanProfile,
ScanProgress,
ScanResult,
)
from iac_reverse.plugin_base import ProviderPlugin
from iac_reverse.scanner.scanner import (
AuthenticationError,
ConnectionLostError,
Scanner,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
non_empty_string_strategy = st.text(
min_size=1,
max_size=100,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "S")),
).filter(lambda s: s.strip())
non_empty_credentials_strategy = st.dictionaries(
keys=st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=("L", "N"))),
values=st.text(min_size=1, max_size=50),
min_size=1,
max_size=5,
)
unsupported_resource_type_strategy = st.text(
min_size=5,
max_size=30,
alphabet=st.characters(whitelist_categories=("L",)),
).filter(
lambda t: all(t not in types for types in PROVIDER_SUPPORTED_RESOURCE_TYPES.values())
)
# ---------------------------------------------------------------------------
# Mock Plugin Implementations
# ---------------------------------------------------------------------------
class FailingAuthPlugin(ProviderPlugin):
"""A plugin that always fails authentication with a given reason."""
def __init__(self, failure_reason: str):
self.failure_reason = failure_reason
def authenticate(self, credentials: dict[str, str]) -> None:
raise RuntimeError(self.failure_reason)
def get_platform_category(self) -> PlatformCategory:
return PlatformCategory.CONTAINER_ORCHESTRATION
def list_endpoints(self) -> list[str]:
return ["http://localhost:8080"]
def list_supported_resource_types(self) -> list[str]:
return ["mock_resource"]
def detect_architecture(self, endpoint: str) -> CpuArchitecture:
return CpuArchitecture.AMD64
def discover_resources(
self,
endpoints: list[str],
resource_types: list[str],
progress_callback: Callable[[ScanProgress], None],
) -> ScanResult:
return ScanResult(
resources=[], warnings=[], errors=[],
scan_timestamp="", profile_hash="",
)
class GracefulDegradationPlugin(ProviderPlugin):
"""A plugin that supports specific resource types and discovers resources for them."""
def __init__(self, supported_types: list[str]):
self._supported_types = supported_types
def authenticate(self, credentials: dict[str, str]) -> None:
pass # Always succeeds
def get_platform_category(self) -> PlatformCategory:
return PlatformCategory.CONTAINER_ORCHESTRATION
def list_endpoints(self) -> list[str]:
return ["http://localhost:8080"]
def list_supported_resource_types(self) -> list[str]:
return self._supported_types
def detect_architecture(self, endpoint: str) -> CpuArchitecture:
return CpuArchitecture.AMD64
def discover_resources(
self,
endpoints: list[str],
resource_types: list[str],
progress_callback: Callable[[ScanProgress], None],
) -> ScanResult:
# Create one resource per supported resource type requested
resources = []
for i, rt in enumerate(resource_types):
resources.append(
DiscoveredResource(
resource_type=rt,
unique_id=f"id-{rt}-{i}",
name=f"resource-{rt}-{i}",
provider=ProviderType.KUBERNETES,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="http://localhost:8080",
attributes={"key": "value"},
)
)
progress_callback(ScanProgress(
current_resource_type=rt,
resources_discovered=i + 1,
resource_types_completed=i + 1,
total_resource_types=len(resource_types),
))
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="",
profile_hash="",
)
class ProgressTrackingPlugin(ProviderPlugin):
"""A plugin that reports progress per resource type."""
def __init__(self, supported_types: list[str]):
self._supported_types = supported_types
def authenticate(self, credentials: dict[str, str]) -> None:
pass
def get_platform_category(self) -> PlatformCategory:
return PlatformCategory.CONTAINER_ORCHESTRATION
def list_endpoints(self) -> list[str]:
return ["http://localhost:8080"]
def list_supported_resource_types(self) -> list[str]:
return self._supported_types
def detect_architecture(self, endpoint: str) -> CpuArchitecture:
return CpuArchitecture.AMD64
def discover_resources(
self,
endpoints: list[str],
resource_types: list[str],
progress_callback: Callable[[ScanProgress], None],
) -> ScanResult:
resources = []
for i, rt in enumerate(resource_types):
resource = DiscoveredResource(
resource_type=rt,
unique_id=f"id-{rt}-{i}",
name=f"resource-{rt}-{i}",
provider=ProviderType.KUBERNETES,
platform_category=PlatformCategory.CONTAINER_ORCHESTRATION,
architecture=CpuArchitecture.AMD64,
endpoint="http://localhost:8080",
attributes={},
)
resources.append(resource)
progress_callback(ScanProgress(
current_resource_type=rt,
resources_discovered=i + 1,
resource_types_completed=i + 1,
total_resource_types=len(resource_types),
))
return ScanResult(
resources=resources,
warnings=[],
errors=[],
scan_timestamp="",
profile_hash="",
)
class ConnectionLossPlugin(ProviderPlugin):
"""A plugin that loses connection after discovering some resources."""
def __init__(self, supported_types: list[str], fail_after: int):
self._supported_types = supported_types
self._fail_after = fail_after
def authenticate(self, credentials: dict[str, str]) -> None:
pass
def get_platform_category(self) -> PlatformCategory:
return PlatformCategory.CONTAINER_ORCHESTRATION
def list_endpoints(self) -> list[str]:
return ["http://localhost:8080"]
def list_supported_resource_types(self) -> list[str]:
return self._supported_types
def detect_architecture(self, endpoint: str) -> CpuArchitecture:
return CpuArchitecture.AMD64
def discover_resources(
self,
endpoints: list[str],
resource_types: list[str],
progress_callback: Callable[[ScanProgress], None],
) -> ScanResult:
# Simulate connection loss by raising ConnectionError
raise ConnectionError(
f"Connection lost after discovering {self._fail_after} resources"
)
# ---------------------------------------------------------------------------
# Property Tests
# ---------------------------------------------------------------------------
class TestAuthenticationErrorDescriptiveness:
"""Property 2: Authentication error descriptiveness.
For any provider type and any authentication failure reason, the error
returned by the Scanner SHALL contain both the provider name string and
the failure reason string.
**Validates: Requirements 1.3**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
failure_reason=non_empty_string_strategy,
)
@settings(max_examples=100)
def test_auth_error_contains_provider_name_and_reason(
self, provider, credentials, failure_reason
):
"""AuthenticationError must contain both provider name and failure reason."""
plugin = FailingAuthPlugin(failure_reason=failure_reason)
profile = ScanProfile(
provider=provider,
credentials=credentials,
)
scanner = Scanner(profile=profile, plugin=plugin)
with pytest.raises(AuthenticationError) as exc_info:
scanner.scan()
error = exc_info.value
# The error must contain the provider name
assert provider.value in str(error), (
f"Expected provider name '{provider.value}' in error message, "
f"got: '{str(error)}'"
)
# The error must contain the failure reason
assert failure_reason in str(error), (
f"Expected failure reason '{failure_reason}' in error message, "
f"got: '{str(error)}'"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
failure_reason=non_empty_string_strategy,
)
@settings(max_examples=100)
def test_auth_error_attributes_match(self, provider, credentials, failure_reason):
"""AuthenticationError attributes must store provider_name and reason."""
plugin = FailingAuthPlugin(failure_reason=failure_reason)
profile = ScanProfile(
provider=provider,
credentials=credentials,
)
scanner = Scanner(profile=profile, plugin=plugin)
with pytest.raises(AuthenticationError) as exc_info:
scanner.scan()
error = exc_info.value
assert error.provider_name == provider.value
assert error.reason == failure_reason
class TestGracefulDegradationOnUnsupportedTypes:
"""Property 3: Graceful degradation on unsupported resource types.
For any scan request containing a mix of supported and unsupported resource
types, the Scanner SHALL produce warnings for each unsupported type AND
return a complete inventory for all supported types.
**Validates: Requirements 1.4**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
unsupported_types=st.lists(unsupported_resource_type_strategy, min_size=1, max_size=5),
)
@settings(max_examples=100)
def test_unsupported_types_produce_warnings(
self, provider, credentials, unsupported_types
):
"""Each unsupported resource type must produce a warning."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = GracefulDegradationPlugin(supported_types=supported)
# Mix supported and unsupported types
mixed_filters = list(supported[:2]) + unsupported_types
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=mixed_filters,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# There must be a warning for each unsupported type
for unsupported in unsupported_types:
assert any(unsupported in w for w in result.warnings), (
f"Expected warning for unsupported type '{unsupported}', "
f"got warnings: {result.warnings}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
unsupported_types=st.lists(unsupported_resource_type_strategy, min_size=1, max_size=5),
)
@settings(max_examples=100)
def test_supported_types_still_discovered(
self, provider, credentials, unsupported_types
):
"""Supported types must still be fully discovered despite unsupported types."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = GracefulDegradationPlugin(supported_types=supported)
# Use at least one supported type plus unsupported types
supported_subset = supported[:2]
mixed_filters = supported_subset + unsupported_types
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=mixed_filters,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# All supported types in the filter should have resources discovered
discovered_types = {r.resource_type for r in result.resources}
for st_type in supported_subset:
assert st_type in discovered_types, (
f"Expected supported type '{st_type}' to be discovered, "
f"but only found: {discovered_types}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
unsupported_types=st.lists(unsupported_resource_type_strategy, min_size=1, max_size=5),
)
@settings(max_examples=100)
def test_warning_count_matches_unsupported_count(
self, provider, credentials, unsupported_types
):
"""Number of warnings must be at least the number of unsupported types."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = GracefulDegradationPlugin(supported_types=supported)
# Only unsupported types in filter
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=unsupported_types,
)
scanner = Scanner(profile=profile, plugin=plugin)
result = scanner.scan()
# Deduplicate unsupported types for comparison
unique_unsupported = set(unsupported_types)
assert len(result.warnings) >= len(unique_unsupported), (
f"Expected at least {len(unique_unsupported)} warnings, "
f"got {len(result.warnings)}: {result.warnings}"
)
class TestProgressReportingFrequency:
"""Property 4: Progress reporting frequency.
For any scan across N resource types, the progress callback SHALL be
invoked at least N times, once per resource type completion.
**Validates: Requirements 1.5**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_progress_reported_at_least_once_per_resource_type(
self, provider, credentials
):
"""Progress callback must be invoked at least once per resource type."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = ProgressTrackingPlugin(supported_types=supported)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None, # Scan all supported types
)
progress_reports: list[ScanProgress] = []
def track_progress(progress: ScanProgress) -> None:
progress_reports.append(progress)
scanner = Scanner(profile=profile, plugin=plugin)
scanner.scan(progress_callback=track_progress)
# Must have at least N progress reports for N resource types
assert len(progress_reports) >= len(supported), (
f"Expected at least {len(supported)} progress reports, "
f"got {len(progress_reports)}"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
)
@settings(max_examples=100)
def test_progress_reports_cover_all_resource_types(
self, provider, credentials
):
"""Progress reports must cover every resource type being scanned."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = ProgressTrackingPlugin(supported_types=supported)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
progress_reports: list[ScanProgress] = []
def track_progress(progress: ScanProgress) -> None:
progress_reports.append(progress)
scanner = Scanner(profile=profile, plugin=plugin)
scanner.scan(progress_callback=track_progress)
# Every resource type should appear in at least one progress report
reported_types = {p.current_resource_type for p in progress_reports}
for rt in supported:
assert rt in reported_types, (
f"Expected resource type '{rt}' in progress reports, "
f"but only found: {reported_types}"
)
class TestPartialInventoryPreservationOnFailure:
"""Property 5: Partial inventory preservation on failure.
If the Provider API connection is lost during an active scan, the Scanner
SHALL return a partial resource inventory.
**Validates: Requirements 1.7**
"""
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
fail_after=st.integers(min_value=0, max_value=10),
)
@settings(max_examples=100)
def test_connection_loss_raises_with_partial_result(
self, provider, credentials, fail_after
):
"""Connection loss must raise ConnectionLostError with partial result."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = ConnectionLossPlugin(
supported_types=supported,
fail_after=fail_after,
)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
scanner = Scanner(profile=profile, plugin=plugin)
with pytest.raises(ConnectionLostError) as exc_info:
scanner.scan()
error = exc_info.value
# Must have a partial_result attribute
assert hasattr(error, "partial_result")
partial = error.partial_result
assert isinstance(partial, ScanResult)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
fail_after=st.integers(min_value=0, max_value=10),
)
@settings(max_examples=100)
def test_partial_result_is_marked_as_partial(
self, provider, credentials, fail_after
):
"""Partial result from connection loss must have is_partial=True."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = ConnectionLossPlugin(
supported_types=supported,
fail_after=fail_after,
)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
scanner = Scanner(profile=profile, plugin=plugin)
with pytest.raises(ConnectionLostError) as exc_info:
scanner.scan()
partial = exc_info.value.partial_result
assert partial.is_partial is True, (
"Partial result from connection loss must have is_partial=True"
)
@given(
provider=provider_type_strategy,
credentials=non_empty_credentials_strategy,
fail_after=st.integers(min_value=0, max_value=10),
)
@settings(max_examples=100)
def test_partial_result_contains_error_info(
self, provider, credentials, fail_after
):
"""Partial result must contain error information about the failure."""
supported = PROVIDER_SUPPORTED_RESOURCE_TYPES[provider]
plugin = ConnectionLossPlugin(
supported_types=supported,
fail_after=fail_after,
)
profile = ScanProfile(
provider=provider,
credentials=credentials,
resource_type_filters=None,
)
scanner = Scanner(profile=profile, plugin=plugin)
with pytest.raises(ConnectionLostError) as exc_info:
scanner.scan()
partial = exc_info.value.partial_result
# Must have at least one error or warning indicating the failure
assert len(partial.errors) > 0 or len(partial.warnings) > 0, (
"Partial result must contain error/warning info about the connection loss"
)

View File

@@ -0,0 +1,567 @@
"""Property-based tests for the State Builder.
**Validates: Requirements 4.1, 4.2, 4.4, 4.5**
Properties tested:
- Property 16: State file structural validity
- Property 17: State entry completeness and schema correctness
"""
import json
import re
import uuid
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from iac_reverse.generator.sanitize import sanitize_identifier
from iac_reverse.models import (
CodeGenerationResult,
CpuArchitecture,
DependencyGraph,
DiscoveredResource,
GeneratedFile,
PlatformCategory,
PROVIDER_SUPPORTED_RESOURCE_TYPES,
ProviderType,
ResourceRelationship,
)
from iac_reverse.state_builder import StateBuilder
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
# All supported resource types across all providers (flat list)
ALL_SUPPORTED_RESOURCE_TYPES = []
for _types in PROVIDER_SUPPORTED_RESOURCE_TYPES.values():
ALL_SUPPORTED_RESOURCE_TYPES.extend(_types)
resource_type_strategy = st.sampled_from(ALL_SUPPORTED_RESOURCE_TYPES)
# Strategy for resource names (valid identifiers with some variety)
resource_name_strategy = st.text(
min_size=1,
max_size=20,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")
# Strategy for unique IDs (non-empty strings)
unique_id_strategy = st.text(
min_size=1,
max_size=40,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-/:."),
).filter(lambda s: s.strip() != "")
# Strategy for simple attribute values
simple_attr_value_strategy = st.one_of(
st.text(
min_size=1,
max_size=30,
alphabet=st.characters(
whitelist_categories=("L", "N"), whitelist_characters="_-./: "
),
).filter(lambda s: s.strip() != ""),
st.integers(min_value=0, max_value=10000),
st.booleans(),
)
# Strategy for attribute dictionaries (non-empty)
attributes_strategy = st.dictionaries(
keys=st.text(
min_size=1,
max_size=15,
alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and s[0].isalpha()),
values=simple_attr_value_strategy,
min_size=1,
max_size=5,
)
# Strategy for provider version strings (semver-like)
provider_version_strategy = st.from_regex(r"[1-9][0-9]{0,1}\.[0-9]{1,2}\.[0-9]{1,2}", fullmatch=True)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def make_resource(
unique_id: str,
resource_type: str = "kubernetes_deployment",
name: str = "my_resource",
provider: ProviderType = ProviderType.KUBERNETES,
platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION,
architecture: CpuArchitecture = CpuArchitecture.AMD64,
attributes: dict | None = None,
raw_references: list[str] | None = None,
) -> DiscoveredResource:
"""Helper to create a DiscoveredResource with sensible defaults."""
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=provider,
platform_category=platform_category,
architecture=architecture,
endpoint="https://api.internal.lab:6443",
attributes=attributes or {"key": "value"},
raw_references=raw_references or [],
)
def make_dependency_graph(
resources: list[DiscoveredResource],
relationships: list[ResourceRelationship] | None = None,
) -> DependencyGraph:
"""Helper to create a DependencyGraph from resources."""
return DependencyGraph(
resources=resources,
relationships=relationships or [],
topological_order=[r.unique_id for r in resources],
cycles=[],
unresolved_references=[],
)
def make_code_generation_result() -> CodeGenerationResult:
"""Helper to create a minimal CodeGenerationResult."""
return CodeGenerationResult(
resource_files=[
GeneratedFile(filename="main.tf", content="", resource_count=0)
],
variables_file=GeneratedFile(
filename="variables.tf", content="", resource_count=0
),
provider_file=GeneratedFile(
filename="provider.tf", content="", resource_count=0
),
)
# ---------------------------------------------------------------------------
# Composite strategies
# ---------------------------------------------------------------------------
@st.composite
def mappable_resource_strategy(draw):
"""Generate a single DiscoveredResource that is mappable to state.
A mappable resource has a non-empty unique_id and a recognized resource type.
"""
resource_type = draw(resource_type_strategy)
name = draw(resource_name_strategy)
unique_id = draw(unique_id_strategy)
provider = draw(provider_type_strategy)
platform_category = draw(platform_category_strategy)
architecture = draw(cpu_architecture_strategy)
attributes = draw(attributes_strategy)
return make_resource(
unique_id=unique_id,
resource_type=resource_type,
name=name,
provider=provider,
platform_category=platform_category,
architecture=architecture,
attributes=attributes,
)
@st.composite
def multiple_mappable_resources_strategy(draw):
"""Generate a list of mappable resources with unique IDs."""
num_resources = draw(st.integers(min_value=1, max_value=5))
resources = []
seen_ids = set()
for _ in range(num_resources):
resource = draw(mappable_resource_strategy())
# Ensure unique IDs are distinct
if resource.unique_id in seen_ids:
continue
seen_ids.add(resource.unique_id)
resources.append(resource)
assume(len(resources) >= 1)
return resources
@st.composite
def resource_with_sensitive_attrs_strategy(draw):
"""Generate a resource with attributes that include sensitive-looking keys."""
resource_type = draw(resource_type_strategy)
name = draw(resource_name_strategy)
unique_id = draw(unique_id_strategy)
# Include at least one sensitive key
sensitive_key = draw(st.sampled_from([
"password", "api_secret", "auth_token", "private_key", "tls_certificate",
]))
sensitive_value = draw(st.text(min_size=1, max_size=20, alphabet="abcdefghijklmnop"))
# Also include non-sensitive attributes
normal_attrs = draw(attributes_strategy)
normal_attrs[sensitive_key] = sensitive_value
return make_resource(
unique_id=unique_id,
resource_type=resource_type,
name=name,
attributes=normal_attrs,
)
# ---------------------------------------------------------------------------
# Property 16: State file structural validity
# ---------------------------------------------------------------------------
class TestStateFileStructuralValidity:
"""Property 16: State file structural validity.
**Validates: Requirements 4.1**
For any set of resources, the generated state file has version=4,
valid UUID lineage, serial=1, and valid JSON structure.
"""
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_file_version_is_4(
self, resources: list[DiscoveredResource]
):
"""The generated state file always has version=4."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert state_file.version == 4, (
f"Expected version=4, got version={state_file.version}"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_file_has_valid_uuid_lineage(
self, resources: list[DiscoveredResource]
):
"""The generated state file has a valid UUID lineage."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
# Lineage should be a valid UUID
try:
parsed_uuid = uuid.UUID(state_file.lineage)
except ValueError:
raise AssertionError(
f"Lineage '{state_file.lineage}' is not a valid UUID"
)
assert parsed_uuid.version == 4, (
f"Expected UUID version 4, got version {parsed_uuid.version}"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_file_serial_is_1(
self, resources: list[DiscoveredResource]
):
"""The generated state file always has serial=1."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert state_file.serial == 1, (
f"Expected serial=1, got serial={state_file.serial}"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_file_produces_valid_json(
self, resources: list[DiscoveredResource]
):
"""The state file serializes to valid JSON via to_json()."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
json_str = state_file.to_json()
# Must parse as valid JSON
try:
parsed = json.loads(json_str)
except json.JSONDecodeError as e:
raise AssertionError(
f"State file to_json() produced invalid JSON: {e}"
)
assert isinstance(parsed, dict), "State JSON root must be a dict"
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_json_has_required_top_level_fields(
self, resources: list[DiscoveredResource]
):
"""The serialized state JSON has version, terraform_version, serial, lineage, resources."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
parsed = json.loads(state_file.to_json())
required_fields = {"version", "terraform_version", "serial", "lineage", "resources"}
missing = required_fields - set(parsed.keys())
assert not missing, (
f"State JSON missing required top-level fields: {missing}"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_json_resource_entries_have_required_fields(
self, resources: list[DiscoveredResource]
):
"""Each resource entry in the JSON has mode, type, name, provider, and instances."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
parsed = json.loads(state_file.to_json())
required_resource_fields = {"mode", "type", "name", "provider", "instances"}
for i, entry in enumerate(parsed["resources"]):
missing = required_resource_fields - set(entry.keys())
assert not missing, (
f"Resource entry {i} missing required fields: {missing}. "
f"Entry keys: {list(entry.keys())}"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_json_instances_have_schema_and_attributes(
self, resources: list[DiscoveredResource]
):
"""Each instance in the state JSON has schema_version, attributes, sensitive_attributes, dependencies."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
parsed = json.loads(state_file.to_json())
required_instance_fields = {
"schema_version", "attributes", "sensitive_attributes", "dependencies"
}
for i, entry in enumerate(parsed["resources"]):
for j, instance in enumerate(entry["instances"]):
missing = required_instance_fields - set(instance.keys())
assert not missing, (
f"Resource {i}, instance {j} missing fields: {missing}. "
f"Instance keys: {list(instance.keys())}"
)
# ---------------------------------------------------------------------------
# Property 17: State entry completeness and schema correctness
# ---------------------------------------------------------------------------
class TestStateEntryCompletenessAndSchemaCorrectness:
"""Property 17: State entry completeness and schema correctness.
**Validates: Requirements 4.4, 4.5**
For any resource, the state entry has non-empty resource_type,
resource_name, provider_id, and attributes matching the discovery data.
"""
@given(resource=mappable_resource_strategy())
@settings(max_examples=100)
def test_state_entry_has_non_empty_resource_type(
self, resource: DiscoveredResource
):
"""Each state entry has a non-empty resource_type."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert len(state_file.resources) == 1
entry = state_file.resources[0]
assert entry.resource_type != "", (
"State entry resource_type must not be empty"
)
assert entry.resource_type == resource.resource_type, (
f"Expected resource_type '{resource.resource_type}', "
f"got '{entry.resource_type}'"
)
@given(resource=mappable_resource_strategy())
@settings(max_examples=100)
def test_state_entry_has_non_empty_resource_name(
self, resource: DiscoveredResource
):
"""Each state entry has a non-empty resource_name (sanitized)."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert len(state_file.resources) == 1
entry = state_file.resources[0]
assert entry.resource_name != "", (
"State entry resource_name must not be empty"
)
# The name should be a sanitized version of the original
expected_name = sanitize_identifier(resource.name)
assert entry.resource_name == expected_name, (
f"Expected resource_name '{expected_name}', "
f"got '{entry.resource_name}'"
)
@given(resource=mappable_resource_strategy())
@settings(max_examples=100)
def test_state_entry_has_non_empty_provider_id(
self, resource: DiscoveredResource
):
"""Each state entry has a non-empty provider_id matching the resource's unique_id."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert len(state_file.resources) == 1
entry = state_file.resources[0]
assert entry.provider_id != "", (
"State entry provider_id must not be empty"
)
assert entry.provider_id == resource.unique_id, (
f"Expected provider_id '{resource.unique_id}', "
f"got '{entry.provider_id}'"
)
@given(resource=mappable_resource_strategy())
@settings(max_examples=100)
def test_state_entry_attributes_match_discovery_data(
self, resource: DiscoveredResource
):
"""State entry attributes contain all attributes from the discovered resource."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert len(state_file.resources) == 1
entry = state_file.resources[0]
# All discovery attributes should be present in the state entry
for key, value in resource.attributes.items():
assert key in entry.attributes, (
f"Discovery attribute '{key}' missing from state entry attributes. "
f"State attrs: {list(entry.attributes.keys())}"
)
assert entry.attributes[key] == value, (
f"Attribute '{key}' mismatch: discovery={value}, "
f"state={entry.attributes[key]}"
)
@given(
resource=mappable_resource_strategy(),
provider_version=provider_version_strategy,
)
@settings(max_examples=100)
def test_state_entry_schema_version_matches_provider_version(
self, resource: DiscoveredResource, provider_version: str
):
"""State entry schema_version matches the major version from provider_version."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, provider_version)
assert len(state_file.resources) == 1
entry = state_file.resources[0]
# Schema version should be the major version number
expected_schema_version = int(provider_version.split(".")[0])
assert entry.schema_version == expected_schema_version, (
f"Expected schema_version={expected_schema_version} "
f"(from provider_version='{provider_version}'), "
f"got schema_version={entry.schema_version}"
)
@given(resource=resource_with_sensitive_attrs_strategy())
@settings(max_examples=100)
def test_state_entry_marks_sensitive_attributes(
self, resource: DiscoveredResource
):
"""State entry identifies and marks sensitive attributes correctly."""
builder = StateBuilder()
graph = make_dependency_graph([resource])
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
assert len(state_file.resources) == 1
entry = state_file.resources[0]
# Sensitive attributes list should not be empty when resource has
# attributes with sensitive patterns (password, secret, token, key, certificate)
sensitive_patterns = ["password", "secret", "token", "key", "certificate"]
has_sensitive = any(
any(pattern in attr_key.lower() for pattern in sensitive_patterns)
for attr_key in resource.attributes.keys()
)
if has_sensitive:
assert len(entry.sensitive_attributes) > 0, (
f"Resource has sensitive-looking attributes "
f"{list(resource.attributes.keys())} but sensitive_attributes "
f"is empty"
)
@given(resources=multiple_mappable_resources_strategy())
@settings(max_examples=100)
def test_state_json_id_field_matches_provider_id(
self, resources: list[DiscoveredResource]
):
"""In the serialized JSON, each instance's attributes.id matches the provider_id."""
builder = StateBuilder()
graph = make_dependency_graph(resources)
code_result = make_code_generation_result()
state_file = builder.build(code_result, graph, "1.0.0")
parsed = json.loads(state_file.to_json())
for i, entry in enumerate(parsed["resources"]):
for instance in entry["instances"]:
assert "id" in instance["attributes"], (
f"Resource entry {i} instance missing 'id' in attributes"
)
# The id should be non-empty
assert instance["attributes"]["id"] != "", (
f"Resource entry {i} has empty 'id' attribute"
)