Created IAC reverse generator

This commit is contained in:
p2913020
2026-05-22 00:19:30 -04:00
parent d04c2c6e4b
commit 1a11244fff
161 changed files with 26806 additions and 51 deletions

View File

@@ -0,0 +1,719 @@
"""Property-based tests for the Code Generator.
**Validates: Requirements 2.2, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6**
Properties tested:
- Property 10: References in generated output use Terraform syntax
- Property 11: Generated HCL syntactic validity
- Property 12: File organization by resource type
- Property 13: Variable extraction for shared values
- Property 14: Identifier sanitization validity
- Property 15: Traceability comments in generated code
"""
import re
from hypothesis import given, settings, assume, HealthCheck
from hypothesis import strategies as st
from iac_reverse.generator import CodeGenerator, VariableExtractor, sanitize_identifier
from iac_reverse.models import (
CpuArchitecture,
DependencyGraph,
DiscoveredResource,
PlatformCategory,
ProviderType,
ResourceRelationship,
ScanProfile,
)
# ---------------------------------------------------------------------------
# Hypothesis Strategies
# ---------------------------------------------------------------------------
provider_type_strategy = st.sampled_from(list(ProviderType))
platform_category_strategy = st.sampled_from(list(PlatformCategory))
cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture))
# Strategy for resource names (valid identifiers with some variety)
resource_name_strategy = st.text(
min_size=1,
max_size=20,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != "")
# Strategy for resource types (terraform-style: provider_type)
resource_type_strategy = st.sampled_from([
"kubernetes_deployment",
"kubernetes_service",
"kubernetes_namespace",
"docker_service",
"docker_network",
"docker_volume",
"synology_shared_folder",
"synology_volume",
"harvester_virtualmachine",
"harvester_volume",
"bare_metal_hardware",
"windows_service",
"windows_iis_site",
])
# Strategy for simple attribute values (strings, ints, bools)
simple_attr_value_strategy = st.one_of(
st.text(min_size=1, max_size=30, alphabet=st.characters(
whitelist_categories=("L", "N"), whitelist_characters="_-./: "
)).filter(lambda s: s.strip() != ""),
st.integers(min_value=0, max_value=10000),
st.booleans(),
)
# Strategy for attribute dictionaries
attributes_strategy = st.dictionaries(
keys=st.text(
min_size=1,
max_size=15,
alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"),
).filter(lambda s: s.strip() != "" and s[0].isalpha()),
values=simple_attr_value_strategy,
min_size=1,
max_size=5,
)
def make_resource(
unique_id: str,
resource_type: str = "kubernetes_deployment",
name: str = "my_resource",
provider: ProviderType = ProviderType.KUBERNETES,
platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION,
architecture: CpuArchitecture = CpuArchitecture.AMD64,
attributes: dict | None = None,
raw_references: list[str] | None = None,
) -> DiscoveredResource:
"""Helper to create a DiscoveredResource with sensible defaults."""
return DiscoveredResource(
resource_type=resource_type,
unique_id=unique_id,
name=name,
provider=provider,
platform_category=platform_category,
architecture=architecture,
endpoint="https://api.internal.lab:6443",
attributes=attributes or {"key": "value"},
raw_references=raw_references or [],
)
def make_dependency_graph(
resources: list[DiscoveredResource],
relationships: list[ResourceRelationship] | None = None,
) -> DependencyGraph:
"""Helper to create a DependencyGraph from resources."""
return DependencyGraph(
resources=resources,
relationships=relationships or [],
topological_order=[r.unique_id for r in resources],
cycles=[],
unresolved_references=[],
)
@st.composite
def resource_with_dependency_strategy(draw):
"""Generate a pair of resources where one depends on the other.
Returns (resources, relationships) where the first resource references the second.
"""
resource_type_a = draw(resource_type_strategy)
resource_type_b = draw(resource_type_strategy)
name_a = draw(resource_name_strategy)
name_b = draw(resource_name_strategy)
arch = draw(cpu_architecture_strategy)
# Ensure unique IDs are different
uid_a = f"ns/{resource_type_a}/{name_a}"
uid_b = f"ns/{resource_type_b}/{name_b}"
assume(uid_a != uid_b)
# Resource B is the dependency target
resource_b = make_resource(
unique_id=uid_b,
resource_type=resource_type_b,
name=name_b,
architecture=arch,
attributes={"port": 8080},
)
# Resource A references resource B's unique_id in its attributes
resource_a = make_resource(
unique_id=uid_a,
resource_type=resource_type_a,
name=name_a,
architecture=arch,
attributes={"target_id": uid_b, "replicas": 3},
raw_references=[uid_b],
)
relationship = ResourceRelationship(
source_id=uid_a,
target_id=uid_b,
relationship_type="reference",
source_attribute="target_id",
)
return [resource_a, resource_b], [relationship]
@st.composite
def multiple_resources_strategy(draw):
"""Generate a list of resources with distinct types for file organization testing."""
num_types = draw(st.integers(min_value=1, max_value=5))
types = draw(
st.lists(
resource_type_strategy,
min_size=num_types,
max_size=num_types,
unique=True,
)
)
resources = []
for i, rtype in enumerate(types):
# Each type gets 1-3 resources
num_resources_of_type = draw(st.integers(min_value=1, max_value=3))
for j in range(num_resources_of_type):
uid = f"{rtype}/instance_{i}_{j}"
name = f"res_{i}_{j}"
attrs = draw(attributes_strategy)
resource = make_resource(
unique_id=uid,
resource_type=rtype,
name=name,
attributes=attrs,
)
resources.append(resource)
return resources
@st.composite
def resources_with_shared_values_strategy(draw):
"""Generate resources where at least one attribute value appears in 2+ resources."""
shared_key = draw(st.sampled_from(["region", "environment", "zone", "cluster"]))
shared_value = draw(st.text(
min_size=1,
max_size=15,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"),
).filter(lambda s: s.strip() != ""))
num_resources = draw(st.integers(min_value=2, max_value=5))
resources = []
for i in range(num_resources):
uid = f"resource_{i}"
name = f"res_{i}"
# All resources share the same key-value pair
attrs = {shared_key: shared_value, "name": f"instance_{i}"}
resource = make_resource(
unique_id=uid,
resource_type="kubernetes_deployment",
name=name,
attributes=attrs,
)
resources.append(resource)
return resources, shared_key, shared_value
# Strategy for arbitrary strings to test sanitize_identifier
arbitrary_string_strategy = st.text(min_size=0, max_size=50)
# ---------------------------------------------------------------------------
# Property 10: References in generated output use Terraform syntax
# ---------------------------------------------------------------------------
class TestReferencesUseTerraformSyntax:
"""Property 10: References in generated output use Terraform syntax.
**Validates: Requirements 2.2, 3.5**
For any resource with dependencies, the generated HCL uses Terraform
resource references (type.name.id) not hardcoded IDs.
"""
@given(data=resource_with_dependency_strategy())
@settings(max_examples=100)
def test_references_use_terraform_resource_syntax(
self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
):
"""Generated HCL uses type.name.id references instead of hardcoded IDs."""
resources, relationships = data
graph = make_dependency_graph(resources, relationships)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# The source resource (resources[0]) references resources[1]
target = resources[1]
target_tf_name = sanitize_identifier(target.name)
expected_ref = f"{target.resource_type}.{target_tf_name}.id"
# Find the file containing the source resource
source = resources[0]
source_file = None
for f in result.resource_files:
if f.filename == f"{source.resource_type}.tf":
source_file = f
break
assert source_file is not None, (
f"Expected file {source.resource_type}.tf not found"
)
# The generated content should contain the Terraform reference
assert expected_ref in source_file.content, (
f"Expected Terraform reference '{expected_ref}' not found in output. "
f"Content: {source_file.content[:500]}"
)
@given(data=resource_with_dependency_strategy())
@settings(max_examples=100)
def test_hardcoded_ids_not_present_for_resolved_references(
self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]]
):
"""The target resource's unique_id should not appear as a hardcoded string in the source resource's block."""
resources, relationships = data
graph = make_dependency_graph(resources, relationships)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
target = resources[1]
source = resources[0]
# Find the file containing the source resource
source_file = None
for f in result.resource_files:
if f.filename == f"{source.resource_type}.tf":
source_file = f
break
assert source_file is not None
# The hardcoded unique_id of the target should NOT appear as a quoted string
hardcoded_pattern = f'"{target.unique_id}"'
assert hardcoded_pattern not in source_file.content, (
f"Hardcoded ID '{hardcoded_pattern}' should not appear in generated HCL. "
f"Should use Terraform reference instead."
)
# ---------------------------------------------------------------------------
# Property 11: Generated HCL syntactic validity
# ---------------------------------------------------------------------------
class TestGeneratedHclSyntacticValidity:
"""Property 11: Generated HCL syntactic validity.
**Validates: Requirements 3.1**
For any set of resources, the generated HCL contains valid resource blocks
with proper structure (resource keyword, type, name, braces).
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100, suppress_health_check=[HealthCheck.too_slow])
def test_generated_hcl_has_valid_resource_blocks(
self, resources: list[DiscoveredResource]
):
"""Each generated file contains properly structured resource blocks."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
content = gen_file.content
# Each resource block should have the pattern:
# resource "type" "name" {
resource_block_pattern = re.compile(
r'resource\s+"[^"]+"\s+"[^"]+"\s*\{'
)
blocks_found = resource_block_pattern.findall(content)
assert len(blocks_found) == gen_file.resource_count, (
f"Expected {gen_file.resource_count} resource blocks in "
f"{gen_file.filename}, found {len(blocks_found)}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_generated_hcl_has_balanced_braces(
self, resources: list[DiscoveredResource]
):
"""Generated HCL has balanced opening and closing braces."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
content = gen_file.content
open_braces = content.count("{")
close_braces = content.count("}")
assert open_braces == close_braces, (
f"Unbalanced braces in {gen_file.filename}: "
f"{open_braces} opening vs {close_braces} closing"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_generated_hcl_resource_type_matches_filename(
self, resources: list[DiscoveredResource]
):
"""Each resource block's type matches the file it's in (filename = type.tf)."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for gen_file in result.resource_files:
expected_type = gen_file.filename.replace(".tf", "")
# All resource blocks in this file should be of the expected type
resource_types_in_file = re.findall(
r'resource\s+"([^"]+)"', gen_file.content
)
for rtype in resource_types_in_file:
assert rtype == expected_type, (
f"Resource type '{rtype}' found in {gen_file.filename} "
f"but expected only '{expected_type}'"
)
# ---------------------------------------------------------------------------
# Property 12: File organization by resource type
# ---------------------------------------------------------------------------
class TestFileOrganizationByResourceType:
"""Property 12: File organization by resource type.
**Validates: Requirements 3.2**
For any set of resources, each resource type gets its own .tf file.
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_one_file_per_resource_type(
self, resources: list[DiscoveredResource]
):
"""The number of resource files equals the number of distinct resource types."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
distinct_types = {r.resource_type for r in resources}
assert len(result.resource_files) == len(distinct_types), (
f"Expected {len(distinct_types)} files for {len(distinct_types)} "
f"distinct types, got {len(result.resource_files)}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_each_file_named_after_resource_type(
self, resources: list[DiscoveredResource]
):
"""Each generated file is named <resource_type>.tf."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
distinct_types = {r.resource_type for r in resources}
expected_filenames = {f"{rt}.tf" for rt in distinct_types}
actual_filenames = {f.filename for f in result.resource_files}
assert actual_filenames == expected_filenames, (
f"Expected filenames {expected_filenames}, got {actual_filenames}"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_every_resource_appears_in_exactly_one_file(
self, resources: list[DiscoveredResource]
):
"""Every resource's unique_id appears in exactly one generated file."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for resource in resources:
files_containing = [
f.filename
for f in result.resource_files
if resource.unique_id in f.content
]
assert len(files_containing) == 1, (
f"Resource '{resource.unique_id}' found in {len(files_containing)} "
f"files: {files_containing}. Expected exactly 1."
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_resource_count_per_file_matches(
self, resources: list[DiscoveredResource]
):
"""Each file's resource_count matches the actual number of resources of that type."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# Count resources per type
from collections import Counter
type_counts = Counter(r.resource_type for r in resources)
for gen_file in result.resource_files:
expected_type = gen_file.filename.replace(".tf", "")
assert gen_file.resource_count == type_counts[expected_type], (
f"File {gen_file.filename} reports {gen_file.resource_count} resources "
f"but expected {type_counts[expected_type]}"
)
# ---------------------------------------------------------------------------
# Property 13: Variable extraction for shared values
# ---------------------------------------------------------------------------
class TestVariableExtractionForSharedValues:
"""Property 13: Variable extraction for shared values.
**Validates: Requirements 3.3**
For any set of resources where a value appears in 2+ resources,
a variable is extracted.
"""
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_shared_value_produces_extracted_variable(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""A value appearing in 2+ resources results in an extracted variable."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
# There should be at least one variable extracted for the shared key
var_names = [v.name for v in variables]
# The variable name should contain the shared key
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1, (
f"Expected at least one variable for shared key '{shared_key}', "
f"got variables: {var_names}"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_has_correct_default(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""The extracted variable's default value matches the shared value."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1
# The default should be the shared value (formatted as a string literal)
var = matching_vars[0]
assert shared_value in var.default_value, (
f"Expected default to contain '{shared_value}', got '{var.default_value}'"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_tracks_usage(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""The extracted variable's used_by list contains at least 2 resource IDs."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
matching_vars = [v for v in variables if shared_key in v.name]
assert len(matching_vars) >= 1
var = matching_vars[0]
assert len(var.used_by) >= 2, (
f"Expected variable to be used by 2+ resources, "
f"got {len(var.used_by)}: {var.used_by}"
)
@given(data=resources_with_shared_values_strategy())
@settings(max_examples=100)
def test_extracted_variable_has_type_and_description(
self, data: tuple[list[DiscoveredResource], str, str]
):
"""Each extracted variable has a non-empty type expression and description."""
resources, shared_key, shared_value = data
extractor = VariableExtractor()
variables = extractor.extract_variables(resources)
for var in variables:
assert var.type_expr != "", f"Variable '{var.name}' has empty type_expr"
assert var.description != "", f"Variable '{var.name}' has empty description"
# ---------------------------------------------------------------------------
# Property 14: Identifier sanitization validity
# ---------------------------------------------------------------------------
class TestIdentifierSanitizationValidity:
"""Property 14: Identifier sanitization validity.
**Validates: Requirements 3.4**
For any input string, sanitize_identifier produces a valid Terraform identifier.
"""
TERRAFORM_IDENTIFIER_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
@given(name=arbitrary_string_strategy)
@settings(max_examples=200)
def test_sanitized_identifier_matches_terraform_pattern(self, name: str):
"""The output always matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} does not match "
f"Terraform identifier pattern"
)
@given(name=arbitrary_string_strategy)
@settings(max_examples=200)
def test_sanitized_identifier_is_non_empty(self, name: str):
"""The output is always a non-empty string."""
result = sanitize_identifier(name)
assert len(result) > 0, (
f"sanitize_identifier({name!r}) produced empty string"
)
@given(name=st.text(min_size=1, max_size=30, alphabet="0123456789"))
@settings(max_examples=100)
def test_digit_only_input_produces_valid_identifier(self, name: str):
"""Input consisting only of digits still produces a valid identifier."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} is not valid for digit-only input"
)
# Must not start with a digit
assert not result[0].isdigit(), (
f"sanitize_identifier({name!r}) = {result!r} starts with a digit"
)
@given(name=st.text(
min_size=1,
max_size=30,
alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"),
).filter(lambda s: s[0].isalpha() or s[0] == "_"))
@settings(max_examples=100)
def test_already_valid_identifiers_are_preserved_or_simplified(self, name: str):
"""Input that is already a valid identifier produces a valid result."""
result = sanitize_identifier(name)
assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), (
f"sanitize_identifier({name!r}) = {result!r} is not valid"
)
# ---------------------------------------------------------------------------
# Property 15: Traceability comments in generated code
# ---------------------------------------------------------------------------
class TestTraceabilityCommentsInGeneratedCode:
"""Property 15: Traceability comments in generated code.
**Validates: Requirements 3.6**
For any resource, the generated HCL includes a comment with the original unique_id.
"""
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_each_resource_has_traceability_comment(
self, resources: list[DiscoveredResource]
):
"""Every resource's unique_id appears in a comment in the generated output."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
# Collect all generated content
all_content = "\n".join(f.content for f in result.resource_files)
for resource in resources:
# The unique_id should appear in a comment line
comment_pattern = f"# Source: {resource.unique_id}"
assert comment_pattern in all_content, (
f"Traceability comment for resource '{resource.unique_id}' "
f"not found in generated output"
)
@given(resources=multiple_resources_strategy())
@settings(max_examples=100)
def test_traceability_comment_precedes_resource_block(
self, resources: list[DiscoveredResource]
):
"""The traceability comment appears before its corresponding resource block."""
graph = make_dependency_graph(resources)
profiles: list[ScanProfile] = []
generator = CodeGenerator()
result = generator.generate(graph, profiles)
for resource in resources:
# Find the file containing this resource
target_file = None
for f in result.resource_files:
if resource.unique_id in f.content:
target_file = f
break
assert target_file is not None
content = target_file.content
comment_pos = content.find(f"# Source: {resource.unique_id}")
tf_name = sanitize_identifier(resource.name)
block_pattern = f'resource "{resource.resource_type}" "{tf_name}"'
block_pos = content.find(block_pattern, comment_pos)
assert comment_pos < block_pos, (
f"Comment for '{resource.unique_id}' (pos {comment_pos}) "
f"should precede resource block (pos {block_pos})"
)