"""Property-based tests for the Code Generator. **Validates: Requirements 2.2, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6** Properties tested: - Property 10: References in generated output use Terraform syntax - Property 11: Generated HCL syntactic validity - Property 12: File organization by resource type - Property 13: Variable extraction for shared values - Property 14: Identifier sanitization validity - Property 15: Traceability comments in generated code """ import re from hypothesis import given, settings, assume, HealthCheck from hypothesis import strategies as st from iac_reverse.generator import CodeGenerator, VariableExtractor, sanitize_identifier from iac_reverse.models import ( CpuArchitecture, DependencyGraph, DiscoveredResource, PlatformCategory, ProviderType, ResourceRelationship, ScanProfile, ) # --------------------------------------------------------------------------- # Hypothesis Strategies # --------------------------------------------------------------------------- provider_type_strategy = st.sampled_from(list(ProviderType)) platform_category_strategy = st.sampled_from(list(PlatformCategory)) cpu_architecture_strategy = st.sampled_from(list(CpuArchitecture)) # Strategy for resource names (valid identifiers with some variety) resource_name_strategy = st.text( min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"), ).filter(lambda s: s.strip() != "") # Strategy for resource types (terraform-style: provider_type) resource_type_strategy = st.sampled_from([ "kubernetes_deployment", "kubernetes_service", "kubernetes_namespace", "docker_service", "docker_network", "docker_volume", "synology_shared_folder", "synology_volume", "harvester_virtualmachine", "harvester_volume", "bare_metal_hardware", "windows_service", "windows_iis_site", ]) # Strategy for simple attribute values (strings, ints, bools) simple_attr_value_strategy = st.one_of( st.text(min_size=1, max_size=30, alphabet=st.characters( whitelist_categories=("L", "N"), whitelist_characters="_-./: " )).filter(lambda s: s.strip() != ""), st.integers(min_value=0, max_value=10000), st.booleans(), ) # Strategy for attribute dictionaries attributes_strategy = st.dictionaries( keys=st.text( min_size=1, max_size=15, alphabet=st.characters(whitelist_categories=("L",), whitelist_characters="_"), ).filter(lambda s: s.strip() != "" and s[0].isalpha()), values=simple_attr_value_strategy, min_size=1, max_size=5, ) def make_resource( unique_id: str, resource_type: str = "kubernetes_deployment", name: str = "my_resource", provider: ProviderType = ProviderType.KUBERNETES, platform_category: PlatformCategory = PlatformCategory.CONTAINER_ORCHESTRATION, architecture: CpuArchitecture = CpuArchitecture.AMD64, attributes: dict | None = None, raw_references: list[str] | None = None, ) -> DiscoveredResource: """Helper to create a DiscoveredResource with sensible defaults.""" return DiscoveredResource( resource_type=resource_type, unique_id=unique_id, name=name, provider=provider, platform_category=platform_category, architecture=architecture, endpoint="https://api.internal.lab:6443", attributes=attributes or {"key": "value"}, raw_references=raw_references or [], ) def make_dependency_graph( resources: list[DiscoveredResource], relationships: list[ResourceRelationship] | None = None, ) -> DependencyGraph: """Helper to create a DependencyGraph from resources.""" return DependencyGraph( resources=resources, relationships=relationships or [], topological_order=[r.unique_id for r in resources], cycles=[], unresolved_references=[], ) @st.composite def resource_with_dependency_strategy(draw): """Generate a pair of resources where one depends on the other. Returns (resources, relationships) where the first resource references the second. """ resource_type_a = draw(resource_type_strategy) resource_type_b = draw(resource_type_strategy) name_a = draw(resource_name_strategy) name_b = draw(resource_name_strategy) arch = draw(cpu_architecture_strategy) # Ensure unique IDs are different uid_a = f"ns/{resource_type_a}/{name_a}" uid_b = f"ns/{resource_type_b}/{name_b}" assume(uid_a != uid_b) # Resource B is the dependency target resource_b = make_resource( unique_id=uid_b, resource_type=resource_type_b, name=name_b, architecture=arch, attributes={"port": 8080}, ) # Resource A references resource B's unique_id in its attributes resource_a = make_resource( unique_id=uid_a, resource_type=resource_type_a, name=name_a, architecture=arch, attributes={"target_id": uid_b, "replicas": 3}, raw_references=[uid_b], ) relationship = ResourceRelationship( source_id=uid_a, target_id=uid_b, relationship_type="reference", source_attribute="target_id", ) return [resource_a, resource_b], [relationship] @st.composite def multiple_resources_strategy(draw): """Generate a list of resources with distinct types for file organization testing.""" num_types = draw(st.integers(min_value=1, max_value=5)) types = draw( st.lists( resource_type_strategy, min_size=num_types, max_size=num_types, unique=True, ) ) resources = [] for i, rtype in enumerate(types): # Each type gets 1-3 resources num_resources_of_type = draw(st.integers(min_value=1, max_value=3)) for j in range(num_resources_of_type): uid = f"{rtype}/instance_{i}_{j}" name = f"res_{i}_{j}" attrs = draw(attributes_strategy) resource = make_resource( unique_id=uid, resource_type=rtype, name=name, attributes=attrs, ) resources.append(resource) return resources @st.composite def resources_with_shared_values_strategy(draw): """Generate resources where at least one attribute value appears in 2+ resources.""" shared_key = draw(st.sampled_from(["region", "environment", "zone", "cluster"])) shared_value = draw(st.text( min_size=1, max_size=15, alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_-"), ).filter(lambda s: s.strip() != "")) num_resources = draw(st.integers(min_value=2, max_value=5)) resources = [] for i in range(num_resources): uid = f"resource_{i}" name = f"res_{i}" # All resources share the same key-value pair attrs = {shared_key: shared_value, "name": f"instance_{i}"} resource = make_resource( unique_id=uid, resource_type="kubernetes_deployment", name=name, attributes=attrs, ) resources.append(resource) return resources, shared_key, shared_value # Strategy for arbitrary strings to test sanitize_identifier arbitrary_string_strategy = st.text(min_size=0, max_size=50) # --------------------------------------------------------------------------- # Property 10: References in generated output use Terraform syntax # --------------------------------------------------------------------------- class TestReferencesUseTerraformSyntax: """Property 10: References in generated output use Terraform syntax. **Validates: Requirements 2.2, 3.5** For any resource with dependencies, the generated HCL uses Terraform resource references (type.name.id) not hardcoded IDs. """ @given(data=resource_with_dependency_strategy()) @settings(max_examples=100) def test_references_use_terraform_resource_syntax( self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]] ): """Generated HCL uses type.name.id references instead of hardcoded IDs.""" resources, relationships = data graph = make_dependency_graph(resources, relationships) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) # The source resource (resources[0]) references resources[1] target = resources[1] target_tf_name = sanitize_identifier(target.name) expected_ref = f"{target.resource_type}.{target_tf_name}.id" # Find the file containing the source resource source = resources[0] source_file = None for f in result.resource_files: if f.filename == f"{source.resource_type}.tf": source_file = f break assert source_file is not None, ( f"Expected file {source.resource_type}.tf not found" ) # The generated content should contain the Terraform reference assert expected_ref in source_file.content, ( f"Expected Terraform reference '{expected_ref}' not found in output. " f"Content: {source_file.content[:500]}" ) @given(data=resource_with_dependency_strategy()) @settings(max_examples=100) def test_hardcoded_ids_not_present_for_resolved_references( self, data: tuple[list[DiscoveredResource], list[ResourceRelationship]] ): """The target resource's unique_id should not appear as a hardcoded string in the source resource's block.""" resources, relationships = data graph = make_dependency_graph(resources, relationships) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) target = resources[1] source = resources[0] # Find the file containing the source resource source_file = None for f in result.resource_files: if f.filename == f"{source.resource_type}.tf": source_file = f break assert source_file is not None # The hardcoded unique_id of the target should NOT appear as a quoted string hardcoded_pattern = f'"{target.unique_id}"' assert hardcoded_pattern not in source_file.content, ( f"Hardcoded ID '{hardcoded_pattern}' should not appear in generated HCL. " f"Should use Terraform reference instead." ) # --------------------------------------------------------------------------- # Property 11: Generated HCL syntactic validity # --------------------------------------------------------------------------- class TestGeneratedHclSyntacticValidity: """Property 11: Generated HCL syntactic validity. **Validates: Requirements 3.1** For any set of resources, the generated HCL contains valid resource blocks with proper structure (resource keyword, type, name, braces). """ @given(resources=multiple_resources_strategy()) @settings(max_examples=100, suppress_health_check=[HealthCheck.too_slow]) def test_generated_hcl_has_valid_resource_blocks( self, resources: list[DiscoveredResource] ): """Each generated file contains properly structured resource blocks.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) for gen_file in result.resource_files: content = gen_file.content # Each resource block should have the pattern: # resource "type" "name" { resource_block_pattern = re.compile( r'resource\s+"[^"]+"\s+"[^"]+"\s*\{' ) blocks_found = resource_block_pattern.findall(content) assert len(blocks_found) == gen_file.resource_count, ( f"Expected {gen_file.resource_count} resource blocks in " f"{gen_file.filename}, found {len(blocks_found)}" ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_generated_hcl_has_balanced_braces( self, resources: list[DiscoveredResource] ): """Generated HCL has balanced opening and closing braces.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) for gen_file in result.resource_files: content = gen_file.content open_braces = content.count("{") close_braces = content.count("}") assert open_braces == close_braces, ( f"Unbalanced braces in {gen_file.filename}: " f"{open_braces} opening vs {close_braces} closing" ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_generated_hcl_resource_type_matches_filename( self, resources: list[DiscoveredResource] ): """Each resource block's type matches the file it's in (filename = type.tf).""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) for gen_file in result.resource_files: expected_type = gen_file.filename.replace(".tf", "") # All resource blocks in this file should be of the expected type resource_types_in_file = re.findall( r'resource\s+"([^"]+)"', gen_file.content ) for rtype in resource_types_in_file: assert rtype == expected_type, ( f"Resource type '{rtype}' found in {gen_file.filename} " f"but expected only '{expected_type}'" ) # --------------------------------------------------------------------------- # Property 12: File organization by resource type # --------------------------------------------------------------------------- class TestFileOrganizationByResourceType: """Property 12: File organization by resource type. **Validates: Requirements 3.2** For any set of resources, each resource type gets its own .tf file. """ @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_one_file_per_resource_type( self, resources: list[DiscoveredResource] ): """The number of resource files equals the number of distinct resource types.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) distinct_types = {r.resource_type for r in resources} assert len(result.resource_files) == len(distinct_types), ( f"Expected {len(distinct_types)} files for {len(distinct_types)} " f"distinct types, got {len(result.resource_files)}" ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_each_file_named_after_resource_type( self, resources: list[DiscoveredResource] ): """Each generated file is named .tf.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) distinct_types = {r.resource_type for r in resources} expected_filenames = {f"{rt}.tf" for rt in distinct_types} actual_filenames = {f.filename for f in result.resource_files} assert actual_filenames == expected_filenames, ( f"Expected filenames {expected_filenames}, got {actual_filenames}" ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_every_resource_appears_in_exactly_one_file( self, resources: list[DiscoveredResource] ): """Every resource's unique_id appears in exactly one generated file.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) for resource in resources: files_containing = [ f.filename for f in result.resource_files if resource.unique_id in f.content ] assert len(files_containing) == 1, ( f"Resource '{resource.unique_id}' found in {len(files_containing)} " f"files: {files_containing}. Expected exactly 1." ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_resource_count_per_file_matches( self, resources: list[DiscoveredResource] ): """Each file's resource_count matches the actual number of resources of that type.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) # Count resources per type from collections import Counter type_counts = Counter(r.resource_type for r in resources) for gen_file in result.resource_files: expected_type = gen_file.filename.replace(".tf", "") assert gen_file.resource_count == type_counts[expected_type], ( f"File {gen_file.filename} reports {gen_file.resource_count} resources " f"but expected {type_counts[expected_type]}" ) # --------------------------------------------------------------------------- # Property 13: Variable extraction for shared values # --------------------------------------------------------------------------- class TestVariableExtractionForSharedValues: """Property 13: Variable extraction for shared values. **Validates: Requirements 3.3** For any set of resources where a value appears in 2+ resources, a variable is extracted. """ @given(data=resources_with_shared_values_strategy()) @settings(max_examples=100) def test_shared_value_produces_extracted_variable( self, data: tuple[list[DiscoveredResource], str, str] ): """A value appearing in 2+ resources results in an extracted variable.""" resources, shared_key, shared_value = data extractor = VariableExtractor() variables = extractor.extract_variables(resources) # There should be at least one variable extracted for the shared key var_names = [v.name for v in variables] # The variable name should contain the shared key matching_vars = [v for v in variables if shared_key in v.name] assert len(matching_vars) >= 1, ( f"Expected at least one variable for shared key '{shared_key}', " f"got variables: {var_names}" ) @given(data=resources_with_shared_values_strategy()) @settings(max_examples=100) def test_extracted_variable_has_correct_default( self, data: tuple[list[DiscoveredResource], str, str] ): """The extracted variable's default value matches the shared value.""" resources, shared_key, shared_value = data extractor = VariableExtractor() variables = extractor.extract_variables(resources) matching_vars = [v for v in variables if shared_key in v.name] assert len(matching_vars) >= 1 # The default should be the shared value (formatted as a string literal) var = matching_vars[0] assert shared_value in var.default_value, ( f"Expected default to contain '{shared_value}', got '{var.default_value}'" ) @given(data=resources_with_shared_values_strategy()) @settings(max_examples=100) def test_extracted_variable_tracks_usage( self, data: tuple[list[DiscoveredResource], str, str] ): """The extracted variable's used_by list contains at least 2 resource IDs.""" resources, shared_key, shared_value = data extractor = VariableExtractor() variables = extractor.extract_variables(resources) matching_vars = [v for v in variables if shared_key in v.name] assert len(matching_vars) >= 1 var = matching_vars[0] assert len(var.used_by) >= 2, ( f"Expected variable to be used by 2+ resources, " f"got {len(var.used_by)}: {var.used_by}" ) @given(data=resources_with_shared_values_strategy()) @settings(max_examples=100) def test_extracted_variable_has_type_and_description( self, data: tuple[list[DiscoveredResource], str, str] ): """Each extracted variable has a non-empty type expression and description.""" resources, shared_key, shared_value = data extractor = VariableExtractor() variables = extractor.extract_variables(resources) for var in variables: assert var.type_expr != "", f"Variable '{var.name}' has empty type_expr" assert var.description != "", f"Variable '{var.name}' has empty description" # --------------------------------------------------------------------------- # Property 14: Identifier sanitization validity # --------------------------------------------------------------------------- class TestIdentifierSanitizationValidity: """Property 14: Identifier sanitization validity. **Validates: Requirements 3.4** For any input string, sanitize_identifier produces a valid Terraform identifier. """ TERRAFORM_IDENTIFIER_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") @given(name=arbitrary_string_strategy) @settings(max_examples=200) def test_sanitized_identifier_matches_terraform_pattern(self, name: str): """The output always matches ^[a-zA-Z_][a-zA-Z0-9_]*$.""" result = sanitize_identifier(name) assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), ( f"sanitize_identifier({name!r}) = {result!r} does not match " f"Terraform identifier pattern" ) @given(name=arbitrary_string_strategy) @settings(max_examples=200) def test_sanitized_identifier_is_non_empty(self, name: str): """The output is always a non-empty string.""" result = sanitize_identifier(name) assert len(result) > 0, ( f"sanitize_identifier({name!r}) produced empty string" ) @given(name=st.text(min_size=1, max_size=30, alphabet="0123456789")) @settings(max_examples=100) def test_digit_only_input_produces_valid_identifier(self, name: str): """Input consisting only of digits still produces a valid identifier.""" result = sanitize_identifier(name) assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), ( f"sanitize_identifier({name!r}) = {result!r} is not valid for digit-only input" ) # Must not start with a digit assert not result[0].isdigit(), ( f"sanitize_identifier({name!r}) = {result!r} starts with a digit" ) @given(name=st.text( min_size=1, max_size=30, alphabet=st.characters(whitelist_categories=("L", "N"), whitelist_characters="_"), ).filter(lambda s: s[0].isalpha() or s[0] == "_")) @settings(max_examples=100) def test_already_valid_identifiers_are_preserved_or_simplified(self, name: str): """Input that is already a valid identifier produces a valid result.""" result = sanitize_identifier(name) assert self.TERRAFORM_IDENTIFIER_REGEX.match(result), ( f"sanitize_identifier({name!r}) = {result!r} is not valid" ) # --------------------------------------------------------------------------- # Property 15: Traceability comments in generated code # --------------------------------------------------------------------------- class TestTraceabilityCommentsInGeneratedCode: """Property 15: Traceability comments in generated code. **Validates: Requirements 3.6** For any resource, the generated HCL includes a comment with the original unique_id. """ @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_each_resource_has_traceability_comment( self, resources: list[DiscoveredResource] ): """Every resource's unique_id appears in a comment in the generated output.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) # Collect all generated content all_content = "\n".join(f.content for f in result.resource_files) for resource in resources: # The unique_id should appear in a comment line comment_pattern = f"# Source: {resource.unique_id}" assert comment_pattern in all_content, ( f"Traceability comment for resource '{resource.unique_id}' " f"not found in generated output" ) @given(resources=multiple_resources_strategy()) @settings(max_examples=100) def test_traceability_comment_precedes_resource_block( self, resources: list[DiscoveredResource] ): """The traceability comment appears before its corresponding resource block.""" graph = make_dependency_graph(resources) profiles: list[ScanProfile] = [] generator = CodeGenerator() result = generator.generate(graph, profiles) for resource in resources: # Find the file containing this resource target_file = None for f in result.resource_files: if resource.unique_id in f.content: target_file = f break assert target_file is not None content = target_file.content comment_pos = content.find(f"# Source: {resource.unique_id}") tf_name = sanitize_identifier(resource.name) block_pattern = f'resource "{resource.resource_type}" "{tf_name}"' block_pos = content.find(block_pattern, comment_pos) assert comment_pos < block_pos, ( f"Comment for '{resource.unique_id}' (pos {comment_pos}) " f"should precede resource block (pos {block_pos})" )