"""Variable extraction logic for Terraform code generation. Identifies attribute values that appear in 2+ resources and extracts them into Terraform variables with appropriate type expressions and defaults. """ import logging from collections import defaultdict from iac_reverse.models import DiscoveredResource, ExtractedVariable logger = logging.getLogger(__name__) def _infer_type_expr(value: object) -> str: """Infer a Terraform type expression from a Python value. Args: value: The Python value to infer a type for. Returns: A Terraform type expression string (e.g., "string", "number", "bool"). """ if isinstance(value, bool): return "bool" elif isinstance(value, int) or isinstance(value, float): return "number" elif isinstance(value, str): return "string" elif isinstance(value, list): return "list(string)" elif isinstance(value, dict): return "map(string)" else: return "string" def _format_default_value(value: object) -> str: """Format a Python value as a Terraform default value literal. Args: value: The Python value to format. Returns: A string representation suitable for a Terraform variable default. """ if isinstance(value, bool): return "true" if value else "false" elif isinstance(value, int) or isinstance(value, float): return str(value) elif isinstance(value, str): return f'"{value}"' elif isinstance(value, list): items = ", ".join(f'"{item}"' if isinstance(item, str) else str(item) for item in value) return f"[{items}]" elif isinstance(value, dict): entries = ", ".join(f'"{k}" = "{v}"' for k, v in value.items()) return "{" + entries + "}" else: return f'"{value}"' def _make_hashable(value: object) -> object: """Convert a value to a hashable representation for counting. Args: value: Any Python value from resource attributes. Returns: A hashable version of the value. """ if isinstance(value, dict): return tuple(sorted(value.items())) elif isinstance(value, list): return tuple(value) else: return value class VariableExtractor: """Extracts shared attribute values into Terraform variables. Scans a list of DiscoveredResource objects, identifies attribute values that appear in 2 or more resources, and creates ExtractedVariable instances for each shared value. """ def extract_variables( self, resources: list[DiscoveredResource] ) -> list[ExtractedVariable]: """Identify shared attribute values and extract them as variables. For each attribute key, collects all values across all resources. If a value appears in 2+ resources for the same attribute key, it becomes a variable with the most common value as the default. Args: resources: List of discovered resources to analyze. Returns: List of ExtractedVariable instances for shared values. """ if len(resources) < 2: return [] # Collect attribute values grouped by attribute key # key -> {hashable_value -> [list of (resource_unique_id, original_value)]} attr_values: dict[str, dict[object, list[tuple[str, object]]]] = defaultdict( lambda: defaultdict(list) ) for resource in resources: for attr_key, attr_value in resource.attributes.items(): # Skip complex nested structures (dicts/lists) for variable extraction # as they are less likely to be meaningfully shared if isinstance(attr_value, (dict, list)): continue hashable = _make_hashable(attr_value) attr_values[attr_key][hashable].append( (resource.unique_id, attr_value) ) # Build extracted variables for values appearing in 2+ resources variables: list[ExtractedVariable] = [] for attr_key, value_groups in sorted(attr_values.items()): # Find all values that appear in 2+ resources for this key shared_values = [ (hv, entries) for hv, entries in value_groups.items() if len(entries) >= 2 ] if not shared_values: continue # If only one shared value exists for this key, use the key as the var name # If multiple shared values exist, disambiguate with a suffix for idx, (hashable_value, resource_entries) in enumerate(shared_values): original_value = resource_entries[0][1] used_by = [entry[0] for entry in resource_entries] # Determine the most common value among the shared values for this key # The default is set to the most common value overall most_common_entries = max(shared_values, key=lambda x: len(x[1])) most_common_value = most_common_entries[1][0][1] # Use the most common value as default for the primary variable, # but each variable's default is its own value default_value = _format_default_value(original_value) if len(shared_values) == 1: var_name = f"var_{attr_key}" else: # Disambiguate when multiple shared values exist for same key var_name = f"var_{attr_key}_{idx}" type_expr = _infer_type_expr(original_value) description = ( f"Shared {attr_key} value extracted from " f"{len(resource_entries)} resources" ) variables.append( ExtractedVariable( name=var_name, type_expr=type_expr, default_value=default_value, description=description, used_by=used_by, ) ) return variables def generate_variables_tf( self, variables: list[ExtractedVariable] ) -> str: """Generate Terraform variables.tf file content. Produces variable blocks with type, description, and default values. Args: variables: List of extracted variables to render. Returns: String content for a variables.tf file. """ if not variables: return "" blocks: list[str] = [] for var in variables: block = ( f'variable "{var.name}" {{\n' f' type = {var.type_expr}\n' f' description = "{var.description}"\n' f' default = {var.default_value}\n' f'}}' ) blocks.append(block) return "\n\n".join(blocks) + "\n"