Files
SnarfCode/src/iac_reverse/generator/variable_extractor.py
2026-05-22 00:19:30 -04:00

204 lines
6.9 KiB
Python

"""Variable extraction logic for Terraform code generation.
Identifies attribute values that appear in 2+ resources and extracts them
into Terraform variables with appropriate type expressions and defaults.
"""
import logging
from collections import defaultdict
from iac_reverse.models import DiscoveredResource, ExtractedVariable
logger = logging.getLogger(__name__)
def _infer_type_expr(value: object) -> str:
"""Infer a Terraform type expression from a Python value.
Args:
value: The Python value to infer a type for.
Returns:
A Terraform type expression string (e.g., "string", "number", "bool").
"""
if isinstance(value, bool):
return "bool"
elif isinstance(value, int) or isinstance(value, float):
return "number"
elif isinstance(value, str):
return "string"
elif isinstance(value, list):
return "list(string)"
elif isinstance(value, dict):
return "map(string)"
else:
return "string"
def _format_default_value(value: object) -> str:
"""Format a Python value as a Terraform default value literal.
Args:
value: The Python value to format.
Returns:
A string representation suitable for a Terraform variable default.
"""
if isinstance(value, bool):
return "true" if value else "false"
elif isinstance(value, int) or isinstance(value, float):
return str(value)
elif isinstance(value, str):
return f'"{value}"'
elif isinstance(value, list):
items = ", ".join(f'"{item}"' if isinstance(item, str) else str(item) for item in value)
return f"[{items}]"
elif isinstance(value, dict):
entries = ", ".join(f'"{k}" = "{v}"' for k, v in value.items())
return "{" + entries + "}"
else:
return f'"{value}"'
def _make_hashable(value: object) -> object:
"""Convert a value to a hashable representation for counting.
Args:
value: Any Python value from resource attributes.
Returns:
A hashable version of the value.
"""
if isinstance(value, dict):
return tuple(sorted(value.items()))
elif isinstance(value, list):
return tuple(value)
else:
return value
class VariableExtractor:
"""Extracts shared attribute values into Terraform variables.
Scans a list of DiscoveredResource objects, identifies attribute values
that appear in 2 or more resources, and creates ExtractedVariable instances
for each shared value.
"""
def extract_variables(
self, resources: list[DiscoveredResource]
) -> list[ExtractedVariable]:
"""Identify shared attribute values and extract them as variables.
For each attribute key, collects all values across all resources.
If a value appears in 2+ resources for the same attribute key,
it becomes a variable with the most common value as the default.
Args:
resources: List of discovered resources to analyze.
Returns:
List of ExtractedVariable instances for shared values.
"""
if len(resources) < 2:
return []
# Collect attribute values grouped by attribute key
# key -> {hashable_value -> [list of (resource_unique_id, original_value)]}
attr_values: dict[str, dict[object, list[tuple[str, object]]]] = defaultdict(
lambda: defaultdict(list)
)
for resource in resources:
for attr_key, attr_value in resource.attributes.items():
# Skip complex nested structures (dicts/lists) for variable extraction
# as they are less likely to be meaningfully shared
if isinstance(attr_value, (dict, list)):
continue
hashable = _make_hashable(attr_value)
attr_values[attr_key][hashable].append(
(resource.unique_id, attr_value)
)
# Build extracted variables for values appearing in 2+ resources
variables: list[ExtractedVariable] = []
for attr_key, value_groups in sorted(attr_values.items()):
# Find all values that appear in 2+ resources for this key
shared_values = [
(hv, entries)
for hv, entries in value_groups.items()
if len(entries) >= 2
]
if not shared_values:
continue
# If only one shared value exists for this key, use the key as the var name
# If multiple shared values exist, disambiguate with a suffix
for idx, (hashable_value, resource_entries) in enumerate(shared_values):
original_value = resource_entries[0][1]
used_by = [entry[0] for entry in resource_entries]
# Determine the most common value among the shared values for this key
# The default is set to the most common value overall
most_common_entries = max(shared_values, key=lambda x: len(x[1]))
most_common_value = most_common_entries[1][0][1]
# Use the most common value as default for the primary variable,
# but each variable's default is its own value
default_value = _format_default_value(original_value)
if len(shared_values) == 1:
var_name = f"var_{attr_key}"
else:
# Disambiguate when multiple shared values exist for same key
var_name = f"var_{attr_key}_{idx}"
type_expr = _infer_type_expr(original_value)
description = (
f"Shared {attr_key} value extracted from "
f"{len(resource_entries)} resources"
)
variables.append(
ExtractedVariable(
name=var_name,
type_expr=type_expr,
default_value=default_value,
description=description,
used_by=used_by,
)
)
return variables
def generate_variables_tf(
self, variables: list[ExtractedVariable]
) -> str:
"""Generate Terraform variables.tf file content.
Produces variable blocks with type, description, and default values.
Args:
variables: List of extracted variables to render.
Returns:
String content for a variables.tf file.
"""
if not variables:
return ""
blocks: list[str] = []
for var in variables:
block = (
f'variable "{var.name}" {{\n'
f' type = {var.type_expr}\n'
f' description = "{var.description}"\n'
f' default = {var.default_value}\n'
f'}}'
)
blocks.append(block)
return "\n\n".join(blocks) + "\n"