"""Unit tests for identifier sanitization.""" import re import pytest from iac_reverse.generator.sanitize import sanitize_identifier TERRAFORM_IDENTIFIER_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") class TestSanitizeIdentifierNormalNames: def test_simple_name_passes_through(self): assert sanitize_identifier("nginx") == "nginx" def test_name_with_underscores_passes_through(self): assert sanitize_identifier("my_app") == "my_app" def test_alphanumeric_name_passes_through(self): assert sanitize_identifier("app123") == "app123" class TestSanitizeIdentifierHyphens: def test_hyphens_replaced_with_underscore(self): assert sanitize_identifier("my-app") == "my_app" def test_multiple_hyphens(self): assert sanitize_identifier("my-cool-app") == "my_cool_app" class TestSanitizeIdentifierLeadingDigits: def test_leading_digit_gets_underscore_prefix(self): assert sanitize_identifier("123abc") == "_123abc" def test_all_digits(self): result = sanitize_identifier("12345") assert result == "_12345" assert TERRAFORM_IDENTIFIER_RE.match(result) class TestSanitizeIdentifierSpaces: def test_spaces_replaced_with_underscore(self): assert sanitize_identifier("my app") == "my_app" def test_multiple_spaces_collapse(self): assert sanitize_identifier("my app") == "my_app" class TestSanitizeIdentifierUnicode: def test_unicode_replaced(self): # é is replaced with underscore, trailing underscore preserved assert sanitize_identifier("café") == "caf_" def test_all_unicode(self): result = sanitize_identifier("日本語") assert result == "_resource" def test_emoji_replaced(self): result = sanitize_identifier("app🚀name") assert result == "app_name" class TestSanitizeIdentifierEmptyAndSpecial: def test_empty_string_returns_resource(self): assert sanitize_identifier("") == "_resource" def test_all_special_chars_returns_resource(self): assert sanitize_identifier("@#$%^&*") == "_resource" def test_single_special_char(self): assert sanitize_identifier("!") == "_resource" class TestSanitizeIdentifierConsecutiveSpecialChars: def test_multiple_consecutive_special_chars_collapse(self): assert sanitize_identifier("a---b") == "a_b" def test_mixed_special_chars_collapse(self): assert sanitize_identifier("a-.-b") == "a_b" def test_leading_special_chars_collapse(self): # Leading hyphens become underscore (valid identifier start) result = sanitize_identifier("---abc") assert result == "_abc" class TestSanitizeIdentifierAlwaysValid: """Verify the result always matches the Terraform identifier regex.""" @pytest.mark.parametrize("name", [ "nginx", "my-app", "123abc", "my app", "café", "", "@#$%^&*", "a---b", "___", "hello_world_123", "日本語テスト", " leading spaces", "trailing ", "MixedCase-Name_123", "a", "_", "0", ]) def test_result_matches_terraform_regex(self, name): result = sanitize_identifier(name) assert TERRAFORM_IDENTIFIER_RE.match(result), ( f"sanitize_identifier({name!r}) = {result!r} does not match " f"Terraform identifier pattern" )