Skip to content

Commit

Permalink
Quote strings that contain :
Browse files Browse the repository at this point in the history
  • Loading branch information
lalten committed Aug 5, 2024
1 parent 47039c9 commit d618467
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ variable, use `fix_code`:
- item
- item
```
- Quote strings that contain `:` to prevent some parsers from misinterpreting them as mappings.

# Configuration

Expand Down
86 changes: 85 additions & 1 deletion src/yamlfix/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import re
from functools import partial
from io import StringIO
from typing import Any, Callable, List, Match, Optional, Tuple
from typing import Any, Callable, List, Match, NamedTuple, Optional, Tuple

from ruyaml.comments import CommentedSeq
from ruyaml.main import YAML
from ruyaml.nodes import MappingNode, Node, ScalarNode, SequenceNode
from ruyaml.representer import RoundTripRepresenter
Expand Down Expand Up @@ -349,6 +350,7 @@ def fix(self, source_code: str) -> str:
self._fix_truthy_strings,
self._fix_jinja_variables,
self._ruamel_yaml_fixer,
self._quote_strings_with_colons,
self._restore_truthy_strings,
self._restore_jinja_variables,
self._restore_double_exclamations,
Expand Down Expand Up @@ -795,3 +797,85 @@ def _restore_jinja_variables(source_code: str) -> str:
fixed_source_lines.append(line)

return "\n".join(fixed_source_lines)

def _quote_strings_with_colons(self, source_code: str) -> str:
"""Fix strings with colons to be quoted.
Example:
volumes: [/root:/mapped]
becomes
volumes: ["/root:/mapped"]
We do this by
1. loading the yaml
2. recursively scanning for strings that
* contain colons
* are not already quoted
* are not multi-line strings (see note).
3. Adding quotes at the string start and end locations in the source_code.
Note: Multi-line strings are not supported because ruyaml only provides the
start location of a scalar, but not the end location. For single-line strings
you can calculate the end location by adding the length of the string to the
start, but for strings broken over multiple lines this is not straightforward.
"""
log.debug("Fixing unquoted strings with colons...")

class ToFix(NamedTuple):
"""Where to insert quotes."""

line: int
start: int
end: int

positions_to_quote = set() # type: set[ToFix]
lines = source_code.splitlines()

def add(item: str, line: int, col: int) -> None:
is_quoted = lines[line][col] in ['"', "'"]
if ":" in item and not is_quoted:
to_fix = ToFix(
line=line,
start=col,
end=col + len(item),
)
if to_fix.end <= len(lines[to_fix.line]):
positions_to_quote.add(to_fix)
else:
log.debug("Skipping %r which is multi-line", item)

def check(value: CommentedSeq | dict) -> None:
if isinstance(value, CommentedSeq):
for i, item in enumerate(value):
if isinstance(item, str):
line, col = value.lc.item(i)
add(item, line, col)
else:
check(item)
elif isinstance(value, dict):
for key, item in value.items():
if isinstance(item, str):
try:
line, col = value.lc.value(key)
except (KeyError, TypeError):
# May not be available if merged from an anchor
pass
else:
add(item, line, col)
else:
check(item)

source_dicts = self.yaml.load_all(source_code) # type: Iterator[CommentedMap]
for source_dict in source_dicts:
check(source_dict)

for to_fix in sorted(positions_to_quote, reverse=True):
lines[to_fix.line] = (self.config.quote_representation or "'").join(
[
lines[to_fix.line][: to_fix.start], # noqa: E203: black disagrees
lines[to_fix.line][to_fix.start : to_fix.end], # noqa: E203
lines[to_fix.line][to_fix.end :], # noqa: E203
]
)

return "\n".join(lines)
104 changes: 104 additions & 0 deletions tests/unit/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ def test_fix_code_functions_emit_debug_logs(
"Fixing comments...",
"Fixing top level lists...",
"Fixing flow-style lists...",
"Fixing unquoted strings with colons...",
}
assert set(caplog.messages) == expected_logs
for record in caplog.records:
Expand Down Expand Up @@ -1034,3 +1035,106 @@ def test_fix_code_fix_whitelines(
result = fix_code(source_code=source, config=config)

assert result == desired_source

@pytest.mark.parametrize(
("source", "config", "desired_source"),
[
(
"volumes: [/root:/mapped, a:b, 'c:d']",
YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE),
dedent(
"""\
---
volumes: ['/root:/mapped', 'a:b', 'c:d']
"""
),
),
(
dedent(
"""\
volumes:
- /root:/mapped
- a:b
- 'c:d'
"""
),
YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE),
dedent(
"""\
---
volumes:
- '/root:/mapped'
- 'a:b'
- 'c:d'
"""
),
),
(
dedent(
"""\
test:
- "this one:\
is ok"
- fix this:one
- |
multiline strings:
are not supported yet
- >-
multiline strings:
are not supported yet
"""
),
YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE),
dedent(
"""\
---
test:
- 'this one:\
is ok'
- 'fix this:one'
- |
multiline strings:
are not supported yet
- >-
multiline strings:
are not supported yet
"""
),
),
(
dedent(
"""\
merge0: &anchor
host: host.docker.internal:host-gateway
merge1:
<<: *anchor
merge2:
<<: *anchor
"""
),
None,
dedent(
"""\
---
merge0: &anchor
host: 'host.docker.internal:host-gateway'
merge1:
<<: *anchor
merge2:
<<: *anchor
"""
),
),
],
)
def test_strings_with_colons_are_quoted(
self, source: str, config: Optional[YamlfixConfig], desired_source: str
) -> None:
"""
Given: Code with a string containing `:`
When: fix_code is run
Then: The string is quoted and not turned into a mapping
"""
result = fix_code(source, config=config)

assert result == desired_source

0 comments on commit d618467

Please sign in to comment.