Skip to content

Commit

Permalink
Only parse format strings when being used with locals()
Browse files Browse the repository at this point in the history
The previous behavior has confusing results when strings aren't format strings, e.g. docstrings that happen to have a fragment resembling a format string. The new behavior is more precise.

The downside is that this won't detect usages when the format string or locals() is assigned to an intermediate variable before formatting. That seems fine to me, since it surprises me that the code `x = "{y}"` counts as usage of `y`. One could also argue against handling this at all, given that locals() is somewhat magical and that f-strings are the future.
  • Loading branch information
Jing Wang authored and jendrikseipp committed Aug 27, 2020
1 parent 168c0eb commit 59bba3e
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 48 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# unreleased

* Only parse format strings when being used with `locals()` (jingw, #225).

# 2.1 (2020-08-19)

* Treat `getattr/hasattr(obj, "constant_string", ...)` as a reference to
Expand Down
4 changes: 1 addition & 3 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ def test_invalid_config_options_output():
_check_input_config({"unknown_key_1": 1})


@pytest.mark.parametrize(
"key, value", list(DEFAULTS.items()),
)
@pytest.mark.parametrize("key, value", list(DEFAULTS.items()))
def test_incompatible_option_type(key, value):
"""
If a config value has a different type from the default value we abort.
Expand Down
27 changes: 25 additions & 2 deletions tests/test_format_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,34 @@ def test_new_format_string_access(v):


def test_new_format_string_numbers(v):
v.scan("'{0.b}, {0.d.e} {0[1]} {0[1][1].k}'.format('foo')")
check(v.used_names, ["b", "d", "e", "k", "format"])
v.scan("'{0.b}, {0.d.e} {0[1]} {0[1][1].k}'.format(**locals())")
check(v.used_names, ["b", "d", "e", "k", "format", "locals"])


def test_incorrect_format_string(v):
v.scan('"{"')
v.scan('"{!-a:}"')
check(v.used_names, [])


def test_format_string_not_using_locals(v):
"""Strings that are not formatted with locals() should not be parsed."""
v.scan(
"""\
"{variable}"
def foobar():
'''
Return data of the form
{this_looks_like_a_format_string: 1}
'''
pass
"%(thing)s" % {"thing": 1}
"%(apple)s" * locals()
"{} {a} {b}".format(1, a=used_var, b=locals())
"""
)
check(v.used_names, ["used_var", "locals", "format"])
100 changes: 57 additions & 43 deletions vulture/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,37 +418,6 @@ def _handle_conditional_node(self, node, name):
confidence=100,
)

def _handle_string(self, s):
"""
Parse variable names in format strings:
'%(my_var)s' % locals()
'{my_var}'.format(**locals())
f'{my_var}'
"""
# Old format strings.
self.used_names |= set(re.findall(r"\%\((\w+)\)", s))

def is_identifier(name):
return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name))

# New format strings.
parser = string.Formatter()
try:
names = [name for _, name, _, _ in parser.parse(s) if name]
except ValueError:
# Invalid format string.
names = []

for field_name in names:
# Remove brackets and their contents: "a[0][b].c[d].e" -> "a.c.e",
# then split the resulting string: "a.b.c" -> ["a", "b", "c"]
vars = re.sub(r"\[\w*\]", "", field_name).split(".")
for var in vars:
if is_identifier(var):
self.used_names.add(var)

def _define(
self,
collection,
Expand Down Expand Up @@ -508,8 +477,21 @@ def visit_Attribute(self, node):
elif isinstance(node.ctx, ast.Load):
self.used_names.add(node.attr)

def visit_BinOp(self, node):
"""
Parse variable names in old format strings:
"%(my_var)s" % locals()
"""
if (
isinstance(node.left, ast.Str)
and isinstance(node.op, ast.Mod)
and self._is_locals_call(node.right)
):
self.used_names |= set(re.findall(r"%\((\w+)\)", node.left.s))

def visit_Call(self, node):
"""Count getattr/hasattr(x, "some_attr", ...) as usage of some_attr."""
# Count getattr/hasattr(x, "some_attr", ...) as usage of some_attr.
if isinstance(node.func, ast.Name) and (
(node.func.id == "getattr" and 2 <= len(node.args) <= 3)
or (node.func.id == "hasattr" and len(node.args) == 2)
Expand All @@ -518,6 +500,49 @@ def visit_Call(self, node):
if isinstance(attr_name_arg, ast.Str):
self.used_names.add(attr_name_arg.s)

# Parse variable names in new format strings:
# "{my_var}".format(**locals())
if (
isinstance(node.func, ast.Attribute)
and isinstance(node.func.value, ast.Str)
and node.func.attr == "format"
and any(
kw.arg is None and self._is_locals_call(kw.value)
for kw in node.keywords
)
):
self._handle_new_format_string(node.func.value.s)

def _handle_new_format_string(self, s):
def is_identifier(name):
return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name))

parser = string.Formatter()
try:
names = [name for _, name, _, _ in parser.parse(s) if name]
except ValueError:
# Invalid format string.
names = []

for field_name in names:
# Remove brackets and their contents: "a[0][b].c[d].e" -> "a.c.e",
# then split the resulting string: "a.b.c" -> ["a", "b", "c"]
vars = re.sub(r"\[\w*\]", "", field_name).split(".")
for var in vars:
if is_identifier(var):
self.used_names.add(var)

@staticmethod
def _is_locals_call(node):
"""Return True if the node is `locals()`."""
return (
isinstance(node, ast.Call)
and isinstance(node.func, ast.Name)
and node.func.id == "locals"
and not node.args
and not node.keywords
)

def visit_ClassDef(self, node):
for decorator in node.decorator_list:
if _match(
Expand Down Expand Up @@ -588,17 +613,6 @@ def visit_Name(self, node):
elif isinstance(node.ctx, (ast.Param, ast.Store)):
self._define_variable(node.id, node)

if sys.version_info < (3, 8):

def visit_Str(self, node):
self._handle_string(node.s)

else:

def visit_Constant(self, node):
if isinstance(node.value, str):
self._handle_string(node.value)

def visit_While(self, node):
self._handle_conditional_node(node, "while")

Expand Down

0 comments on commit 59bba3e

Please sign in to comment.