Skip to content

Commit

Permalink
Use end_lineno AST attribute to obtain more accurate line counts.
Browse files Browse the repository at this point in the history
  • Loading branch information
jendrikseipp committed Aug 31, 2023
1 parent 7d8580a commit 2586ee6
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 88 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# 2.10 (unreleased)

* Drop support for Python 3.7 (Jendrik Seipp).
* Drop support for Python 3.7 (Jendrik Seipp, #323).
* Use `end_lineno` AST attribute to obtain more accurate line counts (Jendrik Seipp).

# 2.9.1 (2023-08-21)

Expand Down
2 changes: 0 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
* https://github.com/jendrikseipp/vulture/issues/249
* https://github.com/jendrikseipp/vulture/issues/315
Use these as test cases.
* Use end_lineno and end_col_offset attributes once we require Python
3.8+.
* Honor (speaking) pylint error codes (e.g., # pylint:
disable=unused-import): unused-import, unused-variable, unused-argument,
possibly-unused-variable and unreachable-code. See
Expand Down
21 changes: 2 additions & 19 deletions tests/test_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,9 @@
from vulture import lines


def get_last_line_number_slow(node):
"""
A slower, but also simpler and slightly more accurate version of
get_last_line_number().
It traverses all descendants of node and records the highest line
number seen.
This code is 1.6 times slower than count_lines() on the Python
subset of tensorflow. It reports the same sizes for all test cases
and the functions and classes in tensorflow.
"""
return max(getattr(node, "lineno", -1) for node in ast.walk(node))


def count_lines(node):
"""Estimate the number of lines of the given AST node."""
last_lineno = lines.get_last_line_number(node)
assert get_last_line_number_slow(node) == last_lineno
return last_lineno - lines.get_first_line_number(node) + 1


Expand Down Expand Up @@ -209,7 +192,7 @@ def foo():
'long'
'string')
"""
check_size(example, 4)
check_size(example, 6)


# We currently cannot handle code ending with comment lines.
Expand Down Expand Up @@ -302,7 +285,7 @@ class Foo:
[a, b
]
"""
check_size(example, 2)
check_size(example, 3)


def test_size_ellipsis():
Expand Down
3 changes: 1 addition & 2 deletions vulture/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,14 +477,13 @@ def ignored(lineno):
if ignored(first_lineno):
self._log(f'Ignoring {typ} "{name}"')
else:
last_lineno = lines.get_last_line_number(last_node)
collection.append(
Item(
name,
typ,
self.filename,
first_lineno,
last_lineno,
lines.get_last_line_number(last_node),
message=message,
confidence=confidence,
)
Expand Down
65 changes: 1 addition & 64 deletions vulture/lines.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,5 @@
import ast


def _get_last_child_with_lineno(node):
"""
Return the last direct child of `node` that has a lineno attribute,
or None if `node` has no such children.
Almost all node._field lists are sorted by the order in which they
appear in source code. For some nodes however, we have to skip some
fields that either don't have line numbers (e.g., "ctx" and "names")
or that are in the wrong position (e.g., "decorator_list" and
"returns"). Then we choose the first field (i.e., the field with the
highest line number) that actually contains a node. If it contains a
list of nodes, we return the last one.
"""
ignored_fields = {"ctx", "decorator_list", "names", "returns"}
fields = node._fields
# The fields of ast.Call are in the wrong order.
if isinstance(node, ast.Call):
fields = ("func", "args", "starargs", "keywords", "kwargs")
for name in reversed(fields):
if name in ignored_fields:
continue

try:
last_field = getattr(node, name)
except AttributeError:
continue

# Ignore non-AST objects like "is_async", "level" and "nl".
if isinstance(last_field, ast.AST):
return last_field
elif isinstance(last_field, list) and last_field:
return last_field[-1]
return None


def get_last_line_number(node):
"""Estimate last line number of the given AST node.
The estimate is based on the line number of the last descendant of
`node` that has a lineno attribute. Therefore, it underestimates the
size of code ending with, e.g., multiline strings and comments.
When traversing the tree, we may see a mix of nodes with line
numbers and nodes without line numbers. We therefore, store the
maximum line number seen so far and report it at the end. A more
accurate (but also slower to compute) estimate would traverse all
children, instead of just the last one, since choosing the last one
may lead to a path that ends with a node without line number.
"""
max_lineno = node.lineno
while True:
last_child = _get_last_child_with_lineno(node)
if last_child is None:
return max_lineno
else:
try:
max_lineno = max(max_lineno, last_child.lineno)
except AttributeError:
pass
node = last_child
return node.end_lineno


def get_first_line_number(node):
Expand Down

0 comments on commit 2586ee6

Please sign in to comment.