Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-116022: Improve repr() of AST nodes #117046

Merged
merged 39 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7461655
Improve repr() of AST nodes
tomasr8 Mar 11, 2024
447261b
Add news entry
tomasr8 Mar 19, 2024
74759bc
Improve news entry
tomasr8 Mar 20, 2024
4ffc1fb
Follow PEP7
tomasr8 Mar 20, 2024
6094100
Fix test
tomasr8 Mar 20, 2024
5cde4fc
Regenerate files
tomasr8 Mar 20, 2024
00133b8
Merge branch 'main' into ast-repr
JelleZijlstra Apr 24, 2024
e1b7643
Fix missing whitespace
tomasr8 May 2, 2024
0308982
Check return values
tomasr8 May 2, 2024
3f2bf3f
Check for retval < 0 rather than == -1
tomasr8 May 2, 2024
4386a5a
Fix missing whitespace
tomasr8 May 2, 2024
af3b2a3
Simplify code
tomasr8 May 2, 2024
1aaa1c0
Update Parser/asdl_c.py
JelleZijlstra Jun 3, 2024
06e4657
Merge branch 'main' into ast-repr
AlexWaygood Jun 3, 2024
df70943
Fix leaked references
tomasr8 Jul 10, 2024
ed89975
Add an assertion
tomasr8 Jul 10, 2024
d8b3bff
Simplify code
tomasr8 Jul 10, 2024
cfd6cdf
Add tests
tomasr8 Jul 10, 2024
f022378
Use PyUnicodeWriter instead of string concatenation
tomasr8 Jul 11, 2024
0f916cc
Improve news entry
tomasr8 Jul 11, 2024
e2b0415
PEP7 fixes
tomasr8 Jul 11, 2024
9f405ac
Merge branch 'main' into ast-repr
tomasr8 Jul 11, 2024
b06c8a4
Use the public Unicode Writer API where possible
tomasr8 Jul 11, 2024
32a5169
Increase max depth to 3
tomasr8 Jul 11, 2024
7603910
Add comments
tomasr8 Jul 11, 2024
74c57a6
Add versionchanged note to ast module docs
tomasr8 Jul 11, 2024
ddd7e71
Use `PyUnicodeWriter_WriteUTF8`
tomasr8 Jul 14, 2024
132932a
Check for errors
tomasr8 Jul 14, 2024
a341c57
Use snapshots for testing
tomasr8 Jul 14, 2024
d03c3d1
Fix typo
tomasr8 Jul 14, 2024
44b3303
Add test folder to Makefile
tomasr8 Jul 14, 2024
0b52a65
Switch back to the private API
tomasr8 Jul 16, 2024
0b19d2e
Merge branch 'main' into ast-repr
tomasr8 Aug 3, 2024
2ed3f99
Move test data inside test_ast
tomasr8 Aug 12, 2024
c94c197
Merge branch 'main' into ast-repr
tomasr8 Aug 12, 2024
6ca6b5b
Fix tests
tomasr8 Aug 14, 2024
df34a04
Apply review suggestions
tomasr8 Aug 14, 2024
ab823cc
Merge branch 'main' into ast-repr
tomasr8 Sep 17, 2024
abd35b0
Merge branch 'main' into ast-repr
JelleZijlstra Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Doc/library/ast.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ Node classes
Simple indices are represented by their value, extended slices are
represented as tuples.

.. versionchanged:: 3.14

The :meth:`~object.__repr__` output of :class:`~ast.AST` nodes includes
the values of the node fields.

.. deprecated:: 3.8

Old classes :class:`!ast.Num`, :class:`!ast.Str`, :class:`!ast.Bytes`,
Expand Down
209 changes: 209 additions & 0 deletions Lib/test/test_ast/data/ast_repr.txt

Large diffs are not rendered by default.

25 changes: 24 additions & 1 deletion Lib/test/test_ast/test_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import types
import unittest
import weakref
from pathlib import Path
from textwrap import dedent
try:
import _testinternalcapi
Expand All @@ -29,6 +30,16 @@
STDLIB_FILES = [fn for fn in os.listdir(STDLIB) if fn.endswith(".py")]
STDLIB_FILES.extend(["test/test_grammar.py", "test/test_unpack_ex.py"])

AST_REPR_DATA_FILE = Path(__file__).parent / "data" / "ast_repr.txt"

def ast_repr_get_test_cases() -> list[str]:
return exec_tests + eval_tests


def ast_repr_update_snapshots() -> None:
data = [repr(ast.parse(test)) for test in ast_repr_get_test_cases()]
AST_REPR_DATA_FILE.write_text("\n".join(data))


class AST_Tests(unittest.TestCase):
maxDiff = None
Expand Down Expand Up @@ -408,7 +419,7 @@ def test_invalid_sum(self):
m = ast.Module([ast.Expr(ast.expr(**pos), **pos)], [])
with self.assertRaises(TypeError) as cm:
compile(m, "<test>", "exec")
self.assertIn("but got <ast.expr", str(cm.exception))
self.assertIn("but got expr()", str(cm.exception))

def test_invalid_identifier(self):
m = ast.Module([ast.Expr(ast.Name(42, ast.Load()))], [])
Expand Down Expand Up @@ -772,6 +783,12 @@ def test_none_checks(self) -> None:
for node, attr, source in tests:
self.assert_none_check(node, attr, source)

def test_repr(self) -> None:
snapshots = AST_REPR_DATA_FILE.read_text().split("\n")
for test, snapshot in zip(ast_repr_get_test_cases(), snapshots, strict=True):
with self.subTest(test_input=test):
self.assertEqual(repr(ast.parse(test)), snapshot)


class CopyTests(unittest.TestCase):
"""Test copying and pickling AST nodes."""
Expand Down Expand Up @@ -3035,3 +3052,9 @@ def test_cli_file_input(self):
self.assertEqual(expected.splitlines(),
res.out.decode("utf8").splitlines())
self.assertEqual(res.rc, 0)


if __name__ == '__main__':
if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update':
ast_repr_update_snapshots()
sys.exit(0)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve the :meth:`~object.__repr__` output of :class:`~ast.AST` nodes.
225 changes: 225 additions & 0 deletions Parser/asdl_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,8 +1435,233 @@ def visitModule(self, mod):
{NULL}
};

static PyObject *
ast_repr_max_depth(AST_object *self, int depth);

/* Format list and tuple properties of AST nodes.
Note that, only the first and last elements are shown.
Anything in between is represented with an ellipsis ('...').
For example, the list [1, 2, 3] is formatted as
'List(elts=[Constant(1), ..., Constant(3)])'. */
static PyObject *
ast_repr_list(PyObject *list, int depth)
picnixz marked this conversation as resolved.
Show resolved Hide resolved
{
assert(PyList_Check(list) || PyTuple_Check(list));

struct ast_state *state = get_ast_state();
if (state == NULL) {
return NULL;
}

Py_ssize_t length = PySequence_Size(list);
if (length < 0) {
return NULL;
}
else if (length == 0) {
return PyObject_Repr(list);
}

_PyUnicodeWriter writer;
picnixz marked this conversation as resolved.
Show resolved Hide resolved
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;
PyObject *items[2] = {NULL, NULL};

items[0] = PySequence_GetItem(list, 0);
tomasr8 marked this conversation as resolved.
Show resolved Hide resolved
if (!items[0]) {
goto error;
}
if (length > 1) {
items[1] = PySequence_GetItem(list, length - 1);
if (!items[1]) {
goto error;
}
}

bool is_list = PyList_Check(list);
if (_PyUnicodeWriter_WriteChar(&writer, is_list ? '[' : '(') < 0) {
goto error;
}

for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
PyObject *item = items[i];
PyObject *item_repr;

if (PyType_IsSubtype(Py_TYPE(item), (PyTypeObject *)state->AST_type)) {
item_repr = ast_repr_max_depth((AST_object*)item, depth - 1);
} else {
item_repr = PyObject_Repr(item);
}
if (!item_repr) {
goto error;
}
if (i > 0) {
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
goto error;
}
}
if (_PyUnicodeWriter_WriteStr(&writer, item_repr) < 0) {
Py_DECREF(item_repr);
goto error;
}
if (i == 0 && length > 2) {
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ...", 5) < 0) {
Py_DECREF(item_repr);
goto error;
}
}
Py_DECREF(item_repr);
}

if (_PyUnicodeWriter_WriteChar(&writer, is_list ? ']' : ')') < 0) {
goto error;
}

Py_XDECREF(items[0]);
Py_XDECREF(items[1]);
return _PyUnicodeWriter_Finish(&writer);

error:
Py_XDECREF(items[0]);
Py_XDECREF(items[1]);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}

static PyObject *
ast_repr_max_depth(AST_object *self, int depth)
{
struct ast_state *state = get_ast_state();
if (state == NULL) {
return NULL;
}

if (depth <= 0) {
return PyUnicode_FromFormat("%s(...)", Py_TYPE(self)->tp_name);
}

int status = Py_ReprEnter((PyObject *)self);
if (status != 0) {
if (status < 0) {
return NULL;
}
return PyUnicode_FromFormat("%s(...)", Py_TYPE(self)->tp_name);
}

PyObject *fields;
if (PyObject_GetOptionalAttr((PyObject *)Py_TYPE(self), state->_fields, &fields) < 0) {
Py_ReprLeave((PyObject *)self);
return NULL;
}

Py_ssize_t numfields = PySequence_Size(fields);
tomasr8 marked this conversation as resolved.
Show resolved Hide resolved
if (numfields < 0) {
Py_ReprLeave((PyObject *)self);
Py_DECREF(fields);
return NULL;
}

if (numfields == 0) {
Py_ReprLeave((PyObject *)self);
Py_DECREF(fields);
return PyUnicode_FromFormat("%s()", Py_TYPE(self)->tp_name);
}

const char* tp_name = Py_TYPE(self)->tp_name;
_PyUnicodeWriter writer;
tomasr8 marked this conversation as resolved.
Show resolved Hide resolved
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;

if (_PyUnicodeWriter_WriteASCIIString(&writer, tp_name, strlen(tp_name)) < 0) {
goto error;
}
if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) {
goto error;
}

for (Py_ssize_t i = 0; i < numfields; i++) {
PyObject *name = PySequence_GetItem(fields, i);
if (!name) {
goto error;
}

PyObject *value = PyObject_GetAttr((PyObject *)self, name);
if (!value) {
Py_DECREF(name);
goto error;
}

PyObject *value_repr;
if (PyList_Check(value) || PyTuple_Check(value)) {
value_repr = ast_repr_list(value, depth);
}
else if (PyType_IsSubtype(Py_TYPE(value), (PyTypeObject *)state->AST_type)) {
value_repr = ast_repr_max_depth((AST_object*)value, depth - 1);
}
else {
value_repr = PyObject_Repr(value);
}

if (!value_repr) {
Py_DECREF(name);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can DECREF name and value earlier so you don't have to repeat this. Looks like value is no longer needed after line 1602, so you could DECREF it there. name could be DECREFed after line 1623.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, I move those DECREFs up

Py_DECREF(value);
goto error;
}

if (i > 0) {
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(value_repr);
goto error;
}
}
if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(value_repr);
goto error;
}
if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(value_repr);
goto error;
}
if (_PyUnicodeWriter_WriteStr(&writer, value_repr) < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(value_repr);
goto error;
}

Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(value_repr);
}

if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) {
goto error;
}
Py_ReprLeave((PyObject *)self);
Py_DECREF(fields);
return _PyUnicodeWriter_Finish(&writer);

error:
Py_ReprLeave((PyObject *)self);
Py_DECREF(fields);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}

static PyObject *
ast_repr(AST_object *self)
{
return ast_repr_max_depth(self, 3);
}

static PyType_Slot AST_type_slots[] = {
{Py_tp_dealloc, ast_dealloc},
{Py_tp_repr, ast_repr},
{Py_tp_getattro, PyObject_GenericGetAttr},
{Py_tp_setattro, PyObject_GenericSetAttr},
{Py_tp_traverse, ast_traverse},
Expand Down
Loading
Loading