From 4393f17fc79f5a53a48ea7d18ebfd2da0f170b83 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 31 Oct 2022 13:05:00 +0200 Subject: [PATCH] Use `ast` instead of `eval` for string extraction This is safer (as we don't actually execute anything), and allows us to parse f-strings too. Closes #769 (supersedes it) Refs #715 (doesn't add an error yet, but doesn't crash on f-strings) --- babel/messages/extract.py | 26 +++++++++++++++++--------- tests/messages/test_extract.py | 11 +++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index c95f1cbc9..49338c26c 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -15,7 +15,7 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ - +import ast import os from os.path import relpath import sys @@ -487,14 +487,22 @@ def extract_python(fileobj, keywords, comment_tags, options): if nested: funcname = value elif tok == STRING: - # Unwrap quotes in a safe manner, maintaining the string's - # encoding - # https://sourceforge.net/tracker/?func=detail&atid=355470& - # aid=617979&group_id=5470 - code = compile('# coding=%s\n%s' % (str(encoding), value), - '', 'eval', future_flags) - value = eval(code, {'__builtins__': {}}, {}) - buf.append(value) + # Unwrap quotes in a safe manner, maintaining the string's encoding + # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470 + code = compile( + f'# coding={str(encoding)}\n{value}', + '', + 'eval', + ast.PyCF_ONLY_AST | future_flags, + ) + if isinstance(code, ast.Expression): + body = code.body + if isinstance(body, ast.Str): + buf.append(body.s) + elif isinstance(body, ast.JoinedStr): # f-string + if all(isinstance(node, ast.Constant) for node in body.values): + buf.append(''.join(str(node.value) for node in body.values)) + # TODO: could raise an error or warning when not all nodes are constants elif tok == OP and value == ',': if buf: messages.append(''.join(buf)) diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index 47fe30603..a12b50101 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -528,3 +528,14 @@ def test_future(self): messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages[0][1] == u'\xa0' + + def test_f_strings(self): + buf = BytesIO(br""" +t1 = _('foobar') +t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only +t3 = _(f'spameggs {t1}') # should not be extracted +""") + messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) + assert len(messages) == 2 + assert messages[0][1] == u'foobar' + assert messages[1][1] == u'spameggsfeast'