diff --git a/babel/messages/extract.py b/babel/messages/extract.py index c95f1cbc9..74e57a181 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -15,7 +15,7 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ - +import ast import os from os.path import relpath import sys @@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options): if nested: funcname = value elif tok == STRING: - # Unwrap quotes in a safe manner, maintaining the string's - # encoding - # https://sourceforge.net/tracker/?func=detail&atid=355470& - # aid=617979&group_id=5470 - code = compile('# coding=%s\n%s' % (str(encoding), value), - '', 'eval', future_flags) - value = eval(code, {'__builtins__': {}}, {}) - buf.append(value) + val = _parse_python_string(value, encoding, future_flags) + if val is not None: + buf.append(val) elif tok == OP and value == ',': if buf: messages.append(''.join(buf)) @@ -516,6 +511,28 @@ def extract_python(fileobj, keywords, comment_tags, options): funcname = value +def _parse_python_string(value, encoding, future_flags): + # Unwrap quotes in a safe manner, maintaining the string's encoding + # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470 + code = compile( + f'# coding={str(encoding)}\n{value}', + '', + 'eval', + ast.PyCF_ONLY_AST | future_flags, + ) + if isinstance(code, ast.Expression): + body = code.body + if isinstance(body, ast.Str): + return body.s + if isinstance(body, ast.JoinedStr): # f-string + if all(isinstance(node, ast.Str) for node in body.values): + return ''.join(node.s for node in body.values) + if all(isinstance(node, ast.Constant) for node in body.values): + return ''.join(str(node.value) for node in body.values) + # TODO: we could raise an error or warning when not all nodes are constants + return None + + def extract_javascript(fileobj, keywords, comment_tags, options): """Extract messages from JavaScript source code. diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index 47fe30603..387319113 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -528,3 +528,30 @@ def test_future(self): messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages[0][1] == u'\xa0' + + def test_f_strings(self): + buf = BytesIO(br""" +t1 = _('foobar') +t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only +t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f) +t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines) +f'flying shark' + '... hello' +) +t4 = _(f'spameggs {t1}') # should not be extracted +""") + messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) + assert len(messages) == 4 + assert messages[0][1] == u'foobar' + assert messages[1][1] == u'spameggsfeast' + assert messages[2][1] == u'spameggskerroshampurilainen' + assert messages[3][1] == u'whoa! a flying shark... hello' + + def test_f_strings_non_utf8(self): + buf = BytesIO(b""" +# -- coding: latin-1 -- +t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6') +""") + messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) + assert len(messages) == 1 + assert messages[0][1] == u'åäöÅÄÖ'