Skip to content

Commit

Permalink
gh-109425: regrtest decodes worker stdout with backslashreplace (#109428
Browse files Browse the repository at this point in the history
)

libregrtest now decodes stdout of test worker processes with the
"backslashreplace" error handler to log corrupted stdout, instead of
failing with an error and not logging the stdout.
  • Loading branch information
vstinner authored Sep 14, 2023
1 parent 68a6f21 commit 74c72a2
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 16 deletions.
7 changes: 6 additions & 1 deletion Lib/test/libregrtest/run_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,12 @@ def create_stdout(self, stack: contextlib.ExitStack) -> TextIO:

# gh-94026: Write stdout+stderr to a tempfile as workaround for
# non-blocking pipes on Emscripten with NodeJS.
stdout_file = tempfile.TemporaryFile('w+', encoding=encoding)
# gh-109425: Use "backslashreplace" error handler: log corrupted
# stdout+stderr, instead of failing with a UnicodeDecodeError and not
# logging stdout+stderr at all.
stdout_file = tempfile.TemporaryFile('w+',
encoding=encoding,
errors='backslashreplace')
stack.enter_context(stdout_file)
return stdout_file

Expand Down
43 changes: 28 additions & 15 deletions Lib/test/test_regrtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,12 @@ def regex_search(self, regex, output):
self.fail("%r not found in %r" % (regex, output))
return match

def check_line(self, output, regex, full=False):
def check_line(self, output, pattern, full=False, regex=True):
if not regex:
pattern = re.escape(pattern)
if full:
regex += '\n'
regex = re.compile(r'^' + regex, re.MULTILINE)
pattern += '\n'
regex = re.compile(r'^' + pattern, re.MULTILINE)
self.assertRegex(output, regex)

def parse_executed_tests(self, output):
Expand Down Expand Up @@ -1755,9 +1757,8 @@ def test_leak_tmp_file(self):
f"files (1): mytmpfile",
output)

def test_mp_decode_error(self):
# gh-101634: If a worker stdout cannot be decoded, report a failed test
# and a non-zero exit code.
def test_worker_decode_error(self):
# gh-109425: Use "backslashreplace" error handler to decode stdout.
if sys.platform == 'win32':
encoding = locale.getencoding()
else:
Expand All @@ -1767,29 +1768,41 @@ def test_mp_decode_error(self):
if encoding is None:
self.skipTest("cannot get regrtest worker encoding")

nonascii = b"byte:\xa0\xa9\xff\n"
nonascii = bytes(ch for ch in range(128, 256))
corrupted_output = b"nonascii:%s\n" % (nonascii,)
# gh-108989: On Windows, assertion errors are written in UTF-16: when
# decoded each letter is follow by a NUL character.
assertion_failed = 'Assertion failed: tstate_is_alive(tstate)\n'
corrupted_output += assertion_failed.encode('utf-16-le')
try:
nonascii.decode(encoding)
corrupted_output.decode(encoding)
except UnicodeDecodeError:
pass
else:
self.skipTest(f"{encoding} can decode non-ASCII bytes {nonascii!a}")
self.skipTest(f"{encoding} can decode non-ASCII bytes")

expected_line = corrupted_output.decode(encoding, 'backslashreplace')

code = textwrap.dedent(fr"""
import sys
import unittest
class Tests(unittest.TestCase):
def test_pass(self):
pass
# bytes which cannot be decoded from UTF-8
nonascii = {nonascii!a}
sys.stdout.buffer.write(nonascii)
corrupted_output = {corrupted_output!a}
sys.stdout.buffer.write(corrupted_output)
sys.stdout.buffer.flush()
""")
testname = self.create_test(code=code)

output = self.run_tests("--fail-env-changed", "-v", "-j1", testname,
exitcode=EXITCODE_BAD_TEST)
output = self.run_tests("--fail-env-changed", "-v", "-j1", testname)
self.check_executed_tests(output, [testname],
failed=[testname],
parallel=True,
stats=0)
stats=1)
self.check_line(output, expected_line, regex=False)

def test_doctest(self):
code = textwrap.dedent(r'''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
libregrtest now decodes stdout of test worker processes with the
"backslashreplace" error handler to log corrupted stdout, instead of failing
with an error and not logging the stdout. Patch by Victor Stinner.

0 comments on commit 74c72a2

Please sign in to comment.