Skip to content

Commit

Permalink
gh-109276: libregrtest: use separated file for JSON (#109277)
Browse files Browse the repository at this point in the history
libregrtest now uses a separated file descriptor to write test result
as JSON.  Previously, if a test wrote debug messages late around the
JSON, the main test process failed to parse JSON.

Rename TestResult.write_json() to TestResult.write_json_into().

worker_process() no longer writes an empty line at the end. There is
no need to separate test process output from the JSON output anymore,
since JSON is now written into a separated file descriptor.

create_worker_process() now always spawn the process with
close_fds=True.
  • Loading branch information
vstinner authored Sep 11, 2023
1 parent baa6dc8 commit de5f8f7
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Lib/test/libregrtest/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def get_rerun_match_tests(self) -> FilterTuple | None:
return None
return tuple(match_tests)

def write_json(self, file) -> None:
def write_json_into(self, file) -> None:
json.dump(self, file, cls=_EncodeTestResult)

@staticmethod
Expand Down
66 changes: 43 additions & 23 deletions Lib/test/libregrtest/run_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
from .runtests import RunTests
from .single import PROGRESS_MIN_TIME
from .utils import (
StrPath, TestName,
StrPath, StrJSON, TestName, MS_WINDOWS,
format_duration, print_warning)
from .worker import create_worker_process, USE_PROCESS_GROUP

if sys.platform == 'win32':
if MS_WINDOWS:
import locale
import msvcrt



# Display the running tests if nothing happened last N seconds
Expand Down Expand Up @@ -153,10 +155,11 @@ def mp_result_error(
) -> MultiprocessResult:
return MultiprocessResult(test_result, stdout, err_msg)

def _run_process(self, runtests: RunTests, output_file: TextIO,
def _run_process(self, runtests: RunTests, output_fd: int, json_fd: int,
tmp_dir: StrPath | None = None) -> int:
try:
popen = create_worker_process(runtests, output_file, tmp_dir)
popen = create_worker_process(runtests, output_fd, json_fd,
tmp_dir)

self._killed = False
self._popen = popen
Expand Down Expand Up @@ -208,7 +211,7 @@ def _run_process(self, runtests: RunTests, output_file: TextIO,
def _runtest(self, test_name: TestName) -> MultiprocessResult:
self.current_test_name = test_name

if sys.platform == 'win32':
if MS_WINDOWS:
# gh-95027: When stdout is not a TTY, Python uses the ANSI code
# page for the sys.stdout encoding. If the main process runs in a
# terminal, sys.stdout uses WindowsConsoleIO with UTF-8 encoding.
Expand All @@ -221,14 +224,25 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
match_tests = self.runtests.get_match_tests(test_name)
else:
match_tests = None
kwargs = {}
if match_tests:
kwargs['match_tests'] = match_tests
worker_runtests = self.runtests.copy(tests=tests, **kwargs)
err_msg = None

# gh-94026: Write stdout+stderr to a tempfile as workaround for
# non-blocking pipes on Emscripten with NodeJS.
with tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file:
with (tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file,
tempfile.TemporaryFile('w+', encoding='utf8') as json_file):
stdout_fd = stdout_file.fileno()
json_fd = json_file.fileno()
if MS_WINDOWS:
json_fd = msvcrt.get_osfhandle(json_fd)

kwargs = {}
if match_tests:
kwargs['match_tests'] = match_tests
worker_runtests = self.runtests.copy(
tests=tests,
json_fd=json_fd,
**kwargs)

# gh-93353: Check for leaked temporary files in the parent process,
# since the deletion of temporary files can happen late during
# Python finalization: too late for libregrtest.
Expand All @@ -239,12 +253,14 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
tmp_dir = tempfile.mkdtemp(prefix="test_python_")
tmp_dir = os.path.abspath(tmp_dir)
try:
retcode = self._run_process(worker_runtests, stdout_file, tmp_dir)
retcode = self._run_process(worker_runtests,
stdout_fd, json_fd, tmp_dir)
finally:
tmp_files = os.listdir(tmp_dir)
os_helper.rmtree(tmp_dir)
else:
retcode = self._run_process(worker_runtests, stdout_file)
retcode = self._run_process(worker_runtests,
stdout_fd, json_fd)
tmp_files = ()
stdout_file.seek(0)

Expand All @@ -257,23 +273,27 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
return self.mp_result_error(result, err_msg=err_msg)

try:
# deserialize run_tests_worker() output
json_file.seek(0)
worker_json: StrJSON = json_file.read()
if worker_json:
result = TestResult.from_json(worker_json)
else:
err_msg = f"empty JSON"
except Exception as exc:
# gh-101634: Catch UnicodeDecodeError if stdout cannot be
# decoded from encoding
err_msg = f"Fail to read or parser worker process JSON: {exc}"
result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
return self.mp_result_error(result, stdout, err_msg=err_msg)

if retcode is None:
result = TestResult(test_name, state=State.TIMEOUT)
return self.mp_result_error(result, stdout)

err_msg = None
if retcode != 0:
err_msg = "Exit code %s" % retcode
else:
stdout, _, worker_json = stdout.rpartition("\n")
stdout = stdout.rstrip()
if not worker_json:
err_msg = "Failed to parse worker stdout"
else:
try:
result = TestResult.from_json(worker_json)
except Exception as exc:
err_msg = "Failed to parse worker JSON: %s" % exc

if err_msg:
result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
Expand Down
3 changes: 3 additions & 0 deletions Lib/test/libregrtest/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class RunTests:
gc_threshold: int | None = None
use_resources: list[str] = dataclasses.field(default_factory=list)
python_cmd: list[str] | None = None
# On Unix, it's a file descriptor.
# On Windows, it's a handle.
json_fd: int | None = None

def copy(self, **override):
state = dataclasses.asdict(self)
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/libregrtest/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,5 +271,9 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult:
print(f"test {test_name} crashed -- {msg}",
file=sys.stderr, flush=True)
result.state = State.UNCAUGHT_EXC

sys.stdout.flush()
sys.stderr.flush()

result.duration = time.perf_counter() - start_time
return result
42 changes: 30 additions & 12 deletions Lib/test/libregrtest/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from .runtests import RunTests
from .single import run_single_test
from .utils import (
StrPath, StrJSON, FilterTuple,
StrPath, StrJSON, FilterTuple, MS_WINDOWS,
get_work_dir, exit_timeout)


USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg"))


def create_worker_process(runtests: RunTests,
output_file: TextIO,
output_fd: int, json_fd: int,
tmp_dir: StrPath | None = None) -> subprocess.Popen:
python_cmd = runtests.python_cmd
worker_json = runtests.as_json()
Expand All @@ -41,23 +41,42 @@ def create_worker_process(runtests: RunTests,
# Running the child from the same working directory as regrtest's original
# invocation ensures that TEMPDIR for the child is the same when
# sysconfig.is_python_build() is true. See issue 15300.
kw = dict(
kwargs = dict(
env=env,
stdout=output_file,
stdout=output_fd,
# bpo-45410: Write stderr into stdout to keep messages order
stderr=output_file,
stderr=output_fd,
text=True,
close_fds=(os.name != 'nt'),
close_fds=True,
)
if not MS_WINDOWS:
kwargs['pass_fds'] = [json_fd]
else:
startupinfo = subprocess.STARTUPINFO()
startupinfo.lpAttributeList = {"handle_list": [json_fd]}
kwargs['startupinfo'] = startupinfo
if USE_PROCESS_GROUP:
kw['start_new_session'] = True
return subprocess.Popen(cmd, **kw)
kwargs['start_new_session'] = True

if MS_WINDOWS:
os.set_handle_inheritable(json_fd, True)
try:
return subprocess.Popen(cmd, **kwargs)
finally:
if MS_WINDOWS:
os.set_handle_inheritable(json_fd, False)


def worker_process(worker_json: StrJSON) -> NoReturn:
runtests = RunTests.from_json(worker_json)
test_name = runtests.tests[0]
match_tests: FilterTuple | None = runtests.match_tests
json_fd: int = runtests.json_fd

if MS_WINDOWS:
import msvcrt
json_fd = msvcrt.open_osfhandle(json_fd, os.O_WRONLY)


setup_test_dir(runtests.test_dir)
setup_process()
Expand All @@ -70,11 +89,10 @@ def worker_process(worker_json: StrJSON) -> NoReturn:
print(f"Re-running {test_name} in verbose mode", flush=True)

result = run_single_test(test_name, runtests)
print() # Force a newline (just in case)

# Serialize TestResult as dict in JSON
result.write_json(sys.stdout)
sys.stdout.flush()
with open(json_fd, 'w', encoding='utf-8') as json_file:
result.write_json_into(json_file)

sys.exit(0)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
libregrtest now uses a separated file descriptor to write test result as JSON.
Previously, if a test wrote debug messages late around the JSON, the main test
process failed to parse JSON. Patch by Victor Stinner.

0 comments on commit de5f8f7

Please sign in to comment.