gh-109276: libregrtest: use separated file for JSON (#109277)

libregrtest now uses a separated file descriptor to write test result as JSON. Previously, if a test wrote debug messages late around the JSON, the main test process failed to parse JSON. Rename TestResult.write_json() to TestResult.write_json_into(). worker_process() no longer writes an empty line at the end. There is no need to separate test process output from the JSON output anymore, since JSON is now written into a separated file descriptor. create_worker_process() now always spawn the process with close_fds=True.
python · Sep 11, 2023 · de5f8f7 · de5f8f7
1 parent baa6dc8
commit de5f8f7
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 36 deletions.
diff --git a/Lib/test/libregrtest/result.py b/Lib/test/libregrtest/result.py
@@ -156,7 +156,7 @@ def get_rerun_match_tests(self) -> FilterTuple | None:
             return None
         return tuple(match_tests)
 
-    def write_json(self, file) -> None:
+    def write_json_into(self, file) -> None:
         json.dump(self, file, cls=_EncodeTestResult)
 
     @staticmethod

diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py
@@ -20,12 +20,14 @@
 from .runtests import RunTests
 from .single import PROGRESS_MIN_TIME
 from .utils import (
-    StrPath, TestName,
+    StrPath, StrJSON, TestName, MS_WINDOWS,
     format_duration, print_warning)
 from .worker import create_worker_process, USE_PROCESS_GROUP
 
-if sys.platform == 'win32':
+if MS_WINDOWS:
     import locale
+    import msvcrt
+
 
 
 # Display the running tests if nothing happened last N seconds
@@ -153,10 +155,11 @@ def mp_result_error(
     ) -> MultiprocessResult:
         return MultiprocessResult(test_result, stdout, err_msg)
 
-    def _run_process(self, runtests: RunTests, output_file: TextIO,
+    def _run_process(self, runtests: RunTests, output_fd: int, json_fd: int,
                      tmp_dir: StrPath | None = None) -> int:
         try:
-            popen = create_worker_process(runtests, output_file, tmp_dir)
+            popen = create_worker_process(runtests, output_fd, json_fd,
+                                          tmp_dir)
 
             self._killed = False
             self._popen = popen
@@ -208,7 +211,7 @@ def _run_process(self, runtests: RunTests, output_file: TextIO,
     def _runtest(self, test_name: TestName) -> MultiprocessResult:
         self.current_test_name = test_name
 
-        if sys.platform == 'win32':
+        if MS_WINDOWS:
             # gh-95027: When stdout is not a TTY, Python uses the ANSI code
             # page for the sys.stdout encoding. If the main process runs in a
             # terminal, sys.stdout uses WindowsConsoleIO with UTF-8 encoding.
@@ -221,14 +224,25 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
             match_tests = self.runtests.get_match_tests(test_name)
         else:
             match_tests = None
-        kwargs = {}
-        if match_tests:
-            kwargs['match_tests'] = match_tests
-        worker_runtests = self.runtests.copy(tests=tests, **kwargs)
+        err_msg = None
 
         # gh-94026: Write stdout+stderr to a tempfile as workaround for
         # non-blocking pipes on Emscripten with NodeJS.
-        with tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file:
+        with (tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file,
+              tempfile.TemporaryFile('w+', encoding='utf8') as json_file):
+            stdout_fd = stdout_file.fileno()
+            json_fd = json_file.fileno()
+            if MS_WINDOWS:
+                json_fd = msvcrt.get_osfhandle(json_fd)
+
+            kwargs = {}
+            if match_tests:
+                kwargs['match_tests'] = match_tests
+            worker_runtests = self.runtests.copy(
+                tests=tests,
+                json_fd=json_fd,
+                **kwargs)
+
             # gh-93353: Check for leaked temporary files in the parent process,
             # since the deletion of temporary files can happen late during
             # Python finalization: too late for libregrtest.
@@ -239,12 +253,14 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
                 tmp_dir = tempfile.mkdtemp(prefix="test_python_")
                 tmp_dir = os.path.abspath(tmp_dir)
                 try:
-                    retcode = self._run_process(worker_runtests, stdout_file, tmp_dir)
+                    retcode = self._run_process(worker_runtests,
+                                                stdout_fd, json_fd, tmp_dir)
                 finally:
                     tmp_files = os.listdir(tmp_dir)
                     os_helper.rmtree(tmp_dir)
             else:
-                retcode = self._run_process(worker_runtests, stdout_file)
+                retcode = self._run_process(worker_runtests,
+                                            stdout_fd, json_fd)
                 tmp_files = ()
             stdout_file.seek(0)
 
@@ -257,23 +273,27 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
                 result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
                 return self.mp_result_error(result, err_msg=err_msg)
 
+            try:
+                # deserialize run_tests_worker() output
+                json_file.seek(0)
+                worker_json: StrJSON = json_file.read()
+                if worker_json:
+                    result = TestResult.from_json(worker_json)
+                else:
+                    err_msg = f"empty JSON"
+            except Exception as exc:
+                # gh-101634: Catch UnicodeDecodeError if stdout cannot be
+                # decoded from encoding
+                err_msg = f"Fail to read or parser worker process JSON: {exc}"
+                result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
+                return self.mp_result_error(result, stdout, err_msg=err_msg)
+
         if retcode is None:
             result = TestResult(test_name, state=State.TIMEOUT)
             return self.mp_result_error(result, stdout)
 
-        err_msg = None
         if retcode != 0:
             err_msg = "Exit code %s" % retcode
-        else:
-            stdout, _, worker_json = stdout.rpartition("\n")
-            stdout = stdout.rstrip()
-            if not worker_json:
-                err_msg = "Failed to parse worker stdout"
-            else:
-                try:
-                    result = TestResult.from_json(worker_json)
-                except Exception as exc:
-                    err_msg = "Failed to parse worker JSON: %s" % exc
 
         if err_msg:
             result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)

diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py
@@ -36,6 +36,9 @@ class RunTests:
     gc_threshold: int | None = None
     use_resources: list[str] = dataclasses.field(default_factory=list)
     python_cmd: list[str] | None = None
+    # On Unix, it's a file descriptor.
+    # On Windows, it's a handle.
+    json_fd: int | None = None
 
     def copy(self, **override):
         state = dataclasses.asdict(self)

diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py
@@ -271,5 +271,9 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult:
             print(f"test {test_name} crashed -- {msg}",
                   file=sys.stderr, flush=True)
         result.state = State.UNCAUGHT_EXC
+
+    sys.stdout.flush()
+    sys.stderr.flush()
+
     result.duration = time.perf_counter() - start_time
     return result
diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py
@@ -10,15 +10,15 @@
 from .runtests import RunTests
 from .single import run_single_test
 from .utils import (
-    StrPath, StrJSON, FilterTuple,
+    StrPath, StrJSON, FilterTuple, MS_WINDOWS,
     get_work_dir, exit_timeout)
 
 
 USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg"))
 
 
 def create_worker_process(runtests: RunTests,
-                          output_file: TextIO,
+                          output_fd: int, json_fd: int,
                           tmp_dir: StrPath | None = None) -> subprocess.Popen:
     python_cmd = runtests.python_cmd
     worker_json = runtests.as_json()
@@ -41,23 +41,42 @@ def create_worker_process(runtests: RunTests,
     # Running the child from the same working directory as regrtest's original
     # invocation ensures that TEMPDIR for the child is the same when
     # sysconfig.is_python_build() is true. See issue 15300.
-    kw = dict(
+    kwargs = dict(
         env=env,
-        stdout=output_file,
+        stdout=output_fd,
         # bpo-45410: Write stderr into stdout to keep messages order
-        stderr=output_file,
+        stderr=output_fd,
         text=True,
-        close_fds=(os.name != 'nt'),
+        close_fds=True,
     )
+    if not MS_WINDOWS:
+        kwargs['pass_fds'] = [json_fd]
+    else:
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.lpAttributeList = {"handle_list": [json_fd]}
+        kwargs['startupinfo'] = startupinfo
     if USE_PROCESS_GROUP:
-        kw['start_new_session'] = True
-    return subprocess.Popen(cmd, **kw)
+        kwargs['start_new_session'] = True
+
+    if MS_WINDOWS:
+        os.set_handle_inheritable(json_fd, True)
+    try:
+        return subprocess.Popen(cmd, **kwargs)
+    finally:
+        if MS_WINDOWS:
+            os.set_handle_inheritable(json_fd, False)
 
 
 def worker_process(worker_json: StrJSON) -> NoReturn:
     runtests = RunTests.from_json(worker_json)
     test_name = runtests.tests[0]
     match_tests: FilterTuple | None = runtests.match_tests
+    json_fd: int = runtests.json_fd
+
+    if MS_WINDOWS:
+        import msvcrt
+        json_fd = msvcrt.open_osfhandle(json_fd, os.O_WRONLY)
+
 
     setup_test_dir(runtests.test_dir)
     setup_process()
@@ -70,11 +89,10 @@ def worker_process(worker_json: StrJSON) -> NoReturn:
             print(f"Re-running {test_name} in verbose mode", flush=True)
 
     result = run_single_test(test_name, runtests)
-    print()   # Force a newline (just in case)
 
-    # Serialize TestResult as dict in JSON
-    result.write_json(sys.stdout)
-    sys.stdout.flush()
+    with open(json_fd, 'w', encoding='utf-8') as json_file:
+        result.write_json_into(json_file)
+
     sys.exit(0)
 
 

diff --git a/Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst b/Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst
@@ -0,0 +1,3 @@
+libregrtest now uses a separated file descriptor to write test result as JSON.
+Previously, if a test wrote debug messages late around the JSON, the main test
+process failed to parse JSON. Patch by Victor Stinner.