From 953335f0c359e3871762bcbe46d2ed5c0162867b Mon Sep 17 00:00:00 2001 From: Lukas Puehringer Date: Thu, 9 Feb 2023 15:10:52 +0100 Subject: [PATCH] Stop using securesystemslib.process * Instead of `securesystemslib.process.run`, use `subprocess.run` directly. The former is only a thin wrapper over the latter, which provides no advantage in Py3-only times. * Copy `securesystemslib.process.run_duplicate_streams` to a helper in runlib. This function is really only needed internally by in-toto-run tooling, for which it was developped originally (#160). It does not have to be pulic API in securesystemslib. Moreover, the function is not trivial and there have been multiple related bugs, one of them still unresolved (see secure-systems-lab/securesystemslib#337). This patch allows us to deprecate `securesystemslib.process`, and fix or workaround related issue wrt the specific runlib use case. **Note to git historians:** run_duplicate_streams was originally removed from in-toto in in-toto/in-toto@29f063a, and now copied back from https://github.com/secure-systems-lab/securesystemslib/blob/v0.26.0/securesystemslib/process.py Signed-off-by: Lukas Puehringer --- in_toto/runlib.py | 104 ++++++++++++++++++++++++++++++++++++++----- tests/test_runlib.py | 60 ++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 13 deletions(-) diff --git a/in_toto/runlib.py b/in_toto/runlib.py index be60e213f..b00546cd5 100644 --- a/in_toto/runlib.py +++ b/in_toto/runlib.py @@ -31,6 +31,11 @@ import logging import os import itertools +import io +import subprocess +import sys +import tempfile +import time from pathspec import PathSpec @@ -43,7 +48,6 @@ import securesystemslib.formats import securesystemslib.hash import securesystemslib.exceptions -import securesystemslib.process import securesystemslib.gpg from in_toto.models.metadata import Metablock @@ -322,6 +326,81 @@ def record_artifacts_as_dict(artifacts, exclude_patterns=None, return artifacts_dict +def _subprocess_run_duplicate_streams(cmd, timeout): + """Helper to run subprocess and both print and capture standards streams. + + Caveat: + * Might behave unexpectedly with interactive commands. + * Might not duplicate output in real time, if the command buffers it (see + e.g. `print("foo")` vs. `print("foo", flush=True)`). + * Possible race condition on Windows when removing temporary files. + + """ + # Use temporary files as targets for child process standard stream redirects + # They seem to work better (i.e. do not hang) than pipes, when using + # interactive commands like `vi`. + stdout_fd, stdout_name = tempfile.mkstemp() + stderr_fd, stderr_name = tempfile.mkstemp() + try: + with open(stdout_name, "r") as stdout_reader, \ + open(stderr_name, "r") as stderr_reader, \ + os.fdopen(stdout_fd, "w") as stdout_writer, \ + os.fdopen(stderr_fd, "w") as stderr_writer: + + # Store stream results in mutable dict to update it inside nested helper + _std = {"out": "", "err": ""} + + def _duplicate_streams(): + """Helper to read from child process standard streams, write their + contents to parent process standard streams, and build up return values + for outer function. + """ + # Read until EOF but at most `io.DEFAULT_BUFFER_SIZE` bytes per call. + # Reading and writing in reasonably sized chunks prevents us from + # subverting a timeout, due to being busy for too long or indefinitely. + stdout_part = stdout_reader.read(io.DEFAULT_BUFFER_SIZE) + stderr_part = stderr_reader.read(io.DEFAULT_BUFFER_SIZE) + sys.stdout.write(stdout_part) + sys.stderr.write(stderr_part) + sys.stdout.flush() + sys.stderr.flush() + _std["out"] += stdout_part + _std["err"] += stderr_part + + # Start child process, writing its standard streams to temporary files + proc = subprocess.Popen( + cmd, + stdout=stdout_writer, + stderr=stderr_writer, + universal_newlines=True, + ) + proc_start_time = time.time() + + # Duplicate streams until the process exits (or times out) + while proc.poll() is None: + # Time out as Python's `subprocess.run` would do it + if ( + timeout is not None + and time.time() > proc_start_time + timeout + ): + proc.kill() + proc.wait() + raise subprocess.TimeoutExpired(cmd, timeout) + + _duplicate_streams() + + # Read/write once more to grab everything that the process wrote between + # our last read in the loop and exiting, i.e. breaking the loop. + _duplicate_streams() + + finally: + # The work is done or was interrupted, the temp files can be removed + os.remove(stdout_name) + os.remove(stderr_name) + + # Return process exit code and captured streams + return proc.poll(), _std["out"], _std["err"] + def execute_link(link_cmd_args, record_streams): """ @@ -343,10 +422,9 @@ def execute_link(link_cmd_args, record_streams): OSError: The given command is not present or non-executable - securesystemslib.process.subprocess.TimeoutExpired: - The execution of the given command times out. The default timeout - is securesystemslib.settings.SUBPROCESS_TIMEOUT. - + subprocess.TimeoutExpired: + The execution of the given command times (see + in_toto.settings.LINK_CMD_EXEC_TIMEOUT). Executes passed command in a subprocess and redirects stdout and stderr @@ -360,14 +438,15 @@ def execute_link(link_cmd_args, record_streams): """ if record_streams: return_code, stdout_str, stderr_str = \ - securesystemslib.process.run_duplicate_streams(link_cmd_args, + _subprocess_run_duplicate_streams( + link_cmd_args, timeout=float(in_toto.settings.LINK_CMD_EXEC_TIMEOUT)) else: - process = securesystemslib.process.run(link_cmd_args, check=False, + process = subprocess.run(link_cmd_args, check=False, timeout=float(in_toto.settings.LINK_CMD_EXEC_TIMEOUT), - stdout=securesystemslib.process.DEVNULL, - stderr=securesystemslib.process.DEVNULL) + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) stdout_str = stderr_str = "" return_code = process.returncode @@ -499,7 +578,7 @@ def in_toto_run(name, material_list, product_list, link_cmd_args, PrefixError: Left-stripping artifact paths results in non-unique dict keys. - securesystemslib.process.subprocess.TimeoutExpired: Link command times out. + subprocess.TimeoutExpired: Link command times out. IOError, FileNotFoundError, NotADirectoryError, PermissionError: Cannot write link metadata. @@ -549,6 +628,7 @@ def in_toto_run(name, material_list, product_list, link_cmd_args, if link_cmd_args: LOG.info("Running command '{}'...".format(" ".join(link_cmd_args))) + securesystemslib.formats.LIST_OF_ANY_STRING_SCHEMA.check_match(link_cmd_args) byproducts = execute_link(link_cmd_args, record_streams) else: byproducts = {} @@ -661,7 +741,7 @@ def in_toto_record_start(step_name, material_list, signing_key=None, PrefixError: Left-stripping artifact paths results in non-unique dict keys. - securesystemslib.process.subprocess.TimeoutExpired: Link command times out. + subprocess.TimeoutExpired: Link command times out. IOError, PermissionError: Cannot write link metadata. @@ -806,7 +886,7 @@ def in_toto_record_stop(step_name, product_list, signing_key=None, PrefixError: Left-stripping artifact paths results in non-unique dict keys. - securesystemslib.process.subprocess.TimeoutExpired: Link command times out. + subprocess.TimeoutExpired: Link command times out. IOError, FileNotFoundError, NotADirectoryError, PermissionError: Cannot write link metadata. diff --git a/tests/test_runlib.py b/tests/test_runlib.py index b85554663..e8fa7563a 100755 --- a/tests/test_runlib.py +++ b/tests/test_runlib.py @@ -27,6 +27,7 @@ import tempfile import sys import stat +import subprocess import in_toto.settings import in_toto.exceptions @@ -34,7 +35,7 @@ from in_toto.exceptions import SignatureVerificationError from in_toto.runlib import (in_toto_run, in_toto_record_start, in_toto_record_stop, record_artifacts_as_dict, _apply_exclude_patterns, - _hash_artifact) + _hash_artifact, _subprocess_run_duplicate_streams) from securesystemslib.interface import ( generate_and_write_unencrypted_rsa_keypair, import_rsa_privatekey_from_file, @@ -521,6 +522,63 @@ def test_timeout_setting(self): # Restore original timeout in_toto.settings.LINK_CMD_EXEC_TIMEOUT = timeout_old +class TestSubprocess(unittest.TestCase): + + def test_run_duplicate_streams(self): + """Test output indeed duplicated.""" + # Command that prints 'foo' to stdout and 'bar' to stderr. + cmd = [ + sys.executable, + "-c", + "import sys; sys.stdout.write('foo'); sys.stderr.write('bar');" + ] + + # Create and open fake targets for standard streams + stdout_fd, stdout_fn = tempfile.mkstemp() + stderr_fd, stderr_fn = tempfile.mkstemp() + with open(stdout_fn, "r" ) as fake_stdout_reader, \ + os.fdopen(stdout_fd, "w") as fake_stdout_writer, \ + open(stderr_fn, "r") as fake_stderr_reader, \ + os.fdopen(stderr_fd, "w") as fake_stderr_writer: + + # Backup original standard streams and redirect to fake targets + real_stdout = sys.stdout + real_stderr = sys.stderr + sys.stdout = fake_stdout_writer + sys.stderr = fake_stderr_writer + + # Run command + ret_code, ret_out, ret_err = _subprocess_run_duplicate_streams(cmd, 10) + + # Rewind fake standard streams + fake_stdout_reader.seek(0) + fake_stderr_reader.seek(0) + + # Assert that what was printed and what was returned is correct + self.assertTrue(ret_out == fake_stdout_reader.read() == "foo") + self.assertTrue(ret_err == fake_stderr_reader.read() == "bar") + # Also assert the return value + self.assertEqual(ret_code, 0) + + # Restore original streams + sys.stdout = real_stdout + sys.stderr = real_stderr + + # Remove fake standard streams + os.remove(stdout_fn) + os.remove(stderr_fn) + + def test_run_duplicate_streams_return_value(self): + """Test return code.""" + cmd = [sys.executable, "-c", "import sys; sys.exit(100)"] + ret_code, _, _ = _subprocess_run_duplicate_streams(cmd, 10) + self.assertEqual(ret_code, 100) + + def test_run_duplicate_streams_timeout(self): + """Test timeout.""" + cmd = [sys.executable, "-c", "while True: pass"] + with self.assertRaises(subprocess.TimeoutExpired ): + _subprocess_run_duplicate_streams(cmd, timeout=-1) class TestInTotoRun(unittest.TestCase, TmpDirMixin): """"