Skip to content

Commit

Permalink
Stop using securesystemslib.process
Browse files Browse the repository at this point in the history
* Instead of `securesystemslib.process.run`, use `subprocess.run`
  directly. The former is only a thin wrapper over the latter,
  which provides no advantage in Py3-only times.

* Copy `securesystemslib.process.run_duplicate_streams` to a helper
  in runlib. This function is really only needed internally by
  in-toto-run tooling, for which it was developped originally
  (in-toto#160). It does not have to be pulic API in securesystemslib.
  Moreover, the function is not trivial and there have been
  multiple related bugs, one of them still unresolved
  (see secure-systems-lab/securesystemslib#337).

This patch allows us to deprecate `securesystemslib.process`, and
fix or workaround related issue wrt the specific runlib use case.

**Note to git historians:**
run_duplicate_streams was originally removed from in-toto in
in-toto/in-toto@29f063a, and now copied back from
https://github.com/secure-systems-lab/securesystemslib/blob/v0.26.0/securesystemslib/process.py

Signed-off-by: Lukas Puehringer <[email protected]>
  • Loading branch information
lukpueh committed Feb 9, 2023
1 parent d36cff6 commit 953335f
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 13 deletions.
104 changes: 92 additions & 12 deletions in_toto/runlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
import logging
import os
import itertools
import io
import subprocess
import sys
import tempfile
import time

from pathspec import PathSpec

Expand All @@ -43,7 +48,6 @@
import securesystemslib.formats
import securesystemslib.hash
import securesystemslib.exceptions
import securesystemslib.process
import securesystemslib.gpg

from in_toto.models.metadata import Metablock
Expand Down Expand Up @@ -322,6 +326,81 @@ def record_artifacts_as_dict(artifacts, exclude_patterns=None,

return artifacts_dict

def _subprocess_run_duplicate_streams(cmd, timeout):
"""Helper to run subprocess and both print and capture standards streams.
Caveat:
* Might behave unexpectedly with interactive commands.
* Might not duplicate output in real time, if the command buffers it (see
e.g. `print("foo")` vs. `print("foo", flush=True)`).
* Possible race condition on Windows when removing temporary files.
"""
# Use temporary files as targets for child process standard stream redirects
# They seem to work better (i.e. do not hang) than pipes, when using
# interactive commands like `vi`.
stdout_fd, stdout_name = tempfile.mkstemp()
stderr_fd, stderr_name = tempfile.mkstemp()
try:
with open(stdout_name, "r") as stdout_reader, \
open(stderr_name, "r") as stderr_reader, \
os.fdopen(stdout_fd, "w") as stdout_writer, \
os.fdopen(stderr_fd, "w") as stderr_writer:

# Store stream results in mutable dict to update it inside nested helper
_std = {"out": "", "err": ""}

def _duplicate_streams():
"""Helper to read from child process standard streams, write their
contents to parent process standard streams, and build up return values
for outer function.
"""
# Read until EOF but at most `io.DEFAULT_BUFFER_SIZE` bytes per call.
# Reading and writing in reasonably sized chunks prevents us from
# subverting a timeout, due to being busy for too long or indefinitely.
stdout_part = stdout_reader.read(io.DEFAULT_BUFFER_SIZE)
stderr_part = stderr_reader.read(io.DEFAULT_BUFFER_SIZE)
sys.stdout.write(stdout_part)
sys.stderr.write(stderr_part)
sys.stdout.flush()
sys.stderr.flush()
_std["out"] += stdout_part
_std["err"] += stderr_part

# Start child process, writing its standard streams to temporary files
proc = subprocess.Popen(
cmd,
stdout=stdout_writer,
stderr=stderr_writer,
universal_newlines=True,
)
proc_start_time = time.time()

# Duplicate streams until the process exits (or times out)
while proc.poll() is None:
# Time out as Python's `subprocess.run` would do it
if (
timeout is not None
and time.time() > proc_start_time + timeout
):
proc.kill()
proc.wait()
raise subprocess.TimeoutExpired(cmd, timeout)

_duplicate_streams()

# Read/write once more to grab everything that the process wrote between
# our last read in the loop and exiting, i.e. breaking the loop.
_duplicate_streams()

finally:
# The work is done or was interrupted, the temp files can be removed
os.remove(stdout_name)
os.remove(stderr_name)

# Return process exit code and captured streams
return proc.poll(), _std["out"], _std["err"]

def execute_link(link_cmd_args, record_streams):
"""
<Purpose>
Expand All @@ -343,10 +422,9 @@ def execute_link(link_cmd_args, record_streams):
OSError:
The given command is not present or non-executable
securesystemslib.process.subprocess.TimeoutExpired:
The execution of the given command times out. The default timeout
is securesystemslib.settings.SUBPROCESS_TIMEOUT.
subprocess.TimeoutExpired:
The execution of the given command times (see
in_toto.settings.LINK_CMD_EXEC_TIMEOUT).
<Side Effects>
Executes passed command in a subprocess and redirects stdout and stderr
Expand All @@ -360,14 +438,15 @@ def execute_link(link_cmd_args, record_streams):
"""
if record_streams:
return_code, stdout_str, stderr_str = \
securesystemslib.process.run_duplicate_streams(link_cmd_args,
_subprocess_run_duplicate_streams(
link_cmd_args,
timeout=float(in_toto.settings.LINK_CMD_EXEC_TIMEOUT))

else:
process = securesystemslib.process.run(link_cmd_args, check=False,
process = subprocess.run(link_cmd_args, check=False,
timeout=float(in_toto.settings.LINK_CMD_EXEC_TIMEOUT),
stdout=securesystemslib.process.DEVNULL,
stderr=securesystemslib.process.DEVNULL)
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
stdout_str = stderr_str = ""
return_code = process.returncode

Expand Down Expand Up @@ -499,7 +578,7 @@ def in_toto_run(name, material_list, product_list, link_cmd_args,
PrefixError: Left-stripping artifact paths results in non-unique dict keys.
securesystemslib.process.subprocess.TimeoutExpired: Link command times out.
subprocess.TimeoutExpired: Link command times out.
IOError, FileNotFoundError, NotADirectoryError, PermissionError:
Cannot write link metadata.
Expand Down Expand Up @@ -549,6 +628,7 @@ def in_toto_run(name, material_list, product_list, link_cmd_args,

if link_cmd_args:
LOG.info("Running command '{}'...".format(" ".join(link_cmd_args)))
securesystemslib.formats.LIST_OF_ANY_STRING_SCHEMA.check_match(link_cmd_args)
byproducts = execute_link(link_cmd_args, record_streams)
else:
byproducts = {}
Expand Down Expand Up @@ -661,7 +741,7 @@ def in_toto_record_start(step_name, material_list, signing_key=None,
PrefixError: Left-stripping artifact paths results in non-unique dict keys.
securesystemslib.process.subprocess.TimeoutExpired: Link command times out.
subprocess.TimeoutExpired: Link command times out.
IOError, PermissionError:
Cannot write link metadata.
Expand Down Expand Up @@ -806,7 +886,7 @@ def in_toto_record_stop(step_name, product_list, signing_key=None,
PrefixError: Left-stripping artifact paths results in non-unique dict keys.
securesystemslib.process.subprocess.TimeoutExpired: Link command times out.
subprocess.TimeoutExpired: Link command times out.
IOError, FileNotFoundError, NotADirectoryError, PermissionError:
Cannot write link metadata.
Expand Down
60 changes: 59 additions & 1 deletion tests/test_runlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@
import tempfile
import sys
import stat
import subprocess

import in_toto.settings
import in_toto.exceptions
from in_toto.models.metadata import Metablock
from in_toto.exceptions import SignatureVerificationError
from in_toto.runlib import (in_toto_run, in_toto_record_start,
in_toto_record_stop, record_artifacts_as_dict, _apply_exclude_patterns,
_hash_artifact)
_hash_artifact, _subprocess_run_duplicate_streams)
from securesystemslib.interface import (
generate_and_write_unencrypted_rsa_keypair,
import_rsa_privatekey_from_file,
Expand Down Expand Up @@ -521,6 +522,63 @@ def test_timeout_setting(self):
# Restore original timeout
in_toto.settings.LINK_CMD_EXEC_TIMEOUT = timeout_old

class TestSubprocess(unittest.TestCase):

def test_run_duplicate_streams(self):
"""Test output indeed duplicated."""
# Command that prints 'foo' to stdout and 'bar' to stderr.
cmd = [
sys.executable,
"-c",
"import sys; sys.stdout.write('foo'); sys.stderr.write('bar');"
]

# Create and open fake targets for standard streams
stdout_fd, stdout_fn = tempfile.mkstemp()
stderr_fd, stderr_fn = tempfile.mkstemp()
with open(stdout_fn, "r" ) as fake_stdout_reader, \
os.fdopen(stdout_fd, "w") as fake_stdout_writer, \
open(stderr_fn, "r") as fake_stderr_reader, \
os.fdopen(stderr_fd, "w") as fake_stderr_writer:

# Backup original standard streams and redirect to fake targets
real_stdout = sys.stdout
real_stderr = sys.stderr
sys.stdout = fake_stdout_writer
sys.stderr = fake_stderr_writer

# Run command
ret_code, ret_out, ret_err = _subprocess_run_duplicate_streams(cmd, 10)

# Rewind fake standard streams
fake_stdout_reader.seek(0)
fake_stderr_reader.seek(0)

# Assert that what was printed and what was returned is correct
self.assertTrue(ret_out == fake_stdout_reader.read() == "foo")
self.assertTrue(ret_err == fake_stderr_reader.read() == "bar")
# Also assert the return value
self.assertEqual(ret_code, 0)

# Restore original streams
sys.stdout = real_stdout
sys.stderr = real_stderr

# Remove fake standard streams
os.remove(stdout_fn)
os.remove(stderr_fn)

def test_run_duplicate_streams_return_value(self):
"""Test return code."""
cmd = [sys.executable, "-c", "import sys; sys.exit(100)"]
ret_code, _, _ = _subprocess_run_duplicate_streams(cmd, 10)
self.assertEqual(ret_code, 100)

def test_run_duplicate_streams_timeout(self):
"""Test timeout."""
cmd = [sys.executable, "-c", "while True: pass"]
with self.assertRaises(subprocess.TimeoutExpired ):
_subprocess_run_duplicate_streams(cmd, timeout=-1)

class TestInTotoRun(unittest.TestCase, TmpDirMixin):
""""
Expand Down

0 comments on commit 953335f

Please sign in to comment.