From e0505bb152508cc9a3de5f5a82f9a65d13bf4890 Mon Sep 17 00:00:00 2001
From: Chris Jerdonek <chris.jerdonek@gmail.com>
Date: Sun, 24 Mar 2019 22:34:42 -0700
Subject: [PATCH] Prevent subprocess stdout and stderr from cluttering pip's
 stdout.

---
 news/1219.bugfix                   |  2 +
 src/pip/_internal/utils/logging.py | 45 ++++++++++++--
 src/pip/_internal/utils/misc.py    | 99 +++++++++++++++---------------
 tests/functional/test_install.py   | 13 ++--
 tests/functional/test_wheel.py     |  2 +-
 tests/unit/test_utils.py           | 73 ++++++++++++----------
 6 files changed, 139 insertions(+), 95 deletions(-)
 create mode 100644 news/1219.bugfix

diff --git a/news/1219.bugfix b/news/1219.bugfix
new file mode 100644
index 00000000000..a697ffa2295
--- /dev/null
+++ b/news/1219.bugfix
@@ -0,0 +1,2 @@
+The stdout and stderr from VCS commands run by pip as subprocesses (e.g.
+``git``, ``hg``, etc.) no longer pollute pip's stdout.
diff --git a/src/pip/_internal/utils/logging.py b/src/pip/_internal/utils/logging.py
index 9ff6fcb141a..a28e88cac35 100644
--- a/src/pip/_internal/utils/logging.py
+++ b/src/pip/_internal/utils/logging.py
@@ -6,12 +6,13 @@
 import logging.handlers
 import os
 import sys
+from logging import Filter
 
 from pip._vendor.six import PY2
 
 from pip._internal.utils.compat import WINDOWS
 from pip._internal.utils.deprecation import DEPRECATION_MSG_PREFIX
-from pip._internal.utils.misc import ensure_dir
+from pip._internal.utils.misc import ensure_dir, subprocess_logger
 
 try:
     import threading
@@ -225,7 +226,7 @@ def _open(self):
         return logging.handlers.RotatingFileHandler._open(self)
 
 
-class MaxLevelFilter(logging.Filter):
+class MaxLevelFilter(Filter):
 
     def __init__(self, level):
         self.level = level
@@ -234,6 +235,18 @@ def filter(self, record):
         return record.levelno < self.level
 
 
+class ExcludeLoggerFilter(Filter):
+
+    """
+    A logging Filter that excludes records from a logger (or its children).
+    """
+
+    def filter(self, record):
+        # The base Filter class allows only records from a logger (or its
+        # children).
+        return not super(ExcludeLoggerFilter, self).filter(record)
+
+
 def setup_logging(verbosity, no_color, user_log_file):
     """Configures and sets up all of the logging
 
@@ -277,6 +290,9 @@ def setup_logging(verbosity, no_color, user_log_file):
         "stream": "pip._internal.utils.logging.ColorizedStreamHandler",
         "file": "pip._internal.utils.logging.BetterRotatingFileHandler",
     }
+    handlers = ["console", "console_errors", "console_subprocess"] + (
+        ["user_log"] if include_user_log else []
+    )
 
     logging.config.dictConfig({
         "version": 1,
@@ -286,6 +302,14 @@ def setup_logging(verbosity, no_color, user_log_file):
                 "()": "pip._internal.utils.logging.MaxLevelFilter",
                 "level": logging.WARNING,
             },
+            "restrict_to_subprocess": {
+                "()": "logging.Filter",
+                "name": subprocess_logger.name,
+            },
+            "exclude_subprocess": {
+                "()": "pip._internal.utils.logging.ExcludeLoggerFilter",
+                "name": subprocess_logger.name,
+            },
         },
         "formatters": {
             "indent": {
@@ -304,7 +328,7 @@ def setup_logging(verbosity, no_color, user_log_file):
                 "class": handler_classes["stream"],
                 "no_color": no_color,
                 "stream": log_streams["stdout"],
-                "filters": ["exclude_warnings"],
+                "filters": ["exclude_subprocess", "exclude_warnings"],
                 "formatter": "indent",
             },
             "console_errors": {
@@ -312,6 +336,17 @@ def setup_logging(verbosity, no_color, user_log_file):
                 "class": handler_classes["stream"],
                 "no_color": no_color,
                 "stream": log_streams["stderr"],
+                "filters": ["exclude_subprocess"],
+                "formatter": "indent",
+            },
+            # A handler responsible for logging to the console messages
+            # from the "subprocessor" logger.
+            "console_subprocess": {
+                "level": level,
+                "class": handler_classes["stream"],
+                "no_color": no_color,
+                "stream": log_streams["stderr"],
+                "filters": ["restrict_to_subprocess"],
                 "formatter": "indent",
             },
             "user_log": {
@@ -324,9 +359,7 @@ def setup_logging(verbosity, no_color, user_log_file):
         },
         "root": {
             "level": root_level,
-            "handlers": ["console", "console_errors"] + (
-                ["user_log"] if include_user_log else []
-            ),
+            "handlers": handlers,
         },
         "loggers": {
             "pip._vendor": {
diff --git a/src/pip/_internal/utils/misc.py b/src/pip/_internal/utils/misc.py
index 42f324bc412..ca7a529387c 100644
--- a/src/pip/_internal/utils/misc.py
+++ b/src/pip/_internal/utils/misc.py
@@ -65,6 +65,7 @@
 
 
 logger = std_logging.getLogger(__name__)
+subprocess_logger = std_logging.getLogger('pip.subprocessor')
 
 LOG_DIVIDER = '----------------------------------------'
 
@@ -671,6 +672,8 @@ def call_subprocess(
     # type: (...) -> Optional[Text]
     """
     Args:
+      show_stdout: if true, use INFO to log the subprocess's stderr and
+        stdout streams.  Otherwise, use DEBUG.  Defaults to False.
       extra_ok_returncodes: an iterable of integer return codes that are
         acceptable, in addition to 0. Defaults to None, which means [].
       unset_environ: an iterable of environment variable names to unset
@@ -680,39 +683,42 @@ def call_subprocess(
         extra_ok_returncodes = []
     if unset_environ is None:
         unset_environ = []
-    # This function's handling of subprocess output is confusing and I
-    # previously broke it terribly, so as penance I will write a long comment
-    # explaining things.
+    # Most places in pip use show_stdout=False. What this means is--
     #
-    # The obvious thing that affects output is the show_stdout=
-    # kwarg. show_stdout=True means, let the subprocess write directly to our
-    # stdout. It is almost never used
-    # inside pip (and should not be used in new code without a very good
-    # reason); as of 2016-02-22 it is only used in a few places inside the VCS
-    # wrapper code. Ideally we should get rid of it entirely, because it
-    # creates a lot of complexity here for a rarely used feature.
+    # - We connect the child's output (combined stderr and stdout) to a
+    #   single pipe, which we read.
+    # - We log this output to stderr at DEBUG level as it is received.
+    # - If DEBUG logging isn't enabled (e.g. if --verbose logging wasn't
+    #   requested), then we show a spinner so the user can still see the
+    #   subprocess is in progress.
+    # - If the subprocess exits with an error, we log the output to stderr
+    #   at ERROR level if it hasn't already been displayed to the console
+    #   (e.g. if --verbose logging wasn't enabled).  This way we don't log
+    #   the output to the console twice.
     #
-    # Most places in pip use show_stdout=False. What this means is:
-    # - We connect the child stdout to a pipe, which we read.
-    # - By default, we hide the output but show a spinner -- unless the
-    #   subprocess exits with an error, in which case we show the output.
-    # - If the --verbose option was passed (= loglevel is DEBUG), then we show
-    #   the output unconditionally. (But in this case we don't want to show
-    #   the output a second time if it turns out that there was an error.)
-    #
-    # stderr is always merged with stdout (even if show_stdout=True).
+    # If show_stdout=True, then the above is still done, but with DEBUG
+    # replaced by INFO.
     if show_stdout:
-        stdout = None
+        # Then log the subprocess output at INFO level.
+        log_subprocess = subprocess_logger.info
+        used_level = std_logging.INFO
     else:
-        stdout = subprocess.PIPE
+        # Then log the subprocess output using DEBUG.  This also ensures
+        # it will be logged to the log file (aka user_log), if enabled.
+        log_subprocess = subprocess_logger.debug
+        used_level = std_logging.DEBUG
+
+    # Whether the subprocess will be visible in the console.
+    showing_subprocess = subprocess_logger.getEffectiveLevel() <= used_level
 
-    # Only use the spinner when we're capturing stdout and we have one.
-    use_spinner = not show_stdout and spinner is not None
+    # Only use the spinner if we're not showing the subprocess output
+    # and we have a spinner.
+    use_spinner = not showing_subprocess and spinner is not None
 
     if command_desc is None:
         command_desc = format_command_args(cmd)
 
-    logger.debug("Running command %s", command_desc)
+    log_subprocess("Running command %s", command_desc)
     env = os.environ.copy()
     if extra_environ:
         env.update(extra_environ)
@@ -721,29 +727,27 @@ def call_subprocess(
     try:
         proc = subprocess.Popen(
             cmd, stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
-            stdout=stdout, cwd=cwd, env=env,
+            stdout=subprocess.PIPE, cwd=cwd, env=env,
         )
         proc.stdin.close()
     except Exception as exc:
-        logger.critical(
+        subprocess_logger.critical(
             "Error %s while executing command %s", exc, command_desc,
         )
         raise
     all_output = []
-    if stdout is not None:
-        while True:
-            line = console_to_str(proc.stdout.readline())
-            if not line:
-                break
-            line = line.rstrip()
-            all_output.append(line + '\n')
-            if logger.getEffectiveLevel() <= std_logging.DEBUG:
-                # Show the line immediately
-                logger.debug(line)
-            else:
-                # Update the spinner
-                if use_spinner:
-                    spinner.spin()
+    while True:
+        line = console_to_str(proc.stdout.readline())
+        if not line:
+            break
+        line = line.rstrip()
+        all_output.append(line + '\n')
+
+        # Show the line immediately.
+        log_subprocess(line)
+        # Update the spinner.
+        if use_spinner:
+            spinner.spin()
     try:
         proc.wait()
     finally:
@@ -759,18 +763,19 @@ def call_subprocess(
             spinner.finish("done")
     if proc_had_error:
         if on_returncode == 'raise':
-            if (logger.getEffectiveLevel() > std_logging.DEBUG and
-                    not show_stdout):
-                logger.info(
+            if not showing_subprocess:
+                # Then the subprocess streams haven't been logged to the
+                # console yet.
+                subprocess_logger.error(
                     'Complete output from command %s:', command_desc,
                 )
                 # The all_output value already ends in a newline.
-                logger.info(''.join(all_output) + LOG_DIVIDER)
+                subprocess_logger.error(''.join(all_output) + LOG_DIVIDER)
             raise InstallationError(
                 'Command "%s" failed with error code %s in %s'
                 % (command_desc, proc.returncode, cwd))
         elif on_returncode == 'warn':
-            logger.warning(
+            subprocess_logger.warning(
                 'Command "%s" had error code %s in %s',
                 command_desc, proc.returncode, cwd,
             )
@@ -779,9 +784,7 @@ def call_subprocess(
         else:
             raise ValueError('Invalid value: on_returncode=%s' %
                              repr(on_returncode))
-    if not show_stdout:
-        return ''.join(all_output)
-    return None
+    return ''.join(all_output)
 
 
 def _make_build_dir(build_dir):
diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py
index ed0d37b5b91..51f844be25c 100644
--- a/tests/functional/test_install.py
+++ b/tests/functional/test_install.py
@@ -160,7 +160,7 @@ def test_pep518_forkbombs(script, data, common_wheels, command, package):
     )
     assert '{1} is already being built: {0} from {1}'.format(
         package, path_to_url(package_source),
-    ) in result.stdout, str(result)
+    ) in result.stderr, str(result)
 
 
 @pytest.mark.network
@@ -937,8 +937,9 @@ def test_install_package_that_emits_unicode(script, data):
         'install', to_install, expect_error=True, expect_temp=True, quiet=True,
     )
     assert (
-        'FakeError: this package designed to fail on install' in result.stdout
+        'FakeError: this package designed to fail on install' in result.stderr
     )
+    assert 'UnicodeDecodeError' not in result.stderr
     assert 'UnicodeDecodeError' not in result.stdout
 
 
@@ -1119,19 +1120,19 @@ def test_install_subprocess_output_handling(script, data):
     # With --verbose we should show the output.
     # Only count examples with sys.argv[1] == egg_info, because we call
     # setup.py multiple times, which should not count as duplicate output.
-    result = script.pip(*(args + ["--verbose"]))
-    assert 1 == result.stdout.count("HELLO FROM CHATTYMODULE egg_info")
+    result = script.pip(*(args + ["--verbose"]), expect_stderr=True)
+    assert 1 == result.stderr.count("HELLO FROM CHATTYMODULE egg_info")
     script.pip("uninstall", "-y", "chattymodule")
 
     # If the install fails, then we *should* show the output... but only once,
     # even if --verbose is given.
     result = script.pip(*(args + ["--global-option=--fail"]),
                         expect_error=True)
-    assert 1 == result.stdout.count("I DIE, I DIE")
+    assert 1 == result.stderr.count("I DIE, I DIE")
 
     result = script.pip(*(args + ["--global-option=--fail", "--verbose"]),
                         expect_error=True)
-    assert 1 == result.stdout.count("I DIE, I DIE")
+    assert 1 == result.stderr.count("I DIE, I DIE")
 
 
 def test_install_log(script, data, tmpdir):
diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py
index 5de2a9b29ec..f67720f165a 100644
--- a/tests/functional/test_wheel.py
+++ b/tests/functional/test_wheel.py
@@ -118,7 +118,7 @@ def test_pip_wheel_fail(script, data):
         wheel_file_path,
         result.files_created,
     )
-    assert "FakeError" in result.stdout, result.stdout
+    assert "FakeError" in result.stderr, result.stderr
     assert "Failed to build wheelbroken" in result.stdout, result.stdout
     assert result.returncode != 0
 
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index f1abddd988e..dc508b614a0 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -6,7 +6,6 @@
 """
 import codecs
 import itertools
-import logging
 import os
 import shutil
 import stat
@@ -15,6 +14,7 @@
 import time
 import warnings
 from io import BytesIO
+from logging import DEBUG, ERROR, INFO, WARNING
 
 import pytest
 from mock import Mock, patch
@@ -781,30 +781,25 @@ def check_result(
         :param spinner: the FakeSpinner object passed to call_subprocess()
             to be checked.
         :param result: the call_subprocess() return value to be checked.
-        :param expected: a 3-tuple (expected_proc, expected_out,
-            expected_records), where
+        :param expected: a pair (expected_proc, expected_records), where
             1) `expected_proc` is the expected return value of
               call_subprocess() as a list of lines, or None if the return
               value is expected to be None;
-            2) `expected_out` is the expected stdout captured from the
-              subprocess call, as a list of lines; and
-            3) `expected_records` is the expected value of
+            2) `expected_records` is the expected value of
               caplog.record_tuples.
         :param expected_spinner: a 2-tuple of the spinner's expected
             (spin_count, final_status).
         """
-        expected_proc, expected_out, expected_records = expected
+        expected_proc, expected_records = expected
 
         if expected_proc is None:
-            assert result is expected_proc
+            assert result is None
         else:
             assert result.splitlines() == expected_proc
 
+        # Confirm that stdout and stderr haven't been written to.
         captured = capfd.readouterr()
-        stdout, stderr = captured.out, captured.err
-
-        assert stdout.splitlines() == expected_out
-        assert stderr == ''
+        assert (captured.out, captured.err) == ('', '')
 
         records = caplog.record_tuples
         if len(records) != len(expected_records):
@@ -837,31 +832,31 @@ def test_debug_logging(self, capfd, caplog):
         """
         Test DEBUG logging (and without passing show_stdout=True).
         """
-        log_level = logging.DEBUG
+        log_level = DEBUG
         args, spinner = self.prepare_call(caplog, log_level)
         result = call_subprocess(args, spinner=spinner)
 
-        expected = (['Hello', 'world'], [], [
-            ('pip._internal.utils.misc', 10, 'Running command '),
-            ('pip._internal.utils.misc', 10, 'Hello'),
-            ('pip._internal.utils.misc', 10, 'world'),
+        expected = (['Hello', 'world'], [
+            ('pip.subprocessor', DEBUG, 'Running command '),
+            ('pip.subprocessor', DEBUG, 'Hello'),
+            ('pip.subprocessor', DEBUG, 'world'),
         ])
         # The spinner shouldn't spin in this case since the subprocess
         # output is already being logged to the console.
         self.check_result(
             capfd, caplog, log_level, spinner, result, expected,
-            expected_spinner=(0, 'done'),
+            expected_spinner=(0, None),
         )
 
     def test_info_logging(self, capfd, caplog):
         """
         Test INFO logging (and without passing show_stdout=True).
         """
-        log_level = logging.INFO
+        log_level = INFO
         args, spinner = self.prepare_call(caplog, log_level)
         result = call_subprocess(args, spinner=spinner)
 
-        expected = (['Hello', 'world'], [], [])
+        expected = (['Hello', 'world'], [])
         # The spinner should spin twice in this case since the subprocess
         # output isn't being written to the console.
         self.check_result(
@@ -874,7 +869,7 @@ def test_info_logging__subprocess_error(self, capfd, caplog):
         Test INFO logging of a subprocess with an error (and without passing
         show_stdout=True).
         """
-        log_level = logging.INFO
+        log_level = INFO
         command = 'print("Hello"); print("world"); exit("fail")'
         args, spinner = self.prepare_call(caplog, log_level, command=command)
 
@@ -882,10 +877,10 @@ def test_info_logging__subprocess_error(self, capfd, caplog):
             call_subprocess(args, spinner=spinner)
         result = None
 
-        expected = (None, [], [
-            ('pip._internal.utils.misc', 20, 'Complete output from command '),
+        expected = (None, [
+            ('pip.subprocessor', ERROR, 'Complete output from command '),
             # The "failed" portion is later on in this "Hello" string.
-            ('pip._internal.utils.misc', 20, 'Hello'),
+            ('pip.subprocessor', ERROR, 'Hello'),
         ])
         # The spinner should spin three times in this case since the
         # subprocess output isn't being written to the console.
@@ -915,11 +910,15 @@ def test_info_logging_with_show_stdout_true(self, capfd, caplog):
         """
         Test INFO logging with show_stdout=True.
         """
-        log_level = logging.INFO
+        log_level = INFO
         args, spinner = self.prepare_call(caplog, log_level)
         result = call_subprocess(args, spinner=spinner, show_stdout=True)
 
-        expected = (None, ['Hello', 'world'], [])
+        expected = (['Hello', 'world'], [
+            ('pip.subprocessor', INFO, 'Running command '),
+            ('pip.subprocessor', INFO, 'Hello'),
+            ('pip.subprocessor', INFO, 'world'),
+        ])
         # The spinner shouldn't spin in this case since the subprocess
         # output is already being written to the console.
         self.check_result(
@@ -931,17 +930,23 @@ def test_info_logging_with_show_stdout_true(self, capfd, caplog):
         'exit_status', 'show_stdout', 'extra_ok_returncodes', 'log_level',
         'expected'),
         [
-            (0, False, None, logging.INFO, (None, 'done', 2)),
-            # Test some cases that should result in show_spinner false.
-            (0, False, None, logging.DEBUG, (None, 'done', 0)),
+            # The spinner should show here because show_stdout=False means
+            # the subprocess should get logged at DEBUG level, but the passed
+            # log level is only INFO.
+            (0, False, None, INFO, (None, 'done', 2)),
+            # Test some cases where the spinner should not be shown.
+            (0, False, None, DEBUG, (None, None, 0)),
             # Test show_stdout=True.
-            (0, True, None, logging.DEBUG, (None, None, 0)),
-            (0, True, None, logging.INFO, (None, None, 0)),
-            (0, True, None, logging.WARNING, (None, None, 0)),
+            (0, True, None, DEBUG, (None, None, 0)),
+            (0, True, None, INFO, (None, None, 0)),
+            # The spinner should show here because show_stdout=True means
+            # the subprocess should get logged at INFO level, but the passed
+            # log level is only WARNING.
+            (0, True, None, WARNING, (None, 'done', 2)),
             # Test a non-zero exit status.
-            (3, False, None, logging.INFO, (InstallationError, 'error', 2)),
+            (3, False, None, INFO, (InstallationError, 'error', 2)),
             # Test a non-zero exit status also in extra_ok_returncodes.
-            (3, False, (3, ), logging.INFO, (None, 'done', 2)),
+            (3, False, (3, ), INFO, (None, 'done', 2)),
     ])
     def test_spinner_finish(
         self, exit_status, show_stdout, extra_ok_returncodes, log_level,