diff --git a/tests/mypy_test.py b/tests/mypy_test.py
index 55f846b3d8f1..f82b6739bda0 100644
--- a/tests/mypy_test.py
+++ b/tests/mypy_test.py
@@ -4,15 +4,18 @@
 from __future__ import annotations
 
 import argparse
+import concurrent.futures
 import os
 import re
+import subprocess
 import sys
 import tempfile
-from contextlib import redirect_stderr, redirect_stdout
+import time
+from collections import defaultdict
 from dataclasses import dataclass
-from io import StringIO
 from itertools import product
 from pathlib import Path
+from threading import Lock
 from typing import TYPE_CHECKING, Any, NamedTuple
 
 if TYPE_CHECKING:
@@ -23,17 +26,22 @@
 import tomli
 from utils import (
     VERSIONS_RE as VERSION_LINE_RE,
+    PackageDependencies,
+    VenvInfo,
     colored,
     get_gitignore_spec,
+    get_mypy_req,
     get_recursive_requirements,
+    make_venv,
     print_error,
     print_success_msg,
     spec_matches_path,
     strip_comments,
 )
 
+# Fail early if mypy isn't installed
 try:
-    from mypy.api import run as mypy_run
+    import mypy  # noqa: F401
 except ImportError:
     print_error("Cannot import mypy. Did you install it?")
     sys.exit(1)
@@ -108,7 +116,7 @@ class TestConfig:
 
 def log(args: TestConfig, *varargs: object) -> None:
     if args.verbose >= 2:
-        print(*varargs)
+        print(colored(" ".join(map(str, varargs)), "blue"))
 
 
 def match(path: Path, args: TestConfig) -> bool:
@@ -204,7 +212,19 @@ def add_configuration(configurations: list[MypyDistConf], distribution: str) ->
         configurations.append(MypyDistConf(module_name, values.copy()))
 
 
-def run_mypy(args: TestConfig, configurations: list[MypyDistConf], files: list[Path], *, testing_stdlib: bool) -> ReturnCode:
+def run_mypy(
+    args: TestConfig,
+    configurations: list[MypyDistConf],
+    files: list[Path],
+    *,
+    testing_stdlib: bool,
+    non_types_dependencies: bool,
+    venv_info: VenvInfo,
+    mypypath: str | None = None,
+) -> ReturnCode:
+    env_vars = dict(os.environ)
+    if mypypath is not None:
+        env_vars["MYPYPATH"] = mypypath
     with tempfile.NamedTemporaryFile("w+") as temp:
         temp.write("[mypy]\n")
         for dist_conf in configurations:
@@ -213,57 +233,49 @@ def run_mypy(args: TestConfig, configurations: list[MypyDistConf], files: list[P
                 temp.write(f"{k} = {v}\n")
         temp.flush()
 
-        flags = get_mypy_flags(args, temp.name, testing_stdlib=testing_stdlib)
+        flags = [
+            "--python-version",
+            args.version,
+            "--show-traceback",
+            "--warn-incomplete-stub",
+            "--no-error-summary",
+            "--platform",
+            args.platform,
+            "--custom-typeshed-dir",
+            str(Path(__file__).parent.parent),
+            "--strict",
+            # Stub completion is checked by pyright (--allow-*-defs)
+            "--allow-untyped-defs",
+            "--allow-incomplete-defs",
+            "--allow-subclassing-any",  # TODO: Do we still need this now that non-types dependencies are allowed? (#5768)
+            "--enable-error-code",
+            "ignore-without-code",
+            "--config-file",
+            temp.name,
+        ]
+        if not testing_stdlib:
+            flags.append("--explicit-package-bases")
+        if not non_types_dependencies:
+            flags.append("--no-site-packages")
+
         mypy_args = [*flags, *map(str, files)]
+        mypy_command = [venv_info.python_exe, "-m", "mypy"] + mypy_args
         if args.verbose:
-            print("running mypy", " ".join(mypy_args))
-        stdout_redirect, stderr_redirect = StringIO(), StringIO()
-        with redirect_stdout(stdout_redirect), redirect_stderr(stderr_redirect):
-            returned_stdout, returned_stderr, exit_code = mypy_run(mypy_args)
-
-        if exit_code:
+            print(colored(f"running {' '.join(mypy_command)}", "blue"))
+        result = subprocess.run(mypy_command, capture_output=True, text=True, env=env_vars)
+        if result.returncode:
             print_error("failure\n")
-            captured_stdout = stdout_redirect.getvalue()
-            captured_stderr = stderr_redirect.getvalue()
-            if returned_stderr:
-                print_error(returned_stderr)
-            if captured_stderr:
-                print_error(captured_stderr)
-            if returned_stdout:
-                print_error(returned_stdout)
-            if captured_stdout:
-                print_error(captured_stdout, end="")
+            if result.stdout:
+                print_error(result.stdout)
+            if result.stderr:
+                print_error(result.stderr)
+            if non_types_dependencies and args.verbose:
+                print("Ran with the following environment:")
+                subprocess.run([venv_info.pip_exe, "freeze", "--all"])
+                print()
         else:
             print_success_msg()
-        return exit_code
-
-
-def get_mypy_flags(args: TestConfig, temp_name: str, *, testing_stdlib: bool) -> list[str]:
-    flags = [
-        "--python-version",
-        args.version,
-        "--show-traceback",
-        "--warn-incomplete-stub",
-        "--show-error-codes",
-        "--no-error-summary",
-        "--platform",
-        args.platform,
-        "--no-site-packages",
-        "--custom-typeshed-dir",
-        str(Path(__file__).parent.parent),
-        "--strict",
-        # Stub completion is checked by pyright (--allow-*-defs)
-        "--allow-untyped-defs",
-        "--allow-incomplete-defs",
-        "--allow-subclassing-any",  # Needed until we can use non-types dependencies #5768
-        "--enable-error-code",
-        "ignore-without-code",
-        "--config-file",
-        temp_name,
-    ]
-    if not testing_stdlib:
-        flags.append("--explicit-package-bases")
-    return flags
+        return result.returncode
 
 
 def add_third_party_files(
@@ -298,7 +310,9 @@ class TestResults(NamedTuple):
     files_checked: int
 
 
-def test_third_party_distribution(distribution: str, args: TestConfig) -> TestResults:
+def test_third_party_distribution(
+    distribution: str, args: TestConfig, venv_info: VenvInfo, *, non_types_dependencies: bool
+) -> TestResults:
     """Test the stubs of a third-party distribution.
 
     Return a tuple, where the first element indicates mypy's return code
@@ -313,20 +327,24 @@ def test_third_party_distribution(distribution: str, args: TestConfig) -> TestRe
     if not files and args.filter:
         return TestResults(0, 0)
 
-    print(f"testing {distribution} ({len(files)} files)... ", end="")
+    print(f"testing {distribution} ({len(files)} files)... ", end="", flush=True)
 
     if not files:
         print_error("no files found")
         sys.exit(1)
 
-    prev_mypypath = os.getenv("MYPYPATH")
-    os.environ["MYPYPATH"] = os.pathsep.join(str(Path("stubs", dist)) for dist in seen_dists)
-    code = run_mypy(args, configurations, files, testing_stdlib=False)
-    if prev_mypypath is None:
-        del os.environ["MYPYPATH"]
-    else:
-        os.environ["MYPYPATH"] = prev_mypypath
-
+    mypypath = os.pathsep.join(str(Path("stubs", dist)) for dist in seen_dists)
+    if args.verbose:
+        print(colored(f"\n{mypypath=}", "blue"))
+    code = run_mypy(
+        args,
+        configurations,
+        files,
+        venv_info=venv_info,
+        mypypath=mypypath,
+        testing_stdlib=False,
+        non_types_dependencies=non_types_dependencies,
+    )
     return TestResults(code, len(files))
 
 
@@ -343,19 +361,105 @@ def test_stdlib(code: int, args: TestConfig) -> TestResults:
             add_files(files, (stdlib / name), args)
 
     if files:
-        print(f"Testing stdlib ({len(files)} files)...")
-        print("Running mypy " + " ".join(get_mypy_flags(args, "/tmp/...", testing_stdlib=True)))
-        this_code = run_mypy(args, [], files, testing_stdlib=True)
+        print(f"Testing stdlib ({len(files)} files)...", end="", flush=True)
+        # We don't actually need pip for the stdlib testing
+        venv_info = VenvInfo(pip_exe="", python_exe=sys.executable)
+        this_code = run_mypy(args, [], files, venv_info=venv_info, testing_stdlib=True, non_types_dependencies=False)
         code = max(code, this_code)
 
     return TestResults(code, len(files))
 
 
-def test_third_party_stubs(code: int, args: TestConfig) -> TestResults:
+_PRINT_LOCK = Lock()
+_DISTRIBUTION_TO_VENV_MAPPING: dict[str, VenvInfo] = {}
+
+
+def setup_venv_for_external_requirements_set(requirements_set: frozenset[str], tempdir: Path) -> tuple[frozenset[str], VenvInfo]:
+    venv_dir = tempdir / f".venv-{hash(requirements_set)}"
+    return requirements_set, make_venv(venv_dir)
+
+
+def install_requirements_for_venv(venv_info: VenvInfo, args: TestConfig, external_requirements: frozenset[str]) -> None:
+    # Use --no-cache-dir to avoid issues with concurrent read/writes to the cache
+    pip_command = [venv_info.pip_exe, "install", get_mypy_req(), *sorted(external_requirements), "--no-cache-dir"]
+    if args.verbose:
+        with _PRINT_LOCK:
+            print(colored(f"Running {pip_command}", "blue"))
+    try:
+        subprocess.run(pip_command, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        print(e.stderr)
+        raise
+
+
+def setup_virtual_environments(distributions: dict[str, PackageDependencies], args: TestConfig, tempdir: Path) -> None:
+    # We don't actually need pip if there aren't any external dependencies
+    no_external_dependencies_venv = VenvInfo(pip_exe="", python_exe=sys.executable)
+    external_requirements_to_distributions: defaultdict[frozenset[str], list[str]] = defaultdict(list)
+    num_pkgs_with_external_reqs = 0
+
+    for distribution_name, requirements in distributions.items():
+        if requirements.external_pkgs:
+            num_pkgs_with_external_reqs += 1
+            external_requirements = frozenset(requirements.external_pkgs)
+            external_requirements_to_distributions[external_requirements].append(distribution_name)
+        else:
+            _DISTRIBUTION_TO_VENV_MAPPING[distribution_name] = no_external_dependencies_venv
+
+    if num_pkgs_with_external_reqs == 0:
+        if args.verbose:
+            print(colored("No additional venvs are required to be set up", "blue"))
+        return
+
+    requirements_sets_to_venvs: dict[frozenset[str], VenvInfo] = {}
+
+    if args.verbose:
+        num_venvs = len(external_requirements_to_distributions)
+        msg = (
+            f"Setting up {num_venvs} venv{'s' if num_venvs != 1 else ''} "
+            f"for {num_pkgs_with_external_reqs} "
+            f"distribution{'s' if num_pkgs_with_external_reqs != 1 else ''}... "
+        )
+        print(colored(msg, "blue"), end="", flush=True)
+        venv_start_time = time.perf_counter()
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        venv_info_futures = [
+            executor.submit(setup_venv_for_external_requirements_set, requirements_set, tempdir)
+            for requirements_set in external_requirements_to_distributions
+        ]
+        for venv_info_future in concurrent.futures.as_completed(venv_info_futures):
+            requirements_set, venv_info = venv_info_future.result()
+            requirements_sets_to_venvs[requirements_set] = venv_info
+
+    if args.verbose:
+        venv_elapsed_time = time.perf_counter() - venv_start_time
+        print(colored(f"took {venv_elapsed_time:.2f} seconds", "blue"))
+        pip_start_time = time.perf_counter()
+
+    # Limit workers to 10 at a time, since this makes network requests
+    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+        pip_install_futures = [
+            executor.submit(install_requirements_for_venv, venv_info, args, requirements_set)
+            for requirements_set, venv_info in requirements_sets_to_venvs.items()
+        ]
+        concurrent.futures.wait(pip_install_futures)
+
+    if args.verbose:
+        pip_elapsed_time = time.perf_counter() - pip_start_time
+        msg = f"Combined time for installing requirements across all venvs: {pip_elapsed_time:.2f} seconds"
+        print(colored(msg, "blue"))
+
+    for requirements_set, distribution_list in external_requirements_to_distributions.items():
+        venv_to_use = requirements_sets_to_venvs[requirements_set]
+        _DISTRIBUTION_TO_VENV_MAPPING.update(dict.fromkeys(distribution_list, venv_to_use))
+
+
+def test_third_party_stubs(code: int, args: TestConfig, tempdir: Path) -> TestResults:
     print("Testing third-party packages...")
-    print("Running mypy " + " ".join(get_mypy_flags(args, "/tmp/...", testing_stdlib=False)))
     files_checked = 0
     gitignore_spec = get_gitignore_spec()
+    distributions_to_check: dict[str, PackageDependencies] = {}
 
     for distribution in sorted(os.listdir("stubs")):
         distribution_path = Path("stubs", distribution)
@@ -368,14 +472,25 @@ def test_third_party_stubs(code: int, args: TestConfig) -> TestResults:
             or Path("stubs") in args.filter
             or any(distribution_path in path.parents for path in args.filter)
         ):
-            this_code, checked = test_third_party_distribution(distribution, args)
-            code = max(code, this_code)
-            files_checked += checked
+            distributions_to_check[distribution] = get_recursive_requirements(distribution)
+
+    if not _DISTRIBUTION_TO_VENV_MAPPING:
+        setup_virtual_environments(distributions_to_check, args, tempdir)
+
+    assert _DISTRIBUTION_TO_VENV_MAPPING.keys() == distributions_to_check.keys()
+
+    for distribution, venv_info in _DISTRIBUTION_TO_VENV_MAPPING.items():
+        non_types_dependencies = venv_info.python_exe != sys.executable
+        this_code, checked = test_third_party_distribution(
+            distribution, args, venv_info=venv_info, non_types_dependencies=non_types_dependencies
+        )
+        code = max(code, this_code)
+        files_checked += checked
 
     return TestResults(code, files_checked)
 
 
-def test_typeshed(code: int, args: TestConfig) -> TestResults:
+def test_typeshed(code: int, args: TestConfig, tempdir: Path) -> TestResults:
     print(f"*** Testing Python {args.version} on {args.platform}")
     files_checked_this_version = 0
     stdlib_dir, stubs_dir = Path("stdlib"), Path("stubs")
@@ -385,7 +500,7 @@ def test_typeshed(code: int, args: TestConfig) -> TestResults:
         print()
 
     if stubs_dir in args.filter or any(stubs_dir in path.parents for path in args.filter):
-        code, third_party_files_checked = test_third_party_stubs(code, args)
+        code, third_party_files_checked = test_third_party_stubs(code, args, tempdir)
         files_checked_this_version += third_party_files_checked
         print()
 
@@ -400,10 +515,12 @@ def main() -> None:
     exclude = args.exclude or []
     code = 0
     total_files_checked = 0
-    for version, platform in product(versions, platforms):
-        config = TestConfig(args.verbose, filter, exclude, version, platform)
-        code, files_checked_this_version = test_typeshed(code, args=config)
-        total_files_checked += files_checked_this_version
+    with tempfile.TemporaryDirectory() as td:
+        td_path = Path(td)
+        for version, platform in product(versions, platforms):
+            config = TestConfig(args.verbose, filter, exclude, version, platform)
+            code, files_checked_this_version = test_typeshed(code, args=config, tempdir=td_path)
+            total_files_checked += files_checked_this_version
     if code:
         print_error(f"--- exit status {code}, {total_files_checked} files checked ---")
         sys.exit(code)
@@ -417,5 +534,5 @@ def main() -> None:
     try:
         main()
     except KeyboardInterrupt:
-        print_error("\n\n!!!\nTest aborted due to KeyboardInterrupt\n!!!")
+        print_error("\n\nTest aborted due to KeyboardInterrupt!")
         sys.exit(1)
diff --git a/tests/utils.py b/tests/utils.py
index a0f0d56ef734..a8d81f93e122 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -91,7 +91,10 @@ def read_dependencies(distribution: str) -> PackageDependencies:
         if maybe_typeshed_dependency in pypi_name_to_typeshed_name_mapping:
             typeshed.append(pypi_name_to_typeshed_name_mapping[maybe_typeshed_dependency])
         else:
-            external.append(dependency)
+            # convert to Requirement and then back to str
+            # to make sure that the requirements all have a normalised string representation
+            # (This will also catch any malformed requirements early)
+            external.append(str(Requirement(dependency)))
     return PackageDependencies(tuple(typeshed), tuple(external))
 
 
@@ -142,7 +145,7 @@ def make_venv(venv_dir: Path) -> VenvInfo:
     try:
         venv.create(venv_dir, with_pip=True, clear=True)
     except subprocess.CalledProcessError as e:
-        if "ensurepip" in e.cmd:
+        if "ensurepip" in e.cmd and b"KeyboardInterrupt" not in e.stdout.splitlines():
             print_error(
                 "stubtest requires a Python installation with ensurepip. "
                 "If on Linux, you may need to install the python3-venv package."