From 6ed60ac162297361f531255ad339ae2e43a6b95e Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Sun, 1 May 2022 10:56:33 -0600
Subject: [PATCH 1/3] MultipleFailures -> ExceptionGroup

Using the new features from PEP-654 and PEP-678, exception groups and enriching exceptions with notes
---
 hypothesis-python/RELEASE.rst                 |  10 ++
 hypothesis-python/src/hypothesis/core.py      | 143 ++++++++----------
 hypothesis-python/src/hypothesis/errors.py    |  15 +-
 .../src/hypothesis/internal/escalation.py     |   2 +-
 .../tests/cover/test_arbitrary_data.py        |  37 ++---
 .../tests/cover/test_deadline.py              |  11 +-
 .../tests/cover/test_error_in_draw.py         |   9 +-
 .../tests/cover/test_escalation.py            |   8 +
 .../tests/cover/test_explicit_examples.py     |  24 +--
 .../cover/test_falsifying_example_output.py   |  34 ++---
 .../tests/cover/test_random_module.py         |  19 +--
 hypothesis-python/tests/cover/test_randoms.py |  19 +--
 .../tests/cover/test_reporting.py             |   8 +-
 .../tests/cover/test_reproduce_failure.py     |  10 +-
 .../tests/cover/test_slippage.py              |  69 ++++++---
 .../tests/cover/test_stateful.py              | 119 +++++++--------
 .../tests/cover/test_testdecorators.py        |  18 +--
 .../tests/ghostwriter/test_ghostwriter.py     |   5 +-
 .../tests/nocover/test_interesting_origin.py  |   4 +-
 .../tests/nocover/test_scrutineer.py          |   9 +-
 .../tests/nocover/test_stateful.py            |  17 +--
 .../tests/nocover/test_targeting.py           |   1 -
 .../tests/pytest/test_skipping.py             |   2 +-
 23 files changed, 280 insertions(+), 313 deletions(-)
 create mode 100644 hypothesis-python/RELEASE.rst

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
new file mode 100644
index 0000000000..1ff6d3638b
--- /dev/null
+++ b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,10 @@
+RELEASE_TYPE: minor
+
+Reporting of :obj:`multiple failing examples <hypothesis.settings.report_multiple_bugs>`
+now uses the :pep:`654` `ExceptionGroup <https://docs.python.org/3.11/library/exceptions.html#ExceptionGroup>`__ type, which is provided by the
+:pypi:`exceptiongroup` backport on Python 3.10 and earlier (:issue:`3175`).
+``hypothesis.errors.MultipleFailures`` is therefore deprecated.
+
+Failing examples and other reports are now stored as :pep:`678` exception notes, which
+ensures that they will always appear together with the traceback and other information
+about their respective error.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index c931f2d193..3234155769 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -60,7 +60,6 @@
     HypothesisDeprecationWarning,
     HypothesisWarning,
     InvalidArgument,
-    MultipleFailures,
     NoSuchExample,
     StopTest,
     Unsatisfiable,
@@ -69,6 +68,7 @@
 from hypothesis.executors import default_new_style_executor, new_style_executor
 from hypothesis.internal.compat import (
     PYPY,
+    BaseExceptionGroup,
     bad_django_TestCase,
     get_type_hints,
     int_from_bytes,
@@ -575,7 +575,6 @@ def __init__(
         self.settings = settings
         self.last_exception = None
         self.falsifying_examples = ()
-        self.__was_flaky = False
         self.random = random
         self.__test_runtime = None
         self.ever_executed = False
@@ -710,11 +709,10 @@ def run(data):
                 )
             else:
                 report("Failed to reproduce exception. Expected: \n" + traceback)
-            self.__flaky(
-                f"Hypothesis {text_repr} produces unreliable results: Falsified"
-                " on the first call but did not on a subsequent one",
-                cause=exception,
-            )
+            raise Flaky(
+                f"Hypothesis {text_repr} produces unreliable results: "
+                "Falsified on the first call but did not on a subsequent one"
+            ) from exception
         return result
 
     def _execute_once_for_engine(self, data):
@@ -849,57 +847,50 @@ def run_engine(self):
         # The engine found one or more failures, so we need to reproduce and
         # report them.
 
-        flaky = 0
+        errors_to_report = []
 
-        if runner.best_observed_targets:
-            for line in describe_targets(runner.best_observed_targets):
-                report(line)
-            report("")
+        report_lines = describe_targets(runner.best_observed_targets)
+        if report_lines:
+            report_lines.append("")
 
         explanations = explanatory_lines(self.explain_traces, self.settings)
         for falsifying_example in self.falsifying_examples:
             info = falsifying_example.extra_information
+            fragments = []
 
             ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
-            self.__was_flaky = False
             assert info.__expected_exception is not None
             try:
-                self.execute_once(
-                    ran_example,
-                    print_example=not self.is_find,
-                    is_final=True,
-                    expected_failure=(
-                        info.__expected_exception,
-                        info.__expected_traceback,
-                    ),
-                )
+                with with_reporter(fragments.append):
+                    self.execute_once(
+                        ran_example,
+                        print_example=not self.is_find,
+                        is_final=True,
+                        expected_failure=(
+                            info.__expected_exception,
+                            info.__expected_traceback,
+                        ),
+                    )
             except (UnsatisfiedAssumption, StopTest) as e:
-                report(format_exception(e, e.__traceback__))
-                self.__flaky(
+                err = Flaky(
                     "Unreliable assumption: An example which satisfied "
                     "assumptions on the first run now fails it.",
-                    cause=e,
                 )
+                err.__cause__ = err.__context__ = e
+                errors_to_report.append((fragments, err))
             except BaseException as e:
                 # If we have anything for explain-mode, this is the time to report.
                 for line in explanations[falsifying_example.interesting_origin]:
-                    report(line)
-
-                if len(self.falsifying_examples) <= 1:
-                    # There is only one failure, so we can report it by raising
-                    # it directly.
-                    raise
-
-                # We are reporting multiple failures, so we need to manually
-                # print each exception's stack trace and information.
-                tb = get_trimmed_traceback()
-                report(format_exception(e, tb))
+                    fragments.append(line)
+                errors_to_report.append(
+                    (fragments, e.with_traceback(get_trimmed_traceback()))
+                )
 
             finally:
                 # Whether or not replay actually raised the exception again, we want
                 # to print the reproduce_failure decorator for the failing example.
                 if self.settings.print_blob:
-                    report(
+                    fragments.append(
                         "\nYou can reproduce this example by temporarily adding "
                         "@reproduce_failure(%r, %r) as a decorator on your test case"
                         % (__version__, encode_failure(falsifying_example.buffer))
@@ -908,30 +899,38 @@ def run_engine(self):
                 # hold on to a reference to ``data`` know that it's now been
                 # finished and they can't draw more data from it.
                 ran_example.freeze()
+        _raise_to_user(errors_to_report, self.settings, report_lines)
 
-            if self.__was_flaky:
-                flaky += 1
-
-        # If we only have one example then we should have raised an error or
-        # flaky prior to this point.
-        assert len(self.falsifying_examples) > 1
 
-        if flaky > 0:
-            raise Flaky(
-                f"Hypothesis found {len(self.falsifying_examples)} distinct failures, "
-                f"but {flaky} of them exhibited some sort of flaky behaviour."
-            )
-        else:
-            raise MultipleFailures(
-                f"Hypothesis found {len(self.falsifying_examples)} distinct failures."
-            )
+def add_note(exc, note):
+    try:
+        exc.add_note(note)
+    except AttributeError:
+        if not hasattr(exc, "__notes__"):
+            exc.__notes__ = []
+        exc.__notes__.append(note)
+
+
+def _raise_to_user(errors_to_report, settings, target_lines, trailer=""):
+    """Helper function for attaching notes and grouping multiple errors."""
+    if settings.verbosity >= Verbosity.normal:
+        for fragments, err in errors_to_report:
+            for note in fragments:
+                add_note(err, note)
+
+    if len(errors_to_report) == 1:
+        _, the_error_hypothesis_found = errors_to_report[0]
+    else:
+        assert errors_to_report
+        the_error_hypothesis_found = BaseExceptionGroup(
+            f"Hypothesis found {len(errors_to_report)} distinct failures{trailer}.",
+            [e for _, e in errors_to_report],
+        )
 
-    def __flaky(self, message, *, cause):
-        if len(self.falsifying_examples) <= 1:
-            raise Flaky(message) from cause
-        else:
-            self.__was_flaky = True
-            report("Flaky example! " + message)
+    if settings.verbosity >= Verbosity.normal:
+        for line in target_lines:
+            add_note(the_error_hypothesis_found, line)
+    raise the_error_hypothesis_found
 
 
 @contextlib.contextmanager
@@ -1189,23 +1188,11 @@ def wrapped_test(*arguments, **kwargs):
                     state, wrapped_test, arguments, kwargs, original_sig
                 )
             )
-            with local_settings(state.settings):
-                if len(errors) > 1:
-                    # If we're not going to report multiple bugs, we would have
-                    # stopped running explicit examples at the first failure.
-                    assert state.settings.report_multiple_bugs
-                    for fragments, err in errors:
-                        for f in fragments:
-                            report(f)
-                        report(format_exception(err, err.__traceback__))
-                    raise MultipleFailures(
-                        f"Hypothesis found {len(errors)} failures in explicit examples."
-                    )
-                elif errors:
-                    fragments, the_error_hypothesis_found = errors[0]
-                    for f in fragments:
-                        report(f)
-                    raise the_error_hypothesis_found
+            if errors:
+                # If we're not going to report multiple bugs, we would have
+                # stopped running explicit examples at the first failure.
+                assert len(errors) == 1 or state.settings.report_multiple_bugs
+                _raise_to_user(errors, state.settings, [], " in explicit examples")
 
             # If there were any explicit examples, they all ran successfully.
             # The next step is to use the Conjecture engine to run the test on
@@ -1236,7 +1223,7 @@ def wrapped_test(*arguments, **kwargs):
                     state.run_engine()
             except BaseException as e:
                 # The exception caught here should either be an actual test
-                # failure (or MultipleFailures), or some kind of fatal error
+                # failure (or BaseExceptionGroup), or some kind of fatal error
                 # that caused the engine to stop.
 
                 generated_seed = wrapped_test._hypothesis_internal_use_generated_seed
@@ -1262,7 +1249,9 @@ def wrapped_test(*arguments, **kwargs):
                     # which will actually appear in tracebacks is as clear as
                     # possible - "raise the_error_hypothesis_found".
                     the_error_hypothesis_found = e.with_traceback(
-                        get_trimmed_traceback()
+                        None
+                        if isinstance(e, BaseExceptionGroup)
+                        else get_trimmed_traceback()
                     )
                     raise the_error_hypothesis_found
 
diff --git a/hypothesis-python/src/hypothesis/errors.py b/hypothesis-python/src/hypothesis/errors.py
index 75fe7ee0f9..7f4ad9d697 100644
--- a/hypothesis-python/src/hypothesis/errors.py
+++ b/hypothesis-python/src/hypothesis/errors.py
@@ -124,9 +124,18 @@ class Frozen(HypothesisException):
     after freeze() has been called."""
 
 
-class MultipleFailures(_Trimmable):
-    """Indicates that Hypothesis found more than one distinct bug when testing
-    your code."""
+def __getattr__(name):
+    if name == "MultipleFailures":
+        from hypothesis._settings import note_deprecation
+        from hypothesis.internal.compat import BaseExceptionGroup
+
+        note_deprecation(
+            "MultipleFailures is deprecated; use the builtin `BaseExceptionGroup` type "
+            "instead, or `exceptiongroup.BaseExceptionGroup` before Python 3.11",
+            since="RELEASEDAY",
+            has_codemod=False,  # This would be a great PR though!
+        )
+        return BaseExceptionGroup
 
 
 class DeadlineExceeded(_Trimmable):
diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py
index 5fcd356e31..4eb1fd421b 100644
--- a/hypothesis-python/src/hypothesis/internal/escalation.py
+++ b/hypothesis-python/src/hypothesis/internal/escalation.py
@@ -86,7 +86,7 @@ def get_trimmed_traceback(exception=None):
     else:
         tb = exception.__traceback__
     # Avoid trimming the traceback if we're in verbose mode, or the error
-    # was raised inside Hypothesis (and is not a MultipleFailures)
+    # was raised inside Hypothesis
     if hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug or (
         is_hypothesis_file(traceback.extract_tb(tb)[-1][0])
         and not isinstance(exception, _Trimmable)
diff --git a/hypothesis-python/tests/cover/test_arbitrary_data.py b/hypothesis-python/tests/cover/test_arbitrary_data.py
index 57258e7a89..25ea056bf2 100644
--- a/hypothesis-python/tests/cover/test_arbitrary_data.py
+++ b/hypothesis-python/tests/cover/test_arbitrary_data.py
@@ -11,11 +11,9 @@
 import pytest
 from pytest import raises
 
-from hypothesis import find, given, reporting, strategies as st
+from hypothesis import find, given, strategies as st
 from hypothesis.errors import InvalidArgument
 
-from tests.common.utils import capture_out
-
 
 @given(st.integers(), st.data())
 def test_conditional_draw(x, data):
@@ -32,13 +30,10 @@ def test(data):
         if y in x:
             raise ValueError()
 
-    with raises(ValueError):
-        with capture_out() as out:
-            with reporting.with_reporter(reporting.default):
-                test()
-    result = out.getvalue()
-    assert "Draw 1: [0, 0]" in result
-    assert "Draw 2: 0" in result
+    with raises(ValueError) as err:
+        test()
+    assert "Draw 1: [0, 0]" in err.value.__notes__
+    assert "Draw 2: 0" in err.value.__notes__
 
 
 def test_prints_labels_if_given_on_failure():
@@ -50,13 +45,10 @@ def test(data):
         x.remove(y)
         assert y not in x
 
-    with raises(AssertionError):
-        with capture_out() as out:
-            with reporting.with_reporter(reporting.default):
-                test()
-    result = out.getvalue()
-    assert "Draw 1 (Some numbers): [0, 0]" in result
-    assert "Draw 2 (A number): 0" in result
+    with raises(AssertionError) as err:
+        test()
+    assert "Draw 1 (Some numbers): [0, 0]" in err.value.__notes__
+    assert "Draw 2 (A number): 0" in err.value.__notes__
 
 
 def test_given_twice_is_same():
@@ -66,13 +58,10 @@ def test(data1, data2):
         data2.draw(st.integers())
         raise ValueError()
 
-    with raises(ValueError):
-        with capture_out() as out:
-            with reporting.with_reporter(reporting.default):
-                test()
-    result = out.getvalue()
-    assert "Draw 1: 0" in result
-    assert "Draw 2: 0" in result
+    with raises(ValueError) as err:
+        test()
+    assert "Draw 1: 0" in err.value.__notes__
+    assert "Draw 2: 0" in err.value.__notes__
 
 
 def test_errors_when_used_in_find():
diff --git a/hypothesis-python/tests/cover/test_deadline.py b/hypothesis-python/tests/cover/test_deadline.py
index d7d6b8a9fd..d7927d9560 100644
--- a/hypothesis-python/tests/cover/test_deadline.py
+++ b/hypothesis-python/tests/cover/test_deadline.py
@@ -15,7 +15,7 @@
 from hypothesis import given, settings, strategies as st
 from hypothesis.errors import DeadlineExceeded, Flaky, InvalidArgument
 
-from tests.common.utils import assert_falsifying_output, capture_out, fails_with
+from tests.common.utils import assert_falsifying_output, fails_with
 
 
 def test_raises_deadline_on_slow_test():
@@ -109,11 +109,10 @@ def slow_once(i):
             once[0] = False
             time.sleep(0.2)
 
-    with capture_out() as o:
-        with pytest.raises(Flaky):
-            slow_once()
-    assert "Unreliable test timing" in o.getvalue()
-    assert "took 2" in o.getvalue()
+    with pytest.raises(Flaky) as err:
+        slow_once()
+    assert "Unreliable test timing" in "\n".join(err.value.__notes__)
+    assert "took 2" in "\n".join(err.value.__notes__)
 
 
 @pytest.mark.parametrize("slow_strategy", [False, True])
diff --git a/hypothesis-python/tests/cover/test_error_in_draw.py b/hypothesis-python/tests/cover/test_error_in_draw.py
index 3117a6ecd6..e00d0c7897 100644
--- a/hypothesis-python/tests/cover/test_error_in_draw.py
+++ b/hypothesis-python/tests/cover/test_error_in_draw.py
@@ -12,8 +12,6 @@
 
 from hypothesis import given, strategies as st
 
-from tests.common.utils import capture_out
-
 
 def test_error_is_in_finally():
     @given(st.data())
@@ -23,8 +21,7 @@ def test(d):
         finally:
             raise ValueError()
 
-    with capture_out() as o:
-        with pytest.raises(ValueError):
-            test()
+    with pytest.raises(ValueError) as err:
+        test()
 
-    assert "[0, 1, -1]" in o.getvalue()
+    assert "[0, 1, -1]" in "\n".join(err.value.__notes__)
diff --git a/hypothesis-python/tests/cover/test_escalation.py b/hypothesis-python/tests/cover/test_escalation.py
index 11a4bc7f4b..5755089829 100644
--- a/hypothesis-python/tests/cover/test_escalation.py
+++ b/hypothesis-python/tests/cover/test_escalation.py
@@ -13,7 +13,9 @@
 import pytest
 
 import hypothesis
+from hypothesis import errors
 from hypothesis.internal import escalation as esc
+from hypothesis.internal.compat import BaseExceptionGroup
 
 
 def test_does_not_escalate_errors_in_non_hypothesis_file():
@@ -62,3 +64,9 @@ def test_is_hypothesis_file_not_confused_by_prefix(monkeypatch):
 @pytest.mark.parametrize("fname", ["", "<ipython-input-18-f7c304bea5eb>"])
 def test_is_hypothesis_file_does_not_error_on_invalid_paths_issue_2319(fname):
     assert not esc.is_hypothesis_file(fname)
+
+
+def test_multiplefailures_deprecation():
+    with pytest.warns(errors.HypothesisDeprecationWarning):
+        exc = errors.MultipleFailures
+    assert exc is BaseExceptionGroup
diff --git a/hypothesis-python/tests/cover/test_explicit_examples.py b/hypothesis-python/tests/cover/test_explicit_examples.py
index fa8afd863a..d47e9ae41e 100644
--- a/hypothesis-python/tests/cover/test_explicit_examples.py
+++ b/hypothesis-python/tests/cover/test_explicit_examples.py
@@ -23,12 +23,8 @@
     reporting,
     settings,
 )
-from hypothesis.errors import (
-    DeadlineExceeded,
-    HypothesisWarning,
-    InvalidArgument,
-    MultipleFailures,
-)
+from hypothesis.errors import DeadlineExceeded, HypothesisWarning, InvalidArgument
+from hypothesis.internal.compat import ExceptionGroup
 from hypothesis.strategies import floats, integers, text
 
 from tests.common.utils import assert_falsifying_output, capture_out
@@ -210,14 +206,10 @@ def test(x):
         note(f"x -> {x}")
         assert x == 42
 
-    with capture_out() as out:
-        with reporting.with_reporter(reporting.default):
-            with pytest.raises(AssertionError):
-                test()
-    v = out.getvalue()
-    print(v)
-    assert "x -> 43" in v
-    assert "x -> 42" not in v
+    with pytest.raises(AssertionError) as err:
+        test()
+    assert "x -> 43" in err.value.__notes__
+    assert "x -> 42" not in err.value.__notes__
 
 
 def test_must_agree_with_number_of_arguments():
@@ -250,11 +242,11 @@ def test_unsatisfied_assumption_during_explicit_example(threshold, value):
     assume(value < threshold)
 
 
-@pytest.mark.parametrize("exc", [MultipleFailures, AssertionError])
+@pytest.mark.parametrize("exc", [ExceptionGroup, AssertionError])
 def test_multiple_example_reporting(exc):
     @example(1)
     @example(2)
-    @settings(report_multiple_bugs=exc is MultipleFailures, phases=[Phase.explicit])
+    @settings(report_multiple_bugs=exc is ExceptionGroup, phases=[Phase.explicit])
     @given(integers())
     def inner_test_multiple_failing_examples(x):
         assert x < 2
diff --git a/hypothesis-python/tests/cover/test_falsifying_example_output.py b/hypothesis-python/tests/cover/test_falsifying_example_output.py
index ca55a8e58c..6d942aa1b2 100644
--- a/hypothesis-python/tests/cover/test_falsifying_example_output.py
+++ b/hypothesis-python/tests/cover/test_falsifying_example_output.py
@@ -12,19 +12,16 @@
 
 from hypothesis import Phase, example, given, settings, strategies as st
 
-from tests.common.utils import capture_out
-
-OUTPUT_NO_LINE_BREAK = """
+OUTPUT_NO_BREAK = """
 Falsifying explicit example: test(
-    x=%(input)s, y=%(input)s,
+    x={0!r}, y={0!r},
 )
 """
 
-
-OUTPUT_WITH_LINE_BREAK = """
+OUTPUT_WITH_BREAK = """
 Falsifying explicit example: test(
-    x=%(input)s,
-    y=%(input)s,
+    x={0!r},
+    y={0!r},
 )
 """
 
@@ -36,17 +33,11 @@ def test_inserts_line_breaks_only_at_appropriate_lengths(line_break, input):
     def test(x, y):
         assert x < y
 
-    with capture_out() as cap:
-        with pytest.raises(AssertionError):
-            test()
-
-    template = OUTPUT_WITH_LINE_BREAK if line_break else OUTPUT_NO_LINE_BREAK
-
-    desired_output = template % {"input": repr(input)}
-
-    actual_output = cap.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test()
 
-    assert desired_output.strip() == actual_output.strip()
+    expected = (OUTPUT_WITH_BREAK if line_break else OUTPUT_NO_BREAK).format(input)
+    assert expected.strip() == "\n".join(err.value.__notes__)
 
 
 @given(kw=st.none())
@@ -67,8 +58,7 @@ def explicit_phase(*args, kw):
     ids=lambda fn: fn.__name__,
 )
 def test_vararg_output(fn):
-    with capture_out() as cap:
-        with pytest.raises(AssertionError):
-            fn(1, 2, 3)
+    with pytest.raises(AssertionError) as err:
+        fn(1, 2, 3)
 
-    assert "1, 2, 3" in cap.getvalue()
+    assert "1, 2, 3" in "\n".join(err.value.__notes__)
diff --git a/hypothesis-python/tests/cover/test_random_module.py b/hypothesis-python/tests/cover/test_random_module.py
index 7eb030f5d4..83ed253a3e 100644
--- a/hypothesis-python/tests/cover/test_random_module.py
+++ b/hypothesis-python/tests/cover/test_random_module.py
@@ -13,14 +13,12 @@
 
 import pytest
 
-from hypothesis import core, find, given, register_random, reporting, strategies as st
+from hypothesis import core, find, given, register_random, strategies as st
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal import entropy
 from hypothesis.internal.compat import PYPY
 from hypothesis.internal.entropy import deterministic_PRNG
 
-from tests.common.utils import capture_out
-
 
 def gc_on_pypy():
     # CPython uses reference counting, so objects (without circular refs)
@@ -32,16 +30,13 @@ def gc_on_pypy():
 
 
 def test_can_seed_random():
-    with capture_out() as out:
-        with reporting.with_reporter(reporting.default):
-            with pytest.raises(AssertionError):
-
-                @given(st.random_module())
-                def test(r):
-                    raise AssertionError
+    @given(st.random_module())
+    def test(r):
+        raise AssertionError
 
-                test()
-    assert "RandomSeeder(0)" in out.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test()
+    assert "RandomSeeder(0)" in "\n".join(err.value.__notes__)
 
 
 @given(st.random_module(), st.random_module())
diff --git a/hypothesis-python/tests/cover/test_randoms.py b/hypothesis-python/tests/cover/test_randoms.py
index 143ecf0489..4b4db455e3 100644
--- a/hypothesis-python/tests/cover/test_randoms.py
+++ b/hypothesis-python/tests/cover/test_randoms.py
@@ -16,7 +16,7 @@
 import pytest
 
 from hypothesis import assume, given, strategies as st
-from hypothesis.errors import MultipleFailures
+from hypothesis.internal.compat import ExceptionGroup
 from hypothesis.strategies._internal.random import (
     RANDOM_METHODS,
     HypothesisRandom,
@@ -26,7 +26,6 @@
 )
 
 from tests.common.debug import find_any
-from tests.common.utils import capture_out
 
 
 def test_implements_all_random_methods():
@@ -242,10 +241,9 @@ def test(rnd):
         rnd.uniform(0.1, 0.5)
         raise AssertionError
 
-    with capture_out() as out:
-        with pytest.raises(AssertionError):
-            test()
-    assert ".uniform(0.1, 0.5)" in out.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test()
+    assert ".uniform(0.1, 0.5)" in "\n".join(err.value.__notes__)
 
 
 @pytest.mark.skipif(
@@ -259,10 +257,9 @@ def test(rnd):
         rnd.choices([1, 2, 3, 4], k=2)
         raise AssertionError
 
-    with capture_out() as out:
-        with pytest.raises(AssertionError):
-            test()
-    assert ".choices([1, 2, 3, 4], k=2)" in out.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test()
+    assert ".choices([1, 2, 3, 4], k=2)" in "\n".join(err.value.__notes__)
 
 
 @given(st.randoms(use_true_random=False))
@@ -298,7 +295,7 @@ def test(rnd):
         assert x < 0.5
         assert x > 0.5
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
 
diff --git a/hypothesis-python/tests/cover/test_reporting.py b/hypothesis-python/tests/cover/test_reporting.py
index dce5ac3f9c..cf7bd3053d 100644
--- a/hypothesis-python/tests/cover/test_reporting.py
+++ b/hypothesis-python/tests/cover/test_reporting.py
@@ -45,11 +45,9 @@ def test_prints_output_by_default():
     def test_int(x):
         raise AssertionError
 
-    with capture_out() as o:
-        with reporting.with_reporter(reporting.default):
-            with pytest.raises(AssertionError):
-                test_int()
-    assert "Falsifying example" in o.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test_int()
+    assert "Falsifying example" in "\n".join(err.value.__notes__)
 
 
 def test_does_not_print_debug_in_verbose():
diff --git a/hypothesis-python/tests/cover/test_reproduce_failure.py b/hypothesis-python/tests/cover/test_reproduce_failure.py
index 0f8878c25f..cb954ace4c 100644
--- a/hypothesis-python/tests/cover/test_reproduce_failure.py
+++ b/hypothesis-python/tests/cover/test_reproduce_failure.py
@@ -128,13 +128,13 @@ def test(i):
             failing_example[0] = i
         assert i not in failing_example
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            test()
-    assert "@reproduce_failure" in o.getvalue()
+    with pytest.raises(AssertionError) as err:
+        test()
+    notes = "\n".join(err.value.__notes__)
+    assert "@reproduce_failure" in notes
 
     exp = re.compile(r"reproduce_failure\(([^)]+)\)", re.MULTILINE)
-    extract = exp.search(o.getvalue())
+    extract = exp.search(notes)
     reproduction = eval(extract.group(0))
     test = reproduction(test)
 
diff --git a/hypothesis-python/tests/cover/test_slippage.py b/hypothesis-python/tests/cover/test_slippage.py
index 6fe4bc1261..4561000fcf 100644
--- a/hypothesis-python/tests/cover/test_slippage.py
+++ b/hypothesis-python/tests/cover/test_slippage.py
@@ -10,9 +10,10 @@
 
 import pytest
 
-from hypothesis import Phase, assume, given, settings, strategies as st
+from hypothesis import Phase, assume, given, settings, strategies as st, target
 from hypothesis.database import InMemoryExampleDatabase
-from hypothesis.errors import Flaky, MultipleFailures
+from hypothesis.errors import Flaky
+from hypothesis.internal.compat import ExceptionGroup
 from hypothesis.internal.conjecture.engine import MIN_TEST_CALLS
 
 from tests.common.utils import (
@@ -22,6 +23,16 @@
 )
 
 
+def capture_reports(test):
+    with capture_out() as o, pytest.raises(ExceptionGroup) as err:
+        test()
+
+    return o.getvalue() + "\n\n".join(
+        f"{e!r}\n" + "\n".join(getattr(e, "__notes__", []))
+        for e in (err.value,) + err.value.exceptions
+    )
+
+
 def test_raises_multiple_failures_with_varying_type():
     target = [None]
 
@@ -38,12 +49,20 @@ def test(i):
         exc_class = TypeError if target[0] == i else ValueError
         raise exc_class()
 
-    with capture_out() as o:
-        with pytest.raises(MultipleFailures):
-            test()
+    output = capture_reports(test)
+    assert "TypeError" in output
+    assert "ValueError" in output
+
 
-    assert "TypeError" in o.getvalue()
-    assert "ValueError" in o.getvalue()
+def test_shows_target_scores_with_multiple_failures():
+    @settings(database=None, max_examples=100)
+    @given(st.integers())
+    def test(i):
+        target(i)
+        assert i > 0
+        assert i < 0
+
+    assert "Highest target score:" in capture_reports(test)
 
 
 def test_raises_multiple_failures_when_position_varies():
@@ -61,11 +80,9 @@ def test(i):
         else:
             raise ValueError("loc 2")
 
-    with capture_out() as o:
-        with pytest.raises(MultipleFailures):
-            test()
-    assert "loc 1" in o.getvalue()
-    assert "loc 2" in o.getvalue()
+    output = capture_reports(test)
+    assert "loc 1" in output
+    assert "loc 2" in output
 
 
 def test_replays_both_failing_values():
@@ -81,10 +98,10 @@ def test(i):
         exc_class = TypeError if target[0] == i else ValueError
         raise exc_class()
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
 
@@ -111,7 +128,7 @@ def test(i):
         if i == target[1]:
             raise ValueError()
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
     bug_fixed = True
@@ -142,7 +159,7 @@ def test(i):
         if i == target[1]:
             raise ValueError()
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
     bug_fixed = True
@@ -180,11 +197,7 @@ def test(i):
         else:
             duds.add(i)
 
-    with capture_out() as o:
-        with pytest.raises(MultipleFailures):
-            test()
-
-    output = o.getvalue()
+    output = capture_reports(test)
     assert_output_contains_failure(output, test, i=10000)
     assert_output_contains_failure(output, test, i=second_target[0])
 
@@ -212,13 +225,19 @@ def test(i):
             flaky_failed_once[0] = True
             raise ValueError()
 
-    with pytest.raises(Flaky):
+    try:
         test()
+        raise AssertionError("Expected test() to raise an error")
+    except ExceptionGroup as err:
+        assert any(isinstance(e, Flaky) for e in err.exceptions)
 
     flaky_fixed = True
 
-    with pytest.raises(MultipleFailures):
+    try:
         test()
+        raise AssertionError("Expected test() to raise an error")
+    except ExceptionGroup as err:
+        assert not any(isinstance(e, Flaky) for e in err.exceptions)
 
 
 @pytest.mark.parametrize("allow_multi", [True, False])
@@ -237,7 +256,7 @@ def test(i):
             seen.add(ValueError)
             raise ValueError
 
-    with pytest.raises(MultipleFailures if allow_multi else TypeError):
+    with pytest.raises(ExceptionGroup if allow_multi else TypeError):
         test()
     assert seen == {TypeError, ValueError}
 
@@ -263,7 +282,7 @@ def test(x):
             assert x in seen or (x <= special[0])
         assert x not in special
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test()
 
 
diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py
index 1b1ac20842..dbbb911f9e 100644
--- a/hypothesis-python/tests/cover/test_stateful.py
+++ b/hypothesis-python/tests/cover/test_stateful.py
@@ -206,16 +206,13 @@ def populate_bundle(self):
         def fail_fast(self):
             raise AssertionError
 
-    with capture_out() as o:
-        # The state machine must raise an exception for the
-        # falsifying example to be printed.
-        with raises(AssertionError):
-            run_state_machine_as_test(ProducesMultiple)
+    with raises(AssertionError) as err:
+        run_state_machine_as_test(ProducesMultiple)
 
     # This is tightly coupled to the output format of the step printing.
     # The first line is "Falsifying Example:..." the second is creating
     # the state machine, the third is calling the "initialize" method.
-    assignment_line = o.getvalue().split("\n")[2]
+    assignment_line = err.value.__notes__[2]
     # 'populate_bundle()' returns 2 values, so should be
     # expanded to 2 variables.
     assert assignment_line == "v1, v2 = state.populate_bundle()"
@@ -241,10 +238,10 @@ def populate_bundle(self):
         def fail_fast(self, b):
             assert b != 1
 
-    with capture_out() as o, raises(AssertionError):
+    with raises(AssertionError) as err:
         run_state_machine_as_test(ProducesMultiple)
 
-    assignment_line = o.getvalue().split("\n")[2]
+    assignment_line = err.value.__notes__[2]
     assert assignment_line == "(v1,) = state.populate_bundle()"
 
     state = ProducesMultiple()
@@ -266,16 +263,13 @@ def populate_bundle(self):
         def fail_fast(self):
             raise AssertionError
 
-    with capture_out() as o:
-        # The state machine must raise an exception for the
-        # falsifying example to be printed.
-        with raises(AssertionError):
-            run_state_machine_as_test(ProducesNoVariables)
+    with raises(AssertionError) as err:
+        run_state_machine_as_test(ProducesNoVariables)
 
     # This is tightly coupled to the output format of the step printing.
     # The first line is "Falsifying Example:..." the second is creating
     # the state machine, the third is calling the "initialize" method.
-    assignment_line = o.getvalue().split("\n")[2]
+    assignment_line = err.value.__notes__[2]
     # 'populate_bundle()' returns 0 values, so there should be no
     # variable assignment.
     assert assignment_line == "state.populate_bundle()"
@@ -633,20 +627,19 @@ def invariant_1(self):
         def rule_1(self):
             pass
 
-    with capture_out() as o:
-        with pytest.raises(ValueError):
-            run_state_machine_as_test(BadInvariant)
+    with pytest.raises(ValueError) as err:
+        run_state_machine_as_test(BadInvariant)
 
-    result = o.getvalue()
+    result = "\n".join(err.value.__notes__)
     assert (
         result
-        == """\
+        == """
 Falsifying example:
 state = BadInvariant()
 state.initialize_1()
 state.invariant_1()
 state.teardown()
-"""
+""".strip()
     )
 
 
@@ -680,14 +673,13 @@ def rule_1(self):
             if self.num == 2:
                 raise ValueError()
 
-    with capture_out() as o:
-        with pytest.raises(ValueError):
-            run_state_machine_as_test(BadRuleWithGoodInvariants)
+    with pytest.raises(ValueError) as err:
+        run_state_machine_as_test(BadRuleWithGoodInvariants)
 
-    result = o.getvalue()
+    result = "\n".join(err.value.__notes__)
     assert (
         result
-        == """\
+        == """
 Falsifying example:
 state = BadRuleWithGoodInvariants()
 state.invariant_1()
@@ -700,7 +692,7 @@ def rule_1(self):
 state.invariant_3()
 state.rule_1()
 state.teardown()
-"""
+""".strip()
     )
 
 
@@ -763,12 +755,12 @@ def delete(self, k, v):
         def values_agree(self, k):
             assert not self.__deleted[k]
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(IncorrectDeletion)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(IncorrectDeletion)
 
-    assert o.getvalue().count(" = state.k(") == 1
-    assert o.getvalue().count(" = state.v(") == 1
+    result = "\n".join(err.value.__notes__)
+    assert result.count(" = state.k(") == 1
+    assert result.count(" = state.v(") == 1
 
 
 def test_prints_equal_values_with_correct_variable_name():
@@ -789,11 +781,10 @@ def transfer(self, source):
         def fail(self, source):
             raise AssertionError
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(MovesBetweenBundles)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(MovesBetweenBundles)
 
-    result = o.getvalue()
+    result = "\n".join(err.value.__notes__)
     for m in ["create", "transfer", "fail"]:
         assert result.count("state." + m) == 1
     assert "v1 = state.create()" in result
@@ -822,12 +813,11 @@ def initialize_c(self):
         def fail_fast(self):
             raise AssertionError
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(WithInitializeRules)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(WithInitializeRules)
 
     assert set(WithInitializeRules.initialized[-3:]) == {"a", "b", "c"}
-    result = o.getvalue().splitlines()[1:]
+    result = err.value.__notes__[1:]
     assert result[0] == "state = WithInitializeRules()"
     # Initialize rules call order is shuffled
     assert {result[1], result[2], result[3]} == {
@@ -852,20 +842,19 @@ def fail_fast(self, param):
             raise AssertionError
 
     WithInitializeBundleRules.TestCase.settings = NO_BLOB_SETTINGS
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(WithInitializeBundleRules)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(WithInitializeBundleRules)
 
-    result = o.getvalue()
+    result = "\n".join(err.value.__notes__)
     assert (
         result
-        == """\
+        == """
 Falsifying example:
 state = WithInitializeBundleRules()
 v1 = state.initialize_a(dep='dep')
 state.fail_fast(param=v1)
 state.teardown()
-"""
+""".strip()
     )
 
 
@@ -934,12 +923,11 @@ def initialize_b(self):
         def fail_fast(self):
             raise AssertionError
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(ChildStateMachine)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(ChildStateMachine)
 
     assert set(ChildStateMachine.initialized[-2:]) == {"a", "b"}
-    result = o.getvalue().splitlines()[1:]
+    result = err.value.__notes__[1:]
     assert result[0] == "state = ChildStateMachine()"
     # Initialize rules call order is shuffled
     assert {result[1], result[2]} == {"state.initialize_a()", "state.initialize_b()"}
@@ -961,25 +949,24 @@ def fail_eventually(self):
             assert self.initialize_called_counter <= 2
 
     StateMachine.TestCase.settings = NO_BLOB_SETTINGS
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(StateMachine)
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(StateMachine)
 
-    result = o.getvalue()
+    result = "\n".join(err.value.__notes__)
     assert (
         result
-        == """\
+        == """
 Falsifying example:
 state = StateMachine()
 state.initialize()
 state.fail_eventually()
 state.fail_eventually()
 state.teardown()
-"""
+""".strip()
     )
 
 
-def test_steps_printed_despite_pytest_fail(capsys):
+def test_steps_printed_despite_pytest_fail():
     # Test for https://github.com/HypothesisWorks/hypothesis/issues/1372
     @Settings(print_blob=False)
     class RaisesProblem(RuleBasedStateMachine):
@@ -987,17 +974,15 @@ class RaisesProblem(RuleBasedStateMachine):
         def oops(self):
             pytest.fail()
 
-    with pytest.raises(Failed):
+    with pytest.raises(Failed) as err:
         run_state_machine_as_test(RaisesProblem)
-    out, _ = capsys.readouterr()
     assert (
-        """\
+        "\n".join(err.value.__notes__).strip()
+        == """
 Falsifying example:
 state = RaisesProblem()
 state.oops()
-state.teardown()
-"""
-        in out
+state.teardown()""".strip()
     )
 
 
@@ -1090,12 +1075,10 @@ def init_data(self, value):
         def mostly_fails(self, d):
             assert d == 42
 
-    with capture_out() as o:
-        with pytest.raises(AssertionError):
-            run_state_machine_as_test(TrickyPrintingMachine)
-    output = o.getvalue()
-    assert "v1 = state.init_data(value=0)" in output
-    assert "v1 = state.init_data(value=v1)" not in output
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(TrickyPrintingMachine)
+    assert "v1 = state.init_data(value=0)" in err.value.__notes__
+    assert "v1 = state.init_data(value=v1)" not in err.value.__notes__
 
 
 def test_multiple_precondition_bug():
diff --git a/hypothesis-python/tests/cover/test_testdecorators.py b/hypothesis-python/tests/cover/test_testdecorators.py
index 233a593e82..2bd583e658 100644
--- a/hypothesis-python/tests/cover/test_testdecorators.py
+++ b/hypothesis-python/tests/cover/test_testdecorators.py
@@ -406,13 +406,12 @@ def foo(x):
             failing.append(x)
             raise AssertionError
 
-    with raises(AssertionError):
-        with capture_out() as out:
-            foo()
+    with raises(AssertionError) as err:
+        foo()
     assert len(failing) == 2
     assert len(set(failing)) == 1
-    assert "Falsifying example" in out.getvalue()
-    assert "Lo" in out.getvalue()
+    assert "Falsifying example" in "\n".join(err.value.__notes__)
+    assert "Lo" in err.value.__notes__
 
 
 @given(integers().filter(lambda x: x % 4 == 0))
@@ -466,12 +465,9 @@ def test(xs):
         if sum(xs) <= 100:
             raise ValueError()
 
-    with capture_out() as out:
-        with reporting.with_reporter(reporting.default):
-            with raises(ValueError):
-                test()
-    lines = out.getvalue().strip().splitlines()
-    assert lines.count("Hi there") == 1
+    with raises(ValueError) as err:
+        test()
+    assert err.value.__notes__.count("Hi there") == 1
 
 
 @given(lists(integers(), max_size=0))
diff --git a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
index dc0d8cd1a5..9886b704aa 100644
--- a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
+++ b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
@@ -37,8 +37,9 @@
 import click
 import pytest
 
-from hypothesis.errors import InvalidArgument, MultipleFailures, Unsatisfiable
+from hypothesis.errors import InvalidArgument, Unsatisfiable
 from hypothesis.extra import cli, ghostwriter
+from hypothesis.internal.compat import BaseExceptionGroup
 from hypothesis.strategies import builds, from_type, just, lists
 from hypothesis.strategies._internal.lazy import LazyStrategy
 
@@ -336,7 +337,7 @@ def test_run_ghostwriter_roundtrip():
     )
     try:
         get_test_function(source_code)()
-    except (AssertionError, ValueError, MultipleFailures):
+    except (AssertionError, ValueError, BaseExceptionGroup):
         pass
 
     # Finally, restricting ourselves to finite floats makes the test pass!
diff --git a/hypothesis-python/tests/nocover/test_interesting_origin.py b/hypothesis-python/tests/nocover/test_interesting_origin.py
index d0d1194055..caa50ae91e 100644
--- a/hypothesis-python/tests/nocover/test_interesting_origin.py
+++ b/hypothesis-python/tests/nocover/test_interesting_origin.py
@@ -11,7 +11,7 @@
 import pytest
 
 from hypothesis import given, settings, strategies as st
-from hypothesis.errors import MultipleFailures
+from hypothesis.internal.compat import ExceptionGroup
 
 from tests.common.utils import flaky
 
@@ -58,5 +58,5 @@ def test_fn(x, y):
         # Indirection to fix https://github.com/HypothesisWorks/hypothesis/issues/2888
         return function(x, y)
 
-    with pytest.raises(MultipleFailures):
+    with pytest.raises(ExceptionGroup):
         test_fn()
diff --git a/hypothesis-python/tests/nocover/test_scrutineer.py b/hypothesis-python/tests/nocover/test_scrutineer.py
index b2512fec3d..b24033a170 100644
--- a/hypothesis-python/tests/nocover/test_scrutineer.py
+++ b/hypothesis-python/tests/nocover/test_scrutineer.py
@@ -59,7 +59,10 @@ def get_reports(file_contents, *, testdir):
         for i, line in enumerate(file_contents.splitlines())
         if line.endswith(BUG_MARKER)
     }
-    expected = ["\n".join(r) for k, r in make_report(explanations).items()]
+    expected = [
+        ("\n".join(r), "\n    | ".join(r))  # single, ExceptionGroup
+        for r in make_report(explanations).values()
+    ]
     return pytest_stdout, expected
 
 
@@ -67,8 +70,8 @@ def get_reports(file_contents, *, testdir):
 def test_explanations(code, testdir):
     pytest_stdout, expected = get_reports(PRELUDE + code, testdir=testdir)
     assert len(expected) == code.count(BUG_MARKER)
-    for report in expected:
-        assert report in pytest_stdout
+    for single, group in expected:
+        assert single in pytest_stdout or group in pytest_stdout
 
 
 @pytest.mark.parametrize("code", FRAGMENTS)
diff --git a/hypothesis-python/tests/nocover/test_stateful.py b/hypothesis-python/tests/nocover/test_stateful.py
index b1c35c5848..b9a2e07429 100644
--- a/hypothesis-python/tests/nocover/test_stateful.py
+++ b/hypothesis-python/tests/nocover/test_stateful.py
@@ -11,14 +11,11 @@
 from collections import namedtuple
 
 import pytest
-from pytest import raises
 
 from hypothesis import settings as Settings
 from hypothesis.stateful import Bundle, RuleBasedStateMachine, precondition, rule
 from hypothesis.strategies import booleans, integers, lists
 
-from tests.common.utils import capture_out
-
 Leaf = namedtuple("Leaf", ("label",))
 Split = namedtuple("Split", ("left", "right"))
 
@@ -183,13 +180,9 @@ def snake(self):
 def test_bad_machines_fail(machine):
     test_class = machine.TestCase
     try:
-        with capture_out() as o:
-            with raises(AssertionError):
-                test_class().runTest()
-    except Exception:
-        print(o.getvalue())
-        raise
-    v = o.getvalue()
-    print(v)
-    steps = [l for l in v.splitlines() if "Step " in l or "state." in l]
+        test_class().runTest()
+        raise RuntimeError("Expected an assertion error")
+    except AssertionError as err:
+        notes = err.__notes__
+    steps = [l for l in notes if "Step " in l or "state." in l]
     assert 1 <= len(steps) <= 50
diff --git a/hypothesis-python/tests/nocover/test_targeting.py b/hypothesis-python/tests/nocover/test_targeting.py
index 58fb6be6f8..0fad74eb5a 100644
--- a/hypothesis-python/tests/nocover/test_targeting.py
+++ b/hypothesis-python/tests/nocover/test_targeting.py
@@ -34,7 +34,6 @@ def test_reports_target_results(testdir, multiple):
     assert "Falsifying example" in out
     assert "x=101" in out
     assert out.count("Highest target score") == 1
-    assert out.index("Highest target score") < out.index("Falsifying example")
     assert result.ret != 0
 
 
diff --git a/hypothesis-python/tests/pytest/test_skipping.py b/hypothesis-python/tests/pytest/test_skipping.py
index ff560ef42c..0433fe9706 100644
--- a/hypothesis-python/tests/pytest/test_skipping.py
+++ b/hypothesis-python/tests/pytest/test_skipping.py
@@ -23,7 +23,7 @@ def test_to_be_skipped(xs):
     if xs == 0:
         pytest.skip()
     # But the pytest 3.0 internals don't have such an exception, so we keep
-    # going and raise a MultipleFailures error.  Ah well.
+    # going and raise a BaseExceptionGroup error.  Ah well.
     else:
         assert xs == 0
 """

From 04b4d0ef2520b8d56e55930eafd9880813df80e6 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Tue, 3 May 2022 19:01:52 -0600
Subject: [PATCH 2/3] Report only first bug on Pytest

---
 hypothesis-python/src/_hypothesis_pytestplugin.py | 7 +++++++
 hypothesis-python/src/hypothesis/core.py          | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/_hypothesis_pytestplugin.py b/hypothesis-python/src/_hypothesis_pytestplugin.py
index 0155c9f232..042f9e62c7 100644
--- a/hypothesis-python/src/_hypothesis_pytestplugin.py
+++ b/hypothesis-python/src/_hypothesis_pytestplugin.py
@@ -178,6 +178,13 @@ def pytest_configure(config):
                 pass
             core.global_force_seed = seed
 
+        core.pytest_shows_exceptiongroups = (
+            sys.version_info[:2] >= (3, 11)
+            ## See https://github.com/pytest-dev/pytest/issues/9159
+            # or pytest_version >= (7, 2)  # TODO: fill in correct version here
+            or config.getoption("tbstyle", "auto") == "native"
+        )
+
     @pytest.hookimpl(hookwrapper=True)
     def pytest_runtest_call(item):
         __tracebackhide__ = True
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 3234155769..14ade3fbb1 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -126,6 +126,7 @@
 
 
 running_under_pytest = False
+pytest_shows_exceptiongroups = True
 global_force_seed = None
 _hypothesis_global_random = None
 
@@ -436,7 +437,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
                     err = new
 
                 yield (fragments_reported, err)
-                if state.settings.report_multiple_bugs:
+                if state.settings.report_multiple_bugs and pytest_shows_exceptiongroups:
                     continue
                 break
             finally:
@@ -840,7 +841,7 @@ def run_engine(self):
 
         if not self.falsifying_examples:
             return
-        elif not self.settings.report_multiple_bugs:
+        elif not (self.settings.report_multiple_bugs and pytest_shows_exceptiongroups):
             # Pretend that we only found one failure, by discarding the others.
             del self.falsifying_examples[:-1]
 

From 5ed345121f8a1bd6aa761b4bddc90e270a274483 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Tue, 26 Jul 2022 23:35:10 -0700
Subject: [PATCH 3/3] Remove old+undocumented hook

---
 hypothesis-python/src/hypothesis/reporting.py   |  4 ----
 hypothesis-python/tests/cover/test_reporting.py | 12 ------------
 2 files changed, 16 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/reporting.py b/hypothesis-python/src/hypothesis/reporting.py
index d62beb57f6..a0f300b2bc 100644
--- a/hypothesis-python/src/hypothesis/reporting.py
+++ b/hypothesis-python/src/hypothesis/reporting.py
@@ -15,10 +15,6 @@
 from hypothesis.utils.dynamicvariables import DynamicVariable
 
 
-def silent(value):
-    pass
-
-
 def default(value):
     try:
         print(value)
diff --git a/hypothesis-python/tests/cover/test_reporting.py b/hypothesis-python/tests/cover/test_reporting.py
index cf7bd3053d..7faaf0c2f4 100644
--- a/hypothesis-python/tests/cover/test_reporting.py
+++ b/hypothesis-python/tests/cover/test_reporting.py
@@ -21,18 +21,6 @@
 from tests.common.utils import capture_out
 
 
-def test_can_suppress_output():
-    @given(integers())
-    def test_int(x):
-        raise AssertionError
-
-    with capture_out() as o:
-        with reporting.with_reporter(reporting.silent):
-            with pytest.raises(AssertionError):
-                test_int()
-    assert "Falsifying example" not in o.getvalue()
-
-
 def test_can_print_bytes():
     with capture_out() as o:
         with reporting.with_reporter(reporting.default):