From 6ed60ac162297361f531255ad339ae2e43a6b95e Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Sun, 1 May 2022 10:56:33 -0600 Subject: [PATCH 1/3] MultipleFailures -> ExceptionGroup Using the new features from PEP-654 and PEP-678, exception groups and enriching exceptions with notes --- hypothesis-python/RELEASE.rst | 10 ++ hypothesis-python/src/hypothesis/core.py | 143 ++++++++---------- hypothesis-python/src/hypothesis/errors.py | 15 +- .../src/hypothesis/internal/escalation.py | 2 +- .../tests/cover/test_arbitrary_data.py | 37 ++--- .../tests/cover/test_deadline.py | 11 +- .../tests/cover/test_error_in_draw.py | 9 +- .../tests/cover/test_escalation.py | 8 + .../tests/cover/test_explicit_examples.py | 24 +-- .../cover/test_falsifying_example_output.py | 34 ++--- .../tests/cover/test_random_module.py | 19 +-- hypothesis-python/tests/cover/test_randoms.py | 19 +-- .../tests/cover/test_reporting.py | 8 +- .../tests/cover/test_reproduce_failure.py | 10 +- .../tests/cover/test_slippage.py | 69 ++++++--- .../tests/cover/test_stateful.py | 119 +++++++-------- .../tests/cover/test_testdecorators.py | 18 +-- .../tests/ghostwriter/test_ghostwriter.py | 5 +- .../tests/nocover/test_interesting_origin.py | 4 +- .../tests/nocover/test_scrutineer.py | 9 +- .../tests/nocover/test_stateful.py | 17 +-- .../tests/nocover/test_targeting.py | 1 - .../tests/pytest/test_skipping.py | 2 +- 23 files changed, 280 insertions(+), 313 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..1ff6d3638b --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,10 @@ +RELEASE_TYPE: minor + +Reporting of :obj:`multiple failing examples ` +now uses the :pep:`654` `ExceptionGroup `__ type, which is provided by the +:pypi:`exceptiongroup` backport on Python 3.10 and earlier (:issue:`3175`). +``hypothesis.errors.MultipleFailures`` is therefore deprecated. + +Failing examples and other reports are now stored as :pep:`678` exception notes, which +ensures that they will always appear together with the traceback and other information +about their respective error. diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index c931f2d193..3234155769 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -60,7 +60,6 @@ HypothesisDeprecationWarning, HypothesisWarning, InvalidArgument, - MultipleFailures, NoSuchExample, StopTest, Unsatisfiable, @@ -69,6 +68,7 @@ from hypothesis.executors import default_new_style_executor, new_style_executor from hypothesis.internal.compat import ( PYPY, + BaseExceptionGroup, bad_django_TestCase, get_type_hints, int_from_bytes, @@ -575,7 +575,6 @@ def __init__( self.settings = settings self.last_exception = None self.falsifying_examples = () - self.__was_flaky = False self.random = random self.__test_runtime = None self.ever_executed = False @@ -710,11 +709,10 @@ def run(data): ) else: report("Failed to reproduce exception. Expected: \n" + traceback) - self.__flaky( - f"Hypothesis {text_repr} produces unreliable results: Falsified" - " on the first call but did not on a subsequent one", - cause=exception, - ) + raise Flaky( + f"Hypothesis {text_repr} produces unreliable results: " + "Falsified on the first call but did not on a subsequent one" + ) from exception return result def _execute_once_for_engine(self, data): @@ -849,57 +847,50 @@ def run_engine(self): # The engine found one or more failures, so we need to reproduce and # report them. - flaky = 0 + errors_to_report = [] - if runner.best_observed_targets: - for line in describe_targets(runner.best_observed_targets): - report(line) - report("") + report_lines = describe_targets(runner.best_observed_targets) + if report_lines: + report_lines.append("") explanations = explanatory_lines(self.explain_traces, self.settings) for falsifying_example in self.falsifying_examples: info = falsifying_example.extra_information + fragments = [] ran_example = ConjectureData.for_buffer(falsifying_example.buffer) - self.__was_flaky = False assert info.__expected_exception is not None try: - self.execute_once( - ran_example, - print_example=not self.is_find, - is_final=True, - expected_failure=( - info.__expected_exception, - info.__expected_traceback, - ), - ) + with with_reporter(fragments.append): + self.execute_once( + ran_example, + print_example=not self.is_find, + is_final=True, + expected_failure=( + info.__expected_exception, + info.__expected_traceback, + ), + ) except (UnsatisfiedAssumption, StopTest) as e: - report(format_exception(e, e.__traceback__)) - self.__flaky( + err = Flaky( "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it.", - cause=e, ) + err.__cause__ = err.__context__ = e + errors_to_report.append((fragments, err)) except BaseException as e: # If we have anything for explain-mode, this is the time to report. for line in explanations[falsifying_example.interesting_origin]: - report(line) - - if len(self.falsifying_examples) <= 1: - # There is only one failure, so we can report it by raising - # it directly. - raise - - # We are reporting multiple failures, so we need to manually - # print each exception's stack trace and information. - tb = get_trimmed_traceback() - report(format_exception(e, tb)) + fragments.append(line) + errors_to_report.append( + (fragments, e.with_traceback(get_trimmed_traceback())) + ) finally: # Whether or not replay actually raised the exception again, we want # to print the reproduce_failure decorator for the failing example. if self.settings.print_blob: - report( + fragments.append( "\nYou can reproduce this example by temporarily adding " "@reproduce_failure(%r, %r) as a decorator on your test case" % (__version__, encode_failure(falsifying_example.buffer)) @@ -908,30 +899,38 @@ def run_engine(self): # hold on to a reference to ``data`` know that it's now been # finished and they can't draw more data from it. ran_example.freeze() + _raise_to_user(errors_to_report, self.settings, report_lines) - if self.__was_flaky: - flaky += 1 - - # If we only have one example then we should have raised an error or - # flaky prior to this point. - assert len(self.falsifying_examples) > 1 - if flaky > 0: - raise Flaky( - f"Hypothesis found {len(self.falsifying_examples)} distinct failures, " - f"but {flaky} of them exhibited some sort of flaky behaviour." - ) - else: - raise MultipleFailures( - f"Hypothesis found {len(self.falsifying_examples)} distinct failures." - ) +def add_note(exc, note): + try: + exc.add_note(note) + except AttributeError: + if not hasattr(exc, "__notes__"): + exc.__notes__ = [] + exc.__notes__.append(note) + + +def _raise_to_user(errors_to_report, settings, target_lines, trailer=""): + """Helper function for attaching notes and grouping multiple errors.""" + if settings.verbosity >= Verbosity.normal: + for fragments, err in errors_to_report: + for note in fragments: + add_note(err, note) + + if len(errors_to_report) == 1: + _, the_error_hypothesis_found = errors_to_report[0] + else: + assert errors_to_report + the_error_hypothesis_found = BaseExceptionGroup( + f"Hypothesis found {len(errors_to_report)} distinct failures{trailer}.", + [e for _, e in errors_to_report], + ) - def __flaky(self, message, *, cause): - if len(self.falsifying_examples) <= 1: - raise Flaky(message) from cause - else: - self.__was_flaky = True - report("Flaky example! " + message) + if settings.verbosity >= Verbosity.normal: + for line in target_lines: + add_note(the_error_hypothesis_found, line) + raise the_error_hypothesis_found @contextlib.contextmanager @@ -1189,23 +1188,11 @@ def wrapped_test(*arguments, **kwargs): state, wrapped_test, arguments, kwargs, original_sig ) ) - with local_settings(state.settings): - if len(errors) > 1: - # If we're not going to report multiple bugs, we would have - # stopped running explicit examples at the first failure. - assert state.settings.report_multiple_bugs - for fragments, err in errors: - for f in fragments: - report(f) - report(format_exception(err, err.__traceback__)) - raise MultipleFailures( - f"Hypothesis found {len(errors)} failures in explicit examples." - ) - elif errors: - fragments, the_error_hypothesis_found = errors[0] - for f in fragments: - report(f) - raise the_error_hypothesis_found + if errors: + # If we're not going to report multiple bugs, we would have + # stopped running explicit examples at the first failure. + assert len(errors) == 1 or state.settings.report_multiple_bugs + _raise_to_user(errors, state.settings, [], " in explicit examples") # If there were any explicit examples, they all ran successfully. # The next step is to use the Conjecture engine to run the test on @@ -1236,7 +1223,7 @@ def wrapped_test(*arguments, **kwargs): state.run_engine() except BaseException as e: # The exception caught here should either be an actual test - # failure (or MultipleFailures), or some kind of fatal error + # failure (or BaseExceptionGroup), or some kind of fatal error # that caused the engine to stop. generated_seed = wrapped_test._hypothesis_internal_use_generated_seed @@ -1262,7 +1249,9 @@ def wrapped_test(*arguments, **kwargs): # which will actually appear in tracebacks is as clear as # possible - "raise the_error_hypothesis_found". the_error_hypothesis_found = e.with_traceback( - get_trimmed_traceback() + None + if isinstance(e, BaseExceptionGroup) + else get_trimmed_traceback() ) raise the_error_hypothesis_found diff --git a/hypothesis-python/src/hypothesis/errors.py b/hypothesis-python/src/hypothesis/errors.py index 75fe7ee0f9..7f4ad9d697 100644 --- a/hypothesis-python/src/hypothesis/errors.py +++ b/hypothesis-python/src/hypothesis/errors.py @@ -124,9 +124,18 @@ class Frozen(HypothesisException): after freeze() has been called.""" -class MultipleFailures(_Trimmable): - """Indicates that Hypothesis found more than one distinct bug when testing - your code.""" +def __getattr__(name): + if name == "MultipleFailures": + from hypothesis._settings import note_deprecation + from hypothesis.internal.compat import BaseExceptionGroup + + note_deprecation( + "MultipleFailures is deprecated; use the builtin `BaseExceptionGroup` type " + "instead, or `exceptiongroup.BaseExceptionGroup` before Python 3.11", + since="RELEASEDAY", + has_codemod=False, # This would be a great PR though! + ) + return BaseExceptionGroup class DeadlineExceeded(_Trimmable): diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py index 5fcd356e31..4eb1fd421b 100644 --- a/hypothesis-python/src/hypothesis/internal/escalation.py +++ b/hypothesis-python/src/hypothesis/internal/escalation.py @@ -86,7 +86,7 @@ def get_trimmed_traceback(exception=None): else: tb = exception.__traceback__ # Avoid trimming the traceback if we're in verbose mode, or the error - # was raised inside Hypothesis (and is not a MultipleFailures) + # was raised inside Hypothesis if hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug or ( is_hypothesis_file(traceback.extract_tb(tb)[-1][0]) and not isinstance(exception, _Trimmable) diff --git a/hypothesis-python/tests/cover/test_arbitrary_data.py b/hypothesis-python/tests/cover/test_arbitrary_data.py index 57258e7a89..25ea056bf2 100644 --- a/hypothesis-python/tests/cover/test_arbitrary_data.py +++ b/hypothesis-python/tests/cover/test_arbitrary_data.py @@ -11,11 +11,9 @@ import pytest from pytest import raises -from hypothesis import find, given, reporting, strategies as st +from hypothesis import find, given, strategies as st from hypothesis.errors import InvalidArgument -from tests.common.utils import capture_out - @given(st.integers(), st.data()) def test_conditional_draw(x, data): @@ -32,13 +30,10 @@ def test(data): if y in x: raise ValueError() - with raises(ValueError): - with capture_out() as out: - with reporting.with_reporter(reporting.default): - test() - result = out.getvalue() - assert "Draw 1: [0, 0]" in result - assert "Draw 2: 0" in result + with raises(ValueError) as err: + test() + assert "Draw 1: [0, 0]" in err.value.__notes__ + assert "Draw 2: 0" in err.value.__notes__ def test_prints_labels_if_given_on_failure(): @@ -50,13 +45,10 @@ def test(data): x.remove(y) assert y not in x - with raises(AssertionError): - with capture_out() as out: - with reporting.with_reporter(reporting.default): - test() - result = out.getvalue() - assert "Draw 1 (Some numbers): [0, 0]" in result - assert "Draw 2 (A number): 0" in result + with raises(AssertionError) as err: + test() + assert "Draw 1 (Some numbers): [0, 0]" in err.value.__notes__ + assert "Draw 2 (A number): 0" in err.value.__notes__ def test_given_twice_is_same(): @@ -66,13 +58,10 @@ def test(data1, data2): data2.draw(st.integers()) raise ValueError() - with raises(ValueError): - with capture_out() as out: - with reporting.with_reporter(reporting.default): - test() - result = out.getvalue() - assert "Draw 1: 0" in result - assert "Draw 2: 0" in result + with raises(ValueError) as err: + test() + assert "Draw 1: 0" in err.value.__notes__ + assert "Draw 2: 0" in err.value.__notes__ def test_errors_when_used_in_find(): diff --git a/hypothesis-python/tests/cover/test_deadline.py b/hypothesis-python/tests/cover/test_deadline.py index d7d6b8a9fd..d7927d9560 100644 --- a/hypothesis-python/tests/cover/test_deadline.py +++ b/hypothesis-python/tests/cover/test_deadline.py @@ -15,7 +15,7 @@ from hypothesis import given, settings, strategies as st from hypothesis.errors import DeadlineExceeded, Flaky, InvalidArgument -from tests.common.utils import assert_falsifying_output, capture_out, fails_with +from tests.common.utils import assert_falsifying_output, fails_with def test_raises_deadline_on_slow_test(): @@ -109,11 +109,10 @@ def slow_once(i): once[0] = False time.sleep(0.2) - with capture_out() as o: - with pytest.raises(Flaky): - slow_once() - assert "Unreliable test timing" in o.getvalue() - assert "took 2" in o.getvalue() + with pytest.raises(Flaky) as err: + slow_once() + assert "Unreliable test timing" in "\n".join(err.value.__notes__) + assert "took 2" in "\n".join(err.value.__notes__) @pytest.mark.parametrize("slow_strategy", [False, True]) diff --git a/hypothesis-python/tests/cover/test_error_in_draw.py b/hypothesis-python/tests/cover/test_error_in_draw.py index 3117a6ecd6..e00d0c7897 100644 --- a/hypothesis-python/tests/cover/test_error_in_draw.py +++ b/hypothesis-python/tests/cover/test_error_in_draw.py @@ -12,8 +12,6 @@ from hypothesis import given, strategies as st -from tests.common.utils import capture_out - def test_error_is_in_finally(): @given(st.data()) @@ -23,8 +21,7 @@ def test(d): finally: raise ValueError() - with capture_out() as o: - with pytest.raises(ValueError): - test() + with pytest.raises(ValueError) as err: + test() - assert "[0, 1, -1]" in o.getvalue() + assert "[0, 1, -1]" in "\n".join(err.value.__notes__) diff --git a/hypothesis-python/tests/cover/test_escalation.py b/hypothesis-python/tests/cover/test_escalation.py index 11a4bc7f4b..5755089829 100644 --- a/hypothesis-python/tests/cover/test_escalation.py +++ b/hypothesis-python/tests/cover/test_escalation.py @@ -13,7 +13,9 @@ import pytest import hypothesis +from hypothesis import errors from hypothesis.internal import escalation as esc +from hypothesis.internal.compat import BaseExceptionGroup def test_does_not_escalate_errors_in_non_hypothesis_file(): @@ -62,3 +64,9 @@ def test_is_hypothesis_file_not_confused_by_prefix(monkeypatch): @pytest.mark.parametrize("fname", ["", ""]) def test_is_hypothesis_file_does_not_error_on_invalid_paths_issue_2319(fname): assert not esc.is_hypothesis_file(fname) + + +def test_multiplefailures_deprecation(): + with pytest.warns(errors.HypothesisDeprecationWarning): + exc = errors.MultipleFailures + assert exc is BaseExceptionGroup diff --git a/hypothesis-python/tests/cover/test_explicit_examples.py b/hypothesis-python/tests/cover/test_explicit_examples.py index fa8afd863a..d47e9ae41e 100644 --- a/hypothesis-python/tests/cover/test_explicit_examples.py +++ b/hypothesis-python/tests/cover/test_explicit_examples.py @@ -23,12 +23,8 @@ reporting, settings, ) -from hypothesis.errors import ( - DeadlineExceeded, - HypothesisWarning, - InvalidArgument, - MultipleFailures, -) +from hypothesis.errors import DeadlineExceeded, HypothesisWarning, InvalidArgument +from hypothesis.internal.compat import ExceptionGroup from hypothesis.strategies import floats, integers, text from tests.common.utils import assert_falsifying_output, capture_out @@ -210,14 +206,10 @@ def test(x): note(f"x -> {x}") assert x == 42 - with capture_out() as out: - with reporting.with_reporter(reporting.default): - with pytest.raises(AssertionError): - test() - v = out.getvalue() - print(v) - assert "x -> 43" in v - assert "x -> 42" not in v + with pytest.raises(AssertionError) as err: + test() + assert "x -> 43" in err.value.__notes__ + assert "x -> 42" not in err.value.__notes__ def test_must_agree_with_number_of_arguments(): @@ -250,11 +242,11 @@ def test_unsatisfied_assumption_during_explicit_example(threshold, value): assume(value < threshold) -@pytest.mark.parametrize("exc", [MultipleFailures, AssertionError]) +@pytest.mark.parametrize("exc", [ExceptionGroup, AssertionError]) def test_multiple_example_reporting(exc): @example(1) @example(2) - @settings(report_multiple_bugs=exc is MultipleFailures, phases=[Phase.explicit]) + @settings(report_multiple_bugs=exc is ExceptionGroup, phases=[Phase.explicit]) @given(integers()) def inner_test_multiple_failing_examples(x): assert x < 2 diff --git a/hypothesis-python/tests/cover/test_falsifying_example_output.py b/hypothesis-python/tests/cover/test_falsifying_example_output.py index ca55a8e58c..6d942aa1b2 100644 --- a/hypothesis-python/tests/cover/test_falsifying_example_output.py +++ b/hypothesis-python/tests/cover/test_falsifying_example_output.py @@ -12,19 +12,16 @@ from hypothesis import Phase, example, given, settings, strategies as st -from tests.common.utils import capture_out - -OUTPUT_NO_LINE_BREAK = """ +OUTPUT_NO_BREAK = """ Falsifying explicit example: test( - x=%(input)s, y=%(input)s, + x={0!r}, y={0!r}, ) """ - -OUTPUT_WITH_LINE_BREAK = """ +OUTPUT_WITH_BREAK = """ Falsifying explicit example: test( - x=%(input)s, - y=%(input)s, + x={0!r}, + y={0!r}, ) """ @@ -36,17 +33,11 @@ def test_inserts_line_breaks_only_at_appropriate_lengths(line_break, input): def test(x, y): assert x < y - with capture_out() as cap: - with pytest.raises(AssertionError): - test() - - template = OUTPUT_WITH_LINE_BREAK if line_break else OUTPUT_NO_LINE_BREAK - - desired_output = template % {"input": repr(input)} - - actual_output = cap.getvalue() + with pytest.raises(AssertionError) as err: + test() - assert desired_output.strip() == actual_output.strip() + expected = (OUTPUT_WITH_BREAK if line_break else OUTPUT_NO_BREAK).format(input) + assert expected.strip() == "\n".join(err.value.__notes__) @given(kw=st.none()) @@ -67,8 +58,7 @@ def explicit_phase(*args, kw): ids=lambda fn: fn.__name__, ) def test_vararg_output(fn): - with capture_out() as cap: - with pytest.raises(AssertionError): - fn(1, 2, 3) + with pytest.raises(AssertionError) as err: + fn(1, 2, 3) - assert "1, 2, 3" in cap.getvalue() + assert "1, 2, 3" in "\n".join(err.value.__notes__) diff --git a/hypothesis-python/tests/cover/test_random_module.py b/hypothesis-python/tests/cover/test_random_module.py index 7eb030f5d4..83ed253a3e 100644 --- a/hypothesis-python/tests/cover/test_random_module.py +++ b/hypothesis-python/tests/cover/test_random_module.py @@ -13,14 +13,12 @@ import pytest -from hypothesis import core, find, given, register_random, reporting, strategies as st +from hypothesis import core, find, given, register_random, strategies as st from hypothesis.errors import InvalidArgument from hypothesis.internal import entropy from hypothesis.internal.compat import PYPY from hypothesis.internal.entropy import deterministic_PRNG -from tests.common.utils import capture_out - def gc_on_pypy(): # CPython uses reference counting, so objects (without circular refs) @@ -32,16 +30,13 @@ def gc_on_pypy(): def test_can_seed_random(): - with capture_out() as out: - with reporting.with_reporter(reporting.default): - with pytest.raises(AssertionError): - - @given(st.random_module()) - def test(r): - raise AssertionError + @given(st.random_module()) + def test(r): + raise AssertionError - test() - assert "RandomSeeder(0)" in out.getvalue() + with pytest.raises(AssertionError) as err: + test() + assert "RandomSeeder(0)" in "\n".join(err.value.__notes__) @given(st.random_module(), st.random_module()) diff --git a/hypothesis-python/tests/cover/test_randoms.py b/hypothesis-python/tests/cover/test_randoms.py index 143ecf0489..4b4db455e3 100644 --- a/hypothesis-python/tests/cover/test_randoms.py +++ b/hypothesis-python/tests/cover/test_randoms.py @@ -16,7 +16,7 @@ import pytest from hypothesis import assume, given, strategies as st -from hypothesis.errors import MultipleFailures +from hypothesis.internal.compat import ExceptionGroup from hypothesis.strategies._internal.random import ( RANDOM_METHODS, HypothesisRandom, @@ -26,7 +26,6 @@ ) from tests.common.debug import find_any -from tests.common.utils import capture_out def test_implements_all_random_methods(): @@ -242,10 +241,9 @@ def test(rnd): rnd.uniform(0.1, 0.5) raise AssertionError - with capture_out() as out: - with pytest.raises(AssertionError): - test() - assert ".uniform(0.1, 0.5)" in out.getvalue() + with pytest.raises(AssertionError) as err: + test() + assert ".uniform(0.1, 0.5)" in "\n".join(err.value.__notes__) @pytest.mark.skipif( @@ -259,10 +257,9 @@ def test(rnd): rnd.choices([1, 2, 3, 4], k=2) raise AssertionError - with capture_out() as out: - with pytest.raises(AssertionError): - test() - assert ".choices([1, 2, 3, 4], k=2)" in out.getvalue() + with pytest.raises(AssertionError) as err: + test() + assert ".choices([1, 2, 3, 4], k=2)" in "\n".join(err.value.__notes__) @given(st.randoms(use_true_random=False)) @@ -298,7 +295,7 @@ def test(rnd): assert x < 0.5 assert x > 0.5 - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() diff --git a/hypothesis-python/tests/cover/test_reporting.py b/hypothesis-python/tests/cover/test_reporting.py index dce5ac3f9c..cf7bd3053d 100644 --- a/hypothesis-python/tests/cover/test_reporting.py +++ b/hypothesis-python/tests/cover/test_reporting.py @@ -45,11 +45,9 @@ def test_prints_output_by_default(): def test_int(x): raise AssertionError - with capture_out() as o: - with reporting.with_reporter(reporting.default): - with pytest.raises(AssertionError): - test_int() - assert "Falsifying example" in o.getvalue() + with pytest.raises(AssertionError) as err: + test_int() + assert "Falsifying example" in "\n".join(err.value.__notes__) def test_does_not_print_debug_in_verbose(): diff --git a/hypothesis-python/tests/cover/test_reproduce_failure.py b/hypothesis-python/tests/cover/test_reproduce_failure.py index 0f8878c25f..cb954ace4c 100644 --- a/hypothesis-python/tests/cover/test_reproduce_failure.py +++ b/hypothesis-python/tests/cover/test_reproduce_failure.py @@ -128,13 +128,13 @@ def test(i): failing_example[0] = i assert i not in failing_example - with capture_out() as o: - with pytest.raises(AssertionError): - test() - assert "@reproduce_failure" in o.getvalue() + with pytest.raises(AssertionError) as err: + test() + notes = "\n".join(err.value.__notes__) + assert "@reproduce_failure" in notes exp = re.compile(r"reproduce_failure\(([^)]+)\)", re.MULTILINE) - extract = exp.search(o.getvalue()) + extract = exp.search(notes) reproduction = eval(extract.group(0)) test = reproduction(test) diff --git a/hypothesis-python/tests/cover/test_slippage.py b/hypothesis-python/tests/cover/test_slippage.py index 6fe4bc1261..4561000fcf 100644 --- a/hypothesis-python/tests/cover/test_slippage.py +++ b/hypothesis-python/tests/cover/test_slippage.py @@ -10,9 +10,10 @@ import pytest -from hypothesis import Phase, assume, given, settings, strategies as st +from hypothesis import Phase, assume, given, settings, strategies as st, target from hypothesis.database import InMemoryExampleDatabase -from hypothesis.errors import Flaky, MultipleFailures +from hypothesis.errors import Flaky +from hypothesis.internal.compat import ExceptionGroup from hypothesis.internal.conjecture.engine import MIN_TEST_CALLS from tests.common.utils import ( @@ -22,6 +23,16 @@ ) +def capture_reports(test): + with capture_out() as o, pytest.raises(ExceptionGroup) as err: + test() + + return o.getvalue() + "\n\n".join( + f"{e!r}\n" + "\n".join(getattr(e, "__notes__", [])) + for e in (err.value,) + err.value.exceptions + ) + + def test_raises_multiple_failures_with_varying_type(): target = [None] @@ -38,12 +49,20 @@ def test(i): exc_class = TypeError if target[0] == i else ValueError raise exc_class() - with capture_out() as o: - with pytest.raises(MultipleFailures): - test() + output = capture_reports(test) + assert "TypeError" in output + assert "ValueError" in output + - assert "TypeError" in o.getvalue() - assert "ValueError" in o.getvalue() +def test_shows_target_scores_with_multiple_failures(): + @settings(database=None, max_examples=100) + @given(st.integers()) + def test(i): + target(i) + assert i > 0 + assert i < 0 + + assert "Highest target score:" in capture_reports(test) def test_raises_multiple_failures_when_position_varies(): @@ -61,11 +80,9 @@ def test(i): else: raise ValueError("loc 2") - with capture_out() as o: - with pytest.raises(MultipleFailures): - test() - assert "loc 1" in o.getvalue() - assert "loc 2" in o.getvalue() + output = capture_reports(test) + assert "loc 1" in output + assert "loc 2" in output def test_replays_both_failing_values(): @@ -81,10 +98,10 @@ def test(i): exc_class = TypeError if target[0] == i else ValueError raise exc_class() - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() @@ -111,7 +128,7 @@ def test(i): if i == target[1]: raise ValueError() - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() bug_fixed = True @@ -142,7 +159,7 @@ def test(i): if i == target[1]: raise ValueError() - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() bug_fixed = True @@ -180,11 +197,7 @@ def test(i): else: duds.add(i) - with capture_out() as o: - with pytest.raises(MultipleFailures): - test() - - output = o.getvalue() + output = capture_reports(test) assert_output_contains_failure(output, test, i=10000) assert_output_contains_failure(output, test, i=second_target[0]) @@ -212,13 +225,19 @@ def test(i): flaky_failed_once[0] = True raise ValueError() - with pytest.raises(Flaky): + try: test() + raise AssertionError("Expected test() to raise an error") + except ExceptionGroup as err: + assert any(isinstance(e, Flaky) for e in err.exceptions) flaky_fixed = True - with pytest.raises(MultipleFailures): + try: test() + raise AssertionError("Expected test() to raise an error") + except ExceptionGroup as err: + assert not any(isinstance(e, Flaky) for e in err.exceptions) @pytest.mark.parametrize("allow_multi", [True, False]) @@ -237,7 +256,7 @@ def test(i): seen.add(ValueError) raise ValueError - with pytest.raises(MultipleFailures if allow_multi else TypeError): + with pytest.raises(ExceptionGroup if allow_multi else TypeError): test() assert seen == {TypeError, ValueError} @@ -263,7 +282,7 @@ def test(x): assert x in seen or (x <= special[0]) assert x not in special - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test() diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py index 1b1ac20842..dbbb911f9e 100644 --- a/hypothesis-python/tests/cover/test_stateful.py +++ b/hypothesis-python/tests/cover/test_stateful.py @@ -206,16 +206,13 @@ def populate_bundle(self): def fail_fast(self): raise AssertionError - with capture_out() as o: - # The state machine must raise an exception for the - # falsifying example to be printed. - with raises(AssertionError): - run_state_machine_as_test(ProducesMultiple) + with raises(AssertionError) as err: + run_state_machine_as_test(ProducesMultiple) # This is tightly coupled to the output format of the step printing. # The first line is "Falsifying Example:..." the second is creating # the state machine, the third is calling the "initialize" method. - assignment_line = o.getvalue().split("\n")[2] + assignment_line = err.value.__notes__[2] # 'populate_bundle()' returns 2 values, so should be # expanded to 2 variables. assert assignment_line == "v1, v2 = state.populate_bundle()" @@ -241,10 +238,10 @@ def populate_bundle(self): def fail_fast(self, b): assert b != 1 - with capture_out() as o, raises(AssertionError): + with raises(AssertionError) as err: run_state_machine_as_test(ProducesMultiple) - assignment_line = o.getvalue().split("\n")[2] + assignment_line = err.value.__notes__[2] assert assignment_line == "(v1,) = state.populate_bundle()" state = ProducesMultiple() @@ -266,16 +263,13 @@ def populate_bundle(self): def fail_fast(self): raise AssertionError - with capture_out() as o: - # The state machine must raise an exception for the - # falsifying example to be printed. - with raises(AssertionError): - run_state_machine_as_test(ProducesNoVariables) + with raises(AssertionError) as err: + run_state_machine_as_test(ProducesNoVariables) # This is tightly coupled to the output format of the step printing. # The first line is "Falsifying Example:..." the second is creating # the state machine, the third is calling the "initialize" method. - assignment_line = o.getvalue().split("\n")[2] + assignment_line = err.value.__notes__[2] # 'populate_bundle()' returns 0 values, so there should be no # variable assignment. assert assignment_line == "state.populate_bundle()" @@ -633,20 +627,19 @@ def invariant_1(self): def rule_1(self): pass - with capture_out() as o: - with pytest.raises(ValueError): - run_state_machine_as_test(BadInvariant) + with pytest.raises(ValueError) as err: + run_state_machine_as_test(BadInvariant) - result = o.getvalue() + result = "\n".join(err.value.__notes__) assert ( result - == """\ + == """ Falsifying example: state = BadInvariant() state.initialize_1() state.invariant_1() state.teardown() -""" +""".strip() ) @@ -680,14 +673,13 @@ def rule_1(self): if self.num == 2: raise ValueError() - with capture_out() as o: - with pytest.raises(ValueError): - run_state_machine_as_test(BadRuleWithGoodInvariants) + with pytest.raises(ValueError) as err: + run_state_machine_as_test(BadRuleWithGoodInvariants) - result = o.getvalue() + result = "\n".join(err.value.__notes__) assert ( result - == """\ + == """ Falsifying example: state = BadRuleWithGoodInvariants() state.invariant_1() @@ -700,7 +692,7 @@ def rule_1(self): state.invariant_3() state.rule_1() state.teardown() -""" +""".strip() ) @@ -763,12 +755,12 @@ def delete(self, k, v): def values_agree(self, k): assert not self.__deleted[k] - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(IncorrectDeletion) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(IncorrectDeletion) - assert o.getvalue().count(" = state.k(") == 1 - assert o.getvalue().count(" = state.v(") == 1 + result = "\n".join(err.value.__notes__) + assert result.count(" = state.k(") == 1 + assert result.count(" = state.v(") == 1 def test_prints_equal_values_with_correct_variable_name(): @@ -789,11 +781,10 @@ def transfer(self, source): def fail(self, source): raise AssertionError - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(MovesBetweenBundles) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(MovesBetweenBundles) - result = o.getvalue() + result = "\n".join(err.value.__notes__) for m in ["create", "transfer", "fail"]: assert result.count("state." + m) == 1 assert "v1 = state.create()" in result @@ -822,12 +813,11 @@ def initialize_c(self): def fail_fast(self): raise AssertionError - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(WithInitializeRules) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(WithInitializeRules) assert set(WithInitializeRules.initialized[-3:]) == {"a", "b", "c"} - result = o.getvalue().splitlines()[1:] + result = err.value.__notes__[1:] assert result[0] == "state = WithInitializeRules()" # Initialize rules call order is shuffled assert {result[1], result[2], result[3]} == { @@ -852,20 +842,19 @@ def fail_fast(self, param): raise AssertionError WithInitializeBundleRules.TestCase.settings = NO_BLOB_SETTINGS - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(WithInitializeBundleRules) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(WithInitializeBundleRules) - result = o.getvalue() + result = "\n".join(err.value.__notes__) assert ( result - == """\ + == """ Falsifying example: state = WithInitializeBundleRules() v1 = state.initialize_a(dep='dep') state.fail_fast(param=v1) state.teardown() -""" +""".strip() ) @@ -934,12 +923,11 @@ def initialize_b(self): def fail_fast(self): raise AssertionError - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(ChildStateMachine) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(ChildStateMachine) assert set(ChildStateMachine.initialized[-2:]) == {"a", "b"} - result = o.getvalue().splitlines()[1:] + result = err.value.__notes__[1:] assert result[0] == "state = ChildStateMachine()" # Initialize rules call order is shuffled assert {result[1], result[2]} == {"state.initialize_a()", "state.initialize_b()"} @@ -961,25 +949,24 @@ def fail_eventually(self): assert self.initialize_called_counter <= 2 StateMachine.TestCase.settings = NO_BLOB_SETTINGS - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(StateMachine) + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(StateMachine) - result = o.getvalue() + result = "\n".join(err.value.__notes__) assert ( result - == """\ + == """ Falsifying example: state = StateMachine() state.initialize() state.fail_eventually() state.fail_eventually() state.teardown() -""" +""".strip() ) -def test_steps_printed_despite_pytest_fail(capsys): +def test_steps_printed_despite_pytest_fail(): # Test for https://github.com/HypothesisWorks/hypothesis/issues/1372 @Settings(print_blob=False) class RaisesProblem(RuleBasedStateMachine): @@ -987,17 +974,15 @@ class RaisesProblem(RuleBasedStateMachine): def oops(self): pytest.fail() - with pytest.raises(Failed): + with pytest.raises(Failed) as err: run_state_machine_as_test(RaisesProblem) - out, _ = capsys.readouterr() assert ( - """\ + "\n".join(err.value.__notes__).strip() + == """ Falsifying example: state = RaisesProblem() state.oops() -state.teardown() -""" - in out +state.teardown()""".strip() ) @@ -1090,12 +1075,10 @@ def init_data(self, value): def mostly_fails(self, d): assert d == 42 - with capture_out() as o: - with pytest.raises(AssertionError): - run_state_machine_as_test(TrickyPrintingMachine) - output = o.getvalue() - assert "v1 = state.init_data(value=0)" in output - assert "v1 = state.init_data(value=v1)" not in output + with pytest.raises(AssertionError) as err: + run_state_machine_as_test(TrickyPrintingMachine) + assert "v1 = state.init_data(value=0)" in err.value.__notes__ + assert "v1 = state.init_data(value=v1)" not in err.value.__notes__ def test_multiple_precondition_bug(): diff --git a/hypothesis-python/tests/cover/test_testdecorators.py b/hypothesis-python/tests/cover/test_testdecorators.py index 233a593e82..2bd583e658 100644 --- a/hypothesis-python/tests/cover/test_testdecorators.py +++ b/hypothesis-python/tests/cover/test_testdecorators.py @@ -406,13 +406,12 @@ def foo(x): failing.append(x) raise AssertionError - with raises(AssertionError): - with capture_out() as out: - foo() + with raises(AssertionError) as err: + foo() assert len(failing) == 2 assert len(set(failing)) == 1 - assert "Falsifying example" in out.getvalue() - assert "Lo" in out.getvalue() + assert "Falsifying example" in "\n".join(err.value.__notes__) + assert "Lo" in err.value.__notes__ @given(integers().filter(lambda x: x % 4 == 0)) @@ -466,12 +465,9 @@ def test(xs): if sum(xs) <= 100: raise ValueError() - with capture_out() as out: - with reporting.with_reporter(reporting.default): - with raises(ValueError): - test() - lines = out.getvalue().strip().splitlines() - assert lines.count("Hi there") == 1 + with raises(ValueError) as err: + test() + assert err.value.__notes__.count("Hi there") == 1 @given(lists(integers(), max_size=0)) diff --git a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py index dc0d8cd1a5..9886b704aa 100644 --- a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py +++ b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py @@ -37,8 +37,9 @@ import click import pytest -from hypothesis.errors import InvalidArgument, MultipleFailures, Unsatisfiable +from hypothesis.errors import InvalidArgument, Unsatisfiable from hypothesis.extra import cli, ghostwriter +from hypothesis.internal.compat import BaseExceptionGroup from hypothesis.strategies import builds, from_type, just, lists from hypothesis.strategies._internal.lazy import LazyStrategy @@ -336,7 +337,7 @@ def test_run_ghostwriter_roundtrip(): ) try: get_test_function(source_code)() - except (AssertionError, ValueError, MultipleFailures): + except (AssertionError, ValueError, BaseExceptionGroup): pass # Finally, restricting ourselves to finite floats makes the test pass! diff --git a/hypothesis-python/tests/nocover/test_interesting_origin.py b/hypothesis-python/tests/nocover/test_interesting_origin.py index d0d1194055..caa50ae91e 100644 --- a/hypothesis-python/tests/nocover/test_interesting_origin.py +++ b/hypothesis-python/tests/nocover/test_interesting_origin.py @@ -11,7 +11,7 @@ import pytest from hypothesis import given, settings, strategies as st -from hypothesis.errors import MultipleFailures +from hypothesis.internal.compat import ExceptionGroup from tests.common.utils import flaky @@ -58,5 +58,5 @@ def test_fn(x, y): # Indirection to fix https://github.com/HypothesisWorks/hypothesis/issues/2888 return function(x, y) - with pytest.raises(MultipleFailures): + with pytest.raises(ExceptionGroup): test_fn() diff --git a/hypothesis-python/tests/nocover/test_scrutineer.py b/hypothesis-python/tests/nocover/test_scrutineer.py index b2512fec3d..b24033a170 100644 --- a/hypothesis-python/tests/nocover/test_scrutineer.py +++ b/hypothesis-python/tests/nocover/test_scrutineer.py @@ -59,7 +59,10 @@ def get_reports(file_contents, *, testdir): for i, line in enumerate(file_contents.splitlines()) if line.endswith(BUG_MARKER) } - expected = ["\n".join(r) for k, r in make_report(explanations).items()] + expected = [ + ("\n".join(r), "\n | ".join(r)) # single, ExceptionGroup + for r in make_report(explanations).values() + ] return pytest_stdout, expected @@ -67,8 +70,8 @@ def get_reports(file_contents, *, testdir): def test_explanations(code, testdir): pytest_stdout, expected = get_reports(PRELUDE + code, testdir=testdir) assert len(expected) == code.count(BUG_MARKER) - for report in expected: - assert report in pytest_stdout + for single, group in expected: + assert single in pytest_stdout or group in pytest_stdout @pytest.mark.parametrize("code", FRAGMENTS) diff --git a/hypothesis-python/tests/nocover/test_stateful.py b/hypothesis-python/tests/nocover/test_stateful.py index b1c35c5848..b9a2e07429 100644 --- a/hypothesis-python/tests/nocover/test_stateful.py +++ b/hypothesis-python/tests/nocover/test_stateful.py @@ -11,14 +11,11 @@ from collections import namedtuple import pytest -from pytest import raises from hypothesis import settings as Settings from hypothesis.stateful import Bundle, RuleBasedStateMachine, precondition, rule from hypothesis.strategies import booleans, integers, lists -from tests.common.utils import capture_out - Leaf = namedtuple("Leaf", ("label",)) Split = namedtuple("Split", ("left", "right")) @@ -183,13 +180,9 @@ def snake(self): def test_bad_machines_fail(machine): test_class = machine.TestCase try: - with capture_out() as o: - with raises(AssertionError): - test_class().runTest() - except Exception: - print(o.getvalue()) - raise - v = o.getvalue() - print(v) - steps = [l for l in v.splitlines() if "Step " in l or "state." in l] + test_class().runTest() + raise RuntimeError("Expected an assertion error") + except AssertionError as err: + notes = err.__notes__ + steps = [l for l in notes if "Step " in l or "state." in l] assert 1 <= len(steps) <= 50 diff --git a/hypothesis-python/tests/nocover/test_targeting.py b/hypothesis-python/tests/nocover/test_targeting.py index 58fb6be6f8..0fad74eb5a 100644 --- a/hypothesis-python/tests/nocover/test_targeting.py +++ b/hypothesis-python/tests/nocover/test_targeting.py @@ -34,7 +34,6 @@ def test_reports_target_results(testdir, multiple): assert "Falsifying example" in out assert "x=101" in out assert out.count("Highest target score") == 1 - assert out.index("Highest target score") < out.index("Falsifying example") assert result.ret != 0 diff --git a/hypothesis-python/tests/pytest/test_skipping.py b/hypothesis-python/tests/pytest/test_skipping.py index ff560ef42c..0433fe9706 100644 --- a/hypothesis-python/tests/pytest/test_skipping.py +++ b/hypothesis-python/tests/pytest/test_skipping.py @@ -23,7 +23,7 @@ def test_to_be_skipped(xs): if xs == 0: pytest.skip() # But the pytest 3.0 internals don't have such an exception, so we keep - # going and raise a MultipleFailures error. Ah well. + # going and raise a BaseExceptionGroup error. Ah well. else: assert xs == 0 """ From 04b4d0ef2520b8d56e55930eafd9880813df80e6 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Tue, 3 May 2022 19:01:52 -0600 Subject: [PATCH 2/3] Report only first bug on Pytest --- hypothesis-python/src/_hypothesis_pytestplugin.py | 7 +++++++ hypothesis-python/src/hypothesis/core.py | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hypothesis-python/src/_hypothesis_pytestplugin.py b/hypothesis-python/src/_hypothesis_pytestplugin.py index 0155c9f232..042f9e62c7 100644 --- a/hypothesis-python/src/_hypothesis_pytestplugin.py +++ b/hypothesis-python/src/_hypothesis_pytestplugin.py @@ -178,6 +178,13 @@ def pytest_configure(config): pass core.global_force_seed = seed + core.pytest_shows_exceptiongroups = ( + sys.version_info[:2] >= (3, 11) + ## See https://github.com/pytest-dev/pytest/issues/9159 + # or pytest_version >= (7, 2) # TODO: fill in correct version here + or config.getoption("tbstyle", "auto") == "native" + ) + @pytest.hookimpl(hookwrapper=True) def pytest_runtest_call(item): __tracebackhide__ = True diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index 3234155769..14ade3fbb1 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -126,6 +126,7 @@ running_under_pytest = False +pytest_shows_exceptiongroups = True global_force_seed = None _hypothesis_global_random = None @@ -436,7 +437,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s err = new yield (fragments_reported, err) - if state.settings.report_multiple_bugs: + if state.settings.report_multiple_bugs and pytest_shows_exceptiongroups: continue break finally: @@ -840,7 +841,7 @@ def run_engine(self): if not self.falsifying_examples: return - elif not self.settings.report_multiple_bugs: + elif not (self.settings.report_multiple_bugs and pytest_shows_exceptiongroups): # Pretend that we only found one failure, by discarding the others. del self.falsifying_examples[:-1] From 5ed345121f8a1bd6aa761b4bddc90e270a274483 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Tue, 26 Jul 2022 23:35:10 -0700 Subject: [PATCH 3/3] Remove old+undocumented hook --- hypothesis-python/src/hypothesis/reporting.py | 4 ---- hypothesis-python/tests/cover/test_reporting.py | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/hypothesis-python/src/hypothesis/reporting.py b/hypothesis-python/src/hypothesis/reporting.py index d62beb57f6..a0f300b2bc 100644 --- a/hypothesis-python/src/hypothesis/reporting.py +++ b/hypothesis-python/src/hypothesis/reporting.py @@ -15,10 +15,6 @@ from hypothesis.utils.dynamicvariables import DynamicVariable -def silent(value): - pass - - def default(value): try: print(value) diff --git a/hypothesis-python/tests/cover/test_reporting.py b/hypothesis-python/tests/cover/test_reporting.py index cf7bd3053d..7faaf0c2f4 100644 --- a/hypothesis-python/tests/cover/test_reporting.py +++ b/hypothesis-python/tests/cover/test_reporting.py @@ -21,18 +21,6 @@ from tests.common.utils import capture_out -def test_can_suppress_output(): - @given(integers()) - def test_int(x): - raise AssertionError - - with capture_out() as o: - with reporting.with_reporter(reporting.silent): - with pytest.raises(AssertionError): - test_int() - assert "Falsifying example" not in o.getvalue() - - def test_can_print_bytes(): with capture_out() as o: with reporting.with_reporter(reporting.default):