From 1ea1d2c3554fdfb54e5137cb3a68875a5ee9c64c Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sat, 21 Dec 2024 13:36:25 -0500 Subject: [PATCH 1/6] use convenience functions for ir kwargs --- hypothesis-python/tests/conjecture/common.py | 12 +- .../tests/conjecture/test_engine.py | 13 +- hypothesis-python/tests/conjecture/test_ir.py | 480 +++--------------- .../tests/conjecture/test_optimiser.py | 15 +- .../tests/cover/test_float_utils.py | 36 +- 5 files changed, 97 insertions(+), 459 deletions(-) diff --git a/hypothesis-python/tests/conjecture/common.py b/hypothesis-python/tests/conjecture/common.py index 5bb91b386e..d566a4e3d1 100644 --- a/hypothesis-python/tests/conjecture/common.py +++ b/hypothesis-python/tests/conjecture/common.py @@ -430,9 +430,9 @@ def ir(*values: list[IRType]) -> list[IRNode]: return tuple(nodes) -def make_float_kw( - min_value, - max_value, +def float_kw( + min_value=-math.inf, + max_value=math.inf, *, allow_nan=True, smallest_nonzero_magnitude=SMALLEST_SUBNORMAL, @@ -445,7 +445,7 @@ def make_float_kw( } -def make_integer_kw(min_value, max_value, *, weights=None, shrink_towards=0): +def integer_kw(min_value=None, max_value=None, *, weights=None, shrink_towards=0): return { "min_value": min_value, "max_value": max_value, @@ -454,9 +454,9 @@ def make_integer_kw(min_value, max_value, *, weights=None, shrink_towards=0): } -def make_string_kw(intervals, *, min_size=0, max_size=COLLECTION_DEFAULT_MAX_SIZE): +def string_kw(intervals, *, min_size=0, max_size=COLLECTION_DEFAULT_MAX_SIZE): return {"intervals": intervals, "min_size": min_size, "max_size": max_size} -# we could in theory define make_bytes_kw and make_boolean_kw, but without any +# we could in theory define bytes_kw and boolean_kw, but without any # default kw values they aren't really a time save. diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py index 1f76355140..12e45a6982 100644 --- a/hypothesis-python/tests/conjecture/test_engine.py +++ b/hypothesis-python/tests/conjecture/test_engine.py @@ -57,6 +57,7 @@ SOME_LABEL, TEST_SETTINGS, buffer_size_limit, + integer_kw, ir, ir_nodes, run_to_nodes, @@ -1627,17 +1628,7 @@ def test_simulate_to_evicted_data(monkeypatch): # cache evictions (but also potentially other trickery). monkeypatch.setattr(engine_module, "CACHE_SIZE", 1) - node_0 = IRNode( - ir_type="integer", - value=0, - kwargs={ - "min_value": None, - "max_value": None, - "weights": None, - "shrink_towards": 0, - }, - was_forced=False, - ) + node_0 = IRNode(ir_type="integer", value=0, kwargs=integer_kw(), was_forced=False) node_1 = node_0.copy(with_value=1) def test(data): diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py index 64e5d9bce3..74e5c1c104 100644 --- a/hypothesis-python/tests/conjecture/test_ir.py +++ b/hypothesis-python/tests/conjecture/test_ir.py @@ -38,7 +38,9 @@ from tests.common.debug import minimal from tests.conjecture.common import ( draw_value, + float_kw, fresh_data, + integer_kw, integer_kwargs, ir, ir_nodes, @@ -49,9 +51,9 @@ # we max out at 128 bit integers in the *unbounded* case, but someone may # specify a bound with a larger magnitude. Ensure we calculate max children for # those cases correctly. -@example(("integer", {"min_value": None, "max_value": -(2**200), "weights": None})) -@example(("integer", {"min_value": 2**200, "max_value": None, "weights": None})) -@example(("integer", {"min_value": -(2**200), "max_value": 2**200, "weights": None})) +@example(("integer", integer_kw(max_value=-(2**200)))) +@example(("integer", integer_kw(min_value=2**200))) +@example(("integer", integer_kw(-(2**200), 2**200))) @given(ir_types_and_kwargs()) def test_compute_max_children_is_positive(ir_type_and_kwargs): (ir_type, kwargs) = ir_type_and_kwargs @@ -135,78 +137,22 @@ def test_compute_max_children_is_positive(ir_type_and_kwargs): ("boolean", {"p": 0.5}, 2), ("boolean", {"p": 0.001}, 2), ("boolean", {"p": 0.999}, 2), + ("float", float_kw(0.0, 0.0), 1), + ("float", float_kw(-0.0, -0.0), 1), + ("float", float_kw(-0.0, 0.0), 2), + ("float", float_kw(next_down(-0.0), next_up(0.0)), 4), ( "float", - { - "min_value": 0.0, - "max_value": 0.0, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - 1, - ), - ( - "float", - { - "min_value": -0.0, - "max_value": -0.0, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - 1, - ), - ( - "float", - { - "min_value": -0.0, - "max_value": 0.0, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - 2, - ), - ( - "float", - { - "min_value": next_down(-0.0), - "max_value": next_up(0.0), - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + float_kw( + next_down(next_down(-0.0)), + next_up(next_up(0.0)), + smallest_nonzero_magnitude=next_up(SMALLEST_SUBNORMAL), + ), 4, ), - ( - "float", - { - "min_value": next_down(next_down(-0.0)), - "max_value": next_up(next_up(0.0)), - "smallest_nonzero_magnitude": next_up(SMALLEST_SUBNORMAL), - }, - 4, - ), - ( - "float", - { - "min_value": -math.inf, - "max_value": math.inf, - "smallest_nonzero_magnitude": next_down(math.inf), - }, - 6, - ), - ( - "float", - { - "min_value": 1, - "max_value": 10, - "smallest_nonzero_magnitude": 11.0, - }, - 0, - ), - ( - "float", - { - "min_value": -3, - "max_value": -2, - "smallest_nonzero_magnitude": 4.0, - }, - 0, - ), + ("float", float_kw(smallest_nonzero_magnitude=next_down(math.inf)), 6), + ("float", float_kw(1, 10, smallest_nonzero_magnitude=11.0), 0), + ("float", float_kw(-3, -2, smallest_nonzero_magnitude=4.0), 0), ], ) def test_compute_max_children(ir_type, kwargs, count_children): @@ -238,51 +184,12 @@ def test_draw_string_single_interval_with_equal_bounds(s, n): ) ) # all combinations of float signs -@example( - ( - "float", - { - "min_value": next_down(-0.0), - "max_value": -0.0, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - ) -) -@example( - ( - "float", - { - "min_value": next_down(-0.0), - "max_value": next_up(0.0), - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - ) -) -@example( - ( - "float", - { - "min_value": 0.0, - "max_value": next_up(0.0), - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - ) -) +@example(("float", float_kw(next_down(-0.0), -0.0))) +@example(("float", float_kw(next_down(-0.0), next_up(0.0)))) +@example(("float", float_kw(0.0, next_up(0.0)))) # using a smallest_nonzero_magnitude which happens to filter out everything -@example( - ("float", {"min_value": 1.0, "max_value": 2.0, "smallest_nonzero_magnitude": 3.0}) -) -@example( - ( - "integer", - { - "min_value": 1, - "max_value": 2, - "weights": {1: 0.2, 2: 0.4}, - "shrink_towards": 0, - }, - ) -) +@example(("float", float_kw(1.0, 2.0, smallest_nonzero_magnitude=3.0))) +@example(("integer", integer_kw(1, 2, weights={1: 0.2, 2: 0.4}))) @given(ir_types_and_kwargs()) @settings(suppress_health_check=[HealthCheck.filter_too_much]) def test_compute_max_children_and_all_children_agree(ir_type_and_kwargs): @@ -339,15 +246,7 @@ def test_ir_nodes(random): data.freeze() expected_tree_nodes = ( IRNode( - ir_type="float", - value=5.0, - kwargs={ - "min_value": -10.0, - "max_value": 10.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=True, + ir_type="float", value=5.0, kwargs=float_kw(-10.0, 10.0), was_forced=True ), IRNode( ir_type="boolean", @@ -371,17 +270,7 @@ def test_ir_nodes(random): kwargs={"min_size": 8, "max_size": 8}, was_forced=True, ), - IRNode( - ir_type="integer", - value=50, - kwargs={ - "min_value": 0, - "max_value": 100, - "weights": None, - "shrink_towards": 0, - }, - was_forced=True, - ), + IRNode(ir_type="integer", value=50, kwargs=integer_kw(0, 100), was_forced=True), ) assert data.ir_nodes == expected_tree_nodes @@ -439,19 +328,7 @@ def test_data_with_changed_forced_value(node): # ensure we hit bare-minimum coverage for all ir types. -@example( - IRNode( - ir_type="float", - value=0.0, - kwargs={ - "min_value": -math.inf, - "max_value": math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=True, - ) -) +@example(IRNode(ir_type="float", value=0.0, kwargs=float_kw(), was_forced=True)) @example( IRNode( ir_type="boolean", @@ -461,17 +338,7 @@ def test_data_with_changed_forced_value(node): ) ) @example( - IRNode( - ir_type="integer", - value=50, - kwargs={ - "min_value": 50, - "max_value": 100, - "weights": None, - "shrink_towards": 0, - }, - was_forced=True, - ) + IRNode(ir_type="integer", value=50, kwargs=integer_kw(50, 100), was_forced=True) ) @example( IRNode( @@ -524,66 +391,21 @@ def test_all_children_are_permitted_values(ir_type_and_kwargs): @pytest.mark.parametrize( "value, ir_type, kwargs, permitted", [ - (0, "integer", {"min_value": 1, "max_value": 2, "shrink_towards": 0}, False), - (2, "integer", {"min_value": 0, "max_value": 1, "shrink_towards": 0}, False), - (10, "integer", {"min_value": 0, "max_value": 20, "shrink_towards": 0}, True), - ( - int(2**128 / 2) - 1, - "integer", - {"min_value": None, "max_value": None, "shrink_towards": 0}, - True, - ), - ( - int(2**128 / 2), - "integer", - {"min_value": None, "max_value": None, "shrink_towards": 0}, - False, - ), - ( - math.nan, - "float", - {"min_value": 0.0, "max_value": 0.0, "allow_nan": True}, - True, - ), - ( - math.nan, - "float", - {"min_value": 0.0, "max_value": 0.0, "allow_nan": False}, - False, - ), - ( - 2.0, - "float", - { - "min_value": 1.0, - "max_value": 3.0, - "allow_nan": True, - "smallest_nonzero_magnitude": 2.5, - }, - False, - ), + (0, "integer", integer_kw(1, 2), False), + (2, "integer", integer_kw(0, 1), False), + (10, "integer", integer_kw(0, 20), True), + (int(2**128 / 2) - 1, "integer", integer_kw(), True), + (int(2**128 / 2), "integer", integer_kw(), False), + (math.nan, "float", float_kw(0.0, 0.0), True), + (math.nan, "float", float_kw(0.0, 0.0, allow_nan=False), False), + (2.0, "float", float_kw(1.0, 3.0, smallest_nonzero_magnitude=2.5), False), ( -2.0, "float", - { - "min_value": -3.0, - "max_value": -1.0, - "allow_nan": True, - "smallest_nonzero_magnitude": 2.5, - }, + float_kw(-3.0, -1.0, smallest_nonzero_magnitude=2.5), False, ), - ( - 1.0, - "float", - { - "min_value": 1.0, - "max_value": 1.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - True, - ), + (1.0, "float", float_kw(1.0, 1.0), True), ( "abcd", "string", @@ -641,56 +463,15 @@ def test_forced_nodes_are_trivial(node): "node", [ IRNode( - ir_type="float", - value=5.0, - kwargs={ - "min_value": 5.0, - "max_value": 10.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, - ), - IRNode( - ir_type="float", - value=0.0, - kwargs={ - "min_value": -5.0, - "max_value": 5.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, - ), - IRNode( - ir_type="float", - value=0.0, - kwargs={ - "min_value": -math.inf, - "max_value": math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, - ), - IRNode( - ir_type="boolean", - value=False, - kwargs={"p": 0.5}, - was_forced=False, - ), - IRNode( - ir_type="boolean", - value=True, - kwargs={"p": 1.0}, - was_forced=False, + ir_type="float", value=5.0, kwargs=float_kw(5.0, 10.0), was_forced=False ), IRNode( - ir_type="boolean", - value=False, - kwargs={"p": 0.0}, - was_forced=False, + ir_type="float", value=0.0, kwargs=float_kw(-5.0, 5.0), was_forced=False ), + IRNode(ir_type="float", value=0.0, kwargs=float_kw(), was_forced=False), + IRNode(ir_type="boolean", value=False, kwargs={"p": 0.5}, was_forced=False), + IRNode(ir_type="boolean", value=True, kwargs={"p": 1.0}, was_forced=False), + IRNode(ir_type="boolean", value=False, kwargs={"p": 0.0}, was_forced=False), IRNode( ir_type="string", value="", @@ -724,91 +505,40 @@ def test_forced_nodes_are_trivial(node): was_forced=False, ), IRNode( - ir_type="integer", - value=50, - kwargs={ - "min_value": 50, - "max_value": 100, - "weights": None, - "shrink_towards": 0, - }, - was_forced=False, + ir_type="integer", value=50, kwargs=integer_kw(50, 100), was_forced=False ), IRNode( - ir_type="integer", - value=0, - kwargs={ - "min_value": -10, - "max_value": 10, - "weights": None, - "shrink_towards": 0, - }, - was_forced=False, + ir_type="integer", value=0, kwargs=integer_kw(-10, 10), was_forced=False ), IRNode( ir_type="integer", value=2, - kwargs={ - "min_value": -10, - "max_value": 10, - "weights": None, - "shrink_towards": 2, - }, + kwargs=integer_kw(-10, 10, shrink_towards=2), was_forced=False, ), IRNode( ir_type="integer", value=-10, - kwargs={ - "min_value": -10, - "max_value": 10, - "weights": None, - "shrink_towards": -12, - }, + kwargs=integer_kw(-10, 10, shrink_towards=-12), was_forced=False, ), IRNode( ir_type="integer", value=10, - kwargs={ - "min_value": -10, - "max_value": 10, - "weights": None, - "shrink_towards": 12, - }, - was_forced=False, - ), - IRNode( - ir_type="integer", - value=0, - kwargs={ - "min_value": None, - "max_value": None, - "weights": None, - "shrink_towards": 0, - }, + kwargs=integer_kw(-10, 10, shrink_towards=12), was_forced=False, ), + IRNode(ir_type="integer", value=0, kwargs=integer_kw(), was_forced=False), IRNode( ir_type="integer", value=1, - kwargs={ - "min_value": -10, - "max_value": None, - "weights": None, - "shrink_towards": 1, - }, + kwargs=integer_kw(min_value=-10, shrink_towards=1), was_forced=False, ), IRNode( ir_type="integer", value=1, - kwargs={ - "min_value": None, - "max_value": 10, - "weights": None, - "shrink_towards": 1, - }, + kwargs=integer_kw(max_value=10, shrink_towards=1), was_forced=False, ), # we don't consider shrink_towards for unbounded integers. @@ -816,12 +546,7 @@ def test_forced_nodes_are_trivial(node): IRNode( ir_type="integer", value=0, - kwargs={ - "min_value": None, - "max_value": None, - "weights": None, - "shrink_towards": 1, - }, + kwargs=integer_kw(shrink_towards=1), was_forced=False, ), ], @@ -842,50 +567,14 @@ def values(draw): "node", [ IRNode( - ir_type="float", - value=6.0, - kwargs={ - "min_value": 5.0, - "max_value": 10.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, + ir_type="float", value=6.0, kwargs=float_kw(5.0, 10.0), was_forced=False ), IRNode( - ir_type="float", - value=-5.0, - kwargs={ - "min_value": -5.0, - "max_value": 5.0, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, - ), - IRNode( - ir_type="float", - value=1.0, - kwargs={ - "min_value": -math.inf, - "max_value": math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, - was_forced=False, - ), - IRNode( - ir_type="boolean", - value=True, - kwargs={"p": 0.5}, - was_forced=False, - ), - IRNode( - ir_type="boolean", - value=True, - kwargs={"p": 0.99}, - was_forced=False, + ir_type="float", value=-5.0, kwargs=float_kw(-5.0, 5.0), was_forced=False ), + IRNode(ir_type="float", value=1.0, kwargs=float_kw(), was_forced=False), + IRNode(ir_type="boolean", value=True, kwargs={"p": 0.5}, was_forced=False), + IRNode(ir_type="boolean", value=True, kwargs={"p": 0.99}, was_forced=False), IRNode( ir_type="string", value="d", @@ -915,27 +604,9 @@ def values(draw): was_forced=False, ), IRNode( - ir_type="integer", - value=-10, - kwargs={ - "min_value": -10, - "max_value": 10, - "weights": None, - "shrink_towards": 0, - }, - was_forced=False, - ), - IRNode( - ir_type="integer", - value=42, - kwargs={ - "min_value": None, - "max_value": None, - "weights": None, - "shrink_towards": 0, - }, - was_forced=False, + ir_type="integer", value=-10, kwargs=integer_kw(-10, 10), was_forced=False ), + IRNode(ir_type="integer", value=42, kwargs=integer_kw(), was_forced=False), ], ) def test_nontrivial_nodes(node): @@ -956,56 +627,31 @@ def values(draw): IRNode( ir_type="float", value=1.5, - kwargs={ - "min_value": 1.1, - "max_value": 1.6, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + kwargs=float_kw(1.1, 1.6), was_forced=False, ), IRNode( ir_type="float", value=math.floor(sys.float_info.max), - kwargs={ - "min_value": sys.float_info.max - 1, - "max_value": math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + kwargs=float_kw(sys.float_info.max - 1, math.inf), was_forced=False, ), IRNode( ir_type="float", value=math.ceil(-sys.float_info.max), - kwargs={ - "min_value": -math.inf, - "max_value": -sys.float_info.max + 1, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + kwargs=float_kw(-math.inf, -sys.float_info.max + 1), was_forced=False, ), IRNode( ir_type="float", value=math.inf, - kwargs={ - "min_value": math.inf, - "max_value": math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + kwargs=float_kw(math.inf, math.inf), was_forced=False, ), IRNode( ir_type="float", value=-math.inf, - kwargs={ - "min_value": -math.inf, - "max_value": -math.inf, - "allow_nan": True, - "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL, - }, + kwargs=float_kw(-math.inf, -math.inf), was_forced=False, ), ], diff --git a/hypothesis-python/tests/conjecture/test_optimiser.py b/hypothesis-python/tests/conjecture/test_optimiser.py index b1963549b2..8e561ae6e5 100644 --- a/hypothesis-python/tests/conjecture/test_optimiser.py +++ b/hypothesis-python/tests/conjecture/test_optimiser.py @@ -19,7 +19,13 @@ from hypothesis.internal.entropy import deterministic_PRNG from hypothesis.internal.intervalsets import IntervalSet -from tests.conjecture.common import TEST_SETTINGS, buffer_size_limit, ir, ir_nodes +from tests.conjecture.common import ( + TEST_SETTINGS, + buffer_size_limit, + integer_kw, + ir, + ir_nodes, +) def test_optimises_to_maximum(): @@ -246,12 +252,7 @@ def test(data): ) ) @example( - IRNode( - ir_type="integer", - value=1, - kwargs={"min_value": 0, "max_value": 200, "weights": None, "shrink_towards": 0}, - was_forced=False, - ) + IRNode(ir_type="integer", value=1, kwargs=integer_kw(0, 200), was_forced=False) ) def test_optimising_all_nodes(node): assume(compute_max_children(node.ir_type, node.kwargs) > 50) diff --git a/hypothesis-python/tests/cover/test_float_utils.py b/hypothesis-python/tests/cover/test_float_utils.py index d079f31c3d..54888f56e3 100644 --- a/hypothesis-python/tests/cover/test_float_utils.py +++ b/hypothesis-python/tests/cover/test_float_utils.py @@ -24,7 +24,7 @@ sign_aware_lte, ) -from tests.conjecture.common import float_kwargs, make_float_kw +from tests.conjecture.common import float_kw, float_kwargs def test_can_handle_straddling_zero(): @@ -50,26 +50,26 @@ def test_next_float_equal(func, val): # exponent comparisons: -@example(make_float_kw(1, float_info.max), 0) -@example(make_float_kw(1, float_info.max), 1) -@example(make_float_kw(1, float_info.max), 10) -@example(make_float_kw(1, float_info.max), float_info.max) -@example(make_float_kw(1, float_info.max), math.inf) +@example(float_kw(1, float_info.max), 0) +@example(float_kw(1, float_info.max), 1) +@example(float_kw(1, float_info.max), 10) +@example(float_kw(1, float_info.max), float_info.max) +@example(float_kw(1, float_info.max), math.inf) # mantissa comparisons: -@example(make_float_kw(100.0001, 100.0003), 100.0001) -@example(make_float_kw(100.0001, 100.0003), 100.0002) -@example(make_float_kw(100.0001, 100.0003), 100.0003) -@example(make_float_kw(100.0001, 100.0003, allow_nan=False), math.nan) -@example(make_float_kw(0, 10, allow_nan=False), math.nan) -@example(make_float_kw(0, 10, allow_nan=True), math.nan) +@example(float_kw(100.0001, 100.0003), 100.0001) +@example(float_kw(100.0001, 100.0003), 100.0002) +@example(float_kw(100.0001, 100.0003), 100.0003) +@example(float_kw(100.0001, 100.0003, allow_nan=False), math.nan) +@example(float_kw(0, 10, allow_nan=False), math.nan) +@example(float_kw(0, 10, allow_nan=True), math.nan) # the branch coverage of resampling in the "out of range of smallest magnitude" case # relies on randomness from the mantissa. try a few different values. -@example(make_float_kw(-4, -1, smallest_nonzero_magnitude=4), 4) -@example(make_float_kw(-4, -1, smallest_nonzero_magnitude=4), 5) -@example(make_float_kw(-4, -1, smallest_nonzero_magnitude=4), 6) -@example(make_float_kw(1, 4, smallest_nonzero_magnitude=4), -4) -@example(make_float_kw(1, 4, smallest_nonzero_magnitude=4), -5) -@example(make_float_kw(1, 4, smallest_nonzero_magnitude=4), -6) +@example(float_kw(-4, -1, smallest_nonzero_magnitude=4), 4) +@example(float_kw(-4, -1, smallest_nonzero_magnitude=4), 5) +@example(float_kw(-4, -1, smallest_nonzero_magnitude=4), 6) +@example(float_kw(1, 4, smallest_nonzero_magnitude=4), -4) +@example(float_kw(1, 4, smallest_nonzero_magnitude=4), -5) +@example(float_kw(1, 4, smallest_nonzero_magnitude=4), -6) @given(float_kwargs(), st.floats()) def test_float_clamper(kwargs, input_value): min_value = kwargs["min_value"] From 0401677c872f4eeabd021a95e54d46d603c65939 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sat, 21 Dec 2024 18:48:53 -0500 Subject: [PATCH 2/6] move choicetree.py to shrinker/ --- .../src/hypothesis/internal/conjecture/shrinker.py | 10 +++++----- .../internal/conjecture/{ => shrinking}/choicetree.py | 0 hypothesis-python/tests/conjecture/test_choice_tree.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) rename hypothesis-python/src/hypothesis/internal/conjecture/{ => shrinking}/choicetree.py (100%) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index 0b74b2ff8e..b355401a7d 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -15,11 +15,6 @@ import attr from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.choicetree import ( - ChoiceTree, - prefix_selection_order, - random_selection_order, -) from hypothesis.internal.conjecture.data import ( ConjectureData, ConjectureResult, @@ -44,6 +39,11 @@ Ordering, String, ) +from hypothesis.internal.conjecture.shrinking.choicetree import ( + ChoiceTree, + prefix_selection_order, + random_selection_order, +) if TYPE_CHECKING: from random import Random diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/choicetree.py similarity index 100% rename from hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py rename to hypothesis-python/src/hypothesis/internal/conjecture/shrinking/choicetree.py diff --git a/hypothesis-python/tests/conjecture/test_choice_tree.py b/hypothesis-python/tests/conjecture/test_choice_tree.py index 2e6c9c4b6b..0c2b39b080 100644 --- a/hypothesis-python/tests/conjecture/test_choice_tree.py +++ b/hypothesis-python/tests/conjecture/test_choice_tree.py @@ -11,7 +11,7 @@ from random import Random from hypothesis import given, strategies as st -from hypothesis.internal.conjecture.choicetree import ( +from hypothesis.internal.conjecture.shrinking.choicetree import ( ChoiceTree, prefix_selection_order, random_selection_order, From f2bf810b262231579c7772665afa362729daab0e Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sat, 21 Dec 2024 19:28:00 -0500 Subject: [PATCH 3/6] implement choice_to_index and choice_from_index --- hypothesis-python/src/hypothesis/errors.py | 4 + .../hypothesis/internal/conjecture/choice.py | 320 ++++++++++++++++++ .../hypothesis/internal/conjecture/data.py | 42 +-- .../internal/conjecture/datatree.py | 62 +--- .../internal/conjecture/shrinker.py | 16 +- hypothesis-python/tests/conjecture/test_ir.py | 143 ++++++-- 6 files changed, 468 insertions(+), 119 deletions(-) create mode 100644 hypothesis-python/src/hypothesis/internal/conjecture/choice.py diff --git a/hypothesis-python/src/hypothesis/errors.py b/hypothesis-python/src/hypothesis/errors.py index 3adae78b6b..69facb9eb1 100644 --- a/hypothesis-python/src/hypothesis/errors.py +++ b/hypothesis-python/src/hypothesis/errors.py @@ -55,6 +55,10 @@ class Unsatisfiable(_Trimmable): """ +class ChoiceTooLarge(HypothesisException): + """An internal error raised by choice_from_index.""" + + class Flaky(_Trimmable): """Base class for indeterministic failures. Usually one of the more specific subclasses (FlakyFailure or FlakyStrategyDefinition) is raised.""" diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/choice.py b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py new file mode 100644 index 0000000000..b536fa4ba5 --- /dev/null +++ b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py @@ -0,0 +1,320 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math + +from hypothesis.errors import ChoiceTooLarge +from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float +from hypothesis.internal.conjecture.utils import identity +from hypothesis.internal.floats import make_float_clamper, sign_aware_lte + + +def _size_to_index(size, *, alphabet_size): + # this is the closed form of this geometric series: + # for i in range(size): + # index += alphabet_size**i + if alphabet_size <= 0: + assert size == 0 + return 0 + if alphabet_size == 1: + return size + return (alphabet_size**size - 1) // (alphabet_size - 1) + + +def _index_to_size(index, alphabet_size): + if alphabet_size == 0: + return 0 + elif alphabet_size == 1: + # there is only one string of each size, so the size is equal to its + # ordering. + return index + + # the closed-form inverse of _size_to_index is + # size = math.floor(math.log(index * (alphabet_size - 1) + 1, alphabet_size)) + # which is fast, but suffers from float precision errors. As performance is + # relatively critical here, we'll use this formula by default, but fall back to + # a much slower integer-only logarithm when the calculation is too close for + # comfort. + total = index * (alphabet_size - 1) + 1 + size = math.log(total, alphabet_size) + + # if this computation is close enough that it could have been affected by + # floating point errors, use a much slower integer-only logarithm instead, + # which is guaranteed to be precise. + if 0 < math.ceil(size) - size < 1e-7: + size = 0 + while total >= alphabet_size: + total //= alphabet_size + size += 1 + return size + return math.floor(size) + + +def collection_index(choice, *, min_size, alphabet_size, to_order=identity): + # Collections are ordered by counting the number of values of each size, + # starting with min_size. alphabet_size indicates how many options there + # are for a single element. to_order orders an element by returning an n ≥ 0. + + # we start by adding the size to the index, relative to min_size. + index = _size_to_index(len(choice), alphabet_size=alphabet_size) - _size_to_index( + min_size, alphabet_size=alphabet_size + ) + # We then add each element c to the index, starting from the end (so "ab" is + # simpler than "ba"). Each loop takes c at position i in the sequence and + # computes the number of sequences of size i which come before it in the ordering. + for i, c in enumerate(reversed(choice)): + index += (alphabet_size**i) * to_order(c) + return index + + +def collection_value(index, *, min_size, alphabet_size, from_order=identity): + from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR + + # this function is probably easiest to make sense of as an inverse of + # collection_index, tracking ~corresponding lines of code between the two. + + index += _size_to_index(min_size, alphabet_size=alphabet_size) + size = _index_to_size(index, alphabet_size=alphabet_size) + # index -> value computation can be arbitrarily expensive for arbitrarily + # large min_size collections. short-circuit if the resulting size would be + # obviously-too-large. callers will generally turn this into a .mark_overrun(). + if size >= BUFFER_SIZE_IR: + raise ChoiceTooLarge + + # subtract out the amount responsible for the size + index -= _size_to_index(size, alphabet_size=alphabet_size) + vals = [] + for i in reversed(range(size)): + # optimization for common case when we hit index 0. Exponentiation + # on large integers is expensive! + if index == 0: + n = 0 + else: + n = index // (alphabet_size**i) + # subtract out the nearest multiple of alphabet_size**i + index -= n * (alphabet_size**i) + vals.append(from_order(n)) + return vals + + +def zigzag_index(value, *, shrink_towards): + # value | 0 1 -1 2 -2 3 -3 4 + # index | 0 1 2 3 4 5 6 7 + index = 2 * abs(shrink_towards - value) + if value > shrink_towards: + index -= 1 + return index + + +def zigzag_value(index, *, shrink_towards): + assert index >= 0 + # count how many "steps" away from shrink_towards we are. + n = (index + 1) // 2 + # now check if we're stepping up or down from shrink_towards. + if (index % 2) == 0: + n *= -1 + return shrink_towards + n + + +def choice_to_index(choice, kwargs): + # This function takes a choice in the choice sequence and returns the + # complexity index of that choice from among its possible values, where 0 + # is the simplest. + # + # Note that the index of a choice depends on its kwargs. The simplest value + # (at index 0) for {"min_value": None, "max_value": None} is 0, while for + # {"min_value": 1, "max_value": None} the simplest value is 1. + # + # choice_from_index inverts this function. An invariant on both functions is + # that they must be injective. Unfortunately, floats do not currently respect + # this. That's not *good*, but nothing has blown up - yet. And ordering + # floats in a sane manner is quite hard, so I've left it for another day. + + if isinstance(choice, int) and not isinstance(choice, bool): + # Let a = shrink_towards. + # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag. + # [a, a + 1, a - 1, a + 2, a - 2, ...] + # * Semi-bounded: Same as unbounded except stop on one side when you hit + # {min, max}_value. so min_value=-1 a=0 has order + # [0, 1, -1, 2, 3, 4, ...] + # * Bounded: Ordered by (sgn(a - x), |a - x|). Count upwards until max_value, + # then count downards. + # [a, a + 1, a + 2, ..., max_value, a - 1, a - 2, ..., min_value] + # + # To simplify and gain intuition about this ordering, you can think about + # the most common case where 0 is first (a = 0). We deviate from this only + # rarely, e.g. for datetimes, where we generally want year 2000 to be + # simpler than year 0. + + shrink_towards = kwargs["shrink_towards"] + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_index(choice, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + + # min_value = -2 + # index | 0 1 2 3 4 5 6 7 + # v | 0 1 -1 2 -2 3 4 5 + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + elif max_value is not None and min_value is None: + # case: semibounded above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice + else: + # case: bounded + + # range = [-2, 5] + # shrink_towards = 2 + # index | 0 1 2 3 4 5 6 7 + # v | 2 3 4 5 1 0 -1 -2 + # + # ^ with zero weights at index = [0, 2, 6] + # index | 0 1 2 3 4 + # v | 3 5 1 0 -2 + assert kwargs["weights"] is None or all( + w > 0 for w in kwargs["weights"].values() + ), "technically possible but really annoying to support zero weights" + if choice >= shrink_towards: + return choice - shrink_towards + return max_value - shrink_towards + abs(choice - shrink_towards) + elif isinstance(choice, bool): + # Ordered by [False, True]. + p = kwargs["p"] + if not (2 ** (-64) < p < (1 - 2 ** (-64))): + # only one option is possible, so whatever it is is first. + return 0 + return int(choice) + elif isinstance(choice, bytes): + index = collection_index( + list(choice), + min_size=kwargs["min_size"], + alphabet_size=2**8, + ) + return index + elif isinstance(choice, str): + intervals = kwargs["intervals"] + index = collection_index( + choice, + min_size=kwargs["min_size"], + alphabet_size=len(intervals), + to_order=intervals.index_from_char_in_shrink_order, + ) + return index + elif isinstance(choice, float): + sign = int(sign_aware_lte(choice, -0.0)) + return (sign << 64) | float_to_lex(abs(choice)) + else: + raise NotImplementedError + + +def choice_from_index(index, ir_type, kwargs): + assert index >= 0 + if ir_type == "integer": + shrink_towards = kwargs["shrink_towards"] + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_value(index, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + + # min_value = -2 + # index | 0 1 2 3 4 5 6 7 + # v | 0 1 -1 2 -2 3 4 5 + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + + elif max_value is not None and min_value is None: + # case: semibounded above + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index + else: + # case: bounded + + # range = [-2, 5] + # shrink_towards = 2 + # index | 0 1 2 3 4 5 6 7 + # v | 2 3 4 5 1 0 -1 -2 + # + # ^ with zero weights at index = [0, 2, 6] + # index | 0 1 2 3 4 + # v | 3 5 1 0 -2 + if kwargs["weights"] is not None: + assert all( + w > 0 for w in kwargs["weights"].values() + ), "possible but really annoying to support zero weightss" + if index <= max_value - shrink_towards: + return shrink_towards + index + return shrink_towards - (index - (max_value - shrink_towards)) + elif ir_type == "boolean": + # Ordered by [False, True]. + p = kwargs["p"] + only = None + if p <= 2 ** (-64): + only = False + elif p >= (1 - 2 ** (-64)): + only = True + + assert index in {0, 1} + if only is not None: + # only one choice + assert index == 0 + return only + return bool(index) + elif ir_type == "bytes": + value = collection_value( + index, + min_size=kwargs["min_size"], + alphabet_size=2**8, + ) + return bytes(value) + elif ir_type == "string": + intervals = kwargs["intervals"] + value = collection_value( + index, + min_size=kwargs["min_size"], + alphabet_size=len(intervals), + from_order=intervals.char_in_shrink_order, + ) + return "".join(value) + elif ir_type == "float": + sign = -1 if index >> 64 else 1 + result = sign * lex_to_float(index & ((1 << 64) - 1)) + + clamper = make_float_clamper( + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + smallest_nonzero_magnitude=kwargs["smallest_nonzero_magnitude"], + allow_nan=kwargs["allow_nan"], + ) + return clamper(result) + else: + raise NotImplementedError diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py index eada22958a..0520d830dc 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py @@ -31,9 +31,10 @@ import attr -from hypothesis.errors import Frozen, InvalidArgument, StopTest +from hypothesis.errors import ChoiceTooLarge, Frozen, InvalidArgument, StopTest from hypothesis.internal.cache import LRUCache from hypothesis.internal.compat import add_note, floor, int_from_bytes, int_to_bytes +from hypothesis.internal.conjecture.choice import choice_from_index from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float from hypothesis.internal.conjecture.junkdrawer import ( IntList, @@ -965,23 +966,10 @@ def trivial(self): if self.was_forced: return True - if self.ir_type == "integer": - shrink_towards = self.kwargs["shrink_towards"] - min_value = self.kwargs["min_value"] - max_value = self.kwargs["max_value"] - - # shrink_towards is not respected for unbounded integers. (though - # probably it should be?) - if min_value is None and max_value is None: - return self.value == 0 - - if min_value is not None: - shrink_towards = max(min_value, shrink_towards) - if max_value is not None: - shrink_towards = min(max_value, shrink_towards) - - return self.value == shrink_towards - if self.ir_type == "float": + if self.ir_type != "float": + zero_value = choice_from_index(0, self.ir_type, self.kwargs) + return ir_value_equal(self.ir_type, self.value, zero_value) + else: min_value = self.kwargs["min_value"] max_value = self.kwargs["max_value"] shrink_towards = 0 @@ -1005,20 +993,6 @@ def trivial(self): # It would be good to compute this correctly in the future, but it's # also not incorrect to be conservative here. return False - if self.ir_type == "boolean": - p = self.kwargs["p"] - if p == 1.0: - return True - return self.value is False - if self.ir_type == "string": - # smallest size and contains only the smallest-in-shrink-order character. - minimal_char = self.kwargs["intervals"].char_in_shrink_order(0) - return self.value == (minimal_char * self.kwargs["min_size"]) - if self.ir_type == "bytes": - # smallest size and all-zero value. - return len(self.value) == self.kwargs["min_size"] and not any(self.value) - - raise NotImplementedError(f"unhandled ir_type {self.ir_type}") def __eq__(self, other): if not isinstance(other, IRNode): @@ -2390,8 +2364,8 @@ def _pop_ir_tree_node( assert self.index_ir == len(self.ir_prefix) - 1 if node.type == "simplest": try: - value = buffer_to_ir(ir_type, kwargs, buffer=bytes(BUFFER_SIZE)) - except StopTest: + value = choice_from_index(0, ir_type, kwargs) + except ChoiceTooLarge: self.mark_overrun() else: raise NotImplementedError diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py index 98eafedb25..64dbe2d414 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py @@ -8,7 +8,6 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -import itertools import math from typing import Optional, Union @@ -21,7 +20,7 @@ StopTest, ) from hypothesis.internal import floats as flt -from hypothesis.internal.compat import int_to_bytes +from hypothesis.internal.conjecture.choice import choice_from_index from hypothesis.internal.conjecture.data import ( BooleanKWargs, BytesKWargs, @@ -259,54 +258,17 @@ def compute_max_children(ir_type, kwargs): # compute_max_children than to reify the list of children (only to immediately # throw it away). def all_children(ir_type, kwargs): - if ir_type == "integer": - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] - - if min_value is None and max_value is None: - # full 128 bit range. - yield from range(-(2**127) + 1, 2**127 - 1) - - elif min_value is not None and max_value is not None: - yield from range(min_value, max_value + 1) - else: - assert (min_value is None) ^ (max_value is None) - # hard case: only one bound was specified. Here we probe in 128 bits - # around shrink_towards, and discard those above max_value or below - # min_value respectively. - shrink_towards = kwargs["shrink_towards"] - if min_value is None: - shrink_towards = min(max_value, shrink_towards) - yield from range(shrink_towards - (2**127) + 1, max_value) - else: - assert max_value is None - shrink_towards = max(min_value, shrink_towards) - yield from range(min_value, shrink_towards + (2**127) - 1) - - if ir_type == "boolean": - p = kwargs["p"] - if p <= 2 ** (-64): - yield False - elif p >= (1 - 2 ** (-64)): - yield True - else: - yield from [False, True] - if ir_type == "bytes": - for size in range(kwargs["min_size"], kwargs["max_size"] + 1): - yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size))) - if ir_type == "string": - min_size = kwargs["min_size"] - max_size = kwargs["max_size"] - intervals = kwargs["intervals"] - - # written unidiomatically in order to handle the case of max_size=inf. - size = min_size - while size <= max_size: - for ords in itertools.product(intervals, repeat=size): - yield "".join(chr(n) for n in ords) - size += 1 - if ir_type == "float": - + if ir_type != "float": + for index in range(compute_max_children(ir_type, kwargs)): + yield choice_from_index(index, ir_type, kwargs) + else: + # the float ordering is not injective (because of resampling + # out-of-bounds values), so using choice_from_index would result in + # duplicates. This violates invariants in datatree about being able + # to draw unique new children using all_children. + # + # We instead maintain a separate implementation for floats. + # TODO_IR write a better (bijective) ordering for floats and remove this! def floats_between(a, b): for n in range(float_to_int(a), float_to_int(b) + 1): yield int_to_float(n) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index b355401a7d..4d7dd6fc4b 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -15,6 +15,7 @@ import attr from hypothesis.internal.compat import int_from_bytes, int_to_bytes +from hypothesis.internal.conjecture.choice import choice_from_index from hypothesis.internal.conjecture.data import ( ConjectureData, ConjectureResult, @@ -1390,17 +1391,6 @@ def minimize_nodes(self, nodes): def try_trivial_examples(self, chooser): i = chooser.choose(range(len(self.examples))) - def trivial_value(ir_type): - # TODO: In an ideal world this would take into account the - # node kwargs, but this is an adequate first approximation. - return { - "integer": 0, - "string": "", - "boolean": False, - "float": 0.0, - "bytes": b"", - }[ir_type] - prev = self.shrink_target nodes = self.shrink_target.ir_nodes ex = self.examples[i] @@ -1410,7 +1400,9 @@ def trivial_value(ir_type): ( node if node.was_forced - else node.copy(with_value=trivial_value(node.ir_type)) + else node.copy( + with_value=choice_from_index(0, node.ir_type, node.kwargs) + ) ) for node in nodes[ex.ir_start : ex.ir_end] ] diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py index 74e5c1c104..52cdfde931 100644 --- a/hypothesis-python/tests/conjecture/test_ir.py +++ b/hypothesis-python/tests/conjecture/test_ir.py @@ -14,8 +14,17 @@ import pytest -from hypothesis import HealthCheck, assume, example, given, settings, strategies as st +from hypothesis import ( + HealthCheck, + assume, + example, + given, + note, + settings, + strategies as st, +) from hypothesis.errors import StopTest +from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index from hypothesis.internal.conjecture.data import ( COLLECTION_DEFAULT_MAX_SIZE, ConjectureData, @@ -37,6 +46,7 @@ from tests.common.debug import minimal from tests.conjecture.common import ( + clamped_shrink_towards, draw_value, float_kw, fresh_data, @@ -212,20 +222,8 @@ def test_compute_max_children_and_all_children_agree(ir_type_and_kwargs): @given(integer_kwargs()) -def test_compute_max_children_integer_ranges(kwargs): - if kwargs["weights"] is not None: - # this case is in principle testable. would need to takewhile from all_children - # while weight is not zero. - assume(all(v > 0 for v in kwargs["weights"])) - if kwargs["min_value"] is not None: - expected = kwargs["min_value"] - else: - offset = ( - 0 - if kwargs["max_value"] is None - else min(kwargs["max_value"], kwargs["shrink_towards"]) - ) - expected = offset - (2**127) + 1 +def test_compute_max_children_unbounded_integer_ranges(kwargs): + expected = clamped_shrink_towards(kwargs) first = next(all_children("integer", kwargs)) assert expected == first, (expected, first) @@ -541,14 +539,21 @@ def test_forced_nodes_are_trivial(node): kwargs=integer_kw(max_value=10, shrink_towards=1), was_forced=False, ), - # we don't consider shrink_towards for unbounded integers. - # the trivial value should probably be 1 here, not 0. - IRNode( - ir_type="integer", - value=0, - kwargs=integer_kw(shrink_towards=1), - was_forced=False, - ), + # TODO_IR: this *is* trivial by node.trivial, but not by shrinking, because + # the buffer ordering doesn't yet consider shrink_towards for unbounded + # integers this will be fixed (and this test case can be uncommented) when + # we move shrink ordering to the typed choice sequence. + # IRNode( + # ir_type="integer", + # value=1, + # kwargs={ + # "min_value": None, + # "max_value": None, + # "weights": None, + # "shrink_towards": 1, + # }, + # was_forced=False, + # ), ], ) def test_trivial_nodes(node): @@ -718,3 +723,95 @@ def test_node_template_simplest_is_actually_trivial(node): getattr(data, f"draw_{node.ir_type}")(**node.kwargs) assert len(data.ir_nodes) == 1 assert data.ir_nodes[0].trivial + + +@given(ir_types_and_kwargs()) +@example(("boolean", {"p": 0})) +@example(("boolean", {"p": 1})) +def test_choice_indices_are_positive(ir_type_and_kwargs): + (ir_type, kwargs) = ir_type_and_kwargs + v = draw_value(ir_type, kwargs) + assert choice_to_index(v, kwargs) >= 0 + + +@given(integer_kwargs()) +def test_shrink_towards_has_index_0(kwargs): + shrink_towards = clamped_shrink_towards(kwargs) + note({"clamped_shrink_towards": shrink_towards}) + assert choice_to_index(shrink_towards, kwargs) == 0 + assert choice_from_index(0, "integer", kwargs) == shrink_towards + + +@given(ir_types_and_kwargs()) +def test_choice_to_index_injective(ir_type_and_kwargs): + # ir ordering should be injective both ways. + (ir_type, kwargs) = ir_type_and_kwargs + # ...except for floats, which are hard to order bijectively. + assume(ir_type != "float") + # cap to 10k so this test finishes in a reasonable amount of time + cap = min(compute_max_children(ir_type, kwargs), 10_000) + + indices = set() + for i, choice in enumerate(all_children(ir_type, kwargs)): + if i >= cap: + break + index = choice_to_index(choice, kwargs) + assert index not in indices + indices.add(index) + + +@given(ir_types_and_kwargs()) +@example( + ( + "string", + {"min_size": 0, "max_size": 10, "intervals": IntervalSet.from_string("a")}, + ) +) +def test_choice_from_value_injective(ir_type_and_kwargs): + (ir_type, kwargs) = ir_type_and_kwargs + assume(ir_type != "float") + cap = min(compute_max_children(ir_type, kwargs), 10_000) + + choices = set() + for index in range(cap): + choice = choice_from_index(index, ir_type, kwargs) + assert choice not in choices + choices.add(choice) + + +@given(ir_types_and_kwargs()) +def test_choice_index_and_value_are_inverses(ir_type_and_kwargs): + (ir_type, kwargs) = ir_type_and_kwargs + v = draw_value(ir_type, kwargs) + index = choice_to_index(v, kwargs) + note({"v": v, "index": index}) + ir_value_equal(ir_type, choice_from_index(index, ir_type, kwargs), v) + + +@pytest.mark.parametrize( + "ir_type, kwargs, choices", + [ + ("boolean", {"p": 1}, [True]), + ("boolean", {"p": 0}, [False]), + ("integer", integer_kw(min_value=1, shrink_towards=4), range(1, 10)), + ("integer", integer_kw(max_value=5, shrink_towards=2), range(-10, 5 + 1)), + ("integer", integer_kw(max_value=5), range(-10, 5 + 1)), + ("integer", integer_kw(min_value=0, shrink_towards=1), range(10)), + ( + "float", + float_kw(1.0, next_up(next_up(1.0))), + [1.0, next_up(1.0), next_up(next_up(1.0))], + ), + ( + "float", + float_kw(next_down(-0.0), next_up(0.0)), + [next_down(-0.0), -0.0, 0.0, next_up(0.0)], + ), + ], +) +def test_choice_index_and_value_are_inverses_explicit(ir_type, kwargs, choices): + for choice in choices: + index = choice_to_index(choice, kwargs) + assert ir_value_equal( + ir_type, choice_from_index(index, ir_type, kwargs), choice + ) From fb243842db08717bb6c58b5b96c042422b4df9b1 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sat, 21 Dec 2024 19:28:04 -0500 Subject: [PATCH 4/6] reduce max_examples in a test for performance --- hypothesis-python/tests/nocover/test_targeting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypothesis-python/tests/nocover/test_targeting.py b/hypothesis-python/tests/nocover/test_targeting.py index e1cab85a98..116b5f78b7 100644 --- a/hypothesis-python/tests/nocover/test_targeting.py +++ b/hypothesis-python/tests/nocover/test_targeting.py @@ -67,7 +67,7 @@ def score(enabled): phases.append(Phase.target) @seed(0) - @settings(database=None, max_examples=200, phases=phases) + @settings(database=None, max_examples=100, phases=phases) @given(strat) def test(ls): nonlocal result From 2f692082d79b7937e6d4e69b5fc491209a56a649 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sun, 22 Dec 2024 00:44:34 -0500 Subject: [PATCH 5/6] linting, more explicit coverage, add release notes --- hypothesis-python/RELEASE.rst | 3 +++ .../src/hypothesis/internal/conjecture/choice.py | 6 ++---- hypothesis-python/tests/conjecture/test_ir.py | 2 ++ 3 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..634228bd8c --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: patch + +This patch lays some groundwork for migrating our internal representation to the typed choice sequence (:issue:`3921`) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/choice.py b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py index b536fa4ba5..5760f7c138 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/choice.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py @@ -204,21 +204,19 @@ def choice_to_index(choice, kwargs): return 0 return int(choice) elif isinstance(choice, bytes): - index = collection_index( + return collection_index( list(choice), min_size=kwargs["min_size"], alphabet_size=2**8, ) - return index elif isinstance(choice, str): intervals = kwargs["intervals"] - index = collection_index( + return collection_index( choice, min_size=kwargs["min_size"], alphabet_size=len(intervals), to_order=intervals.index_from_char_in_shrink_order, ) - return index elif isinstance(choice, float): sign = int(sign_aware_lte(choice, -0.0)) return (sign << 64) | float_to_lex(abs(choice)) diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py index 52cdfde931..e0c9a343f7 100644 --- a/hypothesis-python/tests/conjecture/test_ir.py +++ b/hypothesis-python/tests/conjecture/test_ir.py @@ -797,6 +797,8 @@ def test_choice_index_and_value_are_inverses(ir_type_and_kwargs): ("integer", integer_kw(max_value=5, shrink_towards=2), range(-10, 5 + 1)), ("integer", integer_kw(max_value=5), range(-10, 5 + 1)), ("integer", integer_kw(min_value=0, shrink_towards=1), range(10)), + ("integer", integer_kw(-5, 5, shrink_towards=3), range(-5, 5 + 1)), + ("integer", integer_kw(-5, 5, shrink_towards=-3), range(-5, 5 + 1)), ( "float", float_kw(1.0, next_up(next_up(1.0))), From e289629d46a19d29986d8fb83e4d5728389cd907 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Sun, 22 Dec 2024 14:59:00 -0500 Subject: [PATCH 6/6] change bounded ordering to match semibounded --- .../hypothesis/internal/conjecture/choice.py | 63 ++++++++++--------- hypothesis-python/tests/conjecture/test_ir.py | 30 +++++++++ 2 files changed, 63 insertions(+), 30 deletions(-) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/choice.py b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py index 5760f7c138..8fbfeb3d80 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/choice.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/choice.py @@ -141,12 +141,11 @@ def choice_to_index(choice, kwargs): # Let a = shrink_towards. # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag. # [a, a + 1, a - 1, a + 2, a - 2, ...] - # * Semi-bounded: Same as unbounded except stop on one side when you hit + # * Semi-bounded: Same as unbounded, except stop on one side when you hit # {min, max}_value. so min_value=-1 a=0 has order # [0, 1, -1, 2, 3, 4, ...] - # * Bounded: Ordered by (sgn(a - x), |a - x|). Count upwards until max_value, - # then count downards. - # [a, a + 1, a + 2, ..., max_value, a - 1, a - 2, ..., min_value] + # * Bounded: Same as unbounded and semibounded, except stop on each side + # when you hit {min, max}_value. # # To simplify and gain intuition about this ordering, you can think about # the most common case where 0 is first (a = 0). We deviate from this only @@ -185,17 +184,28 @@ def choice_to_index(choice, kwargs): # range = [-2, 5] # shrink_towards = 2 # index | 0 1 2 3 4 5 6 7 - # v | 2 3 4 5 1 0 -1 -2 + # v | 2 3 1 4 0 5 -1 -2 # # ^ with zero weights at index = [0, 2, 6] # index | 0 1 2 3 4 - # v | 3 5 1 0 -2 + # v | 3 4 0 5 -2 assert kwargs["weights"] is None or all( w > 0 for w in kwargs["weights"].values() ), "technically possible but really annoying to support zero weights" - if choice >= shrink_towards: - return choice - shrink_towards - return max_value - shrink_towards + abs(choice - shrink_towards) + + # check which side gets exhausted first + if (shrink_towards - min_value) < (max_value - shrink_towards): + # Below shrink_towards gets exhausted first. Equivalent to + # semibounded below + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + else: + # Above shrink_towards gets exhausted first. Equivalent to semibounded + # above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice elif isinstance(choice, bool): # Ordered by [False, True]. p = kwargs["p"] @@ -241,14 +251,9 @@ def choice_from_index(index, ir_type, kwargs): return zigzag_value(index, shrink_towards=shrink_towards) elif min_value is not None and max_value is None: # case: semibounded below - - # min_value = -2 - # index | 0 1 2 3 4 5 6 7 - # v | 0 1 -1 2 -2 3 4 5 if index <= zigzag_index(min_value, shrink_towards=shrink_towards): return zigzag_value(index, shrink_towards=shrink_towards) return index + min_value - elif max_value is not None and min_value is None: # case: semibounded above if index <= zigzag_index(max_value, shrink_towards=shrink_towards): @@ -256,22 +261,20 @@ def choice_from_index(index, ir_type, kwargs): return max_value - index else: # case: bounded - - # range = [-2, 5] - # shrink_towards = 2 - # index | 0 1 2 3 4 5 6 7 - # v | 2 3 4 5 1 0 -1 -2 - # - # ^ with zero weights at index = [0, 2, 6] - # index | 0 1 2 3 4 - # v | 3 5 1 0 -2 - if kwargs["weights"] is not None: - assert all( - w > 0 for w in kwargs["weights"].values() - ), "possible but really annoying to support zero weightss" - if index <= max_value - shrink_towards: - return shrink_towards + index - return shrink_towards - (index - (max_value - shrink_towards)) + assert kwargs["weights"] is None or all( + w > 0 for w in kwargs["weights"].values() + ), "possible but really annoying to support zero weights" + + if (shrink_towards - min_value) < (max_value - shrink_towards): + # equivalent to semibounded below case + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + else: + # equivalent to semibounded above case + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index elif ir_type == "boolean": # Ordered by [False, True]. p = kwargs["p"] diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py index e0c9a343f7..93e232477a 100644 --- a/hypothesis-python/tests/conjecture/test_ir.py +++ b/hypothesis-python/tests/conjecture/test_ir.py @@ -817,3 +817,33 @@ def test_choice_index_and_value_are_inverses_explicit(ir_type, kwargs, choices): assert ir_value_equal( ir_type, choice_from_index(index, ir_type, kwargs), choice ) + + +@pytest.mark.parametrize( + "kwargs, choices", + [ + # unbounded + (integer_kw(), (0, 1, -1, 2, -2, 3, -3)), + (integer_kw(shrink_towards=2), (2, 3, 1, 4, 0, 5, -1, 6, -2)), + # semibounded (below) + (integer_kw(min_value=3), (3, 4, 5, 6, 7)), + (integer_kw(min_value=3, shrink_towards=5), (5, 6, 4, 7, 3, 8, 9)), + (integer_kw(min_value=-3), (0, 1, -1, 2, -2, 3, -3, 4, 5, 6)), + (integer_kw(min_value=-3, shrink_towards=-1), (-1, 0, -2, 1, -3, 2, 3, 4)), + # semibounded (above) + (integer_kw(max_value=3), (0, 1, -1, 2, -2, 3, -3, -4, -5, -6)), + (integer_kw(max_value=3, shrink_towards=1), (1, 2, 0, 3, -1, -2, -3, -4)), + (integer_kw(max_value=-3), (-3, -4, -5, -6, -7)), + (integer_kw(max_value=-3, shrink_towards=-5), (-5, -4, -6, -3, -7, -8, -9)), + # bounded + (integer_kw(-3, 3), (0, 1, -1, 2, -2, 3, -3)), + (integer_kw(-3, 3, shrink_towards=1), (1, 2, 0, 3, -1, -2, -3)), + (integer_kw(-3, 3, shrink_towards=-1), (-1, 0, -2, 1, -3, 2, 3)), + ], + ids=repr, +) +def test_integer_choice_index(kwargs, choices): + # explicit test which checks that the order of `choices` matches the index + # order. + for i, choice in enumerate(choices): + assert choice_to_index(choice, kwargs) == i