Skip to content

Commit

Permalink
Merge pull request #4215 from tybug/sort-key-ir
Browse files Browse the repository at this point in the history
Implement and use `sort_key_ir`
  • Loading branch information
tybug authored Dec 27, 2024
2 parents 3dbfae2 + b089be7 commit 462c5fc
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 77 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

The shrinker now uses the typed choice sequence (:issue:`3921`) when ordering failing examples. As a result, Hypothesis may now report a different minimal failing example for some tests. We expect most cases to remain unchanged.
4 changes: 2 additions & 2 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
ensure_free_stackframes,
gc_cumulative_time,
)
from hypothesis.internal.conjecture.shrinker import sort_key
from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
from hypothesis.internal.entropy import deterministic_PRNG
from hypothesis.internal.escalation import (
InterestingOrigin,
Expand Down Expand Up @@ -1226,7 +1226,7 @@ def run_engine(self):
if runner.interesting_examples:
self.falsifying_examples = sorted(
runner.interesting_examples.values(),
key=lambda d: sort_key(d.buffer),
key=lambda d: sort_key_ir(d.ir_nodes),
reverse=True,
)
else:
Expand Down
12 changes: 6 additions & 6 deletions hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
startswith,
)
from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir
from hypothesis.internal.healthcheck import fail_health_check
from hypothesis.reporting import base_report, report

Expand Down Expand Up @@ -562,8 +562,8 @@ def test_function(self, data: ConjectureData) -> None:
if v < existing_score:
continue

if v > existing_score or sort_key(data.buffer) < sort_key(
existing_example.buffer
if v > existing_score or sort_key_ir(data.ir_nodes) < sort_key_ir(
existing_example.ir_nodes
):
data_as_result = data.as_result()
assert not isinstance(data_as_result, _Overrun)
Expand Down Expand Up @@ -619,7 +619,7 @@ def test_function(self, data: ConjectureData) -> None:
if self.first_bug_found_at is None:
self.first_bug_found_at = self.call_count
else:
if sort_key(data.buffer) < sort_key(existing.buffer):
if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
self.shrinks += 1
self.downgrade_buffer(existing.buffer)
self.__data_cache.unpin(existing.buffer)
Expand Down Expand Up @@ -1376,7 +1376,7 @@ def shrink_interesting_examples(self) -> None:
self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS

for prev_data in sorted(
self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
self.interesting_examples.values(), key=lambda d: sort_key_ir(d.ir_nodes)
):
assert prev_data.status == Status.INTERESTING
data = self.new_conjecture_data_ir(prev_data.ir_nodes)
Expand All @@ -1393,7 +1393,7 @@ def shrink_interesting_examples(self) -> None:
for k, v in self.interesting_examples.items()
if k not in self.shrunk_examples
),
key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))),
)
self.debug(f"Shrinking {target!r}: {data.choices}")

Expand Down
12 changes: 7 additions & 5 deletions hypothesis-python/src/hypothesis/internal/conjecture/pareto.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap
from hypothesis.internal.conjecture.shrinker import sort_key
from hypothesis.internal.conjecture.shrinker import sort_key_ir

NO_SCORE = float("-inf")

Expand Down Expand Up @@ -45,10 +45,12 @@ def dominance(left, right):
more structured or failing tests it can be useful to track, and future work
will depend on it more."""

if left.buffer == right.buffer:
left_key = sort_key_ir(left.ir_nodes)
right_key = sort_key_ir(right.ir_nodes)
if left_key == right_key:
return DominanceRelation.EQUAL

if sort_key(right.buffer) < sort_key(left.buffer):
if right_key < left_key:
result = dominance(left=right, right=left)
if result == DominanceRelation.LEFT_DOMINATES:
return DominanceRelation.RIGHT_DOMINATES
Expand All @@ -60,7 +62,7 @@ def dominance(left, right):
return result

# Either left is better or there is no dominance relationship.
assert sort_key(left.buffer) < sort_key(right.buffer)
assert left_key < right_key

# The right is more interesting
if left.status < right.status:
Expand Down Expand Up @@ -126,7 +128,7 @@ def __init__(self, random):
self.__random = random
self.__eviction_listeners = []

self.front = SortedList(key=lambda d: sort_key(d.buffer))
self.front = SortedList(key=lambda d: sort_key_ir(d.ir_nodes))
self.__pending = None

def add(self, data):
Expand Down
65 changes: 26 additions & 39 deletions hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import attr

from hypothesis.internal.compat import int_from_bytes, int_to_bytes
from hypothesis.internal.conjecture.choice import choice_from_index
from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index
from hypothesis.internal.conjecture.data import (
ConjectureData,
ConjectureResult,
Expand Down Expand Up @@ -80,6 +80,13 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]:
return (len(buffer), buffer)


def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int, ...]]:
return (
len(nodes),
tuple(choice_to_index(node.value, node.kwargs) for node in nodes),
)


SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {}


Expand Down Expand Up @@ -305,7 +312,7 @@ def __init__(
self.__derived_values: dict = {}
self.__pending_shrink_explanation = None

self.initial_size = len(initial.buffer)
self.initial_size = len(initial.choices)

# We keep track of the current best example on the shrink_target
# attribute.
Expand Down Expand Up @@ -401,7 +408,7 @@ def consider_new_tree(self, tree: Sequence[IRNode]) -> bool:
if startswith(tree, self.nodes):
return True

if startswith(self.nodes, tree):
if sort_key_ir(self.nodes) < sort_key_ir(tree):
return False

previous = self.shrink_target
Expand Down Expand Up @@ -445,7 +452,7 @@ def incorporate_test_data(self, data):
return
if (
self.__predicate(data)
and sort_key(data.buffer) < sort_key(self.shrink_target.buffer)
and sort_key_ir(data.ir_nodes) < sort_key_ir(self.shrink_target.ir_nodes)
and self.__allow_transition(self.shrink_target, data)
):
self.update_shrink_target(data)
Expand Down Expand Up @@ -474,28 +481,6 @@ def shrink(self):
This method is "mostly idempotent" - calling it twice is unlikely to
have any effect, though it has a non-zero probability of doing so.
"""
# We assume that if an all-zero block of bytes is an interesting
# example then we're not going to do better than that.
# This might not technically be true: e.g. for integers() | booleans()
# the simplest example is actually [1, 0]. Missing this case is fairly
# harmless and this allows us to make various simplifying assumptions
# about the structure of the data (principally that we're never
# operating on a block of all zero bytes so can use non-zeroness as a
# signpost of complexity).
if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
bytes(len(self.shrink_target.buffer))
):
self.explain()
return

# There are multiple buffers that represent the same counterexample, eg
# n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer
# bucket). Before we start shrinking, we need to normalize to the minimal
# such buffer, else a buffer-smaller but ir-larger value may be chosen
# as the minimal counterexample.
data = self.engine.new_conjecture_data_ir(self.nodes)
self.engine.test_function(data)
self.incorporate_test_data(data.as_result())

try:
self.greedy_shrink()
Expand All @@ -509,7 +494,7 @@ def shrink(self):
def s(n):
return "s" if n != 1 else ""

total_deleted = self.initial_size - len(self.shrink_target.buffer)
total_deleted = self.initial_size - len(self.shrink_target.choices)
calls = self.engine.call_count - self.initial_calls
misaligned = self.engine.misaligned_count - self.initial_misaligned

Expand All @@ -518,7 +503,7 @@ def s(n):
"Shrink pass profiling\n"
"---------------------\n\n"
f"Shrinking made a total of {calls} call{s(calls)} of which "
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out "
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} choices out "
f"of {self.initial_size}."
)
for useful in [True, False]:
Expand All @@ -540,7 +525,7 @@ def s(n):
self.debug(
f" * {p.name} made {p.calls} call{s(p.calls)} of which "
f"{p.shrinks} shrank and {p.misaligned} were misaligned, "
f"deleting {p.deletions} byte{s(p.deletions)}."
f"deleting {p.deletions} choice{s(p.deletions)}."
)
self.debug("")
self.explain()
Expand Down Expand Up @@ -797,7 +782,7 @@ def fixate_shrink_passes(self, passes):
# the length are the best.
if self.shrink_target is before_sp:
reordering[sp] = 1
elif len(self.buffer) < len(before_sp.buffer):
elif len(self.choices) < len(before_sp.choices):
reordering[sp] = -1
else:
reordering[sp] = 0
Expand Down Expand Up @@ -988,7 +973,7 @@ def __changed_nodes(self):
assert prev_target is not new_target
prev_nodes = prev_target.ir_nodes
new_nodes = new_target.ir_nodes
assert sort_key(new_target.buffer) < sort_key(prev_target.buffer)
assert sort_key_ir(new_target.ir_nodes) < sort_key_ir(prev_target.ir_nodes)

if len(prev_nodes) != len(new_nodes) or any(
n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes)
Expand Down Expand Up @@ -1186,11 +1171,11 @@ def remove_discarded(self):

for ex in self.shrink_target.examples:
if (
ex.length > 0
ex.ir_length > 0
and ex.discarded
and (not discarded or ex.start >= discarded[-1][-1])
and (not discarded or ex.ir_start >= discarded[-1][-1])
):
discarded.append((ex.start, ex.end))
discarded.append((ex.ir_start, ex.ir_end))

# This can happen if we have discards but they are all of
# zero length. This shouldn't happen very often so it's
Expand All @@ -1199,11 +1184,11 @@ def remove_discarded(self):
if not discarded:
break

attempt = bytearray(self.shrink_target.buffer)
attempt = list(self.nodes)
for u, v in reversed(discarded):
del attempt[u:v]

if not self.incorporate_new_buffer(attempt):
if not self.consider_new_tree(tuple(attempt)):
return False
return True

Expand Down Expand Up @@ -1563,7 +1548,9 @@ def test_not_equal(x, y):
],
)
),
key=lambda i: st.buffer[examples[i].start : examples[i].end],
key=lambda i: sort_key_ir(
st.ir_nodes[examples[i].ir_start : examples[i].ir_end]
),
)

def run_node_program(self, i, description, original, repeats=1):
Expand Down Expand Up @@ -1670,7 +1657,7 @@ def step(self, *, random_order=False):
initial_shrinks = self.shrinker.shrinks
initial_calls = self.shrinker.calls
initial_misaligned = self.shrinker.misaligned
size = len(self.shrinker.shrink_target.buffer)
size = len(self.shrinker.shrink_target.choices)
self.shrinker.engine.explain_next_call_as(self.name)

if random_order:
Expand All @@ -1687,7 +1674,7 @@ def step(self, *, random_order=False):
self.calls += self.shrinker.calls - initial_calls
self.misaligned += self.shrinker.misaligned - initial_misaligned
self.shrinks += self.shrinker.shrinks - initial_shrinks
self.deletions += size - len(self.shrinker.shrink_target.buffer)
self.deletions += size - len(self.shrinker.shrink_target.choices)
self.shrinker.engine.clear_call_explanation()
return True

Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/cover/test_deadline.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_flaky_slow(i):


def test_deadlines_participate_in_shrinking():
@settings(deadline=500, max_examples=1000)
@settings(deadline=500, max_examples=1000, database=None)
@given(st.integers(min_value=0))
def slow_if_large(i):
if i >= 1000:
Expand Down
14 changes: 7 additions & 7 deletions hypothesis-python/tests/nocover/test_duplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def test(b):
test()
except ValueError:
pass
# There are three circumstances in which a duplicate is allowed: We replay
# the failing test once to check for flakiness, once when shrinking to normalize
# to the minimal buffer, and then we replay the fully minimized failing test
# at the end to display the error. The complication comes from the fact that
# these may or may not be the same test case, so we can see either two test
# cases each run twice or one test case which has been run three times.
assert set(counts.values()) in ({1, 2, 3}, {1, 4})
# There are two circumstances in which a duplicate is allowed: We replay
# the failing test once to check for flakiness, and then we replay the
# fully minimized failing test at the end to display the error. The
# complication comes from the fact that these may or may not be the same
# test case, so we can see either two test cases each run twice or one
# test case which has been run three times.
assert set(counts.values()) in ({1, 2}, {1, 3})
assert len([k for k, v in counts.items() if v > 1]) <= 2
22 changes: 5 additions & 17 deletions hypothesis-python/tests/quality/test_float_shrinking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,7 @@

import pytest

from hypothesis import (
HealthCheck,
Verbosity,
example,
given,
settings,
strategies as st,
)
from hypothesis import example, given, strategies as st
from hypothesis.internal.compat import ceil

from tests.common.debug import minimal
Expand All @@ -39,21 +32,16 @@ def test_can_shrink_in_variable_sized_context(n):
@example(1.7976931348623157e308)
@example(1.5)
@given(st.floats(min_value=0, allow_infinity=False, allow_nan=False))
@settings(deadline=None, suppress_health_check=list(HealthCheck))
def test_shrinks_downwards_to_integers(f):
g = minimal(
st.floats().filter(lambda x: x >= f),
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
)
assert g == ceil(f)
assert minimal(st.floats(min_value=f)) == ceil(f)


@example(1)
@given(st.integers(1, 2**16 - 1))
@settings(deadline=None, suppress_health_check=list(HealthCheck), max_examples=10)
def test_shrinks_downwards_to_integers_when_fractional(b):
g = minimal(
st.floats().filter(lambda x: b < x < 2**53 and int(x) != x),
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
st.floats(
min_value=b, max_value=2**53, exclude_min=True, exclude_max=True
).filter(lambda x: int(x) != x)
)
assert g == b + 0.5

0 comments on commit 462c5fc

Please sign in to comment.