Skip to content

Commit

Permalink
roachtest: make window functions deterministic
Browse files Browse the repository at this point in the history
This commit modifies the sqlsmith logic to add all columns to the `OVER`
clause for all window functions except for the ranking functions. This is
necessary because all other window/aggregate functions can produce different
results for different rows within a peer group. Ordering on all columns
ensures that we are ordering on a key, which restricts each peer group to
one row (and therefore ensures there is only one possible order within each
peer group). This commit also reverts #87426, since window functions should
now be deterministic.

Fixes #87353

Release note: None
  • Loading branch information
DrewKimball authored and michae2 committed Jun 27, 2023
1 parent 6030b8d commit 70f305d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 14 deletions.
3 changes: 0 additions & 3 deletions pkg/cmd/roachtest/tests/query_comparison_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,6 @@ func runOneRoundQueryComparison(
sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(),
sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(), sqlsmith.DisableDecimals(),
sqlsmith.LowProbabilityWhereClauseWithJoinTables(),
// TODO(mgartner): Re-enable aggregate functions when we can guarantee
// they are deterministic.
sqlsmith.DisableAggregateFuncs(),
sqlsmith.SetComplexity(.3),
sqlsmith.SetScalarComplexity(.1),
)
Expand Down
19 changes: 8 additions & 11 deletions pkg/internal/sqlsmith/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,18 +473,15 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx
})
if s.disableNondeterministicFns {
switch fn.def.Name {
case "lead", "lag":
// Lead and lag need all columns to be added to the ORDER BY because
// while they respect the order, they can return any value within the
// partition. This means we need to be consistent about which rows fall
// on the boundary between peer groups.
for _, ref := range refs {
addOrdCol(ref.typedExpr(), ref.typ)
}
case "rank", "dense_rank", "percent_rank":
// The rank functions don't need to add ordering columns because they
// return the same result for all rows in a given peer group.
default:
// Other window functions only need to order by the argument columns.
for _, arg := range args {
addOrdCol(arg, arg.ResolvedType())
// Other window functions care about the relative order of rows within
// each peer group, so we ensure that the ordering is deterministic by
// limiting each peer group to one row (by ordering on all columns).
for _, i := range s.rnd.Perm(len(refs)) {
addOrdCol(refs[i].typedExpr(), refs[i].typ)
}
}
}
Expand Down

0 comments on commit 70f305d

Please sign in to comment.