Skip to content

Commit

Permalink
roachtest: make window functions deterministic
Browse files Browse the repository at this point in the history
This commit modifies the sqlsmith logic to add all columns to the `OVER`
clause for all window functions except for the ranking functions. This is
necessary because all other window/aggregate functions can produce different
results for different rows within a peer group. Ordering on all columns
ensures that we are ordering on a key, which restricts each peer group to
one row (and therefore ensures there is only one possible order within each
peer group). This commit also reverts cockroachdb#86742, since window functions should
now be deterministic.

Fixes cockroachdb#87353

Release note: None
  • Loading branch information
DrewKimball committed Sep 8, 2022
1 parent 07c8664 commit 4ab61af
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 13 deletions.
3 changes: 0 additions & 3 deletions pkg/cmd/roachtest/tests/query_comparison_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,6 @@ func runOneRoundQueryComparison(
sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(),
sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(),
sqlsmith.LowProbabilityWhereClauseWithJoinTables(),
// TODO(mgartner): Re-enable aggregate functions when we can guarantee
// they are deterministic.
sqlsmith.DisableAggregateFuncs(),
sqlsmith.SetComplexity(.3),
sqlsmith.SetScalarComplexity(.1),
)
Expand Down
17 changes: 7 additions & 10 deletions pkg/internal/sqlsmith/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,19 +464,16 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx
})
if s.disableNondeterministicFns {
switch fn.def.Name {
case "lead", "lag":
// Lead and lag need all columns to be added to the ORDER BY because
// while they respect the order, they can return any value within the
// partition. This means we need to be consistent about which rows fall
// on the boundary between peer groups.
case "rank", "dense_rank", "percent_rank":
// The rank functions don't need to add ordering columns because they
// return the same result for all rows in a given peer group.
default:
// Other window functions care about the relative order of rows within
// each peer group, so we ensure that the ordering is deterministic by
// limiting each peer group to one row (by ordering on all columns).
for _, ref := range refs {
addOrdCol(ref.typedExpr(), ref.typ)
}
default:
// Other window functions only need to order by the argument columns.
for _, arg := range args {
addOrdCol(arg, arg.ResolvedType())
}
}
}
var frame *tree.WindowFrame
Expand Down

0 comments on commit 4ab61af

Please sign in to comment.