From 4ab61af024c87390e40639940db28efb76477ab1 Mon Sep 17 00:00:00 2001 From: DrewKimball Date: Thu, 8 Sep 2022 02:42:50 -0700 Subject: [PATCH] roachtest: make window functions deterministic This commit modifies the sqlsmith logic to add all columns to the `OVER` clause for all window functions except for the ranking functions. This is necessary because all other window/aggregate functions can produce different results for different rows within a peer group. Ordering on all columns ensures that we are ordering on a key, which restricts each peer group to one row (and therefore ensures there is only one possible order within each peer group). This commit also reverts #86742, since window functions should now be deterministic. Fixes #87353 Release note: None --- .../roachtest/tests/query_comparison_util.go | 3 --- pkg/internal/sqlsmith/scalar.go | 17 +++++++---------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index 459b17ee2c67..94f26860b350 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -182,9 +182,6 @@ func runOneRoundQueryComparison( sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(), sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(), sqlsmith.LowProbabilityWhereClauseWithJoinTables(), - // TODO(mgartner): Re-enable aggregate functions when we can guarantee - // they are deterministic. - sqlsmith.DisableAggregateFuncs(), sqlsmith.SetComplexity(.3), sqlsmith.SetScalarComplexity(.1), ) diff --git a/pkg/internal/sqlsmith/scalar.go b/pkg/internal/sqlsmith/scalar.go index 3d5076d8a2d6..a37da1a699bc 100644 --- a/pkg/internal/sqlsmith/scalar.go +++ b/pkg/internal/sqlsmith/scalar.go @@ -464,19 +464,16 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx }) if s.disableNondeterministicFns { switch fn.def.Name { - case "lead", "lag": - // Lead and lag need all columns to be added to the ORDER BY because - // while they respect the order, they can return any value within the - // partition. This means we need to be consistent about which rows fall - // on the boundary between peer groups. + case "rank", "dense_rank", "percent_rank": + // The rank functions don't need to add ordering columns because they + // return the same result for all rows in a given peer group. + default: + // Other window functions care about the relative order of rows within + // each peer group, so we ensure that the ordering is deterministic by + // limiting each peer group to one row (by ordering on all columns). for _, ref := range refs { addOrdCol(ref.typedExpr(), ref.typ) } - default: - // Other window functions only need to order by the argument columns. - for _, arg := range args { - addOrdCol(arg, arg.ResolvedType()) - } } } var frame *tree.WindowFrame