From 70f305dbbc30429e6d44fa7b559cddd807debd1d Mon Sep 17 00:00:00 2001 From: DrewKimball Date: Thu, 8 Sep 2022 02:42:50 -0700 Subject: [PATCH] roachtest: make window functions deterministic This commit modifies the sqlsmith logic to add all columns to the `OVER` clause for all window functions except for the ranking functions. This is necessary because all other window/aggregate functions can produce different results for different rows within a peer group. Ordering on all columns ensures that we are ordering on a key, which restricts each peer group to one row (and therefore ensures there is only one possible order within each peer group). This commit also reverts #87426, since window functions should now be deterministic. Fixes #87353 Release note: None --- .../roachtest/tests/query_comparison_util.go | 3 --- pkg/internal/sqlsmith/scalar.go | 19 ++++++++----------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index 2fe13cf098df..7130f4f72a38 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -188,9 +188,6 @@ func runOneRoundQueryComparison( sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(), sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(), sqlsmith.DisableDecimals(), sqlsmith.LowProbabilityWhereClauseWithJoinTables(), - // TODO(mgartner): Re-enable aggregate functions when we can guarantee - // they are deterministic. - sqlsmith.DisableAggregateFuncs(), sqlsmith.SetComplexity(.3), sqlsmith.SetScalarComplexity(.1), ) diff --git a/pkg/internal/sqlsmith/scalar.go b/pkg/internal/sqlsmith/scalar.go index bd72530792b3..9450f96b51cb 100644 --- a/pkg/internal/sqlsmith/scalar.go +++ b/pkg/internal/sqlsmith/scalar.go @@ -473,18 +473,15 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx }) if s.disableNondeterministicFns { switch fn.def.Name { - case "lead", "lag": - // Lead and lag need all columns to be added to the ORDER BY because - // while they respect the order, they can return any value within the - // partition. This means we need to be consistent about which rows fall - // on the boundary between peer groups. - for _, ref := range refs { - addOrdCol(ref.typedExpr(), ref.typ) - } + case "rank", "dense_rank", "percent_rank": + // The rank functions don't need to add ordering columns because they + // return the same result for all rows in a given peer group. default: - // Other window functions only need to order by the argument columns. - for _, arg := range args { - addOrdCol(arg, arg.ResolvedType()) + // Other window functions care about the relative order of rows within + // each peer group, so we ensure that the ordering is deterministic by + // limiting each peer group to one row (by ordering on all columns). + for _, i := range s.rnd.Perm(len(refs)) { + addOrdCol(refs[i].typedExpr(), refs[i].typ) } } }