From 70f305dbbc30429e6d44fa7b559cddd807debd1d Mon Sep 17 00:00:00 2001
From: DrewKimball <drewk@cockroachlabs.com>
Date: Thu, 8 Sep 2022 02:42:50 -0700
Subject: [PATCH] roachtest: make window functions deterministic

This commit modifies the sqlsmith logic to add all columns to the `OVER`
clause for all window functions except for the ranking functions. This is
necessary because all other window/aggregate functions can produce different
results for different rows within a peer group. Ordering on all columns
ensures that we are ordering on a key, which restricts each peer group to
one row (and therefore ensures there is only one possible order within each
peer group). This commit also reverts #87426, since window functions should
now be deterministic.

Fixes #87353

Release note: None
---
 .../roachtest/tests/query_comparison_util.go  |  3 ---
 pkg/internal/sqlsmith/scalar.go               | 19 ++++++++-----------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go
index 2fe13cf098df..7130f4f72a38 100644
--- a/pkg/cmd/roachtest/tests/query_comparison_util.go
+++ b/pkg/cmd/roachtest/tests/query_comparison_util.go
@@ -188,9 +188,6 @@ func runOneRoundQueryComparison(
 		sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(),
 		sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(), sqlsmith.DisableDecimals(),
 		sqlsmith.LowProbabilityWhereClauseWithJoinTables(),
-		// TODO(mgartner): Re-enable aggregate functions when we can guarantee
-		// they are deterministic.
-		sqlsmith.DisableAggregateFuncs(),
 		sqlsmith.SetComplexity(.3),
 		sqlsmith.SetScalarComplexity(.1),
 	)
diff --git a/pkg/internal/sqlsmith/scalar.go b/pkg/internal/sqlsmith/scalar.go
index bd72530792b3..9450f96b51cb 100644
--- a/pkg/internal/sqlsmith/scalar.go
+++ b/pkg/internal/sqlsmith/scalar.go
@@ -473,18 +473,15 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx
 		})
 		if s.disableNondeterministicFns {
 			switch fn.def.Name {
-			case "lead", "lag":
-				// Lead and lag need all columns to be added to the ORDER BY because
-				// while they respect the order, they can return any value within the
-				// partition. This means we need to be consistent about which rows fall
-				// on the boundary between peer groups.
-				for _, ref := range refs {
-					addOrdCol(ref.typedExpr(), ref.typ)
-				}
+			case "rank", "dense_rank", "percent_rank":
+				// The rank functions don't need to add ordering columns because they
+				// return the same result for all rows in a given peer group.
 			default:
-				// Other window functions only need to order by the argument columns.
-				for _, arg := range args {
-					addOrdCol(arg, arg.ResolvedType())
+				// Other window functions care about the relative order of rows within
+				// each peer group, so we ensure that the ordering is deterministic by
+				// limiting each peer group to one row (by ordering on all columns).
+				for _, i := range s.rnd.Perm(len(refs)) {
+					addOrdCol(refs[i].typedExpr(), refs[i].typ)
 				}
 			}
 		}