From 4ab61af024c87390e40639940db28efb76477ab1 Mon Sep 17 00:00:00 2001
From: DrewKimball <drewk@cockroachlabs.com>
Date: Thu, 8 Sep 2022 02:42:50 -0700
Subject: [PATCH] roachtest: make window functions deterministic

This commit modifies the sqlsmith logic to add all columns to the `OVER`
clause for all window functions except for the ranking functions. This is
necessary because all other window/aggregate functions can produce different
results for different rows within a peer group. Ordering on all columns
ensures that we are ordering on a key, which restricts each peer group to
one row (and therefore ensures there is only one possible order within each
peer group). This commit also reverts #86742, since window functions should
now be deterministic.

Fixes #87353

Release note: None
---
 .../roachtest/tests/query_comparison_util.go    |  3 ---
 pkg/internal/sqlsmith/scalar.go                 | 17 +++++++----------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go
index 459b17ee2c67..94f26860b350 100644
--- a/pkg/cmd/roachtest/tests/query_comparison_util.go
+++ b/pkg/cmd/roachtest/tests/query_comparison_util.go
@@ -182,9 +182,6 @@ func runOneRoundQueryComparison(
 		sqlsmith.UnlikelyRandomNulls(), sqlsmith.DisableCrossJoins(),
 		sqlsmith.DisableIndexHints(), sqlsmith.DisableWith(),
 		sqlsmith.LowProbabilityWhereClauseWithJoinTables(),
-		// TODO(mgartner): Re-enable aggregate functions when we can guarantee
-		// they are deterministic.
-		sqlsmith.DisableAggregateFuncs(),
 		sqlsmith.SetComplexity(.3),
 		sqlsmith.SetScalarComplexity(.1),
 	)
diff --git a/pkg/internal/sqlsmith/scalar.go b/pkg/internal/sqlsmith/scalar.go
index 3d5076d8a2d6..a37da1a699bc 100644
--- a/pkg/internal/sqlsmith/scalar.go
+++ b/pkg/internal/sqlsmith/scalar.go
@@ -464,19 +464,16 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx
 		})
 		if s.disableNondeterministicFns {
 			switch fn.def.Name {
-			case "lead", "lag":
-				// Lead and lag need all columns to be added to the ORDER BY because
-				// while they respect the order, they can return any value within the
-				// partition. This means we need to be consistent about which rows fall
-				// on the boundary between peer groups.
+			case "rank", "dense_rank", "percent_rank":
+				// The rank functions don't need to add ordering columns because they
+				// return the same result for all rows in a given peer group.
+			default:
+				// Other window functions care about the relative order of rows within
+				// each peer group, so we ensure that the ordering is deterministic by
+				// limiting each peer group to one row (by ordering on all columns).
 				for _, ref := range refs {
 					addOrdCol(ref.typedExpr(), ref.typ)
 				}
-			default:
-				// Other window functions only need to order by the argument columns.
-				for _, arg := range args {
-					addOrdCol(arg, arg.ResolvedType())
-				}
 			}
 		}
 		var frame *tree.WindowFrame