Merge #89003 #89088 #89113 #89166

89003: storage: CheckSSTConflicts performance fixes r=erikgrinaker a=itsbilal storage: Use range key masking in CheckSSTConflicts This change uses range key masking in case of import cancellation/restart inside CheckSSTConflicts. This prevents point keys shadowed by range tombstones from appearing to CheckSSTConflicts, significantly speeding up its runtime and allowing import restarts to succeed without getting wedged. Unblocks #87309. Release note: None. --- storage: Move no-overlap fast path up in CheckSSTConflicts This change moves up the fast path in CheckSSTConflicts where we check for any engine keys within the overlapping SST keys. If no engine keys are found, we can return from the function immediately. Previously we'd open a range-key sst iterator before doing this check, which has a pretty significant construction cost. Fixes #88702. Release note: None 89088: reduce: remove and simplify index PARTITION BY clauses r=mgartner a=mgartner Release note: None 89113: opt: fix invalid transformation in SplitLimitedSelectIntoUnionSelects r=mgartner a=mgartner #### opt: fix invalid transformation in SplitLimitedSelectIntoUnionSelects This commit removes some untested logic in `SplitLimitedSelectIntoUnionSelects` that created invalid expression transformations. With this logic, this rule could construct an unordered limit below the `UnionAll` which is incorrect. The bug could cause incorrect query results. Fixes #88993 Release note (bug fix): A bug has been fixed that could cause incorrect results in rare cases. The bug could only present if the following conditions were true: 1. A query with `ORDER BY` and `LIMIT` was executed. 2. The table containing the `ORDER BY` columns had an index containing those columns. 3. The index in (2) contained a prefix of columns held to a fixed number of values by the query filter (e.g., `WHERE a IN (1, 3)`), a `CHECK` constraint (e.g., `CHECK (a IN (1, 3))`), inferred by a computed column expression (e.g. `WHERE a IN (1, 3)` and a column `b INT AS (a + 10) STORED`), or inferred by a `PARTITION BY` clause (e.g. `INDEX (a, ...) PARTITION BY LIST (a) (PARTITION p VALUES ((1), (3)))`). This bug was present since version 22.1.0. 89166: storage: improve `BenchmarkUpdateSSTTimestamps` r=erikgrinaker a=erikgrinaker This patch improves `BenchmarkUpdateSSTTimestamps`, by running separate benchmarks varying the number of keys and concurrency. In particular, this exercises the `sstable.RewriteKeySuffixes` fast path, while the old benchmark only used the naïve read/write slow path. Touches #88723. Release note: None Co-authored-by: Bilal Akhtar <[email protected]> Co-authored-by: Marcus Gartner <[email protected]> Co-authored-by: Erik Grinaker <[email protected]>
cockroachdb · Oct 3, 2022 · 73f8ee6 · 73f8ee6
5 parents fca6c42 + 949bd90 + 25676bb + d6ccd10 + a9010ad
commit 73f8ee6
Show file tree

Hide file tree

Showing 10 changed files with 398 additions and 242 deletions.
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_constrained_scans b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_constrained_scans
@@ -317,3 +317,4 @@ select
  │         └── [/'foo'/e'bar\x00'/5 - ]
  └── filters
       └── c = 5
+
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_index b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_index
@@ -262,3 +262,21 @@ NULL  10    1
 20    NULL  1
 20    20    1
 25    NULL  1
+
+# Regression test for #88993 where a limit pushed down into a union of scans
+# caused incorrect query results.
+statement ok
+CREATE TABLE t88993 (
+  a INT,
+  b INT,
+  c INT,
+  INDEX (b, c, a) PARTITION BY LIST (b, c) (
+    PARTITION p1 VALUES IN ((11, 50))
+  )
+);
+INSERT INTO t88993 (a, b, c) VALUES (1, 10, 150), (0, 11, 100);
+
+query I
+SELECT min(a) FROM t88993
+----
+0
diff --git a/pkg/cmd/reduce/reduce/reducesql/reducesql.go b/pkg/cmd/reduce/reduce/reducesql/reducesql.go
@@ -49,6 +49,10 @@ var SQLPasses = []reduce.Pass{
 	removeIndexCols,
 	removeIndexPredicate,
 	removeIndexStoringCols,
+	removeIndexPartitionBy,
+	removeIndexPartitions,
+	removeIndexPartitionListValues,
+	removeIndexPartitionListCols,
 	removeWindowPartitions,
 	removeDBSchema,
 	removeFroms,
@@ -828,6 +832,101 @@ var (
 		}
 		return 0
 	})
+	removeIndexPartitionBy = walkSQL("remove INDEX PARTITION BY", func(xfi int, node interface{}) int {
+		xf := xfi == 0
+		removePartitionBy := func(idx *tree.IndexTableDef) int {
+			if idx.PartitionByIndex != nil {
+				if xf {
+					idx.PartitionByIndex = nil
+				}
+				return 1
+			}
+			return 0
+		}
+		switch node := node.(type) {
+		case *tree.IndexTableDef:
+			return removePartitionBy(node)
+		case *tree.UniqueConstraintTableDef:
+			return removePartitionBy(&node.IndexTableDef)
+		}
+		return 0
+	})
+	removeIndexPartitions = walkSQL("remove INDEX partitions", func(xfi int, node interface{}) int {
+		removePartitionBy := func(idx *tree.IndexTableDef) int {
+			if idx.PartitionByIndex != nil {
+				n := len(idx.PartitionByIndex.List)
+				if xfi < n {
+					idx.PartitionByIndex.List =
+						append(idx.PartitionByIndex.List[:xfi], idx.PartitionByIndex.List[xfi+1:]...)
+				}
+				return n
+			}
+			return 0
+		}
+		switch node := node.(type) {
+		case *tree.IndexTableDef:
+			return removePartitionBy(node)
+		case *tree.UniqueConstraintTableDef:
+			return removePartitionBy(&node.IndexTableDef)
+		}
+		return 0
+	})
+	removeIndexPartitionListValues = walkSQL("remove INDEX partition list values", func(xfi int, node interface{}) int {
+		removePartitionBy := func(idx *tree.IndexTableDef) int {
+			if idx.PartitionByIndex != nil {
+				n := 0
+				for i := range idx.PartitionByIndex.List {
+					list := &idx.PartitionByIndex.List[i]
+					l := len(list.Exprs)
+					if xfi >= n && xfi < n+l {
+						list.Exprs = append(list.Exprs[:xfi-n], list.Exprs[xfi-n+1:]...)
+					}
+					n += l
+				}
+				return n
+			}
+			return 0
+		}
+		switch node := node.(type) {
+		case *tree.IndexTableDef:
+			return removePartitionBy(node)
+		case *tree.UniqueConstraintTableDef:
+			return removePartitionBy(&node.IndexTableDef)
+		}
+		return 0
+	})
+	removeIndexPartitionListCols = walkSQL("remove INDEX partition list cols", func(xfi int, node interface{}) int {
+		removePartitionBy := func(idx *tree.IndexTableDef) int {
+			if idx.PartitionByIndex != nil && len(idx.PartitionByIndex.List) > 0 {
+				n := len(idx.PartitionByIndex.Fields)
+				if xfi < n {
+					idx.PartitionByIndex.Fields =
+						append(idx.PartitionByIndex.Fields[:xfi], idx.PartitionByIndex.Fields[xfi+1:]...)
+					// Remove the corresponding column from the index columns.
+					idx.Columns = append(idx.Columns[:xfi], idx.Columns[xfi+1:]...)
+					// Remove the corresponding value from every tuple in every
+					// partition.
+					for i := range idx.PartitionByIndex.List {
+						list := &idx.PartitionByIndex.List[i]
+						for j := range list.Exprs {
+							t := list.Exprs[j].(*tree.Tuple)
+							t.Exprs = append(t.Exprs[:xfi], t.Exprs[xfi+1:]...)
+						}
+					}
+
+				}
+				return n
+			}
+			return 0
+		}
+		switch node := node.(type) {
+		case *tree.IndexTableDef:
+			return removePartitionBy(node)
+		case *tree.UniqueConstraintTableDef:
+			return removePartitionBy(&node.IndexTableDef)
+		}
+		return 0
+	})
 	removeWindowPartitions = walkSQL("remove WINDOW partitions", func(xfi int, node interface{}) int {
 		switch node := node.(type) {
 		case *tree.WindowDef:

diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go
@@ -219,7 +219,7 @@ func EvalAddSSTable(
 		leftPeekBound, rightPeekBound := rangeTombstonePeekBounds(
 			args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey())
 		statsDelta, err = storage.CheckSSTConflicts(ctx, sst, readWriter, start, end, leftPeekBound, rightPeekBound,
-			args.DisallowShadowing, args.DisallowShadowingBelow, maxIntents, usePrefixSeek)
+			args.DisallowShadowing, args.DisallowShadowingBelow, sstToReqTS, maxIntents, usePrefixSeek)
 		statsDelta.Add(sstReqStatsDelta)
 		if err != nil {
 			return result.Result{}, errors.Wrap(err, "checking for key collisions")

diff --git a/pkg/sql/logictest/testdata/logic_test/aggregate b/pkg/sql/logictest/testdata/logic_test/aggregate
@@ -3786,3 +3786,19 @@ query FFFF
 select covar_pop(y, x), covar_samp(y, x), regr_sxx(y, x), regr_syy(y, x) from corrupt_combine
 ----
 37.5 45 2983.333333333333 17.5
+
+# Regression test for #88993 where a limit pushed down into a union of scans
+# caused incorrect query results.
+statement ok
+CREATE TABLE t2 (
+  a INT,
+  b INT,
+  c INT,
+  INDEX (b, c, a)
+);
+INSERT INTO t2 (a, b, c) VALUES (1, 10, 20), (0, 11, 100);
+
+query I
+SELECT min(a) FROM t2 WHERE (b <= 11 AND c < 50) OR (b = 11 AND c = 50) OR (b >= 11 AND c > 50)
+----
+0
diff --git a/pkg/sql/opt/xform/general_funcs.go b/pkg/sql/opt/xform/general_funcs.go
@@ -545,6 +545,7 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
 	// UnionAll tree.
 	var noLimitSpans constraint.Spans
 	var last memo.RelExpr
+	spColList := sp.Cols.ToList()
 	queue := list.New()
 	for i, n := 0, spans.Count(); i < n; i++ {
 		if i >= budgetExceededIndex {
@@ -561,23 +562,35 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
 		}
 		for j, m := 0, singleKeySpans.Count(); j < m; j++ {
 			// Construct a new Scan for each span.
+			// Note: newHardLimit will be 0 (i.e., no limit) if there are
+			// filters to be applied in a Select.
 			newScanPrivate := c.makeNewScanPrivate(
 				sp,
 				cons.Columns,
 				newHardLimit,
 				singleKeySpans.Get(j),
 			)
-			newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
+			newScanOrLimitedSelect := c.e.f.ConstructScan(newScanPrivate)
 			if !filters.IsTrue() {
-				newScanOrSelect = c.wrapScanInLimitedSelect(
-					newScanOrSelect,
-					sp,
-					newScanPrivate,
-					filters,
-					limit,
+				// If there are filters, apply them and a limit. The limit is
+				// not needed if there are no filters because the scan's hard
+				// limit will be set (see newHardLimit).
+				//
+				// TODO(mgartner/msirek): Converting ColSets to ColLists here is
+				// only safe because column IDs are always allocated in a
+				// consistent, ascending order for each duplicated table in the
+				// metadata. If column ID allocation changes, this could break.
+				newColList := newScanPrivate.Cols.ToList()
+				newScanOrLimitedSelect = c.e.f.ConstructLimit(
+					c.e.f.ConstructSelect(
+						newScanOrLimitedSelect,
+						c.RemapScanColsInFilter(filters, sp, newScanPrivate),
+					),
+					c.IntConst(tree.NewDInt(tree.DInt(limit))),
+					ordering.RemapColumns(spColList, newColList),
 				)
 			}
-			queue.PushBack(newScanOrSelect)
+			queue.PushBack(newScanOrLimitedSelect)
 		}
 	}
 
@@ -652,15 +665,21 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
 		Columns: cons.Columns.RemapColumns(sp.Table, newScanPrivate.Table),
 		Spans:   noLimitSpans,
 	})
+	// TODO(mgartner): We should be able to add a LIMIT above the Scan or Select
+	// below, as long as we remap the original ordering columns. This could
+	// allow a top-k to be planned instead of a sort.
 	newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
 	if !filters.IsTrue() {
-		newScanOrSelect = c.wrapScanInLimitedSelect(newScanOrSelect, sp, newScanPrivate, filters, limit)
+		newScanOrSelect = c.e.f.ConstructSelect(
+			newScanOrSelect,
+			c.RemapScanColsInFilter(filters, sp, newScanPrivate),
+		)
 	}
 	// TODO(mgartner/msirek): Converting ColSets to ColLists here is only safe
 	// because column IDs are always allocated in a consistent, ascending order
 	// for each duplicated table in the metadata. If column ID allocation
 	// changes, this could break.
-	return makeNewUnion(last, newScanOrSelect, sp.Cols.ToList()), true
+	return makeNewUnion(last, newScanOrSelect, spColList), true
 }
 
 // numAllowedValues returns the number of allowed values for a column with a
@@ -711,29 +730,6 @@ func (c *CustomFuncs) numAllowedValues(
 	return 0, false
 }
 
-// wrapScanInLimitedSelect wraps "scan" in a SelectExpr with filters mapped from
-// the originalScanPrivate columns to the columns in scan. If limit is non-zero,
-// the SelectExpr is wrapped in a LimitExpr with that limit.
-func (c *CustomFuncs) wrapScanInLimitedSelect(
-	scan memo.RelExpr,
-	originalScanPrivate, newScanPrivate *memo.ScanPrivate,
-	filters memo.FiltersExpr,
-	limit int,
-) (limitedSelect memo.RelExpr) {
-	limitedSelect = c.e.f.ConstructSelect(
-		scan,
-		c.RemapScanColsInFilter(filters, originalScanPrivate, newScanPrivate),
-	)
-	if limit != 0 {
-		limitedSelect = c.e.f.ConstructLimit(
-			limitedSelect,
-			c.IntConst(tree.NewDInt(tree.DInt(limit))),
-			c.EmptyOrdering(),
-		)
-	}
-	return limitedSelect
-}
-
 // indexHasOrderingSequence returns whether the Scan can provide a given
 // ordering under the assumption that we are scanning a single-key span with the
 // given keyLength (and if so, whether we need to scan it in reverse).
Original file line number	Diff line number	Diff line change
Expand Up		@@ -317,3 +317,4 @@ select
		│ └── [/'foo'/e'bar\x00'/5 - ]
		└── filters
		└── c = 5