Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
89003: storage: CheckSSTConflicts performance fixes r=erikgrinaker a=itsbilal

storage: Use range key masking in CheckSSTConflicts

This change uses range key masking in case of import
cancellation/restart inside CheckSSTConflicts. This prevents
point keys shadowed by range tombstones from appearing to
CheckSSTConflicts, significantly speeding up its runtime
and allowing import restarts to succeed without getting wedged.

Unblocks #87309.

Release note: None.

---

storage: Move no-overlap fast path up in CheckSSTConflicts

This change moves up the fast path in CheckSSTConflicts where we
check for any engine keys within the overlapping SST keys. If no
engine keys are found, we can return from the function immediately.
Previously we'd open a range-key sst iterator before doing this check,
which has a pretty significant construction cost.

Fixes #88702.

Release note: None


89088: reduce: remove and simplify index PARTITION BY clauses r=mgartner a=mgartner

Release note: None

89113: opt: fix invalid transformation in SplitLimitedSelectIntoUnionSelects r=mgartner a=mgartner

#### opt: fix invalid transformation in SplitLimitedSelectIntoUnionSelects

This commit removes some untested logic in
`SplitLimitedSelectIntoUnionSelects` that created invalid expression
transformations. With this logic, this rule could construct an unordered
limit below the `UnionAll` which is incorrect. The bug could cause
incorrect query results.

Fixes #88993

Release note (bug fix): A bug has been fixed that could cause incorrect
results in rare cases. The bug could only present if the following
conditions were true:
  1. A query with `ORDER BY` and `LIMIT` was executed.
  2. The table containing the `ORDER BY` columns had an index containing
     those columns.
  3. The index in (2) contained a prefix of columns held to a fixed
     number of values by the query filter (e.g., `WHERE a IN (1, 3)`),
     a `CHECK` constraint (e.g., `CHECK (a IN (1, 3))`), inferred by
     a computed column expression (e.g. `WHERE a IN (1, 3)` and a column
     `b INT AS (a + 10) STORED`), or inferred by a `PARTITION BY` clause
     (e.g. `INDEX (a, ...) PARTITION BY LIST (a) (PARTITION p VALUES
     ((1), (3)))`).
This bug was present since version 22.1.0.


89166: storage: improve `BenchmarkUpdateSSTTimestamps` r=erikgrinaker a=erikgrinaker

This patch improves `BenchmarkUpdateSSTTimestamps`, by running separate benchmarks varying the number of keys and concurrency. In particular, this exercises the `sstable.RewriteKeySuffixes` fast path, while the old benchmark only used the naïve read/write slow path.

Touches #88723.

Release note: None

Co-authored-by: Bilal Akhtar <[email protected]>
Co-authored-by: Marcus Gartner <[email protected]>
Co-authored-by: Erik Grinaker <[email protected]>
  • Loading branch information
4 people committed Oct 3, 2022
5 parents fca6c42 + 949bd90 + 25676bb + d6ccd10 + a9010ad commit 73f8ee6
Show file tree
Hide file tree
Showing 10 changed files with 398 additions and 242 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,4 @@ select
│ └── [/'foo'/e'bar\x00'/5 - ]
└── filters
└── c = 5

18 changes: 18 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/partitioning_index
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,21 @@ NULL 10 1
20 NULL 1
20 20 1
25 NULL 1

# Regression test for #88993 where a limit pushed down into a union of scans
# caused incorrect query results.
statement ok
CREATE TABLE t88993 (
a INT,
b INT,
c INT,
INDEX (b, c, a) PARTITION BY LIST (b, c) (
PARTITION p1 VALUES IN ((11, 50))
)
);
INSERT INTO t88993 (a, b, c) VALUES (1, 10, 150), (0, 11, 100);

query I
SELECT min(a) FROM t88993
----
0
99 changes: 99 additions & 0 deletions pkg/cmd/reduce/reduce/reducesql/reducesql.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ var SQLPasses = []reduce.Pass{
removeIndexCols,
removeIndexPredicate,
removeIndexStoringCols,
removeIndexPartitionBy,
removeIndexPartitions,
removeIndexPartitionListValues,
removeIndexPartitionListCols,
removeWindowPartitions,
removeDBSchema,
removeFroms,
Expand Down Expand Up @@ -828,6 +832,101 @@ var (
}
return 0
})
removeIndexPartitionBy = walkSQL("remove INDEX PARTITION BY", func(xfi int, node interface{}) int {
xf := xfi == 0
removePartitionBy := func(idx *tree.IndexTableDef) int {
if idx.PartitionByIndex != nil {
if xf {
idx.PartitionByIndex = nil
}
return 1
}
return 0
}
switch node := node.(type) {
case *tree.IndexTableDef:
return removePartitionBy(node)
case *tree.UniqueConstraintTableDef:
return removePartitionBy(&node.IndexTableDef)
}
return 0
})
removeIndexPartitions = walkSQL("remove INDEX partitions", func(xfi int, node interface{}) int {
removePartitionBy := func(idx *tree.IndexTableDef) int {
if idx.PartitionByIndex != nil {
n := len(idx.PartitionByIndex.List)
if xfi < n {
idx.PartitionByIndex.List =
append(idx.PartitionByIndex.List[:xfi], idx.PartitionByIndex.List[xfi+1:]...)
}
return n
}
return 0
}
switch node := node.(type) {
case *tree.IndexTableDef:
return removePartitionBy(node)
case *tree.UniqueConstraintTableDef:
return removePartitionBy(&node.IndexTableDef)
}
return 0
})
removeIndexPartitionListValues = walkSQL("remove INDEX partition list values", func(xfi int, node interface{}) int {
removePartitionBy := func(idx *tree.IndexTableDef) int {
if idx.PartitionByIndex != nil {
n := 0
for i := range idx.PartitionByIndex.List {
list := &idx.PartitionByIndex.List[i]
l := len(list.Exprs)
if xfi >= n && xfi < n+l {
list.Exprs = append(list.Exprs[:xfi-n], list.Exprs[xfi-n+1:]...)
}
n += l
}
return n
}
return 0
}
switch node := node.(type) {
case *tree.IndexTableDef:
return removePartitionBy(node)
case *tree.UniqueConstraintTableDef:
return removePartitionBy(&node.IndexTableDef)
}
return 0
})
removeIndexPartitionListCols = walkSQL("remove INDEX partition list cols", func(xfi int, node interface{}) int {
removePartitionBy := func(idx *tree.IndexTableDef) int {
if idx.PartitionByIndex != nil && len(idx.PartitionByIndex.List) > 0 {
n := len(idx.PartitionByIndex.Fields)
if xfi < n {
idx.PartitionByIndex.Fields =
append(idx.PartitionByIndex.Fields[:xfi], idx.PartitionByIndex.Fields[xfi+1:]...)
// Remove the corresponding column from the index columns.
idx.Columns = append(idx.Columns[:xfi], idx.Columns[xfi+1:]...)
// Remove the corresponding value from every tuple in every
// partition.
for i := range idx.PartitionByIndex.List {
list := &idx.PartitionByIndex.List[i]
for j := range list.Exprs {
t := list.Exprs[j].(*tree.Tuple)
t.Exprs = append(t.Exprs[:xfi], t.Exprs[xfi+1:]...)
}
}

}
return n
}
return 0
}
switch node := node.(type) {
case *tree.IndexTableDef:
return removePartitionBy(node)
case *tree.UniqueConstraintTableDef:
return removePartitionBy(&node.IndexTableDef)
}
return 0
})
removeWindowPartitions = walkSQL("remove WINDOW partitions", func(xfi int, node interface{}) int {
switch node := node.(type) {
case *tree.WindowDef:
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/batcheval/cmd_add_sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ func EvalAddSSTable(
leftPeekBound, rightPeekBound := rangeTombstonePeekBounds(
args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey())
statsDelta, err = storage.CheckSSTConflicts(ctx, sst, readWriter, start, end, leftPeekBound, rightPeekBound,
args.DisallowShadowing, args.DisallowShadowingBelow, maxIntents, usePrefixSeek)
args.DisallowShadowing, args.DisallowShadowingBelow, sstToReqTS, maxIntents, usePrefixSeek)
statsDelta.Add(sstReqStatsDelta)
if err != nil {
return result.Result{}, errors.Wrap(err, "checking for key collisions")
Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -3786,3 +3786,19 @@ query FFFF
select covar_pop(y, x), covar_samp(y, x), regr_sxx(y, x), regr_syy(y, x) from corrupt_combine
----
37.5 45 2983.333333333333 17.5

# Regression test for #88993 where a limit pushed down into a union of scans
# caused incorrect query results.
statement ok
CREATE TABLE t2 (
a INT,
b INT,
c INT,
INDEX (b, c, a)
);
INSERT INTO t2 (a, b, c) VALUES (1, 10, 20), (0, 11, 100);

query I
SELECT min(a) FROM t2 WHERE (b <= 11 AND c < 50) OR (b = 11 AND c = 50) OR (b >= 11 AND c > 50)
----
0
62 changes: 29 additions & 33 deletions pkg/sql/opt/xform/general_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
// UnionAll tree.
var noLimitSpans constraint.Spans
var last memo.RelExpr
spColList := sp.Cols.ToList()
queue := list.New()
for i, n := 0, spans.Count(); i < n; i++ {
if i >= budgetExceededIndex {
Expand All @@ -561,23 +562,35 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
}
for j, m := 0, singleKeySpans.Count(); j < m; j++ {
// Construct a new Scan for each span.
// Note: newHardLimit will be 0 (i.e., no limit) if there are
// filters to be applied in a Select.
newScanPrivate := c.makeNewScanPrivate(
sp,
cons.Columns,
newHardLimit,
singleKeySpans.Get(j),
)
newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
newScanOrLimitedSelect := c.e.f.ConstructScan(newScanPrivate)
if !filters.IsTrue() {
newScanOrSelect = c.wrapScanInLimitedSelect(
newScanOrSelect,
sp,
newScanPrivate,
filters,
limit,
// If there are filters, apply them and a limit. The limit is
// not needed if there are no filters because the scan's hard
// limit will be set (see newHardLimit).
//
// TODO(mgartner/msirek): Converting ColSets to ColLists here is
// only safe because column IDs are always allocated in a
// consistent, ascending order for each duplicated table in the
// metadata. If column ID allocation changes, this could break.
newColList := newScanPrivate.Cols.ToList()
newScanOrLimitedSelect = c.e.f.ConstructLimit(
c.e.f.ConstructSelect(
newScanOrLimitedSelect,
c.RemapScanColsInFilter(filters, sp, newScanPrivate),
),
c.IntConst(tree.NewDInt(tree.DInt(limit))),
ordering.RemapColumns(spColList, newColList),
)
}
queue.PushBack(newScanOrSelect)
queue.PushBack(newScanOrLimitedSelect)
}
}

Expand Down Expand Up @@ -652,15 +665,21 @@ func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
Columns: cons.Columns.RemapColumns(sp.Table, newScanPrivate.Table),
Spans: noLimitSpans,
})
// TODO(mgartner): We should be able to add a LIMIT above the Scan or Select
// below, as long as we remap the original ordering columns. This could
// allow a top-k to be planned instead of a sort.
newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
if !filters.IsTrue() {
newScanOrSelect = c.wrapScanInLimitedSelect(newScanOrSelect, sp, newScanPrivate, filters, limit)
newScanOrSelect = c.e.f.ConstructSelect(
newScanOrSelect,
c.RemapScanColsInFilter(filters, sp, newScanPrivate),
)
}
// TODO(mgartner/msirek): Converting ColSets to ColLists here is only safe
// because column IDs are always allocated in a consistent, ascending order
// for each duplicated table in the metadata. If column ID allocation
// changes, this could break.
return makeNewUnion(last, newScanOrSelect, sp.Cols.ToList()), true
return makeNewUnion(last, newScanOrSelect, spColList), true
}

// numAllowedValues returns the number of allowed values for a column with a
Expand Down Expand Up @@ -711,29 +730,6 @@ func (c *CustomFuncs) numAllowedValues(
return 0, false
}

// wrapScanInLimitedSelect wraps "scan" in a SelectExpr with filters mapped from
// the originalScanPrivate columns to the columns in scan. If limit is non-zero,
// the SelectExpr is wrapped in a LimitExpr with that limit.
func (c *CustomFuncs) wrapScanInLimitedSelect(
scan memo.RelExpr,
originalScanPrivate, newScanPrivate *memo.ScanPrivate,
filters memo.FiltersExpr,
limit int,
) (limitedSelect memo.RelExpr) {
limitedSelect = c.e.f.ConstructSelect(
scan,
c.RemapScanColsInFilter(filters, originalScanPrivate, newScanPrivate),
)
if limit != 0 {
limitedSelect = c.e.f.ConstructLimit(
limitedSelect,
c.IntConst(tree.NewDInt(tree.DInt(limit))),
c.EmptyOrdering(),
)
}
return limitedSelect
}

// indexHasOrderingSequence returns whether the Scan can provide a given
// ordering under the assumption that we are scanning a single-key span with the
// given keyLength (and if so, whether we need to scan it in reverse).
Expand Down
Loading

0 comments on commit 73f8ee6

Please sign in to comment.