Skip to content

Commit

Permalink
opt: ensure we prefer a reverse scan to sorting a forward scan
Browse files Browse the repository at this point in the history
This commit fixes an issue where in some edge cases the optimizer would
prefer sorting the output of a forward scan over performing a reverse scan
(when there is no need to sort the output of the reverse scan).

Release note (performance improvement): The optimizer now prefers
performing a reverse scan over a forward scan + sort if the reverse
scan eliminates the need for a sort and the plans are otherwise
equivalent. This was the case before in most cases, but some edge
cases with a small number of rows have been fixed.
  • Loading branch information
rytaft committed Jun 28, 2021
1 parent e05dfe0 commit b1fa526
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 14 deletions.
19 changes: 12 additions & 7 deletions pkg/sql/opt/xform/coster.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,18 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
baseCost += virtualScanTableDescriptorFetchCost
}

// Performing a reverse scan is more expensive than a forward scan, but it's
// still preferable to sorting the output of a forward scan. To ensure we
// choose a reverse scan over a sort, add the reverse scan cost before we
// alter the row count for unbounded scan penalties below. This cost must also
// be added before adjusting the row count for the limit hint.
if ordering.ScanIsReverse(scan, &required.Ordering) {
if rowCount > 1 {
// Need to do binary search to seek to the previous row.
perRowCost += memo.Cost(math.Log2(rowCount)) * cpuCostFactor
}
}

// Add a penalty to full table scans. All else being equal, we prefer a
// constrained scan. Adding a few rows worth of cost helps prevent surprising
// plans for very small tables.
Expand All @@ -632,13 +644,6 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
rowCount = math.Min(rowCount, required.LimitHint)
}

if ordering.ScanIsReverse(scan, &required.Ordering) {
if rowCount > 1 {
// Need to do binary search to seek to the previous row.
perRowCost += memo.Cost(math.Log2(rowCount)) * cpuCostFactor
}
}

cost := baseCost + memo.Cost(rowCount)*(seqIOCostFactor+perRowCost)

// If this scan is locality optimized, divide the cost by 3 in order to make
Expand Down
23 changes: 23 additions & 0 deletions pkg/sql/opt/xform/testdata/coster/scan
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,29 @@ limit
│ └── i:2 IN (1, 3, 5, 7, 9) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)]
└── 20

exec-ddl
ALTER TABLE a INJECT STATISTICS '[
{
"columns": ["k"],
"created_at": "2019-02-08 04:10:40.001179+00:00",
"row_count": 0,
"distinct_count": 0
}
]'
----

# Ensure that we prefer a reverse scan over sorting.
opt
SELECT * FROM a ORDER BY k DESC
----
scan a,rev
├── columns: k:1!null i:2 s:3 d:4!null
├── stats: [rows=1]
├── cost: 15.89
├── key: (1)
├── fd: (1)-->(2-4)
└── ordering: -1

# Regression test for #35042. Ensure we always prefer constrained scans.
exec-ddl
CREATE TABLE speed_test (id INT PRIMARY KEY DEFAULT unique_rowid())
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/rules/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ memo (optimized, ~5KB, required=[presentation: max:7])
├── G2: (scan abc,cols=(1))
│ ├── [ordering: -1] [limit hint: 1.00]
│ │ ├── best: (scan abc,rev,cols=(1))
│ │ └── cost: 5.06
│ │ └── cost: 5.16
│ └── []
│ ├── best: (scan abc,cols=(1))
│ └── cost: 1064.51
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/xform/testdata/rules/limit
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ EliminateProject
└── 5
================================================================================
GenerateIndexScans
Cost: 3556.51
Cost: 3561.68
================================================================================
explain
├── columns: info:7
Expand Down Expand Up @@ -708,7 +708,7 @@ GenerateZigzagJoins (no changes)
--------------------------------------------------------------------------------
================================================================================
Final best expression
Cost: 3556.51
Cost: 3561.68
================================================================================
explain
├── columns: info:7
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/opt/xform/testdata/rules/scan
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ memo (optimized, ~3KB, required=[presentation: k:1,f:3] [ordering: -1])
├── G2: (scan a,cols=(1,3)) (scan a@s_idx,cols=(1,3))
│ ├── [ordering: -1] [limit hint: 10.00]
│ │ ├── best: (scan a,rev,cols=(1,3))
│ │ └── cost: 15.04
│ │ └── cost: 15.71
│ └── []
│ ├── best: (scan a@s_idx,cols=(1,3))
│ └── cost: 1074.61
Expand Down Expand Up @@ -241,10 +241,10 @@ memo (optimized, ~2KB, required=[presentation: s:4,i:2,f:3] [ordering: -4,+2])
└── G1: (scan a,cols=(2-4)) (scan a@s_idx,cols=(2-4))
├── [presentation: s:4,i:2,f:3] [ordering: -4,+2]
│ ├── best: (sort G1="[ordering: -4]")
│ └── cost: 1311.96
│ └── cost: 1311.81
├── [ordering: -4]
│ ├── best: (scan a@s_idx,rev,cols=(2-4))
│ └── cost: 1185.51
│ └── cost: 1185.36
└── []
├── best: (scan a@s_idx,cols=(2-4))
└── cost: 1084.71
Expand Down Expand Up @@ -334,7 +334,7 @@ memo (optimized, ~2KB, required=[presentation: s:4,j:5] [ordering: +4])
└── G1: (scan a,cols=(4,5)) (scan a@si_idx,cols=(4,5))
├── [presentation: s:4,j:5] [ordering: +4]
│ ├── best: (scan a@si_idx,rev,cols=(4,5))
│ └── cost: 1175.41
│ └── cost: 1175.26
└── []
├── best: (scan a@si_idx,cols=(4,5))
└── cost: 1074.61
Expand Down

0 comments on commit b1fa526

Please sign in to comment.