From 0c664a14176427d39020a567fbaaf496a221f0f3 Mon Sep 17 00:00:00 2001
From: Anton Rybochkin <79331145+arybochkin@users.noreply.github.com>
Date: Mon, 15 Jul 2024 19:01:55 +0200
Subject: [PATCH] [#22370] docdb: Cost Based Optimizer changes to take into
 account backward scans improvement

Summary:
The change updates cost based optimizer to take backward scan improvements into account, so that
backward scans are picked instead of the forward scan+sort when fast backward scan feautuer is
enabled via `FLAGS_use_fast_backward_scan`.

Results for TAQO run first. The first 4 columns are the values for 'Best Execution Plan Picked',
the last 4 columns are for number of queries with backward scans in execution plan. Cost based
optimizer is turned on for 'Master' and 'D36614'.

| Model                        | Master | D36614 | PG     | Num queries | Improved | Degraded | Plan changed
| ---------------------------- | ------ | ------ | ------ | ----------- | -------- | -------- | ------------
| basic                        |  91.04 |  91.04 |  96.64 |           0 |        0 |        0 |        0
| complex                      |  85.42 |  84.38 |  86.46 |           3 |        2 |        1 |        1
| cost-validation-joins        |  78.46 |  79.79 |  95.48 |          23 |       23 |        0 |        0
| cost-validation-misc         |  94.43 |   95.3 |  92.86 |          62 |       62 |        0 |        6
| cost-validation-single-table |  96.09 |  96.92 |   98.7 |           0 |        0 |        0 |        0
| join-order-benchmark         |  66.37 |   64.6 |  42.48 |           0 |        0 |        0 |        0
| subqueries                   |     80 |  86.67 |     80 |           0 |        0 |        0 |        0
| more-subqueries              |  77.94 |  76.47 |    100 |           1 |        1 |        0 |        0
| seek-next-estimation         |    100 |    100 |  96.88 |           0 |        0 |        0 |        0
| tpch                         |  72.73 |  68.18 |  72.73 |           0 |        0 |        0 |        0
| tuning_tests                 |  93.63 |  94.12 |  99.06 |          10 |       10 |        0 |        0

Some queries results by model (queries with backward scans):
| complex                          | Master (default) | Master (best) | D36614 (default) | D36614 (best)  | Comment
| -------------------------------- | ---------------- | ------------- | ---------------- | -------------- | ------------
| 59ebf1c77e58cb2291c35486e2f96137 |                  |               |                  |                | plan changed
| Estimated cost                   |          2285.29 |       2285.29 |          1438.33 | 20000002970.96 |
| Execution time                   |             8.20 |          8.20 |           299.57 |          10.46 |
|                                  |                  |               |                  |                |
| 4039d9f16fb8f4ed263f48d5f5232215 |                  |               |                  |                |
| Estimated cost                   |         18222.67 |      18222.67 |         14353.61 |       14353.61 |
| Execution time                   |            22.04 |         22.04 |            13.19 |          13.19 |
|                                  |                  |               |                  |                |
| 5c918663b34f55e514fc6e6edc046556 |                  |               |                  |                |
| Estimated cost                   |         27084.99 |      27084.99 |         23715.02 |       23715.02 |
| Execution time                   |            35.53 |         35.53 |            27.63 |          27.63 |
|                                  |                  |               |                  |                |
| cost-validation-joins            | Master (default) | Master (best) | D36614 (default) | D36614 (best)  | Comment
| -------------------------------- | ---------------- | ------------- | ---------------- | -------------- | ------------
| c8327c54b05e0781b1e095fefe6e314e |                  |               |                  |                |
| Estimated cost                   |           387.39 |        387.39 |           384.04 |         384.04 |
| Execution time                   |              6.3 |          6.30 |            	3.16 |           3.16 |
|                                  |                  |               |                  |                |
| 4ce5afcdad024545f07198bd75eb5312 |                  |               |                  |                |
| Estimated cost                   |           361.45 |        384.96 |           361.41 |         604.03 |
| Execution time                   |           153.51 |          5.99 |            137.8 |           2.41 |
|                                  |                  |               |                  |                |
| 01c77bebc5d6fe9d1444170d92a14ec1 |                  |               |                  |                |
| Estimated cost                   |           387.77 |        387.77 |           384.44 |         384.44 |
| Execution time                   |             6.34 |          6.34 |              3.3 |           3.30 |
|                                  |                  |               |                  |                |
| 3ca0ff16775634091419ecf365bbfcf5 |                  |               |                  |                |
| Estimated cost                   |           362.67 |        386.53 |           362.34 |         389.85 |
| Execution time                   |            21.43 |          6.09 |            16.82 |           2.47 |
|                                  |                  |               |                  |                |
| e4e31014bb2b4b3ab9478f7200516310 |                  |               |                  |                |
| Estimated cost                   |           374.17 |        386.53 |           373.83 |         383.19 |
| Execution time                   |            75.73 |          5.97 |            46.86 |           3.01 |
|                                  |                  |               |                  |                |
| 7a993deaf6d49e2b053987240fe78c02 |                  |               |                  |                |
| Estimated cost                   |           362.68 |        362.68 |           362.34 |         362.34 |
| Execution time                   |            10.21 |         10.21 |              6.7 |           6.70 |
|                                  |                  |               |                  |                |
| cost-validation-misc             | Master (default) | Master (best) | D36614 (default) | D36614 (best)  | Comment
| -------------------------------- | ---------------- | ------------- | ---------------- | -------------- | ------------
| e8b548c59a52976c30bde07e5576fef3 |                  |               |                  |                | best plan changed
| Estimated cost                   |           379.01 |   10000000674 |           372.27 |         372.27 |
| Execution time                   |                1 |          0.73 |             0.65 |           0.65 |
|                                  |                  |               |                  |                |
| 7a353cc0498dfa2d44a441b37c5a1be2 |                  |               |                  |                |
| Estimated cost                   |          13766.3 |       13766.3 |         10364.91 |       10364.91 |
| Execution time                   |            13.91 |         13.91 |             7.62 |           7.62 |
|                                  |                  |               |                  |                |
| 45241b1e1c945e2d13ba488e42fa9f5f |                  |               |                  |                | plan changed
| Estimated cost                   |          1138.17 |       1327.55 |           956.81 |         956.81 |
| Execution time                   |             1.36 |          0.88 |             0.64 |           0.64 |
|                                  |                  |               |                  |                |
| df22d138bac1990b9a2b664478be3940 |                  |               |                  |                | best plan changed
| Estimated cost                   |         81398.55 |     133322.29 |         81398.55 |       99993.43 |
| Execution time                   |            92.79 |         14.05 |             80.1 |           7.41 |
|                                  |                  |               |                  |                |
| bd56ddf3c3edcbb4daed5e90222f3f43 |                  |               |                  |                | plan changed
| Estimated cost                   |          1156.06 |       1214.59 |           880.92 |         880.92 |
| Execution time                   |             1.84 |          0.93 |              0.7 |           0.70 |
|                                  |                  |               |                  |                |
| c2b94854c7334aaeab12fb6a7ad90bbe |                  |               |                  |                | best plan changed
| Estimated cost                   |          6621.15 |      16245.17 |          6621.15 |       11662.79 |
| Execution time                   |              8.3 |          1.06 |             7.53 |           0.87 |
|                                  |                  |               |                  |                |
| 8c35b5522e8da2fbb14154afbb949c17 |                  |               |                  |                | best plan changed
| Estimated cost                   |         61271.63 |     249323.79 |         61271.63 |      185166.06 |
| Execution time                   |            77.92 |          2.98 |            70.86 |           1.89 |
|                                  |                  |               |                  |                |
| 3e17a5426c1240a7cb15413483c6857b |                  |               |                  |                | best plan changed
| Estimated cost                   |         83146.63 |     286833.39 |         83146.63 |      200156.46 |
| Execution time                   |           102.32 |          3.01 |            89.26 |           1.91 |
|                                  |                  |               |                  |                |
| more-subqueries                  | Master (default) | Master (best) | D36614 (default) | D36614 (best)  | Comment
| -------------------------------- | ---------------- | ------------- | ---------------- | -------------- | ------------
| df720fdc87e9d33aa1006ede6868310f |                  |               |                  |                |
| Estimated cost                   |        432396.84 |     432396.84 |        389020.18 |      389020.18 |
| Execution time                   |           268.49 |        268.49 |           220.67 |         220.67 |
|                                  |                  |               |                  |                |
| tuning_tests                     | Master (default) | Master (best) | D36614 (default) | D36614 (best)  | Comment
| -------------------------------- | ---------------- | ------------- | ---------------- | -------------- | ------------
| a7a1a762f96b9445990d3057904a8f9b |                  |               |                  |                |
| Estimated cost                   |        102825.88 |     102825.88 |         56113.48 |       56113.48 |
| Execution time                   |            70.95 |         70.95 |            38.41 |          38.41 |
|                                  |                  |               |                  |                |
| 13026bfac847da1ec9f13fdaad5c39cf |                  |               |                  |                |
| Estimated cost                   |        117489.58 |     117489.58 |         64103.98 |       64103.98 |
| Execution time                   |            79.83 |         79.83 |            42.35 |          42.35 |
|                                  |                  |               |                  |                |
| 90e1ac9dfdbd77fa5ad96b1fe3526a41 |                  |               |                  |                |
| Estimated cost                   |        132153.27 |     132153.27 |         72094.47 |       72094.47 |
| Execution time                   |            90.19 |         90.19 |            46.56 |          46.56 |
|                                  |                  |               |                  |                |
| 90c2ad6894acac53c20efdd886e0fc03 |                  |               |                  |                |
| Estimated cost                   |        146816.97 |     146816.97 |         80084.97 |       80084.97 |
| Execution time                   |           101.19 |        101.19 |            52.69 |          52.69 |

The full report: https://taqo.dev.yugabyte.com/regression/33

Most of the queries with backward scans improved their time of execution. However, there's one
query 59ebf1c77e58cb2291c35486e2f96137 which shows a regression. From other tests it is clearly
seen that new approach gives a good improvement for backward scans, which may mean some other parts
of Cost Based Optimizer may have been tweaked additionally (like costs for seeks, next, etc). This
action requires additional analysis and will be covered by a separate ticket.
Query link: https://taqo.dev.yugabyte.com/reports/b5e885c8e491050e70320e4b801469b0/20240719-115328/tags/distinct.html#59ebf1c77e58cb2291c35486e2f96137

Jira: DB-11271

Test Plan:
Test case #1 (backward scan improvements are turned off).
1. Start a cluster: `./bin/yb-ctl start --rf=1`
2. Open `ysqlsh`
3. Create a table with some data:
`# CREATE TABLE ttable(h INT, r INT, c INT, PRIMARY KEY(h, r ASC));`
`# INSERT INTO ttable SELECT i, i, i FROM generate_series(1, 10) AS i;`
4. Turn CBO on: `# SET yb_enable_base_scans_cost_model TO true;`
5. Run a query `# EXPLAIN ANALYZE SELECT c, r FROM ttable WHERE h = 1 ORDER BY r DESC;`
6. Result:
```
 Sort  (cost=555.50..555.51 rows=5 width=8) (actual time=0.706..0.706 rows=1 loops=1)
   Sort Key: r DESC
   Sort Method: quicksort  Memory: 25kB
   ->  Index Scan using ttable_pkey on ttable  (cost=180.00..555.44 rows=5 width=8) (actual time=0.674..0.677 rows=1 loops=1)
         Index Cond: (h = 1)
 Planning Time: 6.147 ms
 Execution Time: 0.776 ms
 Peak Memory Usage: 60 kB
(8 rows)
```
It is expected to have Forward Scan + Sort in case of fast backward scan is turned off.

Test case #2 (backward scan improvements are turned on).
1. Start a cluster: `./bin/yb-ctl start --rf=1 --tserver_flags=allowed_preview_flags_csv=use_fast_backward_scan,use_fast_backward_scan=true`
2. Open `ysqlsh`
3. Create a table with some data:
`# CREATE TABLE ttable(h INT, r INT, c INT, PRIMARY KEY(h, r ASC));`
`# INSERT INTO ttable SELECT i, i, i FROM generate_series(1, 10) AS i;`
4. Turn CBO on: `# SET yb_enable_base_scans_cost_model TO true;`
5. Run a query `# EXPLAIN ANALYZE SELECT c, r FROM ttable WHERE h = 1 ORDER BY r DESC;`
6. Result:
```
 Index Scan Backward using ttable_pkey on ttable  (cost=180.00..557.77 rows=5 width=8) (actual time=1.075..1.079 rows=1 loops=1)
   Index Cond: (h = 1)
 Planning Time: 0.073 ms
 Execution Time: 1.129 ms
 Peak Memory Usage: 24 kB
(5 rows)
```
It is seen that CBO takes backward scan improvements into account and the planner prefers Index Scan Backward over Forward Scan + Sort.

Reviewers: rthallam, gkukreja, amartsinchyk

Reviewed By: rthallam, gkukreja, amartsinchyk

Subscribers: yql

Differential Revision: https://phorge.dev.yugabyte.com/D36614
---
 src/postgres/src/backend/optimizer/path/costsize.c | 4 +++-
 src/postgres/src/include/optimizer/cost.h          | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/postgres/src/backend/optimizer/path/costsize.c b/src/postgres/src/backend/optimizer/path/costsize.c
index fac37a8c7514..3d5c238351b3 100644
--- a/src/postgres/src/backend/optimizer/path/costsize.c
+++ b/src/postgres/src/backend/optimizer/path/costsize.c
@@ -164,6 +164,7 @@ double		yb_random_block_cost = DEFAULT_RANDOM_PAGE_COST;
 double		yb_docdb_next_cpu_cycles = YB_DEFAULT_DOCDB_NEXT_CPU_CYCLES;
 double 		yb_seek_cost_factor = YB_DEFAULT_SEEK_COST_FACTOR;
 double 		yb_backward_seek_cost_factor = YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR;
+double 		yb_fast_backward_seek_cost_factor = YB_DEFAULT_FAST_BACKWARD_SEEK_COST_FACTOR;
 int 		yb_docdb_merge_cpu_cycles = YB_DEFAULT_DOCDB_MERGE_CPU_CYCLES;
 int 		yb_docdb_remote_filter_overhead_cycles = YB_DEFAULT_DOCDB_REMOTE_FILTER_OVERHEAD_CYCLES;
 double		yb_local_latency_cost = YB_DEFAULT_LOCAL_LATENCY_COST;
@@ -7073,7 +7074,8 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 
 	if (path->indexscandir == BackwardScanDirection)
 	{
-		per_next_cost *= yb_backward_seek_cost_factor;
+		per_next_cost *= YbUseFastBackwardScan() ?
+			yb_fast_backward_seek_cost_factor : yb_backward_seek_cost_factor;
 	}
 
 	run_cost += num_seeks * index_per_seek_cost +
diff --git a/src/postgres/src/include/optimizer/cost.h b/src/postgres/src/include/optimizer/cost.h
index cda573f17694..89c0286d4752 100644
--- a/src/postgres/src/include/optimizer/cost.h
+++ b/src/postgres/src/include/optimizer/cost.h
@@ -48,6 +48,14 @@
 #define YB_DEFAULT_SEEK_COST_FACTOR 50
 #define YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR 10
 
+/* 
+ * The value for the fast backward scan seek cost factor has been selected based on the smallest
+ * improvement (2.8 times) for the backward scan related Order By workloads of Featurebench. It
+ * might be good to use a different factor for colocated case, where the smallest improvement
+ * is 3 times higher comparing to non-colocated case; refer to D35894 for the details.
+ */
+#define YB_DEFAULT_FAST_BACKWARD_SEEK_COST_FACTOR (YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR / 3.0)
+
 /* DocDB row decode and process cost */
 #define YB_DEFAULT_DOCDB_MERGE_CPU_CYCLES 50