From 8fe33369d5dbe84f106101c1cb356f669747b3fd Mon Sep 17 00:00:00 2001 From: Gaurav Kukreja Date: Tue, 16 Jan 2024 19:06:05 +0100 Subject: [PATCH] [#20635] YSQL: Model remote index filter in the base scans cost model Summary: In case of an Index Scan, storage index filters are applied on secondary index and reduce the number of base table lookups neded. Before this change, this was not correctly modeled in the base scans cost model. After this change, we identify filters that can apply to the secondary index. These filters are applied on the rows that match the index conditions used for LSM index lookup. We compute the selectivity of the index conditions and storage index filters together to estimate the number of base table lookups. Jira: DB-9633 Test Plan: ./yb_build.sh --java-test 'org.yb.pgsql.TestPgCostModelSeekNextEstimation' Reviewers: tverona, tnayak Reviewed By: tnayak Subscribers: yql Differential Revision: https://phorge.dev.yugabyte.com/D31751 --- .../TestPgCostModelSeekNextEstimation.java | 61 +++ .../src/backend/optimizer/path/costsize.c | 317 +++++++----- .../src/backend/optimizer/plan/createplan.c | 117 +---- .../src/backend/optimizer/util/ybcplan.c | 109 ++++ src/postgres/src/include/optimizer/ybcplan.h | 10 + .../expected/yb_join_batching_plans.out | 42 +- .../expected/yb_parallel_colocated.out | 36 +- .../expected/yb_planner_taqo_tuning_tests.out | 470 ++++++++++++++++-- .../regress/expected/yb_select_parallel.out | 18 +- .../sql/yb_planner_taqo_tuning_tests.sql | 151 +++++- 10 files changed, 983 insertions(+), 348 deletions(-) diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgCostModelSeekNextEstimation.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgCostModelSeekNextEstimation.java index c277e5ded181..b3aaca90a36a 100644 --- a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgCostModelSeekNextEstimation.java +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgCostModelSeekNextEstimation.java @@ -103,6 +103,42 @@ private void testSeekAndNextEstimationIndexScanHelper( } } + private void testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults( + Statement stmt, String query, + String table_name, String index_name, + double expected_seeks, + double expected_nexts, + Integer expected_docdb_result_width) throws Exception { + double expected_seeks_lower_bound = expected_seeks * SEEK_LOWER_BOUND_FACTOR + - SEEK_FAULT_TOLERANCE_OFFSET; + double expected_seeks_upper_bound = expected_seeks * SEEK_UPPER_BOUND_FACTOR + + SEEK_FAULT_TOLERANCE_OFFSET; + double expected_nexts_lower_bound = expected_nexts * NEXT_LOWER_BOUND_FACTOR + - NEXT_FAULT_TOLERANCE_OFFSET; + double expected_nexts_upper_bound = expected_nexts * NEXT_UPPER_BOUND_FACTOR + + NEXT_FAULT_TOLERANCE_OFFSET; + try { + testExplainDebug(stmt, query, + makeTopLevelBuilder() + .plan(makePlanBuilder() + .nodeType(NODE_INDEX_SCAN) + .relationName(table_name) + .indexName(index_name) + .estimatedSeeks(Checkers.closed(expected_seeks_lower_bound, + expected_seeks_upper_bound)) + .estimatedNexts(Checkers.closed(expected_nexts_lower_bound, + expected_nexts_upper_bound)) + .estimatedDocdbResultWidth(Checkers.equal(expected_docdb_result_width)) + .build()) + .build()); + } + catch (AssertionError e) { + LOG.info("Failed Query: " + query); + LOG.info(e.toString()); + throw e; + } + } + private void testSeekAndNextEstimationSeqScanHelper( Statement stmt, String query, String table_name, double expected_seeks, @@ -423,4 +459,29 @@ public void testSeekNextEstimationSeqScan() throws Exception { T4_NAME, 67, 160065, 20); } } + + @Test + public void testSeekNextEstimationStorageIndexFilters() throws Exception { + try (Statement stmt = this.connection2.createStatement()) { + stmt.execute("CREATE TABLE test (k1 INT, v1 INT)"); + stmt.execute("CREATE INDEX test_index_k1 ON test (k1 ASC)"); + stmt.execute("CREATE INDEX test_index_k1_v1 ON test (k1 ASC) INCLUDE (v1)"); + stmt.execute("INSERT INTO test (SELECT s, s FROM generate_series(1, 100000) s)"); + stmt.execute("ANALYZE test"); + + /* All rows matching the filter on k1 will be seeked in the base table, and the filter on v1 + * will be applied on the base table. + */ + testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt, + "/*+IndexScan(test test_index_k1) */ SELECT * FROM test WHERE k1 > 50000 and v1 > 80000", + "test", "test_index_k1", 50000, 50000, 10); + + /* The filter on v1 will be executed on the included column in test_index_k1_v1. As a result, + * fewer seeks will be needed on the base table. + */ + testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt, + "/*+IndexScan(test test_index_k1_v1) */ SELECT * FROM test WHERE k1 > 50000 and v1 > 80000", + "test", "test_index_k1_v1", 10000, 50000, 10); + } + } } diff --git a/src/postgres/src/backend/optimizer/path/costsize.c b/src/postgres/src/backend/optimizer/path/costsize.c index 96d0f4ed1ccd..3b3aabf00b58 100644 --- a/src/postgres/src/backend/optimizer/path/costsize.c +++ b/src/postgres/src/backend/optimizer/path/costsize.c @@ -98,6 +98,7 @@ #include "optimizer/predtest.h" #include "optimizer/restrictinfo.h" #include "optimizer/var.h" +#include "optimizer/ybcplan.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" @@ -6133,8 +6134,8 @@ yb_get_docdb_result_width(Path *path, PlannerInfo* root, bool is_index_path, /* Collect the attributes used in each expression in the local filters. */ foreach(lc, local_clauses) { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - pull_varattnos_min_attr((Node*) ri->clause, baserel->relid, &attrs, + Expr *local_qual = (Expr*) lfirst(lc); + pull_varattnos_min_attr((Node*) local_qual, baserel->relid, &attrs, YBFirstLowInvalidAttributeNumber + 1); } } @@ -6253,13 +6254,9 @@ yb_cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, RestrictInfo *ri = lfirst_node(RestrictInfo, lc); if (ri->yb_pushable) - { - pushed_down_clauses = lappend(pushed_down_clauses, ri); - } + pushed_down_clauses = lappend(pushed_down_clauses, ri->clause); else - { - local_clauses = lappend(local_clauses, ri); - } + local_clauses = lappend(local_clauses, ri->clause); } cost_qual_eval(&qual_cost, pushed_down_clauses, root); @@ -6412,10 +6409,12 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, List *qpquals; Cost startup_cost = 0; Cost run_cost = 0; + Selectivity index_lookup_selectivity; Selectivity index_selectivity; List *qinfos; + double num_index_lookup_tuples; double num_index_tuples; - List *index_bound_quals; + List *index_conditions; int index_col; ListCell *lc; RangeTblEntry *rte; @@ -6423,7 +6422,7 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, int32 index_tuple_width; /* TODO: Plug here the actual number of key-value pairs per tuple */ int num_key_value_pairs_per_tuple = - YB_DEFAULT_NUM_KEY_VALUE_PAIRS_PER_TUPLE; + YB_DEFAULT_NUM_KEY_VALUE_PAIRS_PER_TUPLE; /* TODO: Plug here the actual number of SST files for this index */ int num_sst_files = YB_DEFAULT_NUM_SST_FILES_PER_TABLE; Cost per_merge_cost; @@ -6432,14 +6431,16 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, double num_seeks; double num_nexts; QualCost qual_cost; - double remote_filtered_rows; - List *pushed_down_clauses = NIL; + List *base_table_pushed_down_filters = NIL; + List *base_table_colrefs = NIL; + List *index_pushed_down_filters = NIL; + List *index_colrefs = NIL; List *local_clauses = NIL; int index_total_pages; int index_pages_fetched; int index_random_pages_fetched; int index_sequential_pages_fetched; - List **filters_on_each_column; + List **index_quals_on_each_column; List **index_qual_infos_on_each_column; bool previous_column_had_lower_bound; bool previous_column_had_upper_bound; @@ -6456,6 +6457,11 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, rte = planner_rt_fetch(index->rel->relid, root); Assert(rte->rtekind == RTE_RELATION); baserel_oid = rte->relid; + + if (!enable_indexscan) + startup_cost += disable_cost; + /* we don't need to check enable_index_onlyscan; indxpath.c does that */ + baserel_tuple_width = yb_get_relation_data_width(baserel, baserel_oid); if (partial_path) @@ -6488,12 +6494,13 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, baserel_oid, is_primary_index); + /* - * Mark the path with the correct row estimate, and identify which quals - * will need to be enforced as qpquals. We need not check any quals that - * are implied by the index's predicate, so we can use indrestrictinfo not - * baserestrictinfo as the list of relevant restriction clauses for the - * rel. + * Extract non-index conditions ie. filters. + * + * We need not check any quals that are implied by the index's predicate, + * so we can use indrestrictinfo not baserestrictinfo as the list of + * relevant restriction clauses for the rel. */ if (path->path.param_info) { @@ -6513,18 +6520,35 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, path->indexquals); } - if (!enable_indexscan) - startup_cost += disable_cost; - /* we don't need to check enable_index_onlyscan; indxpath.c does that */ + /* + * Sort the filters into `local_clauses`, `base_table_pushed_down_clauses` + * and `index_pushed_down_clauses`. + */ + + /* + * Remote index filters are needed for secondary index scans. + * * In case of primary index scan and index only scan, we group all filters + * under `base_table_pushed_down_filters`. + */ + bool need_remote_index_filters = + !index_only && !index->hypothetical && !is_primary_index; + + extract_pushdown_clauses(qpquals, + need_remote_index_filters ? index : NULL, + &local_clauses, &base_table_pushed_down_filters, &base_table_colrefs, + &index_pushed_down_filters, &index_colrefs); /* Do preliminary analysis of indexquals */ qinfos = deconstruct_indexquals(path); - /* Collect the filters for each index in a list of list structure */ - filters_on_each_column = palloc0(sizeof(List*) * index->nkeycolumns); + /* + * Sort the index conditions into `index_quals_on_each_column` and + * `index_qual_infos_on_each_column` for future use. + */ + index_quals_on_each_column = palloc0(sizeof(List*) * index->nkeycolumns); index_qual_infos_on_each_column = - palloc0(sizeof(List*) * index->nkeycolumns); - index_bound_quals = NIL; + palloc0(sizeof(List*) * index->nkeycolumns); + index_conditions = NIL; index_col = 0; foreach(lc, qinfos) { @@ -6537,17 +6561,33 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, Assert(index_col < index->nkeycolumns); } - filters_on_each_column[index_col] = - lappend(filters_on_each_column[index_col], rinfo); + index_quals_on_each_column[index_col] = + lappend(index_quals_on_each_column[index_col], rinfo); index_qual_infos_on_each_column[index_col] = lappend(index_qual_infos_on_each_column[index_col], qinfo); - index_bound_quals = lappend(index_bound_quals, rinfo); + index_conditions = lappend(index_conditions, rinfo); } /* - * In the following logic, we estimate number of seeks and only the number - * of nexts caused by seek forward optimization. Additional seeks are needed - * which will be added later. + * Compute the number of result rows from docDB to pggate, by considering + * index conditions and all pushed down filters. + */ + List *all_conditions_and_filters = NIL; + all_conditions_and_filters = list_concat(all_conditions_and_filters, + list_copy(index_conditions)); + all_conditions_and_filters = list_concat(all_conditions_and_filters, + list_copy(index_pushed_down_filters)); + all_conditions_and_filters = list_concat(all_conditions_and_filters, + list_copy(base_table_pushed_down_filters)); + + double num_docdb_result_rows = clamp_row_est( + index->rel->tuples * + clauselist_selectivity(root, all_conditions_and_filters, + baserel->relid, JOIN_INNER, NULL)); + + /* + * Estimate number of seeks and only the number of nexts caused by hybrid + * scan. */ num_seeks = 0; num_nexts = 0; @@ -6556,7 +6596,7 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, for (int index_col = index->nkeycolumns - 1; index_col >= 0; --index_col) { List *filtersOnCurrentColumn = - filters_on_each_column[index_col]; + index_quals_on_each_column[index_col]; if (filtersOnCurrentColumn == NIL) { /* No filters on this index column */ @@ -6732,123 +6772,134 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, } } - List *selectivityQuals; - /* - * If the index is partial, AND the index predicate with the - * index-bound quals to produce a more accurate idea of the number of - * rows covered by the bound conditions. + * Estimate the seek and next costs for the index. */ - selectivityQuals = add_predicate_to_quals(index, index_bound_quals); - - index_selectivity = - clauselist_selectivity(root, selectivityQuals, index->rel->relid, - JOIN_INNER, NULL); - num_index_tuples = - clamp_row_est(index_selectivity * index->rel->tuples); - - /* - * So far we have counted the number of nexts due to Seek Forward - * optimization. We still need to add the number of nexts between seeks. To - * keep things simple, we add one seek for each result row. - */ - num_nexts += num_index_tuples; + per_merge_cost = num_key_value_pairs_per_tuple * + yb_docdb_merge_cpu_cycles * cpu_operator_cost; + per_seek_cost = 0; - /* Non index filters will be executed as remote and local filters. */ - foreach(lc, qpquals) + if (index->rel->tuples > 0) { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - - if (ri->yb_pushable) - { - pushed_down_clauses = lappend(pushed_down_clauses, ri); - } - else - { - local_clauses = lappend(local_clauses, ri); - } + per_seek_cost = yb_get_lsm_seek_cost(index->rel->tuples, + num_key_value_pairs_per_tuple, + num_sst_files) + + per_merge_cost; } + per_next_cost = (yb_docdb_next_cpu_cycles * cpu_operator_cost) + + per_merge_cost; - remote_filtered_rows = - clamp_row_est(num_index_tuples * - clauselist_selectivity(root, pushed_down_clauses, - baserel->relid, JOIN_INNER, NULL)); + if (path->indexscandir == BackwardScanDirection) + { + per_next_cost *= yb_backward_seek_cost_factor; + } + /* + * Estimate seeks due to result paging + * + * In case of colocated, we lookup the index, then find the matching rows in + * base table and return the result in pages. Each result page causes one + * additional seek. + * + * TODO: In case of non-colocated, we first scan the index and return the + * ybctid to pggate. pggate, then looks up the base table using the ybctids. + * This round trip is currently not modeled in the cost model, as we focus + * on colocated scenario first. + */ docdb_result_width = yb_get_docdb_result_width(&path->path, root, true /* is_index_path */, is_primary_index, index_only, - index_bound_quals, + index_conditions, local_clauses, baserel_tuple_width, baserel, baserel_oid); path->yb_estimated_docdb_result_width = docdb_result_width; - num_result_pages = yb_get_num_result_pages(remote_filtered_rows, + num_result_pages = yb_get_num_result_pages(num_docdb_result_rows, docdb_result_width); /* Add seeks and nexts for result pages */ num_seeks += num_result_pages; num_nexts += num_result_pages - 1; - path->yb_estimated_num_nexts = num_nexts; - path->yb_estimated_num_seeks = num_seeks; - - /** - * LSM index seek and next costs - */ - per_merge_cost = num_key_value_pairs_per_tuple * - yb_docdb_merge_cpu_cycles * cpu_operator_cost; - per_seek_cost = 0; - - if (index->rel->tuples > 0) + List *index_conditions_and_filters = NIL; + index_conditions_and_filters = list_concat(index_conditions_and_filters, + list_copy(index_conditions)); + if (need_remote_index_filters) { - per_seek_cost = yb_get_lsm_seek_cost(index->rel->tuples, - num_key_value_pairs_per_tuple, - num_sst_files) + - per_merge_cost; + index_conditions_and_filters = list_concat(index_conditions_and_filters, + list_copy(index_pushed_down_filters)); } - per_next_cost = (yb_docdb_next_cpu_cycles * cpu_operator_cost) + - per_merge_cost; - - if (path->indexscandir == BackwardScanDirection) + else { - per_next_cost *= yb_backward_seek_cost_factor; + /* Either index only lookup, or primary index lookup */ + index_conditions_and_filters = list_concat(index_conditions_and_filters, + list_copy(base_table_pushed_down_filters)); } - run_cost += - num_seeks * per_seek_cost + num_nexts * per_next_cost; - - /* Non index filters will be executed as remote and local filters. */ - foreach(lc, qpquals) - { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); + /* + * The index conditions and filters need to be checked only on index tuples + * that match the index condition. + * + * Additionally, if the index is partial, we include index predicate with + * index conditions to produce a more accurate idea of the number of + * rows covered by the index conditions. + */ + List *index_predicates_and_conditions = NIL; + index_predicates_and_conditions = + add_predicate_to_quals(index, index_conditions); - if (ri->yb_pushable) - { - pushed_down_clauses = lappend(pushed_down_clauses, ri); - } - else - { - local_clauses = lappend(local_clauses, ri); - } - } + index_lookup_selectivity = + clauselist_selectivity(root, index_predicates_and_conditions, + index->rel->relid, JOIN_INNER, NULL); - bool has_pushed_down_clauses = list_length(pushed_down_clauses) > 0; + num_index_lookup_tuples = + clamp_row_est(index_lookup_selectivity * index->rel->tuples); - /** - * DocDB must execute index filter on each row. An overhead is added due to - * context switching between PG and DocDB. + /* + * TODO (#16178) DocDB must check the index conditions on each row. This is + * needed for hybrid scan, but can be avoided in cases where hybrid scan is + * not used. This additional cost is modeled here. For checking the index + * conditions, there is an additional overhead that is modeled using + * yb_docdb_remote_filter_overhead_cycles. + * + * In addition, the remote index filters will be executed for each row + * that matches the index conditions. */ - cost_qual_eval(&qual_cost, index_bound_quals, root); - Cost per_tuple_qual_cost = - qual_cost.per_tuple + - (yb_docdb_remote_filter_overhead_cycles * - cpu_operator_cost * has_pushed_down_clauses); + cost_qual_eval(&qual_cost, index_conditions_and_filters, root); + Cost per_tuple_qual_cost = qual_cost.per_tuple + + (yb_docdb_remote_filter_overhead_cycles * + cpu_operator_cost); startup_cost += qual_cost.startup; - run_cost += per_tuple_qual_cost * num_index_tuples; + run_cost += per_tuple_qual_cost * num_index_lookup_tuples; + + /* + * Additional nexts are needed for each key lookup. We cannot estimate the + * nexts needed for each key, but we add 1 next for each key. + */ + num_nexts += num_index_lookup_tuples; + + /* Add the seek and next costs to the total. */ + run_cost += + num_seeks * per_seek_cost + num_nexts * per_next_cost; + + /* + * Estimate number of index tuples that match the index predicate, + * conditions and remote index filters. + */ + List *index_predicates_conditions_and_filters = NIL; + index_predicates_conditions_and_filters = + add_predicate_to_quals(index, index_conditions_and_filters); + + index_selectivity = + clauselist_selectivity(root, index_predicates_conditions_and_filters, + index->rel->relid, JOIN_INNER, NULL); - /** + num_index_tuples = + clamp_row_est(index_selectivity * index->rel->tuples); + + /* * Compute disk fetch costs. We make following assumptions. * 1. The number of index pages actually fetched is based on selectivity of * the filter. @@ -6901,33 +6952,47 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count, per_merge_cost; } - /* DocDB performs a seek for each lookup in the base table. This may + /* + * DocDB performs a seek for each lookup in the base table. This may * be optimized in the future. */ - int num_baserel_seeks = num_index_tuples; + num_seeks += num_index_tuples; - path->yb_estimated_num_seeks += num_baserel_seeks; + startup_cost += baserel_per_seek_cost; + run_cost += (baserel_per_seek_cost * num_index_tuples); - run_cost += (baserel_per_seek_cost * num_baserel_seeks); + /* + * Base table remote filters will be applied to each base table row that + * is looked up. + */ + if (list_length(base_table_pushed_down_filters) > 0) + { + cost_qual_eval(&qual_cost, base_table_pushed_down_filters, root); + Cost per_tuple_qual_cost = qual_cost.per_tuple + + (yb_docdb_remote_filter_overhead_cycles * + cpu_operator_cost); + + startup_cost += qual_cost.startup; + run_cost += per_tuple_qual_cost * num_index_tuples; + } int num_docdb_blocks_fetched = - ceil(remote_filtered_rows * baserel_tuple_width / YB_DEFAULT_DOCDB_BLOCK_SIZE); + ceil(num_index_tuples * baserel_tuple_width / YB_DEFAULT_DOCDB_BLOCK_SIZE); run_cost += num_docdb_blocks_fetched * yb_random_block_cost; } - cost_qual_eval(&qual_cost, pushed_down_clauses, root); - startup_cost += qual_cost.startup; - run_cost += qual_cost.per_tuple * remote_filtered_rows; + path->yb_estimated_num_nexts = num_nexts; + path->yb_estimated_num_seeks = num_seeks; /* Network latency cost is added to startup cost */ startup_cost += yb_local_latency_cost; - run_cost += yb_compute_result_transfer_cost(remote_filtered_rows, + run_cost += yb_compute_result_transfer_cost(num_docdb_result_rows, docdb_result_width); /* Local filter costs */ cost_qual_eval(&qual_cost, local_clauses, root); startup_cost += qual_cost.startup; - run_cost += qual_cost.per_tuple * remote_filtered_rows; + run_cost += qual_cost.per_tuple * num_docdb_result_rows; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->path.pathtarget->cost.startup; diff --git a/src/postgres/src/backend/optimizer/plan/createplan.c b/src/postgres/src/backend/optimizer/plan/createplan.c index ab798b05ac57..0f241588fab3 100644 --- a/src/postgres/src/backend/optimizer/plan/createplan.c +++ b/src/postgres/src/backend/optimizer/plan/createplan.c @@ -43,6 +43,7 @@ #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "optimizer/var.h" +#include "optimizer/ybcplan.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" #include "partitioning/partprune.h" @@ -145,13 +146,6 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, List **qual, List **indexqual, List **indexECs); static void bitmap_subplan_mark_shared(Plan *plan); static List *flatten_partitioned_rels(List *partitioned_rels); -static void extract_pushdown_clauses(List *restrictinfo_list, - IndexOptInfo *indexinfo, - List **local_quals, - List **rel_remote_quals, - List **rel_colrefs, - List **idx_remote_quals, - List **idx_colrefs); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, @@ -6444,115 +6438,6 @@ flatten_partitioned_rels(List *partitioned_rels) return newlist; } -/* - * is_index_only_refs - * Check if all column references from the list are available from the - * index described by the indexinfo. - */ -static bool -is_index_only_refs(List *colrefs, IndexOptInfo *indexinfo) -{ - ListCell *lc; - foreach (lc, colrefs) - { - bool found = false; - YbExprColrefDesc *colref = castNode(YbExprColrefDesc, lfirst(lc)); - for (int i = 0; i < indexinfo->ncolumns; i++) - { - if (colref->attno == indexinfo->indexkeys[i]) - { - /* - * If index key can not return, it does not have actual value - * to evaluate the expression. - */ - if (indexinfo->canreturn[i]) - { - found = true; - break; - } - else - return false; - } - } - if (!found) - return false; - } - return true; -} - -/* - * extract_pushdown_clauses - * Extract actual clauses from RestrictInfo list and distribute them - * between three groups: - * - local_quals - conditions not eligible for pushdown. They are evaluated - * on the Postgres side on the rows fetched from DocDB; - * - rel_remote_quals - conditions to pushdown with the request to the main - * scanned relation. In the case of sequential scan or index only scan - * the DocDB table or DocDB index respectively is the main (and only) - * scanned relation, so the function returns only two groups; - * - idx_remote_quals - conditions to pushdown with the request to the - * secondary (index) relation. Used with the index scan on a secondary - * index, and caller must provide IndexOptInfo record for the index. - * - rel_colrefs, idx_colrefs are columns referenced by respective - * rel_remote_quals or idx_remote_quals. - * The output parameters local_quals, rel_remote_quals, rel_colrefs must - * point to valid lists. The output parameters idx_remote_quals and - * idx_colrefs may be NULL if the indexinfo is NULL. - */ -static void -extract_pushdown_clauses(List *restrictinfo_list, - IndexOptInfo *indexinfo, - List **local_quals, - List **rel_remote_quals, - List **rel_colrefs, - List **idx_remote_quals, - List **idx_colrefs) -{ - ListCell *lc; - foreach(lc, restrictinfo_list) - { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - /* ignore pseudoconstants */ - if (ri->pseudoconstant) - continue; - - if (ri->yb_pushable) - { - List *colrefs = NIL; - bool pushable PG_USED_FOR_ASSERTS_ONLY; - - /* - * Find column references. It has already been determined that - * the expression is pushable. - */ - pushable = YbCanPushdownExpr(ri->clause, &colrefs); - Assert(pushable); - - /* - * If there are both main and secondary (index) relations, - * determine one to pushdown the condition. It is more efficient - * to apply filter earlier, so prefer index, if it has all the - * necessary columns. - */ - if (indexinfo == NULL || - !is_index_only_refs(colrefs, indexinfo)) - { - *rel_colrefs = list_concat(*rel_colrefs, colrefs); - *rel_remote_quals = lappend(*rel_remote_quals, ri->clause); - } - else - { - *idx_colrefs = list_concat(*idx_colrefs, colrefs); - *idx_remote_quals = lappend(*idx_remote_quals, ri->clause); - } - } - else - { - *local_quals = lappend(*local_quals, ri->clause); - } - } -} - /***************************************************************************** * * PLAN NODE BUILDING ROUTINES diff --git a/src/postgres/src/backend/optimizer/util/ybcplan.c b/src/postgres/src/backend/optimizer/util/ybcplan.c index dc6d5a981e49..e3721e36f2c4 100644 --- a/src/postgres/src/backend/optimizer/util/ybcplan.c +++ b/src/postgres/src/backend/optimizer/util/ybcplan.c @@ -187,3 +187,112 @@ bool YBCAllPrimaryKeysProvided(Relation rel, Bitmapset *attrs) /* Verify the sets are the same. */ return bms_equal(attrs, primary_key_attrs); } + +/* + * is_index_only_refs + * Check if all column references from the list are available from the + * index described by the indexinfo. + */ +bool +is_index_only_refs(List *colrefs, IndexOptInfo *indexinfo) +{ + ListCell *lc; + foreach (lc, colrefs) + { + bool found = false; + YbExprColrefDesc *colref = castNode(YbExprColrefDesc, lfirst(lc)); + for (int i = 0; i < indexinfo->ncolumns; i++) + { + if (colref->attno == indexinfo->indexkeys[i]) + { + /* + * If index key can not return, it does not have actual value + * to evaluate the expression. + */ + if (indexinfo->canreturn[i]) + { + found = true; + break; + } + else + return false; + } + } + if (!found) + return false; + } + return true; +} + +/* + * extract_pushdown_clauses + * Extract actual clauses from RestrictInfo list and distribute them + * between three groups: + * - local_quals - conditions not eligible for pushdown. They are evaluated + * on the Postgres side on the rows fetched from DocDB; + * - rel_remote_quals - conditions to pushdown with the request to the main + * scanned relation. In the case of sequential scan or index only scan + * the DocDB table or DocDB index respectively is the main (and only) + * scanned relation, so the function returns only two groups; + * - idx_remote_quals - conditions to pushdown with the request to the + * secondary (index) relation. Used with the index scan on a secondary + * index, and caller must provide IndexOptInfo record for the index. + * - rel_colrefs, idx_colrefs are columns referenced by respective + * rel_remote_quals or idx_remote_quals. + * The output parameters local_quals, rel_remote_quals, rel_colrefs must + * point to valid lists. The output parameters idx_remote_quals and + * idx_colrefs may be NULL if the indexinfo is NULL. + */ +void +extract_pushdown_clauses(List *restrictinfo_list, + IndexOptInfo *indexinfo, + List **local_quals, + List **rel_remote_quals, + List **rel_colrefs, + List **idx_remote_quals, + List **idx_colrefs) +{ + ListCell *lc; + foreach(lc, restrictinfo_list) + { + RestrictInfo *ri = lfirst_node(RestrictInfo, lc); + /* ignore pseudoconstants */ + if (ri->pseudoconstant) + continue; + + if (ri->yb_pushable) + { + List *colrefs = NIL; + bool pushable PG_USED_FOR_ASSERTS_ONLY; + + /* + * Find column references. It has already been determined that + * the expression is pushable. + */ + pushable = YbCanPushdownExpr(ri->clause, &colrefs); + Assert(pushable); + + /* + * If there are both main and secondary (index) relations, + * determine one to pushdown the condition. It is more efficient + * to apply filter earlier, so prefer index, if it has all the + * necessary columns. + */ + if (indexinfo == NULL || + !is_index_only_refs(colrefs, indexinfo)) + { + *rel_colrefs = list_concat(*rel_colrefs, colrefs); + *rel_remote_quals = lappend(*rel_remote_quals, ri->clause); + } + else + { + *idx_colrefs = list_concat(*idx_colrefs, colrefs); + *idx_remote_quals = lappend(*idx_remote_quals, ri->clause); + } + } + else + { + *local_quals = lappend(*local_quals, ri->clause); + } + } +} diff --git a/src/postgres/src/include/optimizer/ybcplan.h b/src/postgres/src/include/optimizer/ybcplan.h index 8e790c2e5ea5..4efe5f7d1080 100644 --- a/src/postgres/src/include/optimizer/ybcplan.h +++ b/src/postgres/src/include/optimizer/ybcplan.h @@ -33,3 +33,13 @@ bool YBCIsSingleRowModify(PlannedStmt *pstmt); bool YbCanSkipFetchingTargetTupleForModifyTable(ModifyTable *modifyTable); bool YBCAllPrimaryKeysProvided(Relation rel, Bitmapset *attrs); + +bool is_index_only_refs(List *colrefs, IndexOptInfo *indexinfo); + +void extract_pushdown_clauses(List *restrictinfo_list, + IndexOptInfo *indexinfo, + List **local_quals, + List **rel_remote_quals, + List **rel_colrefs, + List **idx_remote_quals, + List **idx_colrefs); diff --git a/src/postgres/src/test/regress/expected/yb_join_batching_plans.out b/src/postgres/src/test/regress/expected/yb_join_batching_plans.out index f428916e8716..88703d7913d6 100644 --- a/src/postgres/src/test/regress/expected/yb_join_batching_plans.out +++ b/src/postgres/src/test/regress/expected/yb_join_batching_plans.out @@ -107,28 +107,26 @@ EXPLAIN (COSTS OFF) SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a - 1 = t2.a + 1 WHERE (5 rows) explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - QUERY PLAN ------------------------------------------------------------------ - Merge Left Join - Merge Cond: ((((p1.a - 1)) = p5.a) AND (((p1.b - 1)) = p5.b)) - -> Sort - Sort Key: ((p1.a - 1)), ((p1.b - 1)) + QUERY PLAN +------------------------------------------------------------ + Hash Right Join + Hash Cond: ((p5.a = (p1.a - 1)) AND (p5.b = (p1.b - 1))) + -> Seq Scan on p5 + -> Hash -> Seq Scan on p1 Storage Filter: (a <= 30) - -> Index Scan using p5_pkey on p5 -(7 rows) +(6 rows) /*+NoYbBatchedNL(p1 p5)*/explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - QUERY PLAN ------------------------------------------------------------------ - Merge Left Join - Merge Cond: ((((p1.a - 1)) = p5.a) AND (((p1.b - 1)) = p5.b)) - -> Sort - Sort Key: ((p1.a - 1)), ((p1.b - 1)) + QUERY PLAN +------------------------------------------------------------ + Hash Right Join + Hash Cond: ((p5.a = (p1.a - 1)) AND (p5.b = (p1.b - 1))) + -> Seq Scan on p5 + -> Hash -> Seq Scan on p1 Storage Filter: (a <= 30) - -> Index Scan using p5_pkey on p5 -(7 rows) +(6 rows) /*+IndexScan(p5 p5_hash)*/explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; QUERY PLAN @@ -556,8 +554,8 @@ explain (costs off) select q2.c1, q1.c1 from q1 join q2 on q1.c2 = q2.c2 order b create table q3(a int, b int, c name, primary key(a,b)); create index q3_range on q3(a asc); explain (costs off) select * from q1 p1 left join (SELECT p2.c1 as a1, p3.a as a2 from q2 p2 join q3 p3 on true) j1 on j1.a1 = p1.c1; - QUERY PLAN ------------------------------------------------ + QUERY PLAN +------------------------------------- Hash Right Join Hash Cond: (p2.c1 = p1.c1) -> Nested Loop @@ -565,15 +563,15 @@ explain (costs off) select * from q1 p1 left join (SELECT p2.c1 as a1, p3.a as a -> Materialize -> Seq Scan on q3 p3 -> Hash - -> Index Scan using q1_pkey on q1 p1 + -> Seq Scan on q1 p1 (8 rows) -- this should not be a batched NL join as it contains an unbatchable clause -- (j1.a2 <= p1.c1) even though the batchable clause (j1.a1 = p1.c1) is also -- present explain (costs off) select * from q1 p1 left join (SELECT p2.c1 as a1, p3.a as a2 from q2 p2 join q3 p3 on true) j1 on j1.a1 = p1.c1 and j1.a2 <= p1.c1; - QUERY PLAN ------------------------------------------------ + QUERY PLAN +------------------------------------- Hash Right Join Hash Cond: (p2.c1 = p1.c1) Join Filter: (p3.a <= p1.c1) @@ -582,7 +580,7 @@ explain (costs off) select * from q1 p1 left join (SELECT p2.c1 as a1, p3.a as a -> Materialize -> Seq Scan on q3 p3 -> Hash - -> Index Scan using q1_pkey on q1 p1 + -> Seq Scan on q1 p1 (9 rows) DROP TABLE q1; diff --git a/src/postgres/src/test/regress/expected/yb_parallel_colocated.out b/src/postgres/src/test/regress/expected/yb_parallel_colocated.out index 27d41303b698..d6fbc6674aee 100644 --- a/src/postgres/src/test/regress/expected/yb_parallel_colocated.out +++ b/src/postgres/src/test/regress/expected/yb_parallel_colocated.out @@ -340,13 +340,13 @@ SELECT c, count(*) FROM pctest1 WHERE c > 40 GROUP BY c; EXPLAIN (costs off) SELECT pctest1.* FROM pctest1, pctest2 WHERE pctest1.a = pctest2.b and pctest1.a % 10 = 0; - QUERY PLAN ---------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------- Gather Workers Planned: 2 -> Parallel Hash Join Hash Cond: (pctest2.b = pctest1.a) - -> Parallel Index Only Scan using pctest2_b_idx on pctest2 + -> Parallel Seq Scan on pctest2 -> Parallel Hash -> Parallel Seq Scan on pctest1 Storage Filter: ((a % 10) = 0) @@ -478,9 +478,9 @@ SELECT x, d FROM Workers Planned: 2 -> Parallel Hash Join Hash Cond: ((pctest1.k = pctest2.k) AND (pctest1.c = pctest2.c)) - -> Parallel Index Scan using pctest1_pkey on pctest1 + -> Parallel Seq Scan on pctest1 -> Parallel Hash - -> Parallel Index Scan using pctest2_pkey on pctest2 + -> Parallel Seq Scan on pctest2 -> Hash -> Values Scan on "*VALUES*" (13 rows) @@ -504,26 +504,28 @@ SELECT * FROM WHERE pctest1.k = pctest2.k AND pctest1.c = pctest2.c) s1 JOIN (SELECT pctest2.* FROM pctest1, pctest2 WHERE pctest1.k = pctest2.k AND pctest1.b = pctest2.b) s2 ON s1.b = s2.c; - QUERY PLAN ----------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------- Nested Loop Join Filter: (pctest1.b = pctest2_1.c) -> Gather Workers Planned: 2 -> Parallel Hash Join Hash Cond: ((pctest1.k = pctest2.k) AND (pctest1.c = pctest2.c)) - -> Parallel Index Scan using pctest1_pkey on pctest1 + -> Parallel Seq Scan on pctest1 -> Parallel Hash - -> Parallel Index Scan using pctest2_pkey on pctest2 + -> Parallel Seq Scan on pctest2 -> Materialize - -> Gather - Workers Planned: 2 - -> Parallel Hash Join - Hash Cond: ((pctest1_1.k = pctest2_1.k) AND (pctest1_1.b = pctest2_1.b)) - -> Parallel Index Scan using pctest1_pkey on pctest1 pctest1_1 - -> Parallel Hash - -> Parallel Index Scan using pctest2_pkey on pctest2 pctest2_1 -(17 rows) + -> Hash Join + Hash Cond: ((pctest2_1.k = pctest1_1.k) AND (pctest2_1.b = pctest1_1.b)) + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on pctest2 pctest2_1 + -> Hash + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on pctest1 pctest1_1 +(19 rows) SELECT * FROM (SELECT pctest1.* FROM pctest1, pctest2 diff --git a/src/postgres/src/test/regress/expected/yb_planner_taqo_tuning_tests.out b/src/postgres/src/test/regress/expected/yb_planner_taqo_tuning_tests.out index 55c42e5c4ece..019df9e4afe0 100644 --- a/src/postgres/src/test/regress/expected/yb_planner_taqo_tuning_tests.out +++ b/src/postgres/src/test/regress/expected/yb_planner_taqo_tuning_tests.out @@ -1,37 +1,42 @@ CREATE DATABASE taqo_tuning_tests with colocation = true; \c taqo_tuning_tests SET statement_timeout = '7200s'; -SET enable_bitmapscan = false; -- TODO(#20573): update bitmap scan cost model -- CREATE QUERIES -CREATE TABLE t_range_100k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_100k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr100kv1 ON t_range_100k (v1 ASC); -CREATE TABLE t_range_200k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_200k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr200kv1 ON t_range_200k (v1 ASC); -CREATE TABLE t_range_300k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_300k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr300kv1 ON t_range_300k (v1 ASC); -CREATE TABLE t_range_400k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_400k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr400kv1 ON t_range_400k (v1 ASC); -CREATE TABLE t_range_500k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_500k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr500kv1 ON t_range_500k (v1 ASC); -CREATE TABLE t_range_600k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_600k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr600kv1 ON t_range_600k (v1 ASC); -CREATE TABLE t_range_700k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_700k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr700kv1 ON t_range_700k (v1 ASC); -CREATE TABLE t_range_800k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_800k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr800kv1 ON t_range_800k (v1 ASC); -CREATE TABLE t_range_900k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_900k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr900kv1 ON t_range_900k (v1 ASC); -CREATE TABLE t_range_1m (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_1m (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr1mv1 ON t_range_1m (v1 ASC); -CREATE TABLE t_range_100k_1update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_2update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_3update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_4update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_100k_1update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_2update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_3update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_4update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE TABLE t_range_100k_1column (id INT PRIMARY KEY); -CREATE TABLE t_range_100k_2column (id INT PRIMARY KEY, v1 INT); -CREATE TABLE t_range_100k_3column (id INT PRIMARY KEY, v1 INT, v2 INT); -CREATE TABLE t_range_100k_4column (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT); -CREATE TABLE t_range_1m_4keys (k1 INT, k2 INT, k3 INT, k4 INT, v1 INT, PRIMARY KEY (k1, k2, k3, k4)); +CREATE TABLE t_range_100k_2column (id INT, v1 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_3column (id INT, v1 INT, v2 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_4column (id INT, v1 INT, v2 INT, v3 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_1m_4keys (k1 INT, k2 INT, k3 INT, k4 INT, v1 INT, PRIMARY KEY (k1 ASC, k2 ASC, k3 ASC, k4 ASC)); +CREATE TABLE t_int_100k (v1 int, v2 int, v3 int, v4 int, v5 int, v6 int, v7 int, v8 int); +CREATE TABLE t_numeric_100k (v1 numeric(12,6), v2 numeric(12,6), v3 numeric(12,6), v4 numeric(12,6), v5 numeric(12,6), v6 numeric(12,6), v7 numeric(12,6), v8 numeric(12,6)); +CREATE TABLE t_real_100k (v1 real, v2 real, v3 real, v4 real, v5 real, v6 real, v7 real, v8 real); +CREATE TABLE t_char4_100k (v1 char(4), v2 char(4), v3 char(4), v4 char(4), v5 char(4), v6 char(4), v7 char(4), v8 char(4)); +CREATE TABLE t_char8_100k (v1 char(8), v2 char(8), v3 char(8), v4 char(8), v5 char(8), v6 char(8), v7 char(8), v8 char(8)); +CREATE TABLE t_char16_100k (v1 char(16), v2 char(16), v3 char(16), v4 char(16), v5 char(16), v6 char(16), v7 char(16), v8 char(16)); SET yb_non_ddl_txn_for_sys_tables_allowed = ON; UPDATE pg_class SET reltuples = 100000, relpages = 0 WHERE relnamespace = 'public'::regnamespace AND (relname = 't_range_100k' OR relname = 't_range_100k_pkey'); UPDATE pg_class SET reltuples = 100000, relpages = 0 WHERE relnamespace = 'public'::regnamespace AND (relname = 't_range_100k_1column' OR relname = 't_range_100k_1column_pkey'); @@ -258,8 +263,7 @@ SET pg_hint_plan.debug_print = ON; SET client_min_messages TO log; SET pg_hint_plan.message_level = debug; SET temp_file_limit="8182MB"; -set yb_bnl_batch_size = 1024; set yb_enable_base_scans_cost_model = true; -SET yb_enable_optimizer_statistics = true; +SET yb_enable_optimizer_statistics = true; set yb_bnl_batch_size = 1024; set yb_enable_base_scans_cost_model = true; -- Query Hash: 1dc3153f1e9ed56fa96c3cd1b27e0b07 EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 2000; QUERY PLAN @@ -340,6 +344,198 @@ EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 20000; -> Index Scan using t_range_100k_pkey on t_range_100k (2 rows) +-- Query Hash: d0f652bb9a03d4d0079382c8a675ddf5 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char4_100k limit 40960; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char4_100k +(2 rows) + +-- Query Hash: 00e91b30ef109150e085eccecd169799 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char4_100k limit 20480; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char4_100k +(2 rows) + +-- Query Hash: 2f633a7b6847d1b1f71d16509675ba5c +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char4_100k limit 10240; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char4_100k +(2 rows) + +-- Query Hash: b21a31bdafc618945ec67c144587cad6 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char4_100k limit 5120; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char4_100k +(2 rows) + +-- Query Hash: 14737481d661ae29bbf9c3248e627803 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char8_100k limit 40960; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char8_100k +(2 rows) + +-- Query Hash: 6c4e495181e65578bddfa9cc82dce4be +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char8_100k limit 20480; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char8_100k +(2 rows) + +-- Query Hash: 31106354dd73743581a298fb292373fe +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char8_100k limit 10240; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char8_100k +(2 rows) + +-- Query Hash: adb2fabb5dba6de5021647f0dcd506ba +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char8_100k limit 5120; + QUERY PLAN +-------------------------------- + Limit + -> Seq Scan on t_char8_100k +(2 rows) + +-- Query Hash: 3ea29929b4cdc5cfe987519cfc418dc1 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char16_100k limit 40960; + QUERY PLAN +--------------------------------- + Limit + -> Seq Scan on t_char16_100k +(2 rows) + +-- Query Hash: 29e65871465d1e4fd8e3d57d9d82cc5a +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char16_100k limit 20480; + QUERY PLAN +--------------------------------- + Limit + -> Seq Scan on t_char16_100k +(2 rows) + +-- Query Hash: 867c2d4aefb45b79edce4a60c0117472 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char16_100k limit 10240; + QUERY PLAN +--------------------------------- + Limit + -> Seq Scan on t_char16_100k +(2 rows) + +-- Query Hash: 2e621b26de4391e611d02be624e0cc08 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char16_100k limit 5120; + QUERY PLAN +--------------------------------- + Limit + -> Seq Scan on t_char16_100k +(2 rows) + +-- Query Hash: 447c4552a30004c9748c19f967c090a9 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_int_100k limit 40960; + QUERY PLAN +------------------------------ + Limit + -> Seq Scan on t_int_100k +(2 rows) + +-- Query Hash: 42a05c9206ce8da9e8e280b855ef8267 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_int_100k limit 20480; + QUERY PLAN +------------------------------ + Limit + -> Seq Scan on t_int_100k +(2 rows) + +-- Query Hash: e70dcebc0338929507048f3d8ee117a6 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_int_100k limit 10240; + QUERY PLAN +------------------------------ + Limit + -> Seq Scan on t_int_100k +(2 rows) + +-- Query Hash: 9d09c8d070ba139698c7dd036e8bd213 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_int_100k limit 5120; + QUERY PLAN +------------------------------ + Limit + -> Seq Scan on t_int_100k +(2 rows) + +-- Query Hash: 342457e804c0bd74a91e1dd7b2a39129 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_numeric_100k limit 40960; + QUERY PLAN +---------------------------------- + Limit + -> Seq Scan on t_numeric_100k +(2 rows) + +-- Query Hash: c6690513dd017b932bd5405fab3a45f3 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_numeric_100k limit 20480; + QUERY PLAN +---------------------------------- + Limit + -> Seq Scan on t_numeric_100k +(2 rows) + +-- Query Hash: 48f6a609f0b240efb26d9e24022269e5 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_numeric_100k limit 10240; + QUERY PLAN +---------------------------------- + Limit + -> Seq Scan on t_numeric_100k +(2 rows) + +-- Query Hash: 2e12b7d787ee03f9184d287c5150bbc3 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_numeric_100k limit 5120; + QUERY PLAN +---------------------------------- + Limit + -> Seq Scan on t_numeric_100k +(2 rows) + +-- Query Hash: 3058b97e0c5b78a7afcc14b01dc7a0f1 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_real_100k limit 40960; + QUERY PLAN +------------------------------- + Limit + -> Seq Scan on t_real_100k +(2 rows) + +-- Query Hash: f180e2b23646b995be28029bd68a1c50 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_real_100k limit 20480; + QUERY PLAN +------------------------------- + Limit + -> Seq Scan on t_real_100k +(2 rows) + +-- Query Hash: dcc707d034bab4f4b8bf6a45d2b54d19 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_real_100k limit 10240; + QUERY PLAN +------------------------------- + Limit + -> Seq Scan on t_real_100k +(2 rows) + +-- Query Hash: 32dd0af7a84cabeec056261ba6fb8257 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_real_100k limit 5120; + QUERY PLAN +------------------------------- + Limit + -> Seq Scan on t_real_100k +(2 rows) + -- Query Hash: cd9bc3bcc61879e6b7ead7f119f8b650 EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 8 and k2 > 5 and k2 < 10; QUERY PLAN @@ -428,6 +624,55 @@ EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 6 and k Index Cond: ((k1 > 2) AND (k1 < 6) AND (k2 > 2) AND (k2 < 18)) (2 rows) +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; + QUERY PLAN +------------------------ + Seq Scan on t_int_100k +(1 row) + -- Query Hash: 8317546c890921478dd4eb572fe1270a EXPLAIN (COSTS OFF) SELECT * from t_range_100k_1update; QUERY PLAN @@ -458,34 +703,34 @@ EXPLAIN (COSTS OFF) SELECT * from t_range_100k_4update; -- Query Hash: c84c26737002659979e72554dcc9ce60 EXPLAIN (COSTS OFF) SELECT * FROM t_range_100k where id > 0; - QUERY PLAN ----------------------------------------------------- - Index Scan using t_range_100k_pkey on t_range_100k - Index Cond: (id > 0) + QUERY PLAN +---------------------------- + Seq Scan on t_range_100k + Storage Filter: (id > 0) (2 rows) -- Query Hash: c07f7e18d9f2631cab48c5b81b101312 EXPLAIN (COSTS OFF) SELECT * FROM t_range_100k where id > 1; - QUERY PLAN ----------------------------------------------------- - Index Scan using t_range_100k_pkey on t_range_100k - Index Cond: (id > 1) + QUERY PLAN +---------------------------- + Seq Scan on t_range_100k + Storage Filter: (id > 1) (2 rows) -- Query Hash: 641f7f11c6bdc12df3067ae6f1d8a4f5 EXPLAIN (COSTS OFF) SELECT * FROM t_range_100k where id > 10; - QUERY PLAN ----------------------------------------------------- - Index Scan using t_range_100k_pkey on t_range_100k - Index Cond: (id > 10) + QUERY PLAN +----------------------------- + Seq Scan on t_range_100k + Storage Filter: (id > 10) (2 rows) -- Query Hash: 09c5aed4c09b9f521d3f28ad63bc310c EXPLAIN (COSTS OFF) SELECT * FROM t_range_100k where id > 100; - QUERY PLAN ----------------------------------------------------- - Index Scan using t_range_100k_pkey on t_range_100k - Index Cond: (id > 100) + QUERY PLAN +------------------------------ + Seq Scan on t_range_100k + Storage Filter: (id > 100) (2 rows) -- Query Hash: 5760478f38fc87bd0152788749a4586e @@ -1476,6 +1721,150 @@ EXPLAIN (COSTS OFF) SELECT id FROM t_range_100k; Seq Scan on t_range_100k (1 row) +-- Query Hash: b7e2b50a9b41efaebefb79dd43ef269c +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99990; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99990) +(2 rows) + +-- Query Hash: f9373b83e1cfacf7c9cf80cceb1c7f0f +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99900; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99900) +(2 rows) + +-- Query Hash: 357081f91736d2ff5e9936fee3e8fedd +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99800; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99800) +(2 rows) + +-- Query Hash: 698f609142df0e0bf93395e05a1556a5 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99600; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99600) +(2 rows) + +-- Query Hash: 6bba00a1f39bf37341734382da29d146 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99200; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99200) +(2 rows) + +-- Query Hash: 340f2ffe88d75bd132be8bf9115124fa +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 99000) +(2 rows) + +-- Query Hash: 7f4d0300e2b0866fa2d38c06b4c9cbec +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 98000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 98000) +(2 rows) + +-- Query Hash: 48683ce6ccfba55f1c6e8e7fdd8d1845 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 96000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 96000) +(2 rows) + +-- Query Hash: 03116268b05f2c133f32c50553b18e80 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 92000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 92000) +(2 rows) + +-- Query Hash: e97809d9da4483e1f219d27653ec0203 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 90000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 90000) +(2 rows) + +-- Query Hash: f077961b82832e65dc7117379267656d +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 80000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 80000) +(2 rows) + +-- Query Hash: 8461640eefa124b442bda3bca958ef19 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 70000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 70000) +(2 rows) + +-- Query Hash: 4082b8f91e6ff1d1e8689a4f6980d91c +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 60000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 60000) +(2 rows) + +-- Query Hash: 4176c4bcbf3370770382aeb78f4e7e86 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 50000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 50000) +(2 rows) + +-- Query Hash: a177fa17c56d623e4957504058165dc3 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 40000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 40000) +(2 rows) + +-- Query Hash: ad308fcef469d0182caae8a36e99891e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 30000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 30000) +(2 rows) + +-- Query Hash: d9108e995210c55b60549aebfebc70f0 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 20000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 20000) +(2 rows) + +-- Query Hash: 2a74d235202052554e1d95396b999872 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 10000; + QUERY PLAN +-------------------------------- + Seq Scan on t_int_100k + Storage Filter: (v1 > 10000) +(2 rows) + -- DROP QUERIES; DROP TABLE IF EXISTS t_range_100k CASCADE; DROP TABLE IF EXISTS t_range_200k CASCADE; @@ -1496,3 +1885,10 @@ DROP TABLE IF EXISTS t_range_100k_2column CASCADE; DROP TABLE IF EXISTS t_range_100k_3column CASCADE; DROP TABLE IF EXISTS t_range_100k_4column CASCADE; DROP TABLE IF EXISTS t_range_1m_4keys CASCADE; +DROP TABLE IF EXISTS t_int_100k CASCADE; +DROP TABLE IF EXISTS t_numeric_100k CASCADE; +DROP TABLE IF EXISTS t_real_100k CASCADE; +DROP TABLE IF EXISTS t_char4_100k CASCADE; +DROP TABLE IF EXISTS t_char8_100k CASCADE; +DROP TABLE IF EXISTS t_char16_100k CASCADE; + diff --git a/src/postgres/src/test/regress/expected/yb_select_parallel.out b/src/postgres/src/test/regress/expected/yb_select_parallel.out index 58990bc65ba9..5ea8e7cf595e 100644 --- a/src/postgres/src/test/regress/expected/yb_select_parallel.out +++ b/src/postgres/src/test/regress/expected/yb_select_parallel.out @@ -125,11 +125,11 @@ explain (costs off) explain (costs off) select sum(sp_parallel_restricted(unique1)) from tenk1 group by(sp_parallel_restricted(unique1)); - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +---------------------------------------------- HashAggregate Group Key: sp_parallel_restricted(unique1) - -> Index Only Scan using tenk1_unique1 on tenk1 + -> Seq Scan on tenk1 (3 rows) -- test prepared statement @@ -311,7 +311,7 @@ explain (costs off) Aggregate -> YB Batched Nested Loop Join Join Filter: (tenk1.unique1 = tenk2.unique1) - -> Index Only Scan using tenk2_unique1 on tenk2 + -> Seq Scan on tenk2 -> Index Only Scan using tenk1_unique1 on tenk1 Index Cond: (unique1 = ANY (ARRAY[tenk2.unique1, $1, $2, ..., $1023])) (6 rows) @@ -524,10 +524,10 @@ reset enable_hashagg; -- check parallelized int8 aggregate (bug #14897) explain (costs off) select avg(unique1::int8) from tenk1; - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +------------------------- Aggregate - -> Index Only Scan using tenk1_unique1 on tenk1 + -> Seq Scan on tenk1 (2 rows) select avg(unique1::int8) from tenk1; @@ -665,7 +665,7 @@ explain (costs off, verbose) Output: b.unique1, (row_number() OVER (?)) -> WindowAgg Output: b.unique1, row_number() OVER (?) - -> Index Only Scan using tenk1_unique1 on public.tenk1 b + -> Seq Scan on public.tenk1 b Output: b.unique1 (12 rows) @@ -789,7 +789,7 @@ SELECT 1 FROM tenk1_vw_sec WHERE EXISTS (SELECT 1 WHERE unique1 = 0); --------------------------------------------------------- Subquery Scan on tenk1_vw_sec Filter: (alternatives: SubPlan 1 or hashed SubPlan 2) - -> Index Only Scan using tenk1_unique1 on tenk1 + -> Seq Scan on tenk1 SubPlan 1 -> Result One-Time Filter: (tenk1_vw_sec.unique1 = 0) diff --git a/src/postgres/src/test/regress/sql/yb_planner_taqo_tuning_tests.sql b/src/postgres/src/test/regress/sql/yb_planner_taqo_tuning_tests.sql index fb29432401d1..e7320018ca9d 100644 --- a/src/postgres/src/test/regress/sql/yb_planner_taqo_tuning_tests.sql +++ b/src/postgres/src/test/regress/sql/yb_planner_taqo_tuning_tests.sql @@ -1,37 +1,42 @@ CREATE DATABASE taqo_tuning_tests with colocation = true; \c taqo_tuning_tests SET statement_timeout = '7200s'; -SET enable_bitmapscan = false; -- TODO(#20573): update bitmap scan cost model -- CREATE QUERIES -CREATE TABLE t_range_100k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_100k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr100kv1 ON t_range_100k (v1 ASC); -CREATE TABLE t_range_200k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_200k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr200kv1 ON t_range_200k (v1 ASC); -CREATE TABLE t_range_300k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_300k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr300kv1 ON t_range_300k (v1 ASC); -CREATE TABLE t_range_400k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_400k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr400kv1 ON t_range_400k (v1 ASC); -CREATE TABLE t_range_500k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_500k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr500kv1 ON t_range_500k (v1 ASC); -CREATE TABLE t_range_600k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_600k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr600kv1 ON t_range_600k (v1 ASC); -CREATE TABLE t_range_700k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_700k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr700kv1 ON t_range_700k (v1 ASC); -CREATE TABLE t_range_800k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_800k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr800kv1 ON t_range_800k (v1 ASC); -CREATE TABLE t_range_900k (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_900k (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr900kv1 ON t_range_900k (v1 ASC); -CREATE TABLE t_range_1m (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_1m (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE INDEX tr1mv1 ON t_range_1m (v1 ASC); -CREATE TABLE t_range_100k_1update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_2update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_3update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); -CREATE TABLE t_range_100k_4update (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT, v4 INT); +CREATE TABLE t_range_100k_1update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_2update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_3update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_4update (id INT, v1 INT, v2 INT, v3 INT, v4 INT, PRIMARY KEY (id ASC)); CREATE TABLE t_range_100k_1column (id INT PRIMARY KEY); -CREATE TABLE t_range_100k_2column (id INT PRIMARY KEY, v1 INT); -CREATE TABLE t_range_100k_3column (id INT PRIMARY KEY, v1 INT, v2 INT); -CREATE TABLE t_range_100k_4column (id INT PRIMARY KEY, v1 INT, v2 INT, v3 INT); -CREATE TABLE t_range_1m_4keys (k1 INT, k2 INT, k3 INT, k4 INT, v1 INT, PRIMARY KEY (k1, k2, k3, k4)); +CREATE TABLE t_range_100k_2column (id INT, v1 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_3column (id INT, v1 INT, v2 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_100k_4column (id INT, v1 INT, v2 INT, v3 INT, PRIMARY KEY (id ASC)); +CREATE TABLE t_range_1m_4keys (k1 INT, k2 INT, k3 INT, k4 INT, v1 INT, PRIMARY KEY (k1 ASC, k2 ASC, k3 ASC, k4 ASC)); +CREATE TABLE t_int_100k (v1 int, v2 int, v3 int, v4 int, v5 int, v6 int, v7 int, v8 int); +CREATE TABLE t_numeric_100k (v1 numeric(12,6), v2 numeric(12,6), v3 numeric(12,6), v4 numeric(12,6), v5 numeric(12,6), v6 numeric(12,6), v7 numeric(12,6), v8 numeric(12,6)); +CREATE TABLE t_real_100k (v1 real, v2 real, v3 real, v4 real, v5 real, v6 real, v7 real, v8 real); +CREATE TABLE t_char4_100k (v1 char(4), v2 char(4), v3 char(4), v4 char(4), v5 char(4), v6 char(4), v7 char(4), v8 char(4)); +CREATE TABLE t_char8_100k (v1 char(8), v2 char(8), v3 char(8), v4 char(8), v5 char(8), v6 char(8), v7 char(8), v8 char(8)); +CREATE TABLE t_char16_100k (v1 char(16), v2 char(16), v3 char(16), v4 char(16), v5 char(16), v6 char(16), v7 char(16), v8 char(16)); SET yb_non_ddl_txn_for_sys_tables_allowed = ON; UPDATE pg_class SET reltuples = 100000, relpages = 0 WHERE relnamespace = 'public'::regnamespace AND (relname = 't_range_100k' OR relname = 't_range_100k_pkey'); @@ -260,8 +265,7 @@ SET pg_hint_plan.debug_print = ON; SET client_min_messages TO log; SET pg_hint_plan.message_level = debug; SET temp_file_limit="8182MB"; -set yb_bnl_batch_size = 1024; set yb_enable_base_scans_cost_model = true; -SET yb_enable_optimizer_statistics = true; +SET yb_enable_optimizer_statistics = true; set yb_bnl_batch_size = 1024; set yb_enable_base_scans_cost_model = true; -- Query Hash: 1dc3153f1e9ed56fa96c3cd1b27e0b07 EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 2000; -- Query Hash: 19e6de1bb5bc99e4b305d7d4f9be0028 @@ -282,6 +286,54 @@ EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 16000; EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 18000; -- Query Hash: bc18b6d99b63eb92ae718494d803ac5c EXPLAIN (COSTS OFF) select * from t_range_100k order by id limit 20000; +-- Query Hash: d0f652bb9a03d4d0079382c8a675ddf5 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char4_100k limit 40960; +-- Query Hash: 00e91b30ef109150e085eccecd169799 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char4_100k limit 20480; +-- Query Hash: 2f633a7b6847d1b1f71d16509675ba5c +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char4_100k limit 10240; +-- Query Hash: b21a31bdafc618945ec67c144587cad6 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char4_100k limit 5120; +-- Query Hash: 14737481d661ae29bbf9c3248e627803 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char8_100k limit 40960; +-- Query Hash: 6c4e495181e65578bddfa9cc82dce4be +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char8_100k limit 20480; +-- Query Hash: 31106354dd73743581a298fb292373fe +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char8_100k limit 10240; +-- Query Hash: adb2fabb5dba6de5021647f0dcd506ba +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char8_100k limit 5120; +-- Query Hash: 3ea29929b4cdc5cfe987519cfc418dc1 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_char16_100k limit 40960; +-- Query Hash: 29e65871465d1e4fd8e3d57d9d82cc5a +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_char16_100k limit 20480; +-- Query Hash: 867c2d4aefb45b79edce4a60c0117472 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_char16_100k limit 10240; +-- Query Hash: 2e621b26de4391e611d02be624e0cc08 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_char16_100k limit 5120; +-- Query Hash: 447c4552a30004c9748c19f967c090a9 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_int_100k limit 40960; +-- Query Hash: 42a05c9206ce8da9e8e280b855ef8267 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_int_100k limit 20480; +-- Query Hash: e70dcebc0338929507048f3d8ee117a6 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_int_100k limit 10240; +-- Query Hash: 9d09c8d070ba139698c7dd036e8bd213 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_int_100k limit 5120; +-- Query Hash: 342457e804c0bd74a91e1dd7b2a39129 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_numeric_100k limit 40960; +-- Query Hash: c6690513dd017b932bd5405fab3a45f3 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_numeric_100k limit 20480; +-- Query Hash: 48f6a609f0b240efb26d9e24022269e5 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_numeric_100k limit 10240; +-- Query Hash: 2e12b7d787ee03f9184d287c5150bbc3 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_numeric_100k limit 5120; +-- Query Hash: 3058b97e0c5b78a7afcc14b01dc7a0f1 +EXPLAIN (COSTS OFF) SELECT v1 FROM t_real_100k limit 40960; +-- Query Hash: f180e2b23646b995be28029bd68a1c50 +EXPLAIN (COSTS OFF) SELECT v1, v2 FROM t_real_100k limit 20480; +-- Query Hash: dcc707d034bab4f4b8bf6a45d2b54d19 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4 FROM t_real_100k limit 10240; +-- Query Hash: 32dd0af7a84cabeec056261ba6fb8257 +EXPLAIN (COSTS OFF) SELECT v1, v2, v3, v4, v5, v6, v7, v8 FROM t_real_100k limit 5120; -- Query Hash: cd9bc3bcc61879e6b7ead7f119f8b650 EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 8 and k2 > 5 and k2 < 10; -- Query Hash: f09230315434bedd219aa7fba6db004f @@ -304,6 +356,20 @@ EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 6 and k EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 6 and k2 > 2 and k2 < 16; -- Query Hash: a725b425b9ebccf156b2011f92ebf306 EXPLAIN (COSTS OFF) select * from t_range_1m_4keys where k1 > 2 and k1 < 6 and k2 > 2 and k2 < 18; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; +-- Query Hash: c17e4e0a745b63c2006e87d41d86be6e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k; -- Query Hash: 8317546c890921478dd4eb572fe1270a EXPLAIN (COSTS OFF) SELECT * from t_range_100k_1update; -- Query Hash: 5b00b70a296886f5b741d87c7bc0ea54 @@ -572,6 +638,42 @@ EXPLAIN (COSTS OFF) SELECT id FROM t_range_100k_3column; EXPLAIN (COSTS OFF) SELECT id FROM t_range_100k_4column; -- Query Hash: 59e6de5247cc4e29c81c208f9e5a32f3 EXPLAIN (COSTS OFF) SELECT id FROM t_range_100k; +-- Query Hash: b7e2b50a9b41efaebefb79dd43ef269c +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99990; +-- Query Hash: f9373b83e1cfacf7c9cf80cceb1c7f0f +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99900; +-- Query Hash: 357081f91736d2ff5e9936fee3e8fedd +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99800; +-- Query Hash: 698f609142df0e0bf93395e05a1556a5 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99600; +-- Query Hash: 6bba00a1f39bf37341734382da29d146 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99200; +-- Query Hash: 340f2ffe88d75bd132be8bf9115124fa +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 99000; +-- Query Hash: 7f4d0300e2b0866fa2d38c06b4c9cbec +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 98000; +-- Query Hash: 48683ce6ccfba55f1c6e8e7fdd8d1845 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 96000; +-- Query Hash: 03116268b05f2c133f32c50553b18e80 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 92000; +-- Query Hash: e97809d9da4483e1f219d27653ec0203 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 90000; +-- Query Hash: f077961b82832e65dc7117379267656d +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 80000; +-- Query Hash: 8461640eefa124b442bda3bca958ef19 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 70000; +-- Query Hash: 4082b8f91e6ff1d1e8689a4f6980d91c +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 60000; +-- Query Hash: 4176c4bcbf3370770382aeb78f4e7e86 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 50000; +-- Query Hash: a177fa17c56d623e4957504058165dc3 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 40000; +-- Query Hash: ad308fcef469d0182caae8a36e99891e +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 30000; +-- Query Hash: d9108e995210c55b60549aebfebc70f0 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 20000; +-- Query Hash: 2a74d235202052554e1d95396b999872 +EXPLAIN (COSTS OFF) SELECT 0 FROM t_int_100k WHERE v1 > 10000; -- DROP QUERIES; DROP TABLE IF EXISTS t_range_100k CASCADE; DROP TABLE IF EXISTS t_range_200k CASCADE; @@ -592,3 +694,10 @@ DROP TABLE IF EXISTS t_range_100k_2column CASCADE; DROP TABLE IF EXISTS t_range_100k_3column CASCADE; DROP TABLE IF EXISTS t_range_100k_4column CASCADE; DROP TABLE IF EXISTS t_range_1m_4keys CASCADE; +DROP TABLE IF EXISTS t_int_100k CASCADE; +DROP TABLE IF EXISTS t_numeric_100k CASCADE; +DROP TABLE IF EXISTS t_real_100k CASCADE; +DROP TABLE IF EXISTS t_char4_100k CASCADE; +DROP TABLE IF EXISTS t_char8_100k CASCADE; +DROP TABLE IF EXISTS t_char16_100k CASCADE; +