From ed5383e8a3bea92a19a3de81e5c468cc9153a2f9 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 14 Sep 2020 22:14:39 -0700 Subject: [PATCH] rowexec: fix column mapping for index join Index join code has been recently merged into the join reader processor. However, the setup of the joiner base has been done slightly incorrectly: the join reader core returns the whole rows from the table (i.e. all columns from the "right") and ignores columns coming from the input (from the "left"), but previously we were incorrectly passing in non-empty types for the left side - this would result in an incorrectly setup ProcOutputHelper. This could result in a crash when a row doesn't pass the filter and the verbose logging is enabled. This had a limited impact though because the output rows matched with the indexed vars referring to the columns of that row. Release note: None --- .../logictest/testdata/logic_test/index_join | 39 +++++++++++++++++++ pkg/sql/rowexec/joinreader.go | 8 +++- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/index_join diff --git a/pkg/sql/logictest/testdata/logic_test/index_join b/pkg/sql/logictest/testdata/logic_test/index_join new file mode 100644 index 000000000000..390e39c989d1 --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/index_join @@ -0,0 +1,39 @@ +# Regression test for incorrect post-processing setup in the join reader when +# performing an index join (#54226). + +statement ok +SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false; +CREATE TABLE lineitem +( + l_orderkey int PRIMARY KEY, + l_extendedprice float NOT NULL, + l_shipdate date NOT NULL, + INDEX l_sd (l_shipdate ASC) +); +INSERT INTO lineitem VALUES (1, 200, '1994-01-01'); +ALTER TABLE lineitem INJECT STATISTICS '[ + { + "columns": ["l_orderkey"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 6001215, + "distinct_count": 1500000 + }, + { + "columns": ["l_extendedprice"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 6001215, + "distinct_count": 1000000 + }, + { + "columns": ["l_shipdate"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 6001215, + "distinct_count": 2500 + } +]'; +SELECT crdb_internal.set_vmodule('processorsbase=4') + +query R +SELECT sum(l_extendedprice) FROM lineitem WHERE l_shipdate >= DATE '1994-01-01' AND l_shipdate < DATE '1994-01-01' + INTERVAL '1' YEAR AND l_extendedprice < 100 +---- +NULL diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go index 65a567e94587..f134a301b252 100644 --- a/pkg/sql/rowexec/joinreader.go +++ b/pkg/sql/rowexec/joinreader.go @@ -181,7 +181,13 @@ func newJoinReader( var leftEqCols []uint32 switch readerType { case indexJoinReaderType: - leftTypes = columnTypes + // Index join performs a join between a secondary index, the `input`, + // and the primary index of the same table, `desc`, to retrieve columns + // which are not stored in the secondary index. It outputs the looked + // up rows as is (meaning that the output rows before post-processing + // will contain all columns from the table) whereas the columns that + // came from the secondary index (input rows) are ignored. As a result, + // we leave leftTypes as empty. leftEqCols = indexCols case lookupJoinReaderType: leftTypes = input.OutputTypes()