From ed5383e8a3bea92a19a3de81e5c468cc9153a2f9 Mon Sep 17 00:00:00 2001
From: Yahor Yuzefovich <yahor@cockroachlabs.com>
Date: Mon, 14 Sep 2020 22:14:39 -0700
Subject: [PATCH] rowexec: fix column mapping for index join

Index join code has been recently merged into the join reader processor.
However, the setup of the joiner base has been done slightly
incorrectly: the join reader core returns the whole rows from the table
(i.e. all columns from the "right") and ignores columns coming from the
input (from the "left"), but previously we were incorrectly passing in
non-empty types for the left side - this would result in an incorrectly
setup ProcOutputHelper. This could result in a crash when a row doesn't
pass the filter and the verbose logging is enabled. This had a limited
impact though because the output rows matched with the indexed vars
referring to the columns of that row.

Release note: None
---
 .../logictest/testdata/logic_test/index_join  | 39 +++++++++++++++++++
 pkg/sql/rowexec/joinreader.go                 |  8 +++-
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 pkg/sql/logictest/testdata/logic_test/index_join

diff --git a/pkg/sql/logictest/testdata/logic_test/index_join b/pkg/sql/logictest/testdata/logic_test/index_join
new file mode 100644
index 000000000000..390e39c989d1
--- /dev/null
+++ b/pkg/sql/logictest/testdata/logic_test/index_join
@@ -0,0 +1,39 @@
+# Regression test for incorrect post-processing setup in the join reader when
+# performing an index join (#54226).
+
+statement ok
+SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;
+CREATE TABLE lineitem
+(
+    l_orderkey int PRIMARY KEY,
+    l_extendedprice float NOT NULL,
+    l_shipdate date NOT NULL,
+    INDEX l_sd (l_shipdate ASC)
+);
+INSERT INTO lineitem VALUES (1, 200, '1994-01-01');
+ALTER TABLE lineitem INJECT STATISTICS '[
+  {
+    "columns": ["l_orderkey"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 6001215,
+    "distinct_count": 1500000
+  },
+  {
+    "columns": ["l_extendedprice"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 6001215,
+    "distinct_count": 1000000
+  },
+  {
+    "columns": ["l_shipdate"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 6001215,
+    "distinct_count": 2500
+  }
+]';
+SELECT crdb_internal.set_vmodule('processorsbase=4')
+
+query R
+SELECT sum(l_extendedprice) FROM lineitem WHERE l_shipdate >= DATE '1994-01-01' AND l_shipdate < DATE '1994-01-01' + INTERVAL '1' YEAR AND l_extendedprice < 100
+----
+NULL
diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go
index 65a567e94587..f134a301b252 100644
--- a/pkg/sql/rowexec/joinreader.go
+++ b/pkg/sql/rowexec/joinreader.go
@@ -181,7 +181,13 @@ func newJoinReader(
 	var leftEqCols []uint32
 	switch readerType {
 	case indexJoinReaderType:
-		leftTypes = columnTypes
+		// Index join performs a join between a secondary index, the `input`,
+		// and the primary index of the same table, `desc`, to retrieve columns
+		// which are not stored in the secondary index. It outputs the looked
+		// up rows as is (meaning that the output rows before post-processing
+		// will contain all columns from the table) whereas the columns that
+		// came from the secondary index (input rows) are ignored. As a result,
+		// we leave leftTypes as empty.
 		leftEqCols = indexCols
 	case lookupJoinReaderType:
 		leftTypes = input.OutputTypes()