From 88d2aba9abb4cae380a317e3861ecb097f1d3fca Mon Sep 17 00:00:00 2001
From: Marcus Gartner <marcus@cockroachlabs.com>
Date: Tue, 31 Aug 2021 13:02:42 -0400
Subject: [PATCH] colexec: fix IN operator with unsorted tuple

The vectorized implementation of an `element IN tuple` expression
assumes that the contents of `tuple` are sorted by the optimizer. Based
on this assumption, it performs a binary search instead of a linear
search.

However, the assumption that the optimizer sorts all tuples is
incorrect. For example, there are cases where the contents of a tuple
are not known at planning-time, so the tuple cannot be sorted.
Performing a binary search with an unsorted tuple causes incorrect query
results.

Now, the vectorized engine sorts tuple contents if they are not already
sorted.

Fixes #68979

Release justification: This commit fixes a bug with the IN operator that
causes incorrect results.

Release note (bug fix): A bug has been fixed which caused incorrect
evaluation of the `IN` operator when the tuple on the right-hand-side
of the operator included a subquery, like
`a IN ('foo', (SELECT s FROM t), 'bar')`.
---
 .../execgen/cmd/execgen/select_in_gen.go      |   4 +-
 pkg/sql/colexec/select_in.eg.go               | 439 +++++++++++++++++-
 pkg/sql/colexec/select_in_test.go             |  14 +
 pkg/sql/colexec/select_in_tmpl.go             |  35 +-
 .../logictest/testdata/logic_test/vectorize   |  15 +
 5 files changed, 481 insertions(+), 26 deletions(-)

diff --git a/pkg/sql/colexec/execgen/cmd/execgen/select_in_gen.go b/pkg/sql/colexec/execgen/cmd/execgen/select_in_gen.go
index b0e6e7587d11..50bae7a4f02f 100644
--- a/pkg/sql/colexec/execgen/cmd/execgen/select_in_gen.go
+++ b/pkg/sql/colexec/execgen/cmd/execgen/select_in_gen.go
@@ -31,8 +31,8 @@ func genSelectIn(inputFileContents string, wr io.Writer) error {
 	)
 	s := r.Replace(inputFileContents)
 
-	assignEq := makeFunctionRegex("_COMPARE", 5)
-	s = assignEq.ReplaceAllString(s, makeTemplateFunctionCall("Compare", 5))
+	compare := makeFunctionRegex("_COMPARE", 5)
+	s = compare.ReplaceAllString(s, makeTemplateFunctionCall("Compare", 5))
 
 	s = replaceManipulationFuncs(s)
 
diff --git a/pkg/sql/colexec/select_in.eg.go b/pkg/sql/colexec/select_in.eg.go
index cb6d6fd5040e..eac5850cd665 100644
--- a/pkg/sql/colexec/select_in.eg.go
+++ b/pkg/sql/colexec/select_in.eg.go
@@ -12,6 +12,7 @@ package colexec
 import (
 	"bytes"
 	"math"
+	"sort"
 	"time"
 
 	"github.com/cockroachdb/apd/v2"
@@ -352,6 +353,7 @@ type selectInOpBool struct {
 	filterRow []bool
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpBool{}
@@ -364,6 +366,7 @@ type projectInOpBool struct {
 	filterRow []bool
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpBool{}
@@ -384,11 +387,30 @@ func fillDatumRowBool(t *types.T, datumTuple *tree.DTuple) ([]bool, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowBool(filterRow []bool, targetCol coldata.Bools) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		if !filterRow[i] && filterRow[j] {
+			cmpResult = -1
+		} else if filterRow[i] && !filterRow[j] {
+			cmpResult = 1
+		} else {
+			cmpResult = 0
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInBool(
 	targetElem bool, targetCol coldata.Bools, filterRow []bool, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowBool, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -431,6 +453,14 @@ func (si *selectInOpBool) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowBool because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowBool(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -501,6 +531,14 @@ func (pi *projectInOpBool) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Bool()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowBool because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowBool(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -586,6 +624,7 @@ type selectInOpBytes struct {
 	filterRow [][]byte
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpBytes{}
@@ -598,6 +637,7 @@ type projectInOpBytes struct {
 	filterRow [][]byte
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpBytes{}
@@ -618,11 +658,22 @@ func fillDatumRowBytes(t *types.T, datumTuple *tree.DTuple) ([][]byte, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowBytes(filterRow [][]byte, targetCol *coldata.Bytes) {
+	less := func(i, j int) bool {
+		var cmpResult int
+		cmpResult = bytes.Compare(filterRow[i], filterRow[j])
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInBytes(
 	targetElem []byte, targetCol *coldata.Bytes, filterRow [][]byte, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowBytes, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -657,6 +708,14 @@ func (si *selectInOpBytes) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowBytes because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowBytes(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -725,6 +784,14 @@ func (pi *projectInOpBytes) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Bytes()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowBytes because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowBytes(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -808,6 +875,7 @@ type selectInOpDecimal struct {
 	filterRow []apd.Decimal
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpDecimal{}
@@ -820,6 +888,7 @@ type projectInOpDecimal struct {
 	filterRow []apd.Decimal
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpDecimal{}
@@ -840,11 +909,22 @@ func fillDatumRowDecimal(t *types.T, datumTuple *tree.DTuple) ([]apd.Decimal, bo
 	return result, hasNulls
 }
 
+func sortDatumRowDecimal(filterRow []apd.Decimal, targetCol coldata.Decimals) {
+	less := func(i, j int) bool {
+		var cmpResult int
+		cmpResult = tree.CompareDecimals(&filterRow[i], &filterRow[j])
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInDecimal(
 	targetElem apd.Decimal, targetCol coldata.Decimals, filterRow []apd.Decimal, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowDecimal, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -879,6 +959,14 @@ func (si *selectInOpDecimal) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowDecimal because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowDecimal(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -949,6 +1037,14 @@ func (pi *projectInOpDecimal) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Decimal()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowDecimal because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowDecimal(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -1034,6 +1130,7 @@ type selectInOpInt16 struct {
 	filterRow []int16
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpInt16{}
@@ -1046,6 +1143,7 @@ type projectInOpInt16 struct {
 	filterRow []int16
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpInt16{}
@@ -1066,11 +1164,33 @@ func fillDatumRowInt16(t *types.T, datumTuple *tree.DTuple) ([]int16, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowInt16(filterRow []int16, targetCol coldata.Int16s) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		{
+			a, b := int64(filterRow[i]), int64(filterRow[j])
+			if a < b {
+				cmpResult = -1
+			} else if a > b {
+				cmpResult = 1
+			} else {
+				cmpResult = 0
+			}
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInInt16(
 	targetElem int16, targetCol coldata.Int16s, filterRow []int16, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowInt16, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -1116,6 +1236,14 @@ func (si *selectInOpInt16) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowInt16 because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowInt16(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -1186,6 +1314,14 @@ func (pi *projectInOpInt16) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Int16()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowInt16 because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowInt16(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -1271,6 +1407,7 @@ type selectInOpInt32 struct {
 	filterRow []int32
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpInt32{}
@@ -1283,6 +1420,7 @@ type projectInOpInt32 struct {
 	filterRow []int32
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpInt32{}
@@ -1303,11 +1441,33 @@ func fillDatumRowInt32(t *types.T, datumTuple *tree.DTuple) ([]int32, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowInt32(filterRow []int32, targetCol coldata.Int32s) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		{
+			a, b := int64(filterRow[i]), int64(filterRow[j])
+			if a < b {
+				cmpResult = -1
+			} else if a > b {
+				cmpResult = 1
+			} else {
+				cmpResult = 0
+			}
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInInt32(
 	targetElem int32, targetCol coldata.Int32s, filterRow []int32, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowInt32, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -1353,6 +1513,14 @@ func (si *selectInOpInt32) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowInt32 because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowInt32(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -1423,6 +1591,14 @@ func (pi *projectInOpInt32) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Int32()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowInt32 because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowInt32(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -1508,6 +1684,7 @@ type selectInOpInt64 struct {
 	filterRow []int64
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpInt64{}
@@ -1520,6 +1697,7 @@ type projectInOpInt64 struct {
 	filterRow []int64
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpInt64{}
@@ -1540,11 +1718,33 @@ func fillDatumRowInt64(t *types.T, datumTuple *tree.DTuple) ([]int64, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowInt64(filterRow []int64, targetCol coldata.Int64s) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		{
+			a, b := int64(filterRow[i]), int64(filterRow[j])
+			if a < b {
+				cmpResult = -1
+			} else if a > b {
+				cmpResult = 1
+			} else {
+				cmpResult = 0
+			}
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInInt64(
 	targetElem int64, targetCol coldata.Int64s, filterRow []int64, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowInt64, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -1590,6 +1790,14 @@ func (si *selectInOpInt64) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowInt64 because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowInt64(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -1660,6 +1868,14 @@ func (pi *projectInOpInt64) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Int64()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowInt64 because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowInt64(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -1745,6 +1961,7 @@ type selectInOpFloat64 struct {
 	filterRow []float64
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpFloat64{}
@@ -1757,6 +1974,7 @@ type projectInOpFloat64 struct {
 	filterRow []float64
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpFloat64{}
@@ -1777,11 +1995,41 @@ func fillDatumRowFloat64(t *types.T, datumTuple *tree.DTuple) ([]float64, bool)
 	return result, hasNulls
 }
 
+func sortDatumRowFloat64(filterRow []float64, targetCol coldata.Float64s) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		{
+			a, b := float64(filterRow[i]), float64(filterRow[j])
+			if a < b {
+				cmpResult = -1
+			} else if a > b {
+				cmpResult = 1
+			} else if a == b {
+				cmpResult = 0
+			} else if math.IsNaN(a) {
+				if math.IsNaN(b) {
+					cmpResult = 0
+				} else {
+					cmpResult = -1
+				}
+			} else {
+				cmpResult = 1
+			}
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInFloat64(
 	targetElem float64, targetCol coldata.Float64s, filterRow []float64, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowFloat64, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -1835,6 +2083,14 @@ func (si *selectInOpFloat64) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowFloat64 because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowFloat64(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -1905,6 +2161,14 @@ func (pi *projectInOpFloat64) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Float64()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowFloat64 because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowFloat64(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -1990,6 +2254,7 @@ type selectInOpTimestamp struct {
 	filterRow []time.Time
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpTimestamp{}
@@ -2002,6 +2267,7 @@ type projectInOpTimestamp struct {
 	filterRow []time.Time
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpTimestamp{}
@@ -2022,11 +2288,29 @@ func fillDatumRowTimestamp(t *types.T, datumTuple *tree.DTuple) ([]time.Time, bo
 	return result, hasNulls
 }
 
+func sortDatumRowTimestamp(filterRow []time.Time, targetCol coldata.Times) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		if filterRow[i].Before(filterRow[j]) {
+			cmpResult = -1
+		} else if filterRow[j].Before(filterRow[i]) {
+			cmpResult = 1
+		} else {
+			cmpResult = 0
+		}
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInTimestamp(
 	targetElem time.Time, targetCol coldata.Times, filterRow []time.Time, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowTimestamp, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -2068,6 +2352,14 @@ func (si *selectInOpTimestamp) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowTimestamp because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowTimestamp(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -2138,6 +2430,14 @@ func (pi *projectInOpTimestamp) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Timestamp()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowTimestamp because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowTimestamp(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -2223,6 +2523,7 @@ type selectInOpInterval struct {
 	filterRow []duration.Duration
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpInterval{}
@@ -2235,6 +2536,7 @@ type projectInOpInterval struct {
 	filterRow []duration.Duration
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpInterval{}
@@ -2255,11 +2557,22 @@ func fillDatumRowInterval(t *types.T, datumTuple *tree.DTuple) ([]duration.Durat
 	return result, hasNulls
 }
 
+func sortDatumRowInterval(filterRow []duration.Duration, targetCol coldata.Durations) {
+	less := func(i, j int) bool {
+		var cmpResult int
+		cmpResult = filterRow[i].Compare(filterRow[j])
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInInterval(
 	targetElem duration.Duration, targetCol coldata.Durations, filterRow []duration.Duration, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowInterval, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -2294,6 +2607,14 @@ func (si *selectInOpInterval) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowInterval because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowInterval(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -2364,6 +2685,14 @@ func (pi *projectInOpInterval) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Interval()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowInterval because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowInterval(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -2449,6 +2778,7 @@ type selectInOpJSON struct {
 	filterRow []json.JSON
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpJSON{}
@@ -2461,6 +2791,7 @@ type projectInOpJSON struct {
 	filterRow []json.JSON
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpJSON{}
@@ -2481,11 +2812,28 @@ func fillDatumRowJSON(t *types.T, datumTuple *tree.DTuple) ([]json.JSON, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRowJSON(filterRow []json.JSON, targetCol *coldata.JSONs) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		var err error
+		cmpResult, err = filterRow[i].Compare(filterRow[j])
+		if err != nil {
+			colexecerror.ExpectedError(err)
+		}
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInJSON(
 	targetElem json.JSON, targetCol *coldata.JSONs, filterRow []json.JSON, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowJSON, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -2526,6 +2874,14 @@ func (si *selectInOpJSON) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowJSON because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowJSON(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -2594,6 +2950,14 @@ func (pi *projectInOpJSON) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.JSON()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowJSON because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowJSON(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
@@ -2677,6 +3041,7 @@ type selectInOpDatum struct {
 	filterRow []interface{}
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOpDatum{}
@@ -2689,6 +3054,7 @@ type projectInOpDatum struct {
 	filterRow []interface{}
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOpDatum{}
@@ -2709,11 +3075,24 @@ func fillDatumRowDatum(t *types.T, datumTuple *tree.DTuple) ([]interface{}, bool
 	return result, hasNulls
 }
 
+func sortDatumRowDatum(filterRow []interface{}, targetCol coldata.DatumVec) {
+	less := func(i, j int) bool {
+		var cmpResult int
+
+		cmpResult = coldataext.CompareDatum(filterRow[i], targetCol, filterRow[j])
+
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpInDatum(
 	targetElem interface{}, targetCol coldata.DatumVec, filterRow []interface{}, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRowDatum, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -2750,6 +3129,14 @@ func (si *selectInOpDatum) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRowDatum because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRowDatum(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -2818,6 +3205,14 @@ func (pi *projectInOpDatum) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.Datum()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRowDatum because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRowDatum(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
diff --git a/pkg/sql/colexec/select_in_test.go b/pkg/sql/colexec/select_in_test.go
index 7535fdb1b9a6..d93d7d6eaacb 100644
--- a/pkg/sql/colexec/select_in_test.go
+++ b/pkg/sql/colexec/select_in_test.go
@@ -70,6 +70,14 @@ func TestSelectInInt64(t *testing.T) {
 			hasNulls:     true,
 			negate:       true,
 		},
+		{
+			desc:         "In test with unordered filterRow",
+			inputTuples:  colexectestutils.Tuples{{0}, {1}, {2}},
+			outputTuples: colexectestutils.Tuples{{0}, {1}, {2}},
+			filterRow:    []int64{2, 0, 1},
+			hasNulls:     false,
+			negate:       false,
+		},
 	}
 
 	for _, c := range testCases {
@@ -211,6 +219,12 @@ func TestProjectInInt64(t *testing.T) {
 			outputTuples: colexectestutils.Tuples{{1, false}, {2, false}},
 			inClause:     "IN (3)",
 		},
+		{
+			desc:         "In test with unordered tuple",
+			inputTuples:  colexectestutils.Tuples{{0}, {1}, {2}},
+			outputTuples: colexectestutils.Tuples{{0, true}, {1, true}, {2, true}},
+			inClause:     "IN (2, 0, 1)",
+		},
 	}
 
 	for _, c := range testCases {
diff --git a/pkg/sql/colexec/select_in_tmpl.go b/pkg/sql/colexec/select_in_tmpl.go
index 244070893ed8..3a29e375be8e 100644
--- a/pkg/sql/colexec/select_in_tmpl.go
+++ b/pkg/sql/colexec/select_in_tmpl.go
@@ -20,6 +20,8 @@
 package colexec
 
 import (
+	"sort"
+
 	"github.com/cockroachdb/apd/v2"
 	"github.com/cockroachdb/cockroach/pkg/col/coldata"
 	"github.com/cockroachdb/cockroach/pkg/col/coldataext"
@@ -141,6 +143,7 @@ type selectInOp_TYPE struct {
 	filterRow []_GOTYPE
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &selectInOp_TYPE{}
@@ -153,6 +156,7 @@ type projectInOp_TYPE struct {
 	filterRow []_GOTYPE
 	hasNulls  bool
 	negate    bool
+	sorted    bool
 }
 
 var _ colexecop.Operator = &projectInOp_TYPE{}
@@ -173,11 +177,22 @@ func fillDatumRow_TYPE(t *types.T, datumTuple *tree.DTuple) ([]_GOTYPE, bool) {
 	return result, hasNulls
 }
 
+func sortDatumRow_TYPE(filterRow []_GOTYPE, targetCol _GOTYPESLICE) {
+	less := func(i, j int) bool {
+		var cmpResult int
+		_COMPARE(cmpResult, filterRow[i], filterRow[j], targetCol, _)
+		return cmpResult < 0
+	}
+	if !sort.SliceIsSorted(filterRow, less) {
+		sort.Slice(filterRow, less)
+	}
+}
+
 func cmpIn_TYPE(
 	targetElem _GOTYPE, targetCol _GOTYPESLICE, filterRow []_GOTYPE, hasNulls bool,
 ) comparisonResult {
-	// Filter row input is already sorted due to normalization, so we can use a
-	// binary search right away.
+	// Filter row input was already sorted in sortDatumRow_TYPE, so we can
+	// perform a binary search.
 	lo := 0
 	hi := len(filterRow)
 	for lo < hi {
@@ -212,6 +227,14 @@ func (si *selectInOp_TYPE) Next() coldata.Batch {
 		var idx int
 		n := batch.Length()
 
+		// Sort si.filterRow once. We perform the sort here instead of in
+		// fillDatumRow_TYPE because the compare overload requires the eval
+		// context of a coldata.DatumVec target column.
+		if !si.sorted {
+			sortDatumRow_TYPE(si.filterRow, col)
+			si.sorted = true
+		}
+
 		compVal := siTrue
 		if si.negate {
 			compVal = siFalse
@@ -286,6 +309,14 @@ func (pi *projectInOp_TYPE) Next() coldata.Batch {
 	vec := batch.ColVec(pi.colIdx)
 	col := vec.TemplateType()
 
+	// Sort pi.filterRow once. We perform the sort here instead of in
+	// fillDatumRow_TYPE because the compare overload requires the eval context
+	// of a coldata.DatumVec target column.
+	if !pi.sorted {
+		sortDatumRow_TYPE(pi.filterRow, col)
+		pi.sorted = true
+	}
+
 	projVec := batch.ColVec(pi.outputIdx)
 	projCol := projVec.Bool()
 	projNulls := projVec.Nulls()
diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize b/pkg/sql/logictest/testdata/logic_test/vectorize
index a1f5856c9570..ef6d84316a4d 100644
--- a/pkg/sql/logictest/testdata/logic_test/vectorize
+++ b/pkg/sql/logictest/testdata/logic_test/vectorize
@@ -1253,3 +1253,18 @@ query T
 SELECT c FROM t68040 WHERE c LIKE '%\\%'
 ----
 string with \ backslash
+
+# Regression test for #68979. The IN operator should evaluate correctly when the
+# tuple contents are not sorted by the optimizer.
+statement ok
+CREATE TABLE t68979 (
+  a INT
+)
+
+statement ok
+INSERT INTO t68979 VALUES (0)
+
+query B
+SELECT 'b' IN ('b', (SELECT NULL FROM t68979), 'a') FROM t68979
+----
+true