From ba35f7b5982063d1bd4381067570fe9a07fb1467 Mon Sep 17 00:00:00 2001
From: Peter Mattis <petermattis@gmail.com>
Date: Fri, 28 Aug 2015 11:07:01 -0400
Subject: [PATCH] Add support for scanning multiple spans for a single index.

Index selection now creates multiple independent spans of the index that
must be scanned. Multiple spans are created for expressions like "a
IN (1, 2, 3)" and "a = 1 OR a = 2 or a = 3" (these are equivalent).

Fixes #2140.
See #2142.
---
 examples/sql_bank/main.go |   1 -
 sql/scan.go               |  43 ++++--
 sql/select.go             | 314 +++++++++++++++++++++++++-------------
 sql/testdata/select_index |  47 ++++--
 4 files changed, 275 insertions(+), 130 deletions(-)

diff --git a/examples/sql_bank/main.go b/examples/sql_bank/main.go
index c455f5a308dc..dac0c5c5ebbe 100644
--- a/examples/sql_bank/main.go
+++ b/examples/sql_bank/main.go
@@ -54,7 +54,6 @@ func moveMoney(db *sql.DB) {
 			log.Fatal(err)
 		}
 		startTime := time.Now()
-		// Query is very slow and will be fixed by https://github.com/cockroachdb/cockroach/issues/2140
 		query := fmt.Sprintf("SELECT id, balance FROM accounts WHERE id IN (%d, %d)", from, to)
 		rows, err := tx.Query(query)
 		elapsed := time.Now().Sub(startTime)
diff --git a/sql/scan.go b/sql/scan.go
index 83d8d55ca363..37cda65fb54d 100644
--- a/sql/scan.go
+++ b/sql/scan.go
@@ -59,14 +59,18 @@ func (q *qvalue) String() string {
 type qvalMap map[ColumnID]*qvalue
 type colKindMap map[ColumnID]ColumnType_Kind
 
+type span struct {
+	start proto.Key
+	end   proto.Key
+}
+
 // A scanNode handles scanning over the key/value pairs for a table and
 // reconstructing them into rows.
 type scanNode struct {
 	txn              *client.Txn
 	desc             *TableDescriptor
 	index            *IndexDescriptor
-	startKey         proto.Key
-	endKey           proto.Key
+	spans            []span
 	visibleCols      []ColumnDescriptor
 	isSecondaryIndex bool
 	reverse          bool
@@ -231,22 +235,39 @@ func (n *scanNode) initScan() bool {
 		return true
 	}
 
-	// Retrieve all of the keys that start with our index key prefix.
-	if len(n.startKey) == 0 {
-		n.startKey = proto.Key(MakeIndexKeyPrefix(n.desc.ID, n.index.ID))
-	}
-	if len(n.endKey) == 0 {
-		n.endKey = n.startKey.PrefixEnd()
+	if len(n.spans) == 0 {
+		// If no spans were specified retrieve all of the keys that start with our
+		// index key prefix.
+		start := proto.Key(MakeIndexKeyPrefix(n.desc.ID, n.index.ID))
+		n.spans = append(n.spans, span{
+			start: start,
+			end:   start.PrefixEnd(),
+		})
 	}
+
+	// Retrieve all the spans.
+	b := &client.Batch{}
 	if n.reverse {
-		n.kvs, n.err = n.txn.ReverseScan(n.startKey, n.endKey, 0)
+		for i := len(n.spans) - 1; i >= 0; i-- {
+			b.ReverseScan(n.spans[i].start, n.spans[i].end, 0)
+		}
 	} else {
-		n.kvs, n.err = n.txn.Scan(n.startKey, n.endKey, 0)
+		for i := 0; i < len(n.spans); i++ {
+			b.Scan(n.spans[i].start, n.spans[i].end, 0)
+		}
 	}
-	if n.err != nil {
+	if n.err = n.txn.Run(b); n.err != nil {
 		return false
 	}
 
+	for _, result := range b.Results {
+		if n.kvs == nil {
+			n.kvs = result.Rows
+		} else {
+			n.kvs = append(n.kvs, result.Rows...)
+		}
+	}
+
 	// Prepare our index key vals slice.
 	if n.valTypes, n.err = makeKeyVals(n.desc, n.columnIDs); n.err != nil {
 		return false
diff --git a/sql/select.go b/sql/select.go
index aa05426f41f3..de50272fe557 100644
--- a/sql/select.go
+++ b/sql/select.go
@@ -18,6 +18,8 @@
 package sql
 
 import (
+	"bytes"
+	"fmt"
 	"math"
 	"sort"
 
@@ -205,8 +207,8 @@ func (p *planner) selectIndex(s *scanNode, ordering []int) (planNode, error) {
 
 	if log.V(2) {
 		for i, c := range candidates {
-			log.Infof("%d: selectIndex(%s): %v %s %s: %p",
-				i, c.index.Name, c.cost, c.makeStartKey(), c.makeEndKey(), c)
+			log.Infof("%d: selectIndex(%s): cost=%v constraints=%s",
+				i, c.index.Name, c.cost, c.constraints)
 		}
 	}
 
@@ -215,28 +217,63 @@ func (p *planner) selectIndex(s *scanNode, ordering []int) (planNode, error) {
 	c := candidates[0]
 	s.index = c.index
 	s.isSecondaryIndex = (c.index != &s.desc.PrimaryIndex)
-	s.startKey = c.makeStartKey()
-	s.endKey = c.makeEndKey()
+	s.spans = makeSpans(c.constraints, c.desc.ID, c.index.ID)
 	s.reverse = c.reverse
 	s.initOrdering()
+
+	if log.V(3) {
+		for i, span := range s.spans {
+			log.Infof("%s/%d: start=%s end=%s", c.index.Name, i, span.start, span.end)
+		}
+	}
 	return s, nil
 }
 
+type indexConstraint struct {
+	start *parser.ComparisonExpr
+	end   *parser.ComparisonExpr
+}
+
+type indexConstraints []indexConstraint
+
+func (c indexConstraints) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("[")
+	for i := range c {
+		if i > 0 {
+			_, _ = buf.WriteString(", ")
+		}
+		_, _ = buf.WriteString("{")
+		if c[i].start != nil {
+			fmt.Fprintf(&buf, "%s, ", c[i].start)
+		} else {
+			_, _ = buf.WriteString("_, ")
+		}
+		if c[i].end != nil && c[i].end != c[i].start {
+			fmt.Fprintf(&buf, "%s", c[i].end)
+		} else {
+			_, _ = buf.WriteString("_")
+		}
+		_, _ = buf.WriteString("}")
+	}
+	_, _ = buf.WriteString("]")
+	return buf.String()
+}
+
 type indexInfo struct {
-	desc     *TableDescriptor
-	index    *IndexDescriptor
-	start    []*parser.ComparisonExpr
-	end      []*parser.ComparisonExpr
-	cost     float64
-	covering bool // indicates whether the index covers the required qvalues
-	reverse  bool
+	desc        *TableDescriptor
+	index       *IndexDescriptor
+	constraints indexConstraints
+	cost        float64
+	covering    bool // indicates whether the index covers the required qvalues
+	reverse     bool
 }
 
 func (v *indexInfo) init(s *scanNode) {
 	v.covering = v.isCoveringIndex(s.qvals)
 	if !v.covering {
 		// TODO(pmattis): Support non-coverying indexes.
-		v.cost = math.MaxFloat64
+		v.cost = math.Inf(+1)
 		return
 	}
 
@@ -257,19 +294,17 @@ func (v *indexInfo) analyzeRanges(exprs []parser.Exprs) {
 		return
 	}
 
-	v.makeStartInfo(exprs)
-	v.makeEndInfo(exprs)
+	v.makeConstraints(exprs)
 
 	// Count the number of elements used to limit the start and end keys. We then
 	// boost the cost by what fraction of the index keys are being used. The
 	// higher the fraction, the lower the cost.
-	if len(v.start) == 0 && len(v.end) == 0 {
+	if len(v.constraints) == 0 {
 		// The index isn't being restricted at all, bump the cost significantly to
 		// make any index which does restrict the keys more desirable.
 		v.cost *= 1000
 	} else {
-		v.cost *= float64(len(v.index.ColumnIDs)+len(v.index.ColumnIDs)) /
-			float64(len(v.start)+len(v.end))
+		v.cost *= float64(len(v.index.ColumnIDs)) / float64(len(v.constraints))
 	}
 }
 
@@ -318,126 +353,93 @@ func (v *indexInfo) analyzeOrdering(scan *scanNode, ordering []int) {
 	}
 }
 
-// makeStartInfo populates the indexInfo.start slice for the index using the
-// supplied expression. The start info contains the start values for a scan of
-// the index. As such, the construction looks for =, >= and > comparisons of
-// the index columns to literal values.
-func (v *indexInfo) makeStartInfo(exprs []parser.Exprs) {
+// makeConstraints populates the indexInfo.constraints field based on the
+// analyzed expressions. The constraints are a start and end expressions for a
+// prefix of the columns that make up the index. For example, consider an index
+// on the columns (a, b, c). For the expressions "a > 1 AND b > 2" we would
+// have the constraints:
+//
+//   {a: {start: > 1}
+//
+// Why is there no constraint on "b"? Because the start constraint was > and
+// such a constraint does not allow us to consider further columns in the
+// index. What about the expression "a >= 1 AND b > 2":
+//
+//   {a: {start: >= 1}, b: {start: > 2}}
+//
+// Start constraints look for comparison expressions with the operators >, >=,
+// = or IN. End constraints look for comparison expressions with the operators
+// <, <=, = or IN.
+//
+// TODO(pmattis): This needs to be more thoroughly tested.
+func (v *indexInfo) makeConstraints(exprs []parser.Exprs) {
 	if len(exprs) != 1 {
 		return
 	}
+
 	andExprs := exprs[0]
+	startDone := false
+	endDone := false
 
-outer:
 	for _, colID := range v.index.ColumnIDs {
+		constraint := indexConstraint{}
+
 		for _, e := range andExprs {
 			if c, ok := e.(*parser.ComparisonExpr); ok {
 				if q, ok := c.Left.(*qvalue); !ok || q.col.ID != colID {
+					// This expression refers to a column other than the one we're
+					// looking for.
 					continue
 				}
 				if _, ok := c.Right.(parser.Datum); !ok {
 					continue
 				}
-				switch op := c.Operator; op {
-				case parser.NE:
+				if c.Operator == parser.NE {
+					// Give-up when we encounter a != expression.
 					return
-				case parser.EQ, parser.GE, parser.GT:
-					v.start = append(v.start, c)
-					if op == parser.GT {
-						return
-					}
-					continue outer
 				}
-			}
-		}
-		return
-	}
-}
-
-// makeEndInfo populates the indexInfo.end slice for the index using the
-// supplied expression. The end info contains the end values for a scan of the
-// index. As such, the construction looks for =, <= and < comparisons of the
-// index columns to literal values.
-func (v *indexInfo) makeEndInfo(exprs []parser.Exprs) {
-	if len(exprs) != 1 {
-		return
-	}
-	andExprs := exprs[0]
 
-outer:
-	for _, colID := range v.index.ColumnIDs {
-		for _, e := range andExprs {
-			if c, ok := e.(*parser.ComparisonExpr); ok {
-				if q, ok := c.Left.(*qvalue); !ok || q.col.ID != colID {
-					continue
-				}
-				if _, ok := c.Right.(parser.Datum); !ok {
-					continue
+				if !startDone && constraint.start == nil {
+					switch c.Operator {
+					case parser.GT:
+						startDone = true
+						fallthrough
+					case parser.EQ, parser.GE, parser.In:
+						constraint.start = c
+					}
 				}
-				switch op := c.Operator; op {
-				case parser.NE:
-					return
-				case parser.EQ, parser.LE, parser.LT:
-					v.end = append(v.end, c)
-					if op == parser.LT {
-						return
+
+				if !endDone && constraint.end == nil {
+					switch c.Operator {
+					case parser.LT:
+						endDone = true
+						fallthrough
+					case parser.EQ, parser.LE, parser.In:
+						constraint.end = c
 					}
-					continue outer
+				}
+
+				if (startDone || constraint.start != nil) &&
+					(endDone || constraint.end != nil) {
+					break
 				}
 			}
 		}
-		return
-	}
-}
 
-func (v *indexInfo) makeStartKey() proto.Key {
-	key := proto.Key(MakeIndexKeyPrefix(v.desc.ID, v.index.ID))
-	for _, e := range v.start {
-		datum, ok := e.Right.(parser.Datum)
-		if !ok {
-			break
+		if constraint.start != nil || constraint.end != nil {
+			v.constraints = append(v.constraints, constraint)
 		}
-		var err error
-		key, err = encodeTableKey(key, datum)
-		if err != nil {
-			panic(err)
-		}
-		if e.Operator == parser.GT {
-			// "qval > constant": we can't use any of the additional elements for
-			// restricting the key further.
-			key = key.Next()
-			break
-		}
-	}
-	return key
-}
 
-func (v *indexInfo) makeEndKey() proto.Key {
-	key := proto.Key(MakeIndexKeyPrefix(v.desc.ID, v.index.ID))
-	isLT := false
-	for _, e := range v.end {
-		datum, ok := e.Right.(parser.Datum)
-		if !ok {
-			break
+		if constraint.start == nil {
+			startDone = true
 		}
-		var err error
-		key, err = encodeTableKey(key, datum)
-		if err != nil {
-			panic(err)
+		if constraint.end == nil {
+			endDone = true
 		}
-		isLT = e.Operator == parser.LT
-		if isLT {
-			// "qval < constant": we can't use any of the additional elements for
-			// restricting the key further.
+		if startDone && endDone {
 			break
 		}
 	}
-	if !isLT {
-		// If the last element was not "qval < constant" then we want the
-		// "prefix-end" for the end key.
-		key = key.PrefixEnd()
-	}
-	return key
 }
 
 // isCoveringIndex returns true if all of the columns referenced by the target
@@ -471,3 +473,101 @@ func (v indexInfoByCost) Less(i, j int) bool {
 func (v indexInfoByCost) Swap(i, j int) {
 	v[i], v[j] = v[j], v[i]
 }
+
+// makeSpans constructs the spans for an index given a set of constraints.
+//
+// TODO(pmattis): This needs to be more thoroughly tested.
+func makeSpans(constraints indexConstraints, tableID ID, indexID IndexID) []span {
+	prefix := proto.Key(MakeIndexKeyPrefix(tableID, indexID))
+	spans := []span{{
+		start: append(proto.Key(nil), prefix...),
+		end:   append(proto.Key(nil), prefix...),
+	}}
+	var lastStartOp parser.ComparisonOp
+	var lastEndOp parser.ComparisonOp
+	var buf [100]byte
+
+	for _, c := range constraints {
+		if c.start != nil && c.start.Operator == parser.In {
+			// Special handling of IN exprssions. Such expressions apply to both the
+			// start and end key, but also cause an explosion in the number of spans
+			// searched within an index.
+			//
+			// TODO(pmattis): Handle IN expressions of the form:
+			//
+			//   (a, b, c) IN ((1, 2, 3), (4, 5, 6))
+			tuple, ok := c.start.Right.(parser.DTuple)
+			if !ok {
+				break
+			}
+
+			// For each of the existing spans and for each value in the tuple, create
+			// a new span.
+			existingSpans := spans
+			spans = make([]span, 0, len(existingSpans)*len(tuple))
+			for _, datum := range tuple {
+				key, err := encodeTableKey(buf[:0], datum)
+				if err != nil {
+					panic(err)
+				}
+				for _, s := range existingSpans {
+					spans = append(spans, span{
+						start: append(append(proto.Key(nil), s.start...), key...),
+						end:   append(append(proto.Key(nil), s.end...), key...),
+					})
+				}
+			}
+
+			lastStartOp = c.start.Operator
+			lastEndOp = c.start.Operator
+			// TODO(pmattis): Technically, we could continue here instead of
+			// breaking. Need to test and verify that continuing works correctly.
+			break
+		}
+
+		if c.start != nil {
+			// We have a start constraint.
+			if datum, ok := c.start.Right.(parser.Datum); ok {
+				key, err := encodeTableKey(buf[:0], datum)
+				if err != nil {
+					panic(err)
+				}
+				// Append the constraint to all of the existing spans.
+				for i := range spans {
+					spans[i].start = append(spans[i].start, key...)
+				}
+				lastStartOp = c.start.Operator
+			}
+		}
+
+		if c.end != nil {
+			// We have an end constraint.
+			if datum, ok := c.end.Right.(parser.Datum); ok {
+				key, err := encodeTableKey(buf[:0], datum)
+				if err != nil {
+					panic(err)
+				}
+				// Append the constraint to all of the existing spans.
+				for i := range spans {
+					spans[i].end = append(spans[i].end, key...)
+				}
+				lastEndOp = c.end.Operator
+			}
+		}
+	}
+
+	if lastStartOp == parser.GT {
+		// "qval > constant": we can skip to the next key.
+		for i := range spans {
+			spans[i].start = spans[i].start.Next()
+		}
+	}
+	if lastEndOp != parser.LT {
+		// "qval <= constant" or "qval = constant": we want the prefix end.
+		for i := range spans {
+			spans[i].end = spans[i].end.PrefixEnd()
+		}
+	}
+
+	return spans
+}
diff --git a/sql/testdata/select_index b/sql/testdata/select_index
index 4e6b395c390a..0afd815c44cb 100644
--- a/sql/testdata/select_index
+++ b/sql/testdata/select_index
@@ -22,20 +22,29 @@ EXPLAIN (DEBUG) SELECT * FROM t WHERE a = 2
 0 /t/primary/2/'two'/c 22    NULL
 0 /t/primary/2/'two'/d 'bar' true
 
-# TODO(vivek): The following two tests should use "point" lookups
-# to be fixed by https://github.com/cockroachdb/cockroach/issues/2140
 query ITTB
 EXPLAIN (DEBUG) SELECT * FROM t WHERE a IN (1, 3)
 ----
-0 /t/dc/'bar'/22/2/'two'    NULL false
-1 /t/dc/'blah'/33/3/'three' NULL true
-2 /t/dc/'foo'/11/1/'one'    NULL true
+0 /t/primary/1/'one'     NULL   NULL
+0 /t/primary/1/'one'/c   11     NULL
+0 /t/primary/1/'one'/d   'foo'  true
+1 /t/primary/3/'three'   NULL   NULL
+1 /t/primary/3/'three'/c 33     NULL
+1 /t/primary/3/'three'/d 'blah' true
+
+query ITTB
+EXPLAIN (DEBUG) SELECT * FROM t WHERE d = 'foo' OR d = 'bar'
+----
+0 /t/dc/'bar'/22/2/'two' NULL true
+1 /t/dc/'foo'/11/1/'one' NULL true
 
+# TODO(pmattis): This will only read two rows when we add support for
+# multi-column IN expressions.
 query ITTB
-EXPLAIN (DEBUG) SELECT * FROM t WHERE a = 1 OR a = 3
+EXPLAIN (DEBUG) SELECT * FROM t WHERE (d, c) IN (('foo', 11), ('bar', 22))
 ----
-0 /t/dc/'bar'/22/2/'two'    NULL false
-1 /t/dc/'blah'/33/3/'three' NULL true
+0 /t/dc/'bar'/22/2/'two'    NULL true
+1 /t/dc/'blah'/33/3/'three' NULL false
 2 /t/dc/'foo'/11/1/'one'    NULL true
 
 query ITTB
@@ -91,6 +100,23 @@ EXPLAIN (DEBUG) SELECT * FROM t WHERE a = 1 AND b > 'b'
 ----
 0 /t/primary/1/'c' NULL true
 
+query ITTB
+EXPLAIN (DEBUG) SELECT * FROM t WHERE a > 0 AND b > 'b'
+----
+0 /t/primary/1/'a' NULL false
+1 /t/primary/1/'b' NULL false
+2 /t/primary/1/'c' NULL true
+
+# TODO(pmattis): This should scan 0 keys, but the way we scan greater
+# than a key is broken. We should really transfer "a > 0" into "a >=
+# 1". This is possible for integer keys.
+query ITTB
+EXPLAIN (DEBUG) SELECT * FROM t WHERE a > 1 AND b > 'b'
+----
+0 /t/primary/1/'a' NULL false
+1 /t/primary/1/'b' NULL false
+2 /t/primary/1/'c' NULL false
+
 query ITTB
 EXPLAIN (DEBUG) SELECT * FROM t WHERE a = 1 AND 'a' < b AND 'c' > b
 ----
@@ -158,9 +184,8 @@ EXPLAIN (DEBUG) SELECT * FROM t@ab WHERE a >= 3 OR a > 3
 query ITTB
 EXPLAIN (DEBUG) SELECT * FROM t@ab WHERE a = 3 OR a = 5
 ----
-0 /t/ab/1/2 NULL false
-1 /t/ab/3/4 NULL true
-2 /t/ab/5/6 NULL true
+0 /t/ab/3/4 NULL true
+1 /t/ab/5/6 NULL true
 
 query ITTB
 EXPLAIN (DEBUG) SELECT * FROM t@ab WHERE a < 3 OR a > 3