Skip to content

Commit

Permalink
Merge #39464 #39472
Browse files Browse the repository at this point in the history
39464: exec: handle some mixed type comparison expressions r=rafiss a=rafiss

This adds support for int-int comparisons and float-float comparisons
(of different sizes), as well as various string comparisons. This
is necessary to support TPC-H queries 7, 16, 19, and 21 (though there
are still other issues to address before those are fully supported).

There is some refactoring here so that as a next step we can support
comparisons between different types entirely (e.g. int-decimal).

touches #39189

Release note: None

39472: opt: prune RHS of a semi/anti join r=ridwanmsharif a=ridwanmsharif

Fixes #38704.

This change adds a PruneSemiAntiJoinRightCols to prune
columns in the RHS of a semi/anti join. Alternatively,
we could just tag the PruneJoinRightCols rule as higher
priority to achieve the same effect (previously that rule
was never triggered because the `EliminateProjects` rule
would fire and remove the projections after
`PruneJoinLeftCols` rule is applied). I prefer this rule
because it avoids requiring an ordering of transformations.

Release note: None

Co-authored-by: Rafi Shamim <[email protected]>
Co-authored-by: Ridwan Sharif <[email protected]>
  • Loading branch information
3 people committed Aug 16, 2019
3 parents d3e42df + bc09926 + 76411a6 commit 7880622
Show file tree
Hide file tree
Showing 31 changed files with 757 additions and 649 deletions.
18 changes: 18 additions & 0 deletions pkg/col/coltypes/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,28 @@ const (
// AllTypes is slice of all exec types.
var AllTypes []T

// CompatibleTypes maps a type to a slice of types that can be used with that
// type in a binary expression.
var CompatibleTypes map[T][]T

func init() {
for i := Bool; i < Unhandled; i++ {
AllTypes = append(AllTypes, i)
}

intTypes := []T{Int8, Int16, Int32, Int64}
floatTypes := []T{Float32, Float64}

CompatibleTypes = make(map[T][]T)
CompatibleTypes[Bool] = append(CompatibleTypes[Bool], Bool)
CompatibleTypes[Bytes] = append(CompatibleTypes[Bytes], Bytes)
CompatibleTypes[Decimal] = append(CompatibleTypes[Decimal], Decimal)
CompatibleTypes[Int8] = append(CompatibleTypes[Int8], intTypes...)
CompatibleTypes[Int16] = append(CompatibleTypes[Int16], intTypes...)
CompatibleTypes[Int32] = append(CompatibleTypes[Int32], intTypes...)
CompatibleTypes[Int64] = append(CompatibleTypes[Int64], intTypes...)
CompatibleTypes[Float32] = append(CompatibleTypes[Float32], floatTypes...)
CompatibleTypes[Float64] = append(CompatibleTypes[Float64], floatTypes...)
}

// FromGoType returns the type for a Go value, if applicable. Shouldn't be used at
Expand Down
34 changes: 10 additions & 24 deletions pkg/sql/distsqlrun/column_exec_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ func planSelectionOperators(
if err != nil {
return nil, resultIdx, ct, memUsageLeft, err
}
typ := &ct[leftIdx]
lTyp := &ct[leftIdx]
if constArg, ok := t.Right.(tree.Datum); ok {
if t.Operator == tree.Like || t.Operator == tree.NotLike {
negate := t.Operator == tree.NotLike
Expand All @@ -726,17 +726,17 @@ func planSelectionOperators(
err = errors.Errorf("IN is only supported for constant expressions")
return nil, resultIdx, ct, memUsed, err
}
op, err := exec.GetInOperator(typ, leftOp, leftIdx, datumTuple, negate)
op, err := exec.GetInOperator(lTyp, leftOp, leftIdx, datumTuple, negate)
return op, resultIdx, ct, memUsageLeft, err
}
op, err := exec.GetSelectionConstOperator(typ, cmpOp, leftOp, leftIdx, constArg)
op, err := exec.GetSelectionConstOperator(lTyp, t.TypedRight().ResolvedType(), cmpOp, leftOp, leftIdx, constArg)
return op, resultIdx, ct, memUsageLeft, err
}
rightOp, rightIdx, ct, memUsageRight, err := planProjectionOperators(ctx, t.TypedRight(), ct, leftOp)
if err != nil {
return nil, resultIdx, ct, memUsageLeft + memUsageRight, err
}
op, err := exec.GetSelectionOperator(typ, cmpOp, rightOp, leftIdx, rightIdx)
op, err := exec.GetSelectionOperator(lTyp, &ct[rightIdx], cmpOp, rightOp, leftIdx, rightIdx)
return op, resultIdx, ct, memUsageLeft + memUsageRight, err
default:
return nil, resultIdx, nil, memUsed, errors.Errorf("unhandled selection expression type: %s", reflect.TypeOf(t))
Expand Down Expand Up @@ -834,7 +834,7 @@ func planProjectionExpr(
resultIdx = len(ct)
// The projection result will be outputted to a new column which is appended
// to the input batch.
op, err = exec.GetProjectionLConstOperator(&ct[rightIdx], binOp, rightOp, rightIdx, lConstArg, resultIdx)
op, err = exec.GetProjectionLConstOperator(left.ResolvedType(), &ct[rightIdx], binOp, rightOp, rightIdx, lConstArg, resultIdx)
ct = append(ct, *outputType)
if sMem, ok := op.(exec.StaticMemoryOperator); ok {
memUsed += sMem.EstimateStaticMemoryUsage()
Expand Down Expand Up @@ -863,7 +863,7 @@ func planProjectionExpr(
}
op, err = exec.GetInProjectionOperator(&ct[leftIdx], leftOp, leftIdx, resultIdx, datumTuple, negate)
} else {
op, err = exec.GetProjectionRConstOperator(&ct[leftIdx], binOp, leftOp, leftIdx, rConstArg, resultIdx)
op, err = exec.GetProjectionRConstOperator(&ct[leftIdx], right.ResolvedType(), binOp, leftOp, leftIdx, rConstArg, resultIdx)
}
ct = append(ct, *outputType)
if sMem, ok := op.(exec.StaticMemoryOperator); ok {
Expand All @@ -877,18 +877,18 @@ func planProjectionExpr(
return nil, resultIdx, nil, leftMem + rightMem, err
}
resultIdx = len(ct)
op, err = exec.GetProjectionOperator(&ct[leftIdx], binOp, rightOp, leftIdx, rightIdx, resultIdx)
op, err = exec.GetProjectionOperator(&ct[leftIdx], &ct[rightIdx], binOp, rightOp, leftIdx, rightIdx, resultIdx)
ct = append(ct, *outputType)
if sMem, ok := op.(exec.StaticMemoryOperator); ok {
memUsed += sMem.EstimateStaticMemoryUsage()
}
return op, resultIdx, ct, leftMem + rightMem + memUsed, err
}

// assertHomogeneousTypes checks that the left and right sides of an expression
// assertHomogeneousTypes checks that the left and right sides of a BinaryExpr
// have identical types. (Vectorized execution does not yet handle mixed types.)
// For BinaryExprs, it also checks that the result type matches, since this is
// not the case for certain operations like integer division.
// It also checks that the result type matches, since this is not the case for
// certain operations like integer division.
func assertHomogeneousTypes(expr tree.TypedExpr) error {
switch t := expr.(type) {
case *tree.BinaryExpr:
Expand All @@ -901,20 +901,6 @@ func assertHomogeneousTypes(expr tree.TypedExpr) error {
if !left.Identical(result) {
return errors.Errorf("BinaryExpr on %s with %s result is unhandled", left, result)
}
case *tree.ComparisonExpr:
left := t.TypedLeft().ResolvedType()
right := t.TypedRight().ResolvedType()

// Special rules for IN and NOT IN expressions. The type checker
// handles invalid types for the IN and NOT IN operations at this point,
// and we allow a comparison between t and t tuple.
if t.Operator == tree.In || t.Operator == tree.NotIn {
return nil
}

if !left.Identical(right) {
return errors.Errorf("ComparisonExpr on %s and %s is unhandled", left, right)
}
}
return nil
}
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/exec/float.go → pkg/sql/exec/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@ package exec

import "math"

// compareInts compares two int64 values. This function allows us to easily
// handle mixed-type integer comparison by doing a cast.
func compareInts(a, b int64) int {
if a < b {
return -1
}
if a > b {
return 1
}
return 0
}

// compareFloats compares two float values. This function is necessary for NaN
// handling. In SQL, NaN is treated as less than all other float values. In Go,
// any comparison with NaN returns false.
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec/execgen/cmd/execgen/colvec_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func genColvec(wr io.Writer) error {
return err
}

return tmpl.Execute(wr, comparisonOpToOverloads[tree.NE])
return tmpl.Execute(wr, sameTypeComparisonOpToOverloads[tree.NE])
}
func init() {
registerGenerator(genColvec, "vec.eg.go")
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec/execgen/cmd/execgen/distinct_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func genDistinctOps(wr io.Writer) error {
return err
}

return tmpl.Execute(wr, comparisonOpToOverloads[tree.NE])
return tmpl.Execute(wr, sameTypeComparisonOpToOverloads[tree.NE])
}
func init() {
registerGenerator(genDistinctOps, "distinct.eg.go")
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec/execgen/cmd/execgen/hashjoiner_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func genHashJoiner(wr io.Writer) error {
return err
}

allOverloads := intersectOverloads(comparisonOpToOverloads[tree.NE], hashOverloads)
allOverloads := intersectOverloads(sameTypeComparisonOpToOverloads[tree.NE], hashOverloads)

return tmpl.Execute(wr, struct {
NETemplate interface{}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec/execgen/cmd/execgen/mergejoiner_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func genMergeJoinOps(wr io.Writer) error {
return err
}

allOverloads := intersectOverloads(comparisonOpToOverloads[tree.EQ], comparisonOpToOverloads[tree.LT], comparisonOpToOverloads[tree.GT])
allOverloads := intersectOverloads(sameTypeComparisonOpToOverloads[tree.EQ], sameTypeComparisonOpToOverloads[tree.LT], sameTypeComparisonOpToOverloads[tree.GT])

// Create an mjOverload for each overload combining three overloads so that
// the template code can access all of EQ, LT, and GT in the same range loop.
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/exec/execgen/cmd/execgen/min_max_agg_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ func genMinMaxAgg(wr io.Writer) error {
data := []aggOverloads{
{
Agg: distsqlpb.AggregatorSpec_MIN,
Overloads: comparisonOpToOverloads[tree.LT],
Overloads: sameTypeComparisonOpToOverloads[tree.LT],
},
{
Agg: distsqlpb.AggregatorSpec_MAX,
Overloads: comparisonOpToOverloads[tree.GT],
Overloads: sameTypeComparisonOpToOverloads[tree.GT],
},
}
return tmpl.Execute(wr, data)
Expand Down
121 changes: 73 additions & 48 deletions pkg/sql/exec/execgen/cmd/execgen/overloads.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,13 @@ type overload struct {
CmpOp tree.ComparisonOperator
BinOp tree.BinaryOperator
// OpStr is the string form of whichever of CmpOp and BinOp are set.
OpStr string
LTyp coltypes.T
RTyp coltypes.T
LGoType string
RGoType string
RetTyp coltypes.T
OpStr string
LTyp coltypes.T
RTyp coltypes.T
LGoType string
RGoType string
RetTyp coltypes.T
RetGoType string

AssignFunc assignFunc
CompareFunc compareFunc
Expand All @@ -95,9 +96,13 @@ var comparisonOpOverloads []*overload
// implement it.
var binaryOpToOverloads map[tree.BinaryOperator][]*overload

// comparisonOpToOverloads maps a comparison operator to all of the overloads
// that implement it.
var comparisonOpToOverloads map[tree.ComparisonOperator][]*overload
// sameTypeComparisonOpToOverloads maps a comparison operator to all of the
// overloads that implement that comparison between two values of the same type.
var sameTypeComparisonOpToOverloads map[tree.ComparisonOperator][]*overload

// anyTypeComparisonOpToOverloads maps a comparison operator to all of the
// overloads that implement it, including all mixed type comparisons.
var anyTypeComparisonOpToOverloads map[tree.ComparisonOperator][]*overload

// hashOverloads is a list of all of the overloads that implement the hash
// operation.
Expand Down Expand Up @@ -150,7 +155,8 @@ func init() {
binOps := []tree.BinaryOperator{tree.Plus, tree.Minus, tree.Mult, tree.Div}
cmpOps := []tree.ComparisonOperator{tree.EQ, tree.NE, tree.LT, tree.LE, tree.GT, tree.GE}
binaryOpToOverloads = make(map[tree.BinaryOperator][]*overload, len(binaryOpName))
comparisonOpToOverloads = make(map[tree.ComparisonOperator][]*overload, len(comparisonOpName))
sameTypeComparisonOpToOverloads = make(map[tree.ComparisonOperator][]*overload, len(comparisonOpName))
anyTypeComparisonOpToOverloads = make(map[tree.ComparisonOperator][]*overload, len(comparisonOpName))
for _, t := range inputTypes {
customizer := typeCustomizers[t]
for _, op := range binOps {
Expand All @@ -160,15 +166,16 @@ func init() {
continue
}
ov := &overload{
Name: binaryOpName[op],
BinOp: op,
IsBinOp: true,
OpStr: binaryOpInfix[op],
LTyp: t,
RTyp: t,
LGoType: t.GoTypeName(),
RGoType: t.GoTypeName(),
RetTyp: t,
Name: binaryOpName[op],
BinOp: op,
IsBinOp: true,
OpStr: binaryOpInfix[op],
LTyp: t,
RTyp: t,
LGoType: t.GoTypeName(),
RGoType: t.GoTypeName(),
RetTyp: t,
RetGoType: t.GoTypeName(),
}
if customizer != nil {
if b, ok := customizer.(binOpTypeCustomizer); ok {
Expand All @@ -178,35 +185,6 @@ func init() {
binaryOpOverloads = append(binaryOpOverloads, ov)
binaryOpToOverloads[op] = append(binaryOpToOverloads[op], ov)
}
for _, op := range cmpOps {
opStr := comparisonOpInfix[op]
ov := &overload{
Name: comparisonOpName[op],
CmpOp: op,
IsCmpOp: true,
OpStr: opStr,
LTyp: t,
RTyp: t,
LGoType: t.GoTypeName(),
RGoType: t.GoTypeName(),
RetTyp: coltypes.Bool,
}
if customizer != nil {
if b, ok := customizer.(cmpOpTypeCustomizer); ok {
ov.AssignFunc = func(op overload, target, l, r string) string {
c := b.getCmpOpCompareFunc()(l, r)
if c == "" {
return ""
}
return fmt.Sprintf("%s = %s %s 0", target, c, op.OpStr)
}
ov.CompareFunc = b.getCmpOpCompareFunc()
}
}
comparisonOpOverloads = append(comparisonOpOverloads, ov)
comparisonOpToOverloads[op] = append(comparisonOpToOverloads[op], ov)
}

ov := &overload{
IsHashOp: true,
LTyp: t,
Expand All @@ -219,6 +197,43 @@ func init() {
}
hashOverloads = append(hashOverloads, ov)
}
for _, leftType := range inputTypes {
customizer := typeCustomizers[leftType]
for _, rightType := range coltypes.CompatibleTypes[leftType] {
for _, op := range cmpOps {
opStr := comparisonOpInfix[op]
ov := &overload{
Name: comparisonOpName[op],
CmpOp: op,
IsCmpOp: true,
OpStr: opStr,
LTyp: leftType,
RTyp: rightType,
LGoType: leftType.GoTypeName(),
RGoType: rightType.GoTypeName(),
RetTyp: coltypes.Bool,
RetGoType: coltypes.Bool.GoTypeName(),
}
if customizer != nil {
if b, ok := customizer.(cmpOpTypeCustomizer); ok {
ov.AssignFunc = func(op overload, target, l, r string) string {
c := b.getCmpOpCompareFunc()(l, r)
if c == "" {
return ""
}
return fmt.Sprintf("%s = %s %s 0", target, c, op.OpStr)
}
ov.CompareFunc = b.getCmpOpCompareFunc()
}
}
comparisonOpOverloads = append(comparisonOpOverloads, ov)
anyTypeComparisonOpToOverloads[op] = append(anyTypeComparisonOpToOverloads[op], ov)
if leftType == rightType {
sameTypeComparisonOpToOverloads[op] = append(sameTypeComparisonOpToOverloads[op], ov)
}
}
}
}
}

// typeCustomizer is a marker interface for something that implements one or
Expand All @@ -229,6 +244,8 @@ func init() {
// (==, <, etc) or binary operator (+, -, etc) semantics.
type typeCustomizer interface{}

// TODO(rafi): make this map keyed by (leftType, rightType) so we can have
// customizers for mixed-type operations.
var typeCustomizers map[coltypes.T]typeCustomizer

// registerTypeCustomizer registers a particular type customizer to a type, for
Expand Down Expand Up @@ -352,6 +369,14 @@ func (c intCustomizer) getHashAssignFunc() assignFunc {
}
}

func (c intCustomizer) getCmpOpCompareFunc() compareFunc {
// Always upcast ints for comparison.
return func(l, r string) string {
return fmt.Sprintf("compareInts(int64(%s), int64(%s))", l, r)
}

}

func (c intCustomizer) getBinOpAssignFunc() assignFunc {
return func(op overload, target, l, r string) string {
args := map[string]string{"Target": target, "Left": l, "Right": r}
Expand Down
Loading

0 comments on commit 7880622

Please sign in to comment.