Merge pull request cockroachdb#20728 from RaduBerinde/tuples-2

opt: normalize tuple equalities
petermattis · Dec 15, 2017 · b511ba5 · b511ba5
2 parents 036977a + 0abfd96
commit b511ba5
Show file tree

Hide file tree

Showing 6 changed files with 294 additions and 86 deletions.
diff --git a/pkg/sql/opt/expr.go b/pkg/sql/opt/expr.go
@@ -47,6 +47,22 @@ func (e *expr) opClass() operatorClass {
 	return operatorTab[e.op].class
 }
 
+// Applies normalization rules to an expression.
+func normalizeExpr(e *expr) {
+	for _, input := range e.children {
+		normalizeExpr(input)
+	}
+	normalizeExprNode(e)
+}
+
+// Applies normalization rules to an expression node. This is like
+// normalizeExpr, except that it does not recursively normalize children.
+func normalizeExprNode(e *expr) {
+	if normalizeFn := operatorTab[e.op].normalizeFn; normalizeFn != nil {
+		normalizeFn(e)
+	}
+}
+
 // formatExprs formats the given expressions as children of the same
 // node. Optionally creates a new parent node (if title is not "", and we have
 // expressions).

diff --git a/pkg/sql/opt/operator.go b/pkg/sql/opt/operator.go
@@ -102,6 +102,8 @@ type operatorInfo struct {
 	name string
 	// class of the operator (see operatorClass).
 	class operatorClass
+
+	normalizeFn func(*expr)
 }
 
 // operatorTab stores static information about all operators.
@@ -119,9 +121,8 @@ func (op operator) String() string {
 // registerOperator initializes the operator's entry in operatorTab.
 // There must be a call to registerOperator in an init() function for every
 // operator.
-func registerOperator(op operator, name string, class operatorClass) {
-	operatorTab[op].name = name
-	operatorTab[op].class = class
+func registerOperator(op operator, info operatorInfo) {
+	operatorTab[op] = info
 }
 
 // operatorClass implements functionality that is common for a subset of

diff --git a/pkg/sql/opt/opt_test.go b/pkg/sql/opt/opt_test.go
@@ -279,7 +279,7 @@ func TestOpt(t *testing.T) {
 						}
 						buildScalarFn()
 					case "normalize":
-						normalizeScalar(e)
+						normalizeExpr(e)
 					case "index-constraints":
 						if e == nil {
 							d.fatalf(t, "no expression for index-constraints")

diff --git a/pkg/sql/opt/scalar.go b/pkg/sql/opt/scalar.go
@@ -24,60 +24,64 @@ import (
 )
 
 func init() {
-	scalarOpNames := map[operator]string{
-		variableOp:          "variable",
-		constOp:             "const",
-		listOp:              "list",
-		orderedListOp:       "ordered-list",
-		andOp:               "and",
-		orOp:                "or",
-		notOp:               "not",
-		eqOp:                "eq",
-		ltOp:                "lt",
-		gtOp:                "gt",
-		leOp:                "le",
-		geOp:                "ge",
-		neOp:                "ne",
-		inOp:                "in",
-		notInOp:             "not-in",
-		likeOp:              "like",
-		notLikeOp:           "not-like",
-		iLikeOp:             "ilike",
-		notILikeOp:          "not-ilike",
-		similarToOp:         "similar-to",
-		notSimilarToOp:      "not-similar-to",
-		regMatchOp:          "regmatch",
-		notRegMatchOp:       "not-regmatch",
-		regIMatchOp:         "regimatch",
-		notRegIMatchOp:      "not-regimatch",
-		isDistinctFromOp:    "is-distinct-from",
-		isNotDistinctFromOp: "is-not-distinct-from",
-		isOp:                "is",
-		isNotOp:             "is-not",
-		anyOp:               "any",
-		someOp:              "some",
-		allOp:               "all",
-		bitandOp:            "bitand",
-		bitorOp:             "bitor",
-		bitxorOp:            "bitxor",
-		plusOp:              "plus",
-		minusOp:             "minus",
-		multOp:              "mult",
-		divOp:               "div",
-		floorDivOp:          "floor-div",
-		modOp:               "mod",
-		powOp:               "pow",
-		concatOp:            "concat",
-		lShiftOp:            "lshift",
-		rShiftOp:            "rshift",
-		unaryPlusOp:         "unary-plus",
-		unaryMinusOp:        "unary-minus",
-		unaryComplementOp:   "complement",
-		functionCallOp:      "func",
+	// A note on normalization functions for scalar expressions, for now we expect
+	// to build exprs from TypedExprs which have gone through a normalization
+	// process; we implement only additional rules.
+	scalarOpInfos := map[operator]operatorInfo{
+		variableOp:          {name: "variable"},
+		constOp:             {name: "const"},
+		listOp:              {name: "list"},
+		orderedListOp:       {name: "ordered-list"},
+		andOp:               {name: "and", normalizeFn: normalizeAndOrOp},
+		orOp:                {name: "or", normalizeFn: normalizeAndOrOp},
+		notOp:               {name: "not"},
+		eqOp:                {name: "eq", normalizeFn: normalizeEqOp},
+		ltOp:                {name: "lt"},
+		gtOp:                {name: "gt"},
+		leOp:                {name: "le"},
+		geOp:                {name: "ge"},
+		neOp:                {name: "ne"},
+		inOp:                {name: "in"},
+		notInOp:             {name: "not-in"},
+		likeOp:              {name: "like"},
+		notLikeOp:           {name: "not-like"},
+		iLikeOp:             {name: "ilike"},
+		notILikeOp:          {name: "not-ilike"},
+		similarToOp:         {name: "similar-to"},
+		notSimilarToOp:      {name: "not-similar-to"},
+		regMatchOp:          {name: "regmatch"},
+		notRegMatchOp:       {name: "not-regmatch"},
+		regIMatchOp:         {name: "regimatch"},
+		notRegIMatchOp:      {name: "not-regimatch"},
+		isDistinctFromOp:    {name: "is-distinct-from"},
+		isNotDistinctFromOp: {name: "is-not-distinct-from"},
+		isOp:                {name: "is"},
+		isNotOp:             {name: "is-not"},
+		anyOp:               {name: "any"},
+		someOp:              {name: "some"},
+		allOp:               {name: "all"},
+		bitandOp:            {name: "bitand"},
+		bitorOp:             {name: "bitor"},
+		bitxorOp:            {name: "bitxor"},
+		plusOp:              {name: "plus"},
+		minusOp:             {name: "minus"},
+		multOp:              {name: "mult"},
+		divOp:               {name: "div"},
+		floorDivOp:          {name: "floor-div"},
+		modOp:               {name: "mod"},
+		powOp:               {name: "pow"},
+		concatOp:            {name: "concat"},
+		lShiftOp:            {name: "lshift"},
+		rShiftOp:            {name: "rshift"},
+		unaryPlusOp:         {name: "unary-plus"},
+		unaryMinusOp:        {name: "unary-minus"},
+		unaryComplementOp:   {name: "complement"},
+		functionCallOp:      {name: "func"},
 	}
 
-	for op, name := range scalarOpNames {
-		registerOperator(op, name, scalarClass{})
+	for op, info := range scalarOpInfos {
+		info.class = scalarClass{}
+		registerOperator(op, info)
 	}
 }
 
@@ -165,40 +169,74 @@ func isTupleOfConstants(e *expr) bool {
 	return true
 }
 
-// Applies a set of normalization rules to a scalar expression.
-//
-// For now, we expect to build exprs from TypedExprs which have gone through a
-// normalization process; we include additional rules.
-func normalizeScalar(e *expr) {
-	for _, input := range e.children {
-		normalizeScalar(input)
+// Normalization rules for andOp and orOp: merge in any children that have the
+// same operator.
+// Example: a and (b and c)  ->  a and b and c
+func normalizeAndOrOp(e *expr) {
+	if e.op != andOp && e.op != orOp {
+		panic(fmt.Sprintf("invalid call on %s", e))
+	}
+	var found bool
+	newNumChildren := len(e.children)
+	for _, child := range e.children {
+		if child.op == e.op {
+			found = true
+			// We will add the grandchildren as direct children of this node (and
+			// remove the child). The child has been normalized already, so we don't
+			// need to look deeper.
+			newNumChildren += len(child.children) - 1
+		}
 	}
+	if !found {
+		return
+	}
+	saved := e.children
+	e.children = make([]*expr, 0, newNumChildren)
+
+	for _, child := range saved {
+		if child.op == e.op {
+			e.children = append(e.children, child.children...)
+		} else {
+			e.children = append(e.children, child)
+		}
+	}
+}
 
-	if e.op == andOp || e.op == orOp {
-		// Merge in any children that have the same operator. Example:
-		//   a and (b and c)  ->  a and b and c
-		var found bool
-		newNumChildren := len(e.children)
-		for _, child := range e.children {
-			if child.op == e.op {
-				found = true
-				// We will add the grandchildren as direct children of this node (and
-				// remove the child). The child has been normalized already, so we don't
-				// need to look deeper.
-				newNumChildren += len(child.children) - 1
-			}
+func normalizeEqOp(e *expr) {
+	if e.op != eqOp {
+		panic(fmt.Sprintf("invalid call on %s", e))
+	}
+	lhs, rhs := e.children[0], e.children[1]
+	if lhs.op == orderedListOp && rhs.op == orderedListOp {
+		// Break up expressions like
+		//   (a, b, c) = (x, y, z)
+		// into
+		//   (a = x) AND (b = y) AND (c = z)
+		// This transformation helps reduce the complexity of the index
+		// constraints code which would otherwise have to deal with this case
+		// separately.
+		e.op = andOp
+		if len(lhs.children) != len(rhs.children) {
+			panic(fmt.Sprintf("tuple length mismatch in eqOp: %s", e))
 		}
-		if found {
-			saved := e.children
-			e.children = make([]*expr, 0, newNumChildren)
-
-			for _, child := range saved {
-				if child.op == e.op {
-					e.children = append(e.children, child.children...)
-				} else {
-					e.children = append(e.children, child)
-				}
+		e.children = make([]*expr, len(lhs.children))
+		for i := range lhs.children {
+			e.children[i] = &expr{
+				op:          andOp,
+				scalarProps: &scalarProps{typ: types.Bool},
 			}
+			initBinaryExpr(e.children[i], eqOp, lhs.children[i], rhs.children[i])
+			// Normalize the new child node. This is for cases like:
+			// ((a, b), (c, d)) = ((x, y), (z, u))
+			normalizeExprNode(e.children[i])
 		}
+		// Normalize the new expression (some of the other rules, like coalescing
+		// AND operations might apply now).
+		normalizeExprNode(e)
+	} else if e.children[0].op != variableOp && e.children[1].op == variableOp {
+		// Normalize (1 = @1) to (@1 = 1).
+		// Note: this transformation is already performed by the TypedExpr
+		// NormalizeExpr, but we may be creating new such expressions above.
+		e.children[0], e.children[1] = rhs, lhs
 	}
 }