Skip to content

Commit

Permalink
sql: fix tuple IS NULL logic
Browse files Browse the repository at this point in the history
Previously, we treated all cases of `x IS NULL` as `x IS NOT DISTINCT
FROM NULL`, and all cases of `x IS NOT NULL` as `x IS DISTINCT FROM
NULL`. However, these transformations are not equivalent when `x` is a
tuple.

If all elements of `x` are `NULL`, then `x IS NULL` should evaluate to
true, but `x IS DISTINCT FROM NULL` should evaluate to false. If one
element of `x` is `NULL` and one is not null, then `x IS NOT NULL`
should evaluate to false, but `x IS DISTINCT FROM NULL` should evaluate
to true. Therefore, they are not equivalent.

Below is a table of the correct semantics for tuple expressions.

| Tuple        | IS NOT DISTINCT FROM NULL | IS NULL   | IS DISTINCT FROM NULL | IS NOT NULL |
| ------------ | ------------------------- | --------- | --------------------- | ----------- |
| (1, 1)       | false                     | false     | true                  | true        |
| (1, NULL)    | false                     | **false** | true                  | **false**   |
| (NULL, NULL) | false                     | true      | true                  | false       |

Notice that `IS NOT DISTINCT FROM NULL` is always the inverse of
`IS DISTINCT FROM NULL`. However, `IS NULL` and `IS NOT NULL` are not
inverses given the tuple `(1, NULL)`.

This commit introduces new tree expressions for `IS NULL` and `IS NOT
NULL`. These operators have evaluation logic that is different from `IS
NOT DISTINCT FROM NULL` and `IS DISTINCT FROM NULL`, respectively. While
an expression such as `x IS NOT DISTINCT FROM NULL` is parsed as a
`tree.ComparisonExpr` with a `tree.IsNotDisinctFrom` operator,
execbuiler will output the simpler `tree.IsNullExpr` when the two
expressions are equivalent - when x is not a tuple.

This commit also introduces new optimizer expression types,
`IsTupleNull` and `IsTupleNotNull`. Normalization rules have been added
for folding these expressions into boolean values when possible.

Release note (bug fix): Fixes incorrect logic for `IS NULL` and `IS NOT
NULL` operators with tuples, correctly differentiating them from `IS NOT
DISTINCT FROM NULL` and `IS DISTINCT FROM NULL`, respectively.
  • Loading branch information
mgartner committed May 14, 2020
1 parent fbc1595 commit 41fb2c9
Show file tree
Hide file tree
Showing 22 changed files with 1,035 additions and 78 deletions.
102 changes: 69 additions & 33 deletions pkg/sql/colexec/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -779,9 +779,7 @@ func NewColOperator(
outputIdx := len(spec.Input[0].ColumnTypes)
result.Op = NewOrdinalityOp(streamingAllocator, inputs[0], outputIdx)
result.IsStreaming = true
result.ColumnTypes = make([]*types.T, outputIdx+1)
copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
result.ColumnTypes[outputIdx] = types.Int
result.ColumnTypes = appendOneType(spec.Input[0].ColumnTypes, types.Int)

case core.HashJoiner != nil:
if err := checkNumIn(inputs, 2); err != nil {
Expand Down Expand Up @@ -1090,10 +1088,7 @@ func NewColOperator(
if err != nil {
return result, err
}
oldColumnTypes := result.ColumnTypes
result.ColumnTypes = make([]*types.T, len(oldColumnTypes)+1)
copy(result.ColumnTypes, oldColumnTypes)
result.ColumnTypes[len(oldColumnTypes)] = returnType
result.ColumnTypes = appendOneType(result.ColumnTypes, returnType)
input = result.Op
}

Expand All @@ -1120,6 +1115,13 @@ func NewColOperator(
ColumnTypes: result.ColumnTypes,
}
err = ppr.planPostProcessSpec(ctx, flowCtx, post, streamingMemAccount)
// TODO(yuzefovich): update unit tests to remove panic-catcher when fallback
// to rowexec is not allowed.
if err != nil && processorConstructor == nil {
// Do not attempt to wrap as a row source if there is no
// processorConstructor because it would fail.
return result, err
}
if err != nil {
log.VEventf(
ctx, 2,
Expand Down Expand Up @@ -1483,6 +1485,18 @@ func planSelectionOperators(
)
op = NewBoolVecToSelOp(op, resultIdx)
return op, resultIdx, typs, internalMemUsed, err
case *tree.IsNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
op = newIsNullSelOp(op, resultIdx, false)
return op, resultIdx, typs, internalMemUsed, err
case *tree.IsNotNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
op = newIsNullSelOp(op, resultIdx, true)
return op, resultIdx, typs, internalMemUsed, err
case *tree.ComparisonExpr:
cmpOp := t.Operator
leftOp, leftIdx, ct, internalMemUsedLeft, err := planProjectionOperators(
Expand Down Expand Up @@ -1514,8 +1528,10 @@ func planSelectionOperators(
err = errors.Errorf("IS DISTINCT FROM and IS NOT DISTINCT FROM are supported only with NULL argument")
return nil, resultIdx, ct, internalMemUsed, err
}
// IS NULL is replaced with IS NOT DISTINCT FROM NULL, so we want to
// negate when IS DISTINCT FROM is used.
// IS NOT DISTINCT FROM NULL is synonymous with IS NULL and IS
// DISTINCT FROM NULL is synonymous with IS NOT NULL (except for
// tuples). Therefore, negate when the operator is IS DISTINCT
// FROM NULL.
negate := t.Operator == tree.IsDistinctFrom
op = newIsNullSelOp(leftOp, leftIdx, negate)
return op, resultIdx, ct, internalMemUsedLeft, err
Expand Down Expand Up @@ -1551,9 +1567,7 @@ func planTypedMaybeNullProjectionOperators(
if expr == tree.DNull {
resultIdx = len(columnTypes)
op = NewConstNullOp(colmem.NewAllocator(ctx, acc), input, resultIdx, exprTyp)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[len(columnTypes)] = exprTyp
typs = appendOneType(columnTypes, exprTyp)
return op, resultIdx, typs, internalMemUsed, nil
}
return planProjectionOperators(ctx, evalCtx, expr, columnTypes, input, acc)
Expand Down Expand Up @@ -1592,9 +1606,7 @@ func planCastOperator(
}
outputIdx := len(columnTypes)
op, err = GetCastOperator(colmem.NewAllocator(ctx, acc), input, inputIdx, outputIdx, fromType, toType)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[len(columnTypes)] = toType
typs = appendOneType(columnTypes, toType)
return op, outputIdx, typs, err
}

Expand All @@ -1618,6 +1630,11 @@ func planProjectionOperators(
return planProjectionExpr(ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input, acc)
case *tree.BinaryExpr:
return planProjectionExpr(ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input, acc)
case *tree.IsNullExpr:
t.TypedInnerExpr()
return planIsNullProjectionOp(ctx, evalCtx, t.ResolvedType(), t.TypedInnerExpr(), columnTypes, input, acc, false)
case *tree.IsNotNullExpr:
return planIsNullProjectionOp(ctx, evalCtx, t.ResolvedType(), t.TypedInnerExpr(), columnTypes, input, acc, true)
case *tree.CastExpr:
expr := t.Expr.(tree.TypedExpr)
// If the expression is NULL, we use planTypedMaybeNullProjectionOperators instead of planProjectionOperators
Expand Down Expand Up @@ -1656,22 +1673,16 @@ func planProjectionOperators(
inputCols = append(inputCols, resultIdx)
internalMemUsed += projectionInternalMem
}
funcOutputType := t.ResolvedType()
resultIdx = len(typs)
oldTyps := typs
typs = make([]*types.T, len(oldTyps)+1)
copy(typs, oldTyps)
typs[len(oldTyps)] = funcOutputType
op, err = NewBuiltinFunctionOperator(
colmem.NewAllocator(ctx, acc), evalCtx, t, typs, inputCols, resultIdx, op,
)
typs = appendOneType(typs, t.ResolvedType())
return op, resultIdx, typs, internalMemUsed, err
case tree.Datum:
datumType := t.ResolvedType()
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
resultIdx = len(columnTypes)
typs[resultIdx] = datumType
typs = appendOneType(columnTypes, datumType)
if datumType.Family() == types.UnknownFamily {
return nil, resultIdx, typs, internalMemUsed, errors.New("cannot plan null type unknown")
}
Expand Down Expand Up @@ -1709,9 +1720,7 @@ func planProjectionOperators(
"unsupported type %s", caseOutputType)
}
caseOutputIdx := len(columnTypes)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[caseOutputIdx] = caseOutputType
typs = appendOneType(columnTypes, caseOutputType)
thenIdxs := make([]int, len(t.Whens)+1)
for i, when := range t.Whens {
// The case operator is assembled from n WHEN arms, n THEN arms, and an
Expand Down Expand Up @@ -1966,10 +1975,7 @@ func planProjectionExpr(
if sMem, ok := op.(InternalMemoryOperator); ok {
internalMemUsed += sMem.InternalMemoryUsage()
}
oldTyps := typs
typs = make([]*types.T, len(oldTyps)+1)
copy(typs, oldTyps)
typs[len(oldTyps)] = actualOutputType
typs = appendOneType(typs, actualOutputType)
if !outputType.Identical(actualOutputType) {
// The projection operator outputs a column of a different type than
// the expected logical type. In order to "synchronize" the reality and
Expand Down Expand Up @@ -2001,9 +2007,7 @@ func planLogicalProjectionOp(
) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
// Add a new boolean column that will store the result of the projection.
resultIdx = len(columnTypes)
typs = make([]*types.T, resultIdx+1)
copy(typs, columnTypes)
typs[resultIdx] = types.Bool
typs = appendOneType(columnTypes, types.Bool)
var (
typedLeft, typedRight tree.TypedExpr
leftProjOpChain, rightProjOpChain, outputOp colexecbase.Operator
Expand Down Expand Up @@ -2053,3 +2057,35 @@ func planLogicalProjectionOp(
}
return outputOp, resultIdx, typs, internalMemUsedLeft + internalMemUsedRight, nil
}

// planIsNullProjectionOp plans the operator for IS NULL and IS NOT NULL
// expressions (tree.IsNullExpr and tree.IsNotNullExpr, respectively).
func planIsNullProjectionOp(
ctx context.Context,
evalCtx *tree.EvalContext,
outputType *types.T,
expr tree.TypedExpr,
columnTypes []*types.T,
input colexecbase.Operator,
acc *mon.BoundAccount,
negate bool,
) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, expr, columnTypes, input, acc,
)
outputIdx := len(typs)
op = newIsNullProjOp(colmem.NewAllocator(ctx, acc), op, resultIdx, outputIdx, negate)
typs = appendOneType(typs, outputType)
return op, outputIdx, typs, internalMemUsed, err
}

// appendOneType appends a *types.T to then end of a []*types.T. The size of the
// underlying array of the resulting slice is 1 greater than the input slice.
// This differs from the built-in append function, which can double the capacity
// of the slice if its length is less than 1024, or increase by 25% otherwise.
func appendOneType(typs []*types.T, t *types.T) []*types.T {
newTyps := make([]*types.T, len(typs)+1)
copy(newTyps, typs)
newTyps[len(newTyps)-1] = t
return newTyps
}
Loading

0 comments on commit 41fb2c9

Please sign in to comment.