Skip to content

Commit

Permalink
sql: fix tuple IS NULL logic
Browse files Browse the repository at this point in the history
Previously, we treated all cases of `x IS NULL` as `x IS NOT DISTINCT
FROM NULL`, and all cases of `x IS NOT NULL` as `x IS DISTINCT FROM
NULL`. However, these transformations are not equivalent when `x` is a
tuple.

If all elements of `x` are `NULL`, then `x IS NULL` should evaluate to
true, but `x IS DISTINCT FROM NULL` should evaluate to false. If one
element of `x` is `NULL` and one is not null, then `x IS NOT NULL`
should evaluate to false, but `x IS DISTINCT FROM NULL` should evaluate
to true. Therefore, they are not equivalent.

Below is a table of the correct semantics for tuple expressions.

| Tuple        | IS NOT DISTINCT FROM NULL | IS NULL   | IS DISTINCT FROM NULL | IS NOT NULL |
| ------------ | ------------------------- | --------- | --------------------- | ----------- |
| (1, 1)       | false                     | false     | true                  | true        |
| (1, NULL)    | false                     | **false** | true                  | **false**   |
| (NULL, NULL) | false                     | true      | true                  | false       |

Notice that `IS NOT DISTINCT FROM NULL` is always the inverse of
`IS DISTINCT FROM NULL`. However, `IS NULL` and `IS NOT NULL` are not
inverses given the tuple `(1, NULL)`.

This commit introduces new tree expressions for `IS NULL` and `IS NOT
NULL`. These operators have evaluation logic that is different from `IS
NOT DISTINCT FROM NULL` and `IS DISTINCT FROM NULL`, respectively.

This commit also introduces new optimizer expression types,
`IsTupleNull` and `IsTupleNotNull`. Normalization rules have been added
for folding these expressions into boolean values when possible.

Fixes   #46675
Informs #46908
Informs #12022

Release note (bug fix): Fixes incorrect logic for `IS NULL` and `IS NOT
NULL` operators with tuples, correctly differentiating them from `IS NOT
DISTINCT FROM NULL` and `IS DISTINCT FROM NULL`, respectively.
  • Loading branch information
mgartner committed May 11, 2020
1 parent 26f1f1e commit 39cfab0
Show file tree
Hide file tree
Showing 22 changed files with 994 additions and 80 deletions.
93 changes: 58 additions & 35 deletions pkg/sql/colexec/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,7 @@ func NewColOperator(
colmem.NewAllocator(ctx, streamingMemAccount), inputs[0], outputIdx,
)
result.IsStreaming = true
result.ColumnTypes = make([]*types.T, outputIdx+1)
copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
result.ColumnTypes[outputIdx] = types.Int
result.ColumnTypes = appendOneType(spec.Input[0].ColumnTypes, types.Int)

case core.HashJoiner != nil:
if err := checkNumIn(inputs, 2); err != nil {
Expand Down Expand Up @@ -1085,10 +1083,7 @@ func NewColOperator(
if err != nil {
return result, err
}
oldColumnTypes := result.ColumnTypes
result.ColumnTypes = make([]*types.T, len(oldColumnTypes)+1)
copy(result.ColumnTypes, oldColumnTypes)
result.ColumnTypes[len(oldColumnTypes)] = returnType
result.ColumnTypes = appendOneType(result.ColumnTypes, returnType)
input = result.Op
}

Expand All @@ -1115,6 +1110,11 @@ func NewColOperator(
ColumnTypes: result.ColumnTypes,
}
err = ppr.planPostProcessSpec(ctx, flowCtx, post, streamingMemAccount)
if err != nil && processorConstructor == nil {
// Do not attempt to wrap as a row source if there is no
// processorConstructor because it would fail.
return result, err
}
if err != nil {
log.VEventf(
ctx, 2,
Expand Down Expand Up @@ -1478,6 +1478,19 @@ func planSelectionOperators(
)
op = NewBoolVecToSelOp(op, resultIdx)
return op, resultIdx, typs, internalMemUsed, err
case *tree.IsNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
_, negate := expr.(*tree.IsNotNullExpr)
op = newIsNullSelOp(op, resultIdx, negate)
return op, resultIdx, typs, internalMemUsed, err
case *tree.IsNotNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
op = newIsNullSelOp(op, resultIdx, true)
return op, resultIdx, typs, internalMemUsed, err
case *tree.ComparisonExpr:
cmpOp := t.Operator
leftOp, leftIdx, ct, internalMemUsedLeft, err := planProjectionOperators(
Expand Down Expand Up @@ -1546,9 +1559,7 @@ func planTypedMaybeNullProjectionOperators(
if expr == tree.DNull {
resultIdx = len(columnTypes)
op = NewConstNullOp(colmem.NewAllocator(ctx, acc), input, resultIdx, exprTyp)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[len(columnTypes)] = exprTyp
typs = appendOneType(columnTypes, exprTyp)
return op, resultIdx, typs, internalMemUsed, nil
}
return planProjectionOperators(ctx, evalCtx, expr, columnTypes, input, acc)
Expand Down Expand Up @@ -1587,9 +1598,7 @@ func planCastOperator(
}
outputIdx := len(columnTypes)
op, err = GetCastOperator(colmem.NewAllocator(ctx, acc), input, inputIdx, outputIdx, fromType, toType)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[len(columnTypes)] = toType
typs = appendOneType(columnTypes, toType)
return op, outputIdx, typs, err
}

Expand All @@ -1613,6 +1622,22 @@ func planProjectionOperators(
return planProjectionExpr(ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input, acc)
case *tree.BinaryExpr:
return planProjectionExpr(ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input, acc)
case *tree.IsNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
typs = appendOneType(typs, t.ResolvedType())
outputIdx := len(typs) - 1
op = newIsNullProjOp(colmem.NewAllocator(ctx, acc), op, resultIdx, outputIdx, false)
return op, outputIdx, typs, internalMemUsed, err
case *tree.IsNotNullExpr:
op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc,
)
typs = appendOneType(typs, t.ResolvedType())
outputIdx := len(typs) - 1
op = newIsNullProjOp(colmem.NewAllocator(ctx, acc), op, resultIdx, outputIdx, true)
return op, outputIdx, typs, internalMemUsed, err
case *tree.CastExpr:
expr := t.Expr.(tree.TypedExpr)
// If the expression is NULL, we use planTypedMaybeNullProjectionOperators instead of planProjectionOperators
Expand Down Expand Up @@ -1651,22 +1676,16 @@ func planProjectionOperators(
inputCols = append(inputCols, resultIdx)
internalMemUsed += projectionInternalMem
}
funcOutputType := t.ResolvedType()
resultIdx = len(typs)
oldTyps := typs
typs = make([]*types.T, len(oldTyps)+1)
copy(typs, oldTyps)
typs[len(oldTyps)] = funcOutputType
typs = appendOneType(typs, t.ResolvedType())
resultIdx = len(typs) - 1
op, err = NewBuiltinFunctionOperator(
colmem.NewAllocator(ctx, acc), evalCtx, t, typs, inputCols, resultIdx, op,
)
return op, resultIdx, typs, internalMemUsed, err
case tree.Datum:
datumType := t.ResolvedType()
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
resultIdx = len(columnTypes)
typs[resultIdx] = datumType
typs = appendOneType(columnTypes, datumType)
resultIdx = len(typs) - 1
if datumType.Family() == types.UnknownFamily {
return nil, resultIdx, typs, internalMemUsed, errors.New("cannot plan null type unknown")
}
Expand Down Expand Up @@ -1703,10 +1722,8 @@ func planProjectionOperators(
return nil, resultIdx, typs, internalMemUsed, errors.Newf(
"unsupported type %s", caseOutputType)
}
caseOutputIdx := len(columnTypes)
typs = make([]*types.T, len(columnTypes)+1)
copy(typs, columnTypes)
typs[caseOutputIdx] = caseOutputType
typs = appendOneType(columnTypes, caseOutputType)
caseOutputIdx := len(typs) - 1
thenIdxs := make([]int, len(t.Whens)+1)
for i, when := range t.Whens {
// The case operator is assembled from n WHEN arms, n THEN arms, and an
Expand Down Expand Up @@ -1961,10 +1978,7 @@ func planProjectionExpr(
if sMem, ok := op.(InternalMemoryOperator); ok {
internalMemUsed += sMem.InternalMemoryUsage()
}
oldTyps := typs
typs = make([]*types.T, len(oldTyps)+1)
copy(typs, oldTyps)
typs[len(oldTyps)] = actualOutputType
typs = appendOneType(typs, actualOutputType)
if !outputType.Identical(actualOutputType) {
// The projection operator outputs a column of a different type than
// the expected logical type. In order to "synchronize" the reality and
Expand Down Expand Up @@ -1995,10 +2009,8 @@ func planLogicalProjectionOp(
acc *mon.BoundAccount,
) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
// Add a new boolean column that will store the result of the projection.
resultIdx = len(columnTypes)
typs = make([]*types.T, resultIdx+1)
copy(typs, columnTypes)
typs[resultIdx] = types.Bool
typs = appendOneType(columnTypes, types.Bool)
resultIdx = len(typs) - 1
var (
typedLeft, typedRight tree.TypedExpr
leftProjOpChain, rightProjOpChain, outputOp colexecbase.Operator
Expand Down Expand Up @@ -2048,3 +2060,14 @@ func planLogicalProjectionOp(
}
return outputOp, resultIdx, typs, internalMemUsedLeft + internalMemUsedRight, nil
}

// appendOneType appends a *types.T to then end of a []*types.T. The size of the
// underlying array of the resulting slice is 1 greater than the input slice.
// This differs from the built-in append function, which can double the capacity
// of the slice if its length is less than 1024, or increase by 25% otherwise.
func appendOneType(typs []*types.T, t *types.T) []*types.T {
newTyps := make([]*types.T, len(typs)+1)
copy(newTyps, typs)
newTyps[len(newTyps)-1] = t
return newTyps
}
112 changes: 88 additions & 24 deletions pkg/sql/colexec/is_null_ops_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,56 +41,88 @@ func TestIsNullProjOp(t *testing.T) {
desc string
inputTuples tuples
outputTuples tuples
negate bool
projExpr string
}{
{
desc: "SELECT c, c IS NULL FROM t -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0, false}, {nil, true}, {1, false}, {2, false}, {nil, true}},
negate: false,
projExpr: "IS NULL",
},
{
desc: "SELECT c, c IS NULL FROM t -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0, false}, {1, false}, {2, false}},
negate: false,
projExpr: "IS NULL",
},
{
desc: "SELECT c, c IS NULL FROM t -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil, true}, {nil, true}},
negate: false,
projExpr: "IS NULL",
},
{
desc: "SELECT c, c IS NOT NULL FROM t -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0, true}, {nil, false}, {1, true}, {2, true}, {nil, false}},
negate: true,
projExpr: "IS NOT NULL",
},
{
desc: "SELECT c, c IS NOT NULL FROM t -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0, true}, {1, true}, {2, true}},
negate: true,
projExpr: "IS NOT NULL",
},
{
desc: "SELECT c, c IS NOT NULL FROM t -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil, false}, {nil, false}},
negate: true,
projExpr: "IS NOT NULL",
},
{
desc: "SELECT c, c IS NOT DISTINCT FROM NULL FROM t -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0, false}, {nil, true}, {1, false}, {2, false}, {nil, true}},
projExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c, c IS NOT DISTINCT FROM NULL FROM t -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0, false}, {1, false}, {2, false}},
projExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c, c IS NOT DISTINCT FROM NULL FROM t -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil, true}, {nil, true}},
projExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c, c IS DISTINCT FROM NULL FROM t -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0, true}, {nil, false}, {1, true}, {2, true}, {nil, false}},
projExpr: "IS DISTINCT FROM NULL",
},
{
desc: "SELECT c, c IS DISTINCT FROM NULL FROM t -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0, true}, {1, true}, {2, true}},
projExpr: "IS DISTINCT FROM NULL",
},
{
desc: "SELECT c, c IS DISTINCT FROM NULL FROM t -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil, false}, {nil, false}},
projExpr: "IS DISTINCT FROM NULL",
},
}

for _, c := range testCases {
t.Run(c.desc, func(t *testing.T) {
opConstructor := func(input []colexecbase.Operator) (colexecbase.Operator, error) {
projExpr := "IS NULL"
if c.negate {
projExpr = "IS NOT NULL"
}
return createTestProjectingOperator(
ctx, flowCtx, input[0], []*types.T{types.Int},
fmt.Sprintf("@1 %s", projExpr), false, /* canFallbackToRowexec */
fmt.Sprintf("@1 %s", c.projExpr), false, /* canFallbackToRowexec */
)
}
runTests(t, []tuples{c.inputTuples}, c.outputTuples, orderedVerifier, opConstructor)
Expand All @@ -115,60 +147,92 @@ func TestIsNullSelOp(t *testing.T) {
desc string
inputTuples tuples
outputTuples tuples
negate bool
selExpr string
}{
{
desc: "SELECT c FROM t WHERE c IS NULL -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{nil}, {nil}},
negate: false,
selExpr: "IS NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NULL -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{},
negate: false,
selExpr: "IS NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NULL -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil}, {nil}},
negate: false,
selExpr: "IS NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT NULL -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0}, {1}, {2}},
negate: true,
selExpr: "IS NOT NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT NULL -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0}, {1}, {2}},
negate: true,
selExpr: "IS NOT NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT NULL -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{},
negate: true,
selExpr: "IS NOT NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT DISTINCT FROM NULL -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{nil}, {nil}},
selExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT DISTINCT FROM NULL -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{},
selExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c FROM t WHERE c IS NOT DISTINCT FROM NULL -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{{nil}, {nil}},
selExpr: "IS NOT DISTINCT FROM NULL",
},
{
desc: "SELECT c FROM t WHERE c IS DISTINCT FROM NULL -- both",
inputTuples: tuples{{0}, {nil}, {1}, {2}, {nil}},
outputTuples: tuples{{0}, {1}, {2}},
selExpr: "IS DISTINCT FROM NULL",
},
{
desc: "SELECT c FROM t WHERE c IS DISTINCT FROM NULL -- no NULLs",
inputTuples: tuples{{0}, {1}, {2}},
outputTuples: tuples{{0}, {1}, {2}},
selExpr: "IS DISTINCT FROM NULL",
},
{
desc: "SELECT c FROM t WHERE c IS DISTINCT FROM NULL -- only NULLs",
inputTuples: tuples{{nil}, {nil}},
outputTuples: tuples{},
selExpr: "IS DISTINCT FROM NULL",
},
}

for _, c := range testCases {
t.Run(c.desc, func(t *testing.T) {
opConstructor := func(input []colexecbase.Operator) (colexecbase.Operator, error) {
selExpr := "IS NULL"
if c.negate {
selExpr = "IS NOT NULL"
}
spec := &execinfrapb.ProcessorSpec{
Input: []execinfrapb.InputSyncSpec{{ColumnTypes: []*types.T{types.Int}}},
Core: execinfrapb.ProcessorCoreUnion{
Noop: &execinfrapb.NoopCoreSpec{},
},
Post: execinfrapb.PostProcessSpec{
Filter: execinfrapb.Expression{Expr: fmt.Sprintf("@1 %s", selExpr)},
Filter: execinfrapb.Expression{Expr: fmt.Sprintf("@1 %s", c.selExpr)},
},
}
args := NewColOperatorArgs{
Expand Down
Loading

0 comments on commit 39cfab0

Please sign in to comment.