Skip to content

Commit

Permalink
colexec: use tree.DNull when projection is called on null input
Browse files Browse the repository at this point in the history
Most projections skip rows for which one or more arguments are null, and
just output a null for those rows. However, some projections can actually
accept null arguments. Previously, we were using the values from the vec
even when the `Nulls` bitmap was set for that row, which invalidates the
data in the vec for that row. This could cause a non-null value to be
unexpectedly concatenated to an array when an argument was null (nothing
should be added to the array in this case).

This commit modifies the projection operators that operate on datum-backed
vectors to explicitly set the argument to `tree.DNull` in the case when
the `Nulls` bitmap is set. This ensures that the projection is not
performed with the invalid (and arbitrary) value in the datum vec at that
index.

Fixes cockroachdb#87919

Release note (bug fix): Fixed a bug in `Concat` projection operators
for arrays that could cause non-null values to be added to the array
when one of the arguments was null.
  • Loading branch information
DrewKimball committed Sep 22, 2022
1 parent fe0b93c commit 23e557c
Show file tree
Hide file tree
Showing 7 changed files with 3,887 additions and 5,383 deletions.
3,631 changes: 1,539 additions & 2,092 deletions pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go

Large diffs are not rendered by default.

48 changes: 27 additions & 21 deletions pkg/sql/colexec/colexecproj/proj_non_const_ops_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,25 +123,7 @@ func (p _OP_NAME) Next() coldata.Batch {
// of a projection is Null.
// */}}
_outNulls := projVec.Nulls()

// {{/*
// If calledOnNullInput is true, the function’s definition can handle
// null arguments. We would still want to perform the projection, and
// there is no need to call projVec.SetNulls(). The behaviour will just
// be the same as _HAS_NULLS is false. Since currently only
// ConcatDatumDatum needs this calledOnNullInput == true behaviour,
// logic for calledOnNullInput is only added to the if statement for
// function with Datum, Datum. If we later introduce another projection
// operation that has calledOnNullInput == true, we should update this
// code accordingly.
// */}}
// {{if and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum")}}
hasNullsAndNotCalledOnNullInput :=
(vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls()) && !p.calledOnNullInput
// {{else}}
hasNullsAndNotCalledOnNullInput := (vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls())
// {{end}}
if hasNullsAndNotCalledOnNullInput {
if vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls() {
_SET_PROJECTION(true)
} else {
_SET_PROJECTION(false)
Expand All @@ -158,6 +140,7 @@ func _SET_PROJECTION(_HAS_NULLS bool) {
// {{define "setProjection" -}}
// {{$hasNulls := $.HasNulls}}
// {{with $.Overload}}
// {{$isDatum := (and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum"))}}
// {{if _HAS_NULLS}}
col1Nulls := vec1.Nulls()
col2Nulls := vec2.Nulls()
Expand All @@ -183,7 +166,13 @@ func _SET_PROJECTION(_HAS_NULLS bool) {
// projVec.Nulls() so there is no need to call projVec.SetNulls().
// */}}
// {{if _HAS_NULLS}}
projVec.SetNulls(_outNulls.Or(*col1Nulls).Or(*col2Nulls))
// {{if $isDatum}}
if !p.calledOnNullInput {
// {{end}}
projVec.SetNulls(_outNulls.Or(*col1Nulls).Or(*col2Nulls))
// {{if $isDatum}}
}
// {{end}}
// {{end}}
// {{end}}
// {{end}}
Expand All @@ -198,19 +187,36 @@ func _SET_SINGLE_TUPLE_PROJECTION(_HAS_NULLS bool, _HAS_SEL bool) { // */}}
// {{$hasNulls := $.HasNulls}}
// {{$hasSel := $.HasSel}}
// {{with $.Overload}}
// {{$isDatum := (and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum"))}}
// {{if _HAS_NULLS}}
if !col1Nulls.NullAt(i) && !col2Nulls.NullAt(i) {
if p.calledOnNullInput || (!col1Nulls.NullAt(i) && !col2Nulls.NullAt(i)) {
// We only want to perform the projection operation if both values are not
// null.
// {{end}}
// {{if and (.Left.Sliceable) (not _HAS_SEL)}}
//gcassert:bce
// {{end}}
arg1 := col1.Get(i)
// {{if (and _HAS_NULLS $isDatum)}}
if col1Nulls.NullAt(i) {
// If we entered this branch for a null value, calledOnNullInput must be
// true. This means the projection should be calculated on null arguments.
// When a value is null, the underlying data in the slice is invalid and
// can be anything, so we need to overwrite it here. calledOnNullInput is
// currently only true for ConcatDatumDatum, so only the datum case needs
// to be handled.
arg1 = tree.DNull
}
// {{end}}
// {{if and (.Right.Sliceable) (not _HAS_SEL)}}
//gcassert:bce
// {{end}}
arg2 := col2.Get(i)
// {{if (and _HAS_NULLS $isDatum)}}
if col2Nulls.NullAt(i) {
arg2 = tree.DNull
}
// {{end}}
_ASSIGN(projCol[i], arg1, arg2, projCol, col1, col2)
// {{if _HAS_NULLS}}
}
Expand Down
Loading

0 comments on commit 23e557c

Please sign in to comment.