Skip to content

Commit

Permalink
Merge #47591 #47926
Browse files Browse the repository at this point in the history
47591: colexec: optimize hash aggregator a bit r=yuzefovich a=yuzefovich

This commit optimizes `match` function of hash aggregator for the case
when we know that the first tuple matches the key of the aggregation
function (which is the case when we have just created that aggregation
function). This shows slight (on the order of 2-3%) improvement in the
benchmarks in some cases.

Release note: None

47926: vendor: bump pebble to 43f8d507aa62091533b3370120dbbe1fe300d9a3 r=jbowens a=jbowens

* *: Wire up L0SubLevels with iterator and version creation
* sstable: support NeedCompact method for property collectors

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
Co-authored-by: Jackson Owens <[email protected]>
  • Loading branch information
3 people committed Apr 22, 2020
3 parents 9d536ae + 96da84c + 7652726 commit f02a014
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 45 deletions.
4 changes: 2 additions & 2 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 4 additions & 5 deletions pkg/sql/colexec/hash_aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ func (op *hashAggregator) onlineAgg() {
// allocated for 'sel' to avoid extra allocation and copying.
anyMatched, remaining = aggFunc.match(
remaining, op.scratch, op.groupCols, op.groupTypes, op.keyMapping,
op.scratch.group[:len(remaining)],
op.scratch.group[:len(remaining)], false, /* firstDefiniteMatch */
)
if anyMatched {
aggFunc.compute(op.scratch, op.aggCols)
Expand Down Expand Up @@ -438,8 +438,8 @@ func (op *hashAggregator) onlineAgg() {
Src: op.scratch.ColVec(int(colIdx)),
ColType: op.inputPhysTypes[colIdx],
DestIdx: aggFunc.keyIdx,
SrcStartIdx: remaining[0],
SrcEndIdx: remaining[0] + 1,
SrcStartIdx: groupStartIdx,
SrcEndIdx: groupStartIdx + 1,
})
}
op.keyMapping.SetLength(keyIdx + 1)
Expand All @@ -451,10 +451,9 @@ func (op *hashAggregator) onlineAgg() {
// Select rest of the tuples that matches the current key. We don't need
// to check if there is any match since 'remaining[0]' will always be
// matched.
// TODO(azhng): Refactor match so that we can skip checking remaining[0].
_, remaining = aggFunc.match(
remaining, op.scratch, op.groupCols, op.groupTypes, op.keyMapping,
op.scratch.group[:len(remaining)],
op.scratch.group[:len(remaining)], true, /* firstDefiniteMatch */
)

// Hack required to get aggregation function working. See '.scratch.group'
Expand Down
85 changes: 48 additions & 37 deletions pkg/sql/colexec/hash_aggregator_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ var _ tree.Operator
// Dummy import to pull in "math" package.
var _ int = math.MaxInt16

// Dummy import to pull in "coltypes" package.
var _ coltypes.T
// _TYPES_T is the template type variable for coltypes.T. It will be replaced by
// coltypes.Foo for each type Foo in the coltypes.T type.
const _TYPES_T = coltypes.Unhandled

// _ASSIGN_NE is the template function for assigning the result of comparing
// the second input to the third input into the first input.
Expand Down Expand Up @@ -114,6 +115,9 @@ func _MATCH_LOOP(
// This slice need to be allocated to be at at least as big as sel and set to
// all false. diff will be reset to all false when match returns. This is to
// avoid additional slice allocation.
// - firstDefiniteMatch indicates whether we know that tuple with index sel[0]
// matches the key of the aggregation function and whether we can short
// circuit probing that tuple.
// NOTE: the return vector will reuse the memory allocated for the selection
// vector.
func (v hashAggFuncs) match(
Expand All @@ -123,60 +127,67 @@ func (v hashAggFuncs) match(
keyTypes []types.T,
keyMapping coldata.Batch,
diff []bool,
firstDefiniteMatch bool,
) (bool, []int) {
// We want to directly write to the selection vector to avoid extra
// allocation.
b.SetSelection(true)
matched := b.Selection()
matched = matched[:0]
matched := b.Selection()[:0]

aggKeyIdx := v.keyIdx

for keyIdx, colIdx := range keyCols {
lhs := keyMapping.ColVec(keyIdx)
lhsHasNull := lhs.MaybeHasNulls()

rhs := b.ColVec(int(colIdx))
rhsHasNull := rhs.MaybeHasNulls()

keyTyp := keyTypes[keyIdx]
if firstDefiniteMatch {
matched = append(matched, sel[0])
sel = sel[1:]
diff = diff[:len(diff)-1]
}

switch typeconv.FromColumnType(&keyTyp) {
// {{range .}}
case _TYPES_T:
lhsCol := lhs._TemplateType()
rhsCol := rhs._TemplateType()
if lhsHasNull {
lhsNull := lhs.Nulls().NullAt(v.keyIdx)
if rhsHasNull {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, true)
} else {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, false)
}
} else {
if rhsHasNull {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, true)
if len(sel) > 0 {
for keyIdx, colIdx := range keyCols {
lhs := keyMapping.ColVec(keyIdx)
lhsHasNull := lhs.MaybeHasNulls()

rhs := b.ColVec(int(colIdx))
rhsHasNull := rhs.MaybeHasNulls()

keyTyp := keyTypes[keyIdx]

switch typeconv.FromColumnType(&keyTyp) {
// {{range .}}
case _TYPES_T:
lhsCol := lhs._TemplateType()
rhsCol := rhs._TemplateType()
if lhsHasNull {
lhsNull := lhs.Nulls().NullAt(v.keyIdx)
if rhsHasNull {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, true)
} else {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, false)
}
} else {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, false)
if rhsHasNull {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, true)
} else {
_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, false)
}
}
// {{end}}
default:
colexecerror.InternalError(fmt.Sprintf("unhandled type %s", &keyTyp))
}
// {{end}}
default:
colexecerror.InternalError(fmt.Sprintf("unhandled type %s", &keyTyp))
}
}

remaining := sel[:0]
anyMatched := false

for selIdx, isDiff := range diff {
if isDiff {
remaining = append(remaining, sel[selIdx])
for selIdx, tupleIdx := range sel {
if diff[selIdx] {
remaining = append(remaining, tupleIdx)
} else {
matched = append(matched, sel[selIdx])
matched = append(matched, tupleIdx)
}
}

anyMatched := false
if len(matched) > 0 {
b.SetLength(len(matched))
anyMatched = true
Expand Down

0 comments on commit f02a014

Please sign in to comment.