Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colexec: begin to implement flat decimal columns #57593

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
91b513e
WIP on benchmarks
yuzefovich Dec 5, 2020
79b5599
coldata: add decimal microbenchmark for .Get
jordanlewis Dec 6, 2020
f5e794e
encoding: add flat decimal encoding
jordanlewis Dec 4, 2020
80a4a92
colexec: begin to implement flat decimal columns
jordanlewis Dec 5, 2020
1478f0c
WIP on fixing things up
yuzefovich Dec 5, 2020
0caa66d
WIP switch from value to pointer on Decimals.Set
yuzefovich Dec 5, 2020
3ca41a1
wip: demonstrate the problem (copying apd.Decimals from Get)
jordanlewis Dec 5, 2020
2a85c38
experiments
jordanlewis Dec 5, 2020
6573e15
hybrid approach
jordanlewis Dec 6, 2020
3576f68
fix experiment
jordanlewis Dec 6, 2020
21545ef
restore benchmark
jordanlewis Dec 6, 2020
3682c09
bugfix
jordanlewis Dec 6, 2020
2b9d534
coldata: switch benchmark to use *Decimals
jordanlewis Dec 6, 2020
2558a53
use pointer receiver for decimals.Get
jordanlewis Dec 6, 2020
12c77e2
remove duplicate calls to Bytes.Get
jordanlewis Dec 6, 2020
220de00
Remove unnecessary call to BitLen
jordanlewis Dec 6, 2020
0c7c241
attempt horrible unsafe set pointer hack
jordanlewis Dec 7, 2020
9d41451
optimize EncodeFlatDecimal
jordanlewis Dec 7, 2020
6e41c2e
Remove unnecessary type decl
jordanlewis Dec 7, 2020
6fe5f71
colexec: prototype ~completely flat decimal impl~
jordanlewis Dec 7, 2020
b6fea07
make deswizzling lazy; implement arrow batch converter methods
jordanlewis Dec 11, 2020
f6cdccf
fix bug in set/appendval
jordanlewis Dec 11, 2020
fdd3f73
Merge remote-tracking branch 'origin/master' into flat-dec
jordanlewis Dec 11, 2020
4deffd6
some fixes to flat decimals
jordanlewis Dec 12, 2020
68b17f5
get rid of weird alignment thing that didn't matter so much in the end
jordanlewis Dec 12, 2020
6e8166b
clean up method names
jordanlewis Dec 12, 2020
e88fa5e
Merge remote-tracking branch 'origin/master' into flat-dec
jordanlewis Dec 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions pkg/col/coldata/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ func NewMemBatchWithCapacity(typs []*types.T, capacity int, factory ColumnFactor
b := NewMemBatchNoCols(typs, capacity).(*MemBatch)
for i, t := range typs {
b.b[i] = NewMemColumn(t, capacity, factory)
if b.b[i].CanonicalTypeFamily() == types.BytesFamily {
canonicalTypeFamily := b.b[i].CanonicalTypeFamily()
if canonicalTypeFamily == types.BytesFamily || canonicalTypeFamily == types.DecimalFamily {
b.bytesVecIdxs.Add(i)
}
}
Expand Down Expand Up @@ -252,14 +253,19 @@ func (m *MemBatch) SetLength(length int) {
m.length = length
if length > 0 {
for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) {
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(length)
if m.b[i].Type().Family() == types.DecimalFamily {
m.b[i].Decimal().UpdateOffsetsToBeNonDecreasing(length)
} else {
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(length)
}
}
}
}

// AppendCol implements the Batch interface.
func (m *MemBatch) AppendCol(col Vec) {
if col.CanonicalTypeFamily() == types.BytesFamily {
family := col.CanonicalTypeFamily()
if family == types.BytesFamily || family == types.DecimalFamily {
m.bytesVecIdxs.Add(len(m.b))
}
m.b = append(m.b, col)
Expand Down Expand Up @@ -321,7 +327,11 @@ func (m *MemBatch) ResetInternalBatch() {
}
}
for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) {
m.b[i].Bytes().Reset()
if m.b[i].Type().Family() == types.DecimalFamily {
m.b[i].Decimal().Reset()
} else {
m.b[i].Bytes().Reset()
}
}
}

Expand Down
14 changes: 12 additions & 2 deletions pkg/col/coldata/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ const BytesInitialAllocationFactor = 64
// []byte values. It is legal to call Set on the returned Bytes at this point,
// but Get is undefined until at least one element is Set.
func NewBytes(n int) *Bytes {
return &Bytes{
ret := makeBytes(n)
return &ret
}

func makeBytes(n int) Bytes {
return Bytes{
// Given that the []byte slices are of variable length, we multiply the
// number of elements by some constant factor.
// TODO(asubiotto): Make this tunable.
Expand Down Expand Up @@ -142,6 +147,11 @@ func (b *Bytes) Set(i int, v []byte) {
// is read-only. Window is a lightweight operation that doesn't involve copying
// the underlying data.
func (b *Bytes) Window(start, end int) *Bytes {
window := b.newWindow(start, end)
return &window
}

func (b *Bytes) newWindow(start, end int) Bytes {
if start < 0 || start > end || end > b.Len() {
panic(
fmt.Sprintf(
Expand All @@ -155,7 +165,7 @@ func (b *Bytes) Window(start, end int) *Bytes {
if end == 0 {
data = b.data[:0]
}
return &Bytes{
return Bytes{
data: data,
// We use 'end+1' because of the extra offset to know the length of the
// last element of the newly created window.
Expand Down
106 changes: 106 additions & 0 deletions pkg/col/coldata/decimal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package coldata

import (
"fmt"
"math/big"
"unsafe"

"github.com/cockroachdb/apd/v2"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
)

// AppendSlice appends srcStartIdx inclusive and srcEndIdx exclusive []byte
// values from src into the receiver starting at destIdx.
func (d *Decimals) AppendSlice(src *Decimals, destIdx, srcStartIdx, srcEndIdx int) {
d.Bytes.AppendSlice(&src.Bytes, destIdx, srcStartIdx, srcEndIdx)
}

// AppendVal appends the given []byte value to the end of the receiver. A nil
// value will be "converted" into an empty byte slice.
func (d *Decimals) AppendVal(v apd.Decimal) {
if d.isWindow {
panic("AppendVal is called on a window into Decimal")
}
d.maybeBackfillOffsets(d.Len())
d.data = encoding.EncodeFlatDecimal(&v, d.data[:d.offsets[d.Len()]])
d.maxSetIndex = d.Len()
d.offsets = append(d.offsets, int32(len(d.data)))
}

// unsafeSetNat sets the backing slice of big.Word of the input big.Int to the
// input []big.Word
func unsafeSetNat(b *big.Int, words []big.Word) {
ptrToWords := (*[]big.Word)(encoding.UnsafeGetAbsPtr(b))
*ptrToWords = words
}

// CopySlice copies srcStartIdx inclusive and srcEndIdx exclusive apd.Decimal values
// from src into the receiver starting at destIdx. See the comment on
// Bytes.CopySlice for more information.
func (d *Decimals) CopySlice(src *Decimals, destIdx, srcStartIdx, srcEndIdx int) {
d.Bytes.CopySlice(&src.Bytes, destIdx, srcStartIdx, srcEndIdx)
}

// Set sets the ith apd.Decimal in d. Overwriting a value that is not at the end
// of the Decimals is not allowed since it complicates memory movement to make/take
// away necessary space in the flat buffer. Note that a nil value will be
func (d *Decimals) Set(i int, v apd.Decimal) {
if d.isWindow {
panic("Set is called on a window into Decimals")
}
if i < d.maxSetIndex {
panic(
fmt.Sprintf(
"cannot overwrite value on flat Decimals: maxSetIndex=%d, setIndex=%d, consider using Reset",
d.maxSetIndex,
i,
),
)
}
// We're maybe setting an element not right after the last already present
// element (i.e. there might be gaps in b.offsets). This is probably due to
// NULL values that are stored separately. In order to maintain the
// assumption of non-decreasing offsets, we need to backfill them.
d.maybeBackfillOffsets(i)
d.data = encoding.EncodeFlatDecimal(&v, d.data[:d.offsets[i]])
d.offsets[i+1] = int32(len(d.data))
d.maxSetIndex = i
}

// Window creates a "window" into the receiver. It behaves similarly to
// Golang's slice, but the returned object is *not* allowed to be modified - it
// is read-only. Window is a lightweight operation that doesn't involve copying
// the underlying data.
func (d *Decimals) Window(start, end int) *Decimals {
bytesWindow := d.Bytes.newWindow(start, end)
return &Decimals{
Bytes: bytesWindow,
}
}

var decimalSize = unsafe.Sizeof(apd.Decimal{})

// Size returns the total size of the receiver in bytes.
func (d *Decimals) Size() uintptr {
return d.Bytes.Size()
}

// SetLength sets the length of this Bytes. Note that it will panic if there is
// not enough capacity.
func (d *Decimals) SetLength(l int) {
if d.isWindow {
panic("SetLength is called on a window into Bytes")
}
// We need +1 for an extra offset at the end.
d.offsets = d.offsets[:l+1]
}
85 changes: 85 additions & 0 deletions pkg/col/coldata/decimal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package coldata

import (
"math/big"
"testing"

"github.com/cockroachdb/apd/v2"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/stretchr/testify/assert"
)

// Res is around just to make sure that the benchmark has to do some work.
var Res apd.Decimal

func BenchmarkSumDecimals(b *testing.B) {
colFactory := defaultColumnFactory{}
col := colFactory.MakeColumn(types.Decimal, 1024).(*Decimals)
var ctx = &apd.Context{
Precision: 20,
Rounding: apd.RoundHalfUp,
MaxExponent: 2000,
MinExponent: -2000,
// Don't error on invalid operation, return NaN instead.
Traps: apd.DefaultTraps &^ apd.InvalidOperation,
}
exactCtx := ctx.WithPrecision(0)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var sum apd.Decimal
l := col.Len()
for j := 0; j < l; j++ {
d := col.Get(j)
_, err := exactCtx.Add(&sum, &sum, &d)
if err != nil {
b.Fatal(err)
}
}
Res = sum
}
}

func TestUnsafeSetNat(t *testing.T) {
b := big.NewInt(1000)
c := big.NewInt(2000)

bSlice := b.Bits()
cSlice := c.Bits()

unsafeSetNat(b, cSlice)
unsafeSetNat(c, bSlice)

assert.Equal(t, b.Int64(), int64(2000))
assert.Equal(t, c.Int64(), int64(1000))
}

func TestDecimalColumnBasics(t *testing.T) {
colFactory := defaultColumnFactory{}
col := colFactory.MakeColumn(types.Decimal, 1024).(*Decimals)
for i := 0; i < col.Len(); i++ {
var d apd.Decimal
d.SetInt64(int64(i))
col.Set(i, d)
}

for i := 0; i < col.Len(); i++ {
d := col.Get(i)
dInt, err := d.Int64()
if err != nil {
t.Fatal(err)
}
if int(dInt) != i {
t.Fatalf("ruhoh %d", i)
}
}
}
25 changes: 20 additions & 5 deletions pkg/col/coldata/native_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/cockroachdb/apd/v2"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
)

// Bools is a slice of bool.
Expand All @@ -32,8 +33,10 @@ type Int64s []int64
// Float64s is a slice of float64.
type Float64s []float64

// Decimals is a slice of apd.Decimal.
type Decimals []apd.Decimal
// Decimals is a flat representation of apd.Decimal objects.
type Decimals struct {
Bytes
}

// Times is a slice of time.Time.
type Times []time.Time
Expand Down Expand Up @@ -69,7 +72,16 @@ func (c Float64s) Get(idx int) float64 { return c[idx] }
// Get returns the element at index idx of the vector. The element cannot be
// used anymore once the vector is modified.
//gcassert:inline
func (c Decimals) Get(idx int) apd.Decimal { return c[idx] }
func (c *Decimals) Get(idx int) apd.Decimal {
slice := c.Bytes.Get(idx)
var ret apd.Decimal
if len(slice) == 0 {
// If there's a null in the slice, it'll have no data.
return ret
}
encoding.DecodeFlatDecimal(slice, &ret)
return ret
}

// Get returns the element at index idx of the vector. The element cannot be
// used anymore once the vector is modified.
Expand All @@ -93,14 +105,17 @@ func (c Int32s) Len() int { return len(c) }
// Len returns the length of the vector.
func (c Int64s) Len() int { return len(c) }

func (c Int64s) Set(idx int, agg int64) { c[idx] = agg }

// Len returns the length of the vector.
func (c Float64s) Len() int { return len(c) }

// Len returns the length of the vector.
func (c Decimals) Len() int { return len(c) }
func (c Float64s) Set(idx int, agg float64) { c[idx] = agg }

// Len returns the length of the vector.
func (c Times) Len() int { return len(c) }

// Len returns the length of the vector.
func (c Durations) Len() int { return len(c) }

func (c Durations) Set(idx int, agg duration.Duration) { c[idx] = agg }
Loading