Skip to content

Commit

Permalink
Merge #40866
Browse files Browse the repository at this point in the history
40866: coldata: make coldata.BatchSize a package function r=yuzefovich a=asubiotto

This will allow us to vary the batchSize private variable int the future
by providing a testing hook for external packages to modify the batch
size at will.

Release justification: Category 1 non-production code change.

Release note: None

The only "interesting" changes are in `coldata/batch.go` and `colexec/utils_test.go` where I made a change to use `BatchSize()` rather than hardcode batch sizes to use. In passing, I also noticed we test batch sizes of 1, 2, and 3 elements. Do we need to do that? If not, we could remove one of them to reduce test runtime by `~2s`.

Co-authored-by: Alfonso Subiotto Marqués <[email protected]>
  • Loading branch information
craig[bot] and asubiotto committed Sep 20, 2019
2 parents 073999b + e9bfb4a commit f59a9cd
Show file tree
Hide file tree
Showing 57 changed files with 327 additions and 325 deletions.
12 changes: 9 additions & 3 deletions pkg/col/coldata/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,20 @@ type Batch interface {

var _ Batch = &MemBatch{}

const maxBatchSize = 1024

var batchSize = uint16(1024)

// BatchSize is the maximum number of tuples that fit in a column batch.
// TODO(jordan): tune
const BatchSize = 1024
func BatchSize() uint16 {
return batchSize
}

// NewMemBatch allocates a new in-memory Batch.
// TODO(jordan): pool these allocations.
func NewMemBatch(types []coltypes.T) Batch {
return NewMemBatchWithSize(types, BatchSize)
return NewMemBatchWithSize(types, int(BatchSize()))
}

// NewMemBatchWithSize allocates a new in-memory Batch with the given column
Expand Down Expand Up @@ -133,7 +139,7 @@ func (m *MemBatch) SetLength(n uint16) {

// AppendCol implements the Batch interface.
func (m *MemBatch) AppendCol(t coltypes.T) {
m.b = append(m.b, NewMemColumn(t, BatchSize))
m.b = append(m.b, NewMemColumn(t, int(BatchSize())))
}

// Reset implements the Batch interface.
Expand Down
4 changes: 2 additions & 2 deletions pkg/col/coldata/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (b *Bytes) Len() int {
return len(b.data)
}

var zeroBytesColumn = make([][]byte, BatchSize)
var zeroBytesColumn = make([][]byte, BatchSize())

// Zero zeroes out the underlying bytes.
func (b *Bytes) Zero() {
Expand Down Expand Up @@ -330,7 +330,7 @@ func (b *flatBytes) Len() int {
return len(b.offsets)
}

var zeroInt32Slice = make([]int32, BatchSize)
var zeroInt32Slice = make([]int32, BatchSize())

// Zero zeroes out the underlying bytes. Note that this doesn't change the
// length. Use this instead of Reset if you need to be able to Get zeroed byte
Expand Down
8 changes: 4 additions & 4 deletions pkg/col/coldata/nulls.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

package coldata

// zeroedNulls is a zeroed out slice representing a bitmap of size BatchSize.
// zeroedNulls is a zeroed out slice representing a bitmap of size maxBatchSize.
// This is copied to efficiently set all nulls.
var zeroedNulls [(BatchSize-1)/8 + 1]byte
var zeroedNulls [(maxBatchSize-1)/8 + 1]byte

// filledNulls is a slice representing a bitmap of size BatchSize with every
// filledNulls is a slice representing a bitmap of size maxBatchSize with every
// single bit set.
var filledNulls [(BatchSize-1)/8 + 1]byte
var filledNulls [(maxBatchSize-1)/8 + 1]byte

// bitMask[i] is a byte with a single bit set at i.
var bitMask = [8]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}
Expand Down
58 changes: 29 additions & 29 deletions pkg/col/coldata/nulls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,29 @@ var nulls5 Nulls
var nulls10 Nulls

// pos is a collection of interesting boundary indices to use in tests.
var pos = []uint64{0, 1, 63, 64, 65, BatchSize - 1, BatchSize}
var pos = []uint64{0, 1, 63, 64, 65, uint64(BatchSize()) - 1, uint64(BatchSize())}

func init() {
nulls3 = NewNulls(BatchSize)
nulls5 = NewNulls(BatchSize)
nulls10 = NewNulls(BatchSize * 2)
for i := uint16(0); i < BatchSize; i++ {
nulls3 = NewNulls(int(BatchSize()))
nulls5 = NewNulls(int(BatchSize()))
nulls10 = NewNulls(int(BatchSize()) * 2)
for i := uint16(0); i < BatchSize(); i++ {
if i%3 == 0 {
nulls3.SetNull(i)
}
if i%5 == 0 {
nulls5.SetNull(i)
}
}
for i := uint16(0); i < BatchSize*2; i++ {
for i := uint16(0); i < BatchSize()*2; i++ {
if i%10 == 0 {
nulls10.SetNull(i)
}
}
}

func TestNullAt(t *testing.T) {
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
if i%3 == 0 {
require.True(t, nulls3.NullAt(i))
} else {
Expand All @@ -62,9 +62,9 @@ func TestNullAt(t *testing.T) {
func TestSetNullRange(t *testing.T) {
for _, start := range pos {
for _, end := range pos {
n := NewNulls(BatchSize)
n := NewNulls(int(BatchSize()))
n.SetNullRange(start, end)
for i := uint64(0); i < BatchSize; i++ {
for i := uint64(0); i < uint64(BatchSize()); i++ {
expected := i >= start && i < end
require.Equal(t, expected, n.NullAt64(i),
"NullAt(%d) should be %t after SetNullRange(%d, %d)", i, expected, start, end)
Expand All @@ -76,10 +76,10 @@ func TestSetNullRange(t *testing.T) {
func TestUnsetNullRange(t *testing.T) {
for _, start := range pos {
for _, end := range pos {
n := NewNulls(BatchSize)
n := NewNulls(int(BatchSize()))
n.SetNulls()
n.UnsetNullRange(start, end)
for i := uint64(0); i < BatchSize; i++ {
for i := uint64(0); i < uint64(BatchSize()); i++ {
notExpected := i >= start && i < end
require.NotEqual(t, notExpected, n.NullAt64(i),
"NullAt(%d) saw %t, expected %t, after SetNullRange(%d, %d)", i, n.NullAt64(i), !notExpected, start, end)
Expand All @@ -89,8 +89,8 @@ func TestUnsetNullRange(t *testing.T) {
}

func TestSwapNulls(t *testing.T) {
n := NewNulls(BatchSize)
swapPos := []uint64{0, 1, 63, 64, 65, BatchSize - 1}
n := NewNulls(int(BatchSize()))
swapPos := []uint64{0, 1, 63, 64, 65, uint64(BatchSize()) - 1}
idxInSwapPos := func(idx uint64) bool {
for _, p := range swapPos {
if p == idx {
Expand All @@ -108,7 +108,7 @@ func TestSwapNulls(t *testing.T) {
for _, i := range swapPos {
for _, j := range swapPos {
n.swap(i, j)
for k := uint64(0); k < BatchSize; k++ {
for k := uint64(0); k < uint64(BatchSize()); k++ {
require.Equal(t, idxInSwapPos(k), n.NullAt64(k),
"after swapping NULLS (%d, %d), NullAt(%d) saw %t, expected %t", i, j, k, n.NullAt64(k), idxInSwapPos(k))
}
Expand All @@ -120,7 +120,7 @@ func TestSwapNulls(t *testing.T) {
// Test that swapping null with not null changes things appropriately.
n.UnsetNulls()
swaps := map[uint64]uint64{
0: BatchSize - 1,
0: uint64(BatchSize()) - 1,
1: 62,
2: 3,
63: 65,
Expand All @@ -141,7 +141,7 @@ func TestSwapNulls(t *testing.T) {
n.swap(i, j)
require.Truef(t, n.NullAt64(i), "after swapping not null and null (%d, %d), found null=%t at %d", i, j, n.NullAt64(i), i)
require.Truef(t, !n.NullAt64(j), "after swapping not null and null (%d, %d), found null=%t at %d", i, j, !n.NullAt64(j), j)
for k := uint64(0); k < BatchSize; k++ {
for k := uint64(0); k < uint64(BatchSize()); k++ {
if idxInSwaps(k) {
continue
}
Expand All @@ -160,7 +160,7 @@ func TestSwapNulls(t *testing.T) {
for _, i := range swapPos {
for _, j := range swapPos {
n.swap(i, j)
for k := uint64(0); k < BatchSize; k++ {
for k := uint64(0); k < uint64(BatchSize()); k++ {
require.Equal(t, idxInSwapPos(k), !n.NullAt64(k),
"after swapping NULLS (%d, %d), NullAt(%d) saw %t, expected %t", i, j, k, !n.NullAt64(k), idxInSwapPos(k))
}
Expand All @@ -171,9 +171,9 @@ func TestSwapNulls(t *testing.T) {

func TestNullsTruncate(t *testing.T) {
for _, size := range pos {
n := NewNulls(BatchSize)
n := NewNulls(int(BatchSize()))
n.Truncate(uint16(size))
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
expected := uint64(i) >= size
require.Equal(t, expected, n.NullAt(i),
"NullAt(%d) should be %t after Truncate(%d)", i, expected, size)
Expand All @@ -183,10 +183,10 @@ func TestNullsTruncate(t *testing.T) {

func TestUnsetNullsAfter(t *testing.T) {
for _, size := range pos {
n := NewNulls(BatchSize)
n := NewNulls(int(BatchSize()))
n.SetNulls()
n.UnsetNullsAfter(uint16(size))
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
expected := uint64(i) < size
require.Equal(t, expected, n.NullAt(i),
"NullAt(%d) should be %t after UnsetNullsAfter(%d)", i, expected, size)
Expand All @@ -195,19 +195,19 @@ func TestUnsetNullsAfter(t *testing.T) {
}

func TestSetAndUnsetNulls(t *testing.T) {
n := NewNulls(BatchSize)
for i := uint16(0); i < BatchSize; i++ {
n := NewNulls(int(BatchSize()))
for i := uint16(0); i < BatchSize(); i++ {
require.False(t, n.NullAt(i))
}
n.SetNulls()
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
require.True(t, n.NullAt(i))
}

for i := uint16(0); i < BatchSize; i += 3 {
for i := uint16(0); i < BatchSize(); i += 3 {
n.UnsetNull(i)
}
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
if i%3 == 0 {
require.False(t, n.NullAt(i))
} else {
Expand All @@ -216,7 +216,7 @@ func TestSetAndUnsetNulls(t *testing.T) {
}

n.UnsetNulls()
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
require.False(t, n.NullAt(i))
}
}
Expand Down Expand Up @@ -264,7 +264,7 @@ func TestNullsSetWithSel(t *testing.T) {
args := SliceArgs{
// Neither type nor the length here matter.
Src: NewMemColumn(coltypes.Bool, 0),
Sel: make([]uint16, BatchSize),
Sel: make([]uint16, BatchSize()),
}
// Make a selection vector with every even index. (This turns nulls10 into
// nulls5.)
Expand Down Expand Up @@ -318,7 +318,7 @@ func TestSlice(t *testing.T) {
}
}
// Ensure we haven't modified the receiver.
for i := uint16(0); i < BatchSize; i++ {
for i := uint16(0); i < BatchSize(); i++ {
expected := i%3 == 0
require.Equal(t, expected, nulls3.NullAt(i))
}
Expand Down
Loading

0 comments on commit f59a9cd

Please sign in to comment.