Skip to content

Commit

Permalink
Merge #31705
Browse files Browse the repository at this point in the history
31705: exec: get rid of Bools and Bytes; add some testing utils r=jordanlewis a=jordanlewis

opTestInput and opTestOutput allow testing operators without having to
construct ColBatches by hand, with a natural syntax inspired by
ProcessorTestCase.

Also, get rid of `Bools` in favor of `[]bool`, and `Bytes` in favor of `[][]byte`, as it's up to 4x slower at least on the Distinct benchmark.

Split from #31693.
Based on #31554.

Co-authored-by: Jordan Lewis <[email protected]>
  • Loading branch information
craig[bot] and jordanlewis committed Oct 23, 2018
2 parents a10165c + 3b4fb6e commit d3f39a5
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 78 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/distsqlrun/columnarizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (c *columnarizer) Init() {
for i := range typs {
typs[i] = types.FromColumnType(outputTypes[i])
}
c.batch = exec.NewMemBatch(typs...)
c.batch = exec.NewMemBatch(typs)
c.buffered = make(sqlbase.EncDatumRows, exec.ColBatchSize)
for i := range c.buffered {
c.buffered[i] = make(sqlbase.EncDatumRow, len(typs))
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/distsqlrun/materializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (m *materializer) Next() (sqlbase.EncDatumRow, *ProducerMetadata) {
ct := types[outIdx]
switch ct.SemanticType {
case sqlbase.ColumnType_BOOL:
if col.Bool().At(rowIdx) {
if col.Bool()[rowIdx] {
m.row[outIdx].Datum = tree.DBoolTrue
} else {
m.row[outIdx].Datum = tree.DBoolFalse
Expand All @@ -151,9 +151,9 @@ func (m *materializer) Next() (sqlbase.EncDatumRow, *ProducerMetadata) {
case sqlbase.ColumnType_FLOAT:
m.row[outIdx].Datum = m.da.NewDFloat(tree.DFloat(col.Float64()[rowIdx]))
case sqlbase.ColumnType_BYTES:
m.row[outIdx].Datum = m.da.NewDBytes(tree.DBytes(col.Bytes().At(rowIdx)))
m.row[outIdx].Datum = m.da.NewDBytes(tree.DBytes(col.Bytes()[rowIdx]))
case sqlbase.ColumnType_STRING:
b := col.Bytes().At(rowIdx)
b := col.Bytes()[rowIdx]
m.row[outIdx].Datum = m.da.NewDString(tree.DString(*(*string)(unsafe.Pointer(&b))))
}
}
Expand Down
8 changes: 7 additions & 1 deletion pkg/sql/exec/colbatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ type ColBatch interface {
// densely-packed list of the indices in each column that have not been
// filtered out by a previous step.
Selection() []uint16
// SetSelection sets whether this batch is using its selection vector or not.
SetSelection(bool)
}

var _ ColBatch = &memBatch{}
Expand All @@ -43,7 +45,7 @@ const ColBatchSize = 1024

// NewMemBatch allocates a new in-memory ColBatch.
// TODO(jordan): pool these allocations.
func NewMemBatch(types ...types.T) ColBatch {
func NewMemBatch(types []types.T) ColBatch {
b := &memBatch{}
b.b = make([]ColVec, len(types))

Expand Down Expand Up @@ -81,6 +83,10 @@ func (m *memBatch) Selection() []uint16 {
return m.sel
}

func (m *memBatch) SetSelection(b bool) {
m.useSel = b
}

func (m *memBatch) SetLength(n uint16) {
m.n = n
}
69 changes: 14 additions & 55 deletions pkg/sql/exec/colvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type ColVec interface {

// TODO(jordan): is a bitmap or slice of bools better?
// Bool returns a bool list.
Bool() Bools
Bool() []bool
// Int8 returns an int8 slice.
Int8() []int8
// Int16 returns an int16 slice.
Expand All @@ -40,8 +40,11 @@ type ColVec interface {
Float32() []float32
// Float64 returns an float64 slice.
Float64() []float64
// Bytes returns a Bytes object, allowing retrieval of multiple byte slices.
Bytes() Bytes
// Bytes returns a []byte slice.
Bytes() [][]byte

// Col returns the raw, typeless backing storage for this ColVec.
Col() interface{}
}

// Nulls represents a list of potentially nullable values.
Expand All @@ -59,22 +62,6 @@ type Nulls interface {
Rank(i uint16) uint16
}

// Bools is an interface that represents a list of bools.
type Bools interface {
// At returns the ith bool in the list.
At(i uint16) bool
// Set sets the ith bool in the list to b.
Set(i uint16, b bool)
}

// Bytes is an interface that represents a list of byte slices.
type Bytes interface {
// At returns the ith byte slice in the list.
At(i uint16) []byte
// Set sets the ith byte slice in the list to b.
Set(i uint16, b []byte)
}

var _ ColVec = memColumn{}

// memColumn is a simple pass-through implementation of ColVec that just casts
Expand All @@ -87,9 +74,9 @@ type memColumn struct {
func newMemColumn(t types.T, n int) memColumn {
switch t {
case types.Bool:
return memColumn{col: newMemBools(n)}
return memColumn{col: make([]bool, n)}
case types.Bytes:
return memColumn{col: newMemBytes(n)}
return memColumn{col: make([][]byte, n)}
case types.Int16:
return memColumn{col: make([]int16, n)}
case types.Int32:
Expand Down Expand Up @@ -119,8 +106,8 @@ func (m memColumn) Rank(i uint16) uint16 {
return i
}

func (m memColumn) Bool() Bools {
return m.col.(memBools)
func (m memColumn) Bool() []bool {
return m.col.([]bool)
}

func (m memColumn) Int8() []int8 {
Expand All @@ -147,38 +134,10 @@ func (m memColumn) Float64() []float64 {
return m.col.([]float64)
}

func (m memColumn) Bytes() Bytes {
return m.col.(memBytes)
}

var _ Bools = memBools{}

type memBools []bool

func newMemBools(n int) memBools {
return make([]bool, n)
}

func (m memBools) At(i uint16) bool {
return m[i]
}

func (m memBools) Set(i uint16, b bool) {
m[i] = b
}

var _ Bytes = memBytes{}

type memBytes [][]byte

func newMemBytes(n int) memBytes {
return make([][]byte, n)
}

func (m memBytes) At(i uint16) []byte {
return m[i]
func (m memColumn) Bytes() [][]byte {
return m.col.([][]byte)
}

func (m memBytes) Set(i uint16, b []byte) {
m[i] = b
func (m memColumn) Col() interface{} {
return m.col
}
15 changes: 3 additions & 12 deletions pkg/sql/exec/execgen/cmd/execgen/rowstovec_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,7 @@ func EncDatumRowsToColVec(
if datum == tree.DNull {
vec.SetNull(i)
} else {
{{if .HasSetMethod}}
col.Set(i, {{.DatumToPhysicalFn}})
{{else}}
col[i] = {{.DatumToPhysicalFn}}
{{end}}
}
}
return nil
Expand All @@ -85,9 +81,6 @@ type columnConversion struct {
// ExecType is the exec.T to which we're converting. It should correspond to
// a method name on exec.ColVec.
ExecType string
// HasSetMethod is true if the ColVec is an interface with a Set method rather
// than just a slice.
HasSetMethod bool
// DatumToPhysicalFn is a stringified function for converting a datum to the
// physical type used in the column vector.
DatumToPhysicalFn string
Expand All @@ -105,11 +98,9 @@ func genRowsToVec(wr io.Writer) error {
continue
}
conversion := columnConversion{
SemanticType: "ColumnType_" + name,
Width: width,
ExecType: t.String(),
// TODO(solon): Determine the following fields via reflection.
HasSetMethod: t == types.Bool || t == types.Bytes,
SemanticType: "ColumnType_" + name,
Width: width,
ExecType: t.String(),
DatumToPhysicalFn: getDatumToPhysicalFn(ct),
}
columnConversions = append(columnConversions, conversion)
Expand Down
12 changes: 6 additions & 6 deletions pkg/sql/exec/rowstovec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ func TestEncDatumRowsToColVecBool(t *testing.T) {
t.Fatal(err)
}
expected := newMemColumn(types.Bool, 2)
expected.Bool().Set(0, false)
expected.Bool().Set(1, true)
expected.Bool()[0] = false
expected.Bool()[1] = true
if !reflect.DeepEqual(vec, expected) {
t.Errorf("expected vector %+v, got %+v", expected, vec)
}
Expand All @@ -55,8 +55,8 @@ func TestEncDatumRowsToColVecBool(t *testing.T) {
if err := EncDatumRowsToColVec(rows, vec, 1 /* columnIdx */, &ct, &alloc); err != nil {
t.Fatal(err)
}
expected.Bool().Set(0, true)
expected.Bool().Set(1, false)
expected.Bool()[0] = true
expected.Bool()[1] = false
if !reflect.DeepEqual(vec, expected) {
t.Errorf("expected vector %+v, got %+v", expected, vec)
}
Expand Down Expand Up @@ -91,8 +91,8 @@ func TestEncDatumRowsToColVecString(t *testing.T) {
t.Fatal(err)
}
expected := newMemColumn(types.Bytes, 2)
expected.Bytes().Set(0, []byte("foo"))
expected.Bytes().Set(1, []byte("bar"))
expected.Bytes()[0] = []byte("foo")
expected.Bytes()[1] = []byte("bar")
if !reflect.DeepEqual(vec, expected) {
t.Errorf("expected vector %+v, got %+v", expected, vec)
}
Expand Down
27 changes: 27 additions & 0 deletions pkg/sql/exec/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,30 @@ func FromColumnType(ct sqlbase.ColumnType) T {
}
return Unhandled
}

// FromGoType returns the type for a Go value, if applicable. Shouldn't be used at
// runtime.
func FromGoType(v interface{}) T {
switch t := v.(type) {
case int8:
return Int8
case int16:
return Int16
case int32:
return Int32
case int, int64:
return Int64
case bool:
return Bool
case float32:
return Float32
case float64:
return Float64
case []byte:
return Bytes
case string:
return Bytes
default:
panic(fmt.Sprintf("type %T not supported yet", t))
}
}
Loading

0 comments on commit d3f39a5

Please sign in to comment.