Skip to content

Commit

Permalink
Merge pull request #428 from tdakkota/perf/improve-colstr-writing
Browse files Browse the repository at this point in the history
perf(proto): improve `ColStr` writing performance for small strings
  • Loading branch information
ernado authored Oct 9, 2024
2 parents e4a2d07 + 308ecb3 commit 780d68d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 22 deletions.
90 changes: 73 additions & 17 deletions insert_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ch
import (
"context"
"fmt"
"strings"
"testing"

"github.com/go-faster/errors"
Expand All @@ -15,7 +16,7 @@ func BenchmarkInsert(b *testing.B) {
cht.Skip(b)
srv := cht.New(b)

bench := func(rows int) func(b *testing.B) {
bench := func(data proto.ColInput) func(b *testing.B) {
return func(b *testing.B) {
ctx := context.Background()
c, err := Dial(ctx, Options{
Expand All @@ -25,42 +26,97 @@ func BenchmarkInsert(b *testing.B) {
if err != nil {
b.Fatal(errors.Wrap(err, "dial"))
}
defer func() { _ = c.Close() }()

b.Cleanup(func() {
if err := c.Do(ctx, Query{
Body: "DROP TABLE IF EXISTS test_table",
}); err != nil {
b.Logf("Cleanup failed: %+v", err)
}
_ = c.Close()
})
if err := c.Do(ctx, Query{
Body: "CREATE TABLE IF NOT EXISTS test_table (id Int64) ENGINE = Null",
Body: fmt.Sprintf("CREATE TABLE IF NOT EXISTS test_table (row %s) ENGINE = Null", data.Type()),
}); err != nil {
b.Fatal(err)
}

var id proto.ColInt64
for i := 0; i < rows; i++ {
id = append(id, 1)
}
var tmp proto.Buffer
data.EncodeColumn(&tmp)

b.SetBytes(int64(rows) * 8)
b.SetBytes(int64(len(tmp.Buf)))
b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
if err := c.Do(ctx, Query{
Body: "INSERT INTO test_table VALUES",
Input: []proto.InputColumn{
{Name: "id", Data: id},
{Name: "row", Data: data},
},
}); err != nil {
b.Fatal()
b.Fatal(err)
}
}
}
}
for _, rows := range []int{
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
for _, gen := range []struct {
name string
getData func(rows int) proto.ColInput
maxRows int
}{
{
"ColInt64",
func(rows int) proto.ColInput {
var data proto.ColInt64
for i := 0; i < rows; i++ {
data.Append(int64(i))
}
return data
},
-1,
},
{
"SmallColStr",
func(rows int) proto.ColInput {
var data proto.ColStr
for i := 0; i < rows; i++ {
data.Append(fmt.Sprintf("%016x", i))
}
return data
},
1_000_000,
},
{
"BigColStr",
func(rows int) proto.ColInput {
var (
data proto.ColStr
scratch = strings.Repeat("abcd", 1024)
)
for i := 0; i < rows; i++ {
data.Append(scratch)
}
return data
},
100_000,
},
} {
b.Run(fmt.Sprintf("Rows%d", rows), bench(rows))
b.Run(gen.name, func(b *testing.B) {
for _, rows := range []int{
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
} {
if gen.maxRows > 0 && rows > gen.maxRows {
continue
}
data := gen.getData(rows)

b.Run(fmt.Sprintf("Rows%d", rows), bench(data))
}
})
}
}
12 changes: 7 additions & 5 deletions proto/col_str.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ func (c ColStr) EncodeColumn(b *Buffer) {
// WriteColumn writes String rows to *Writer.
func (c ColStr) WriteColumn(w *Writer) {
buf := make([]byte, binary.MaxVarintLen64)
for _, p := range c.Pos {
w.ChainBuffer(func(b *Buffer) {
// Writing values from c.Buf directly might improve performance if [ColStr] contains a few rows of very long strings.
// However, most of the time it is quite opposite, so we copy data.
w.ChainBuffer(func(b *Buffer) {
for _, p := range c.Pos {
n := binary.PutUvarint(buf, uint64(p.End-p.Start))
b.PutRaw(buf[:n])
})
w.ChainWrite(c.Buf[p.Start:p.End])
}
b.PutRaw(c.Buf[p.Start:p.End])
}
})
}

// ForEach calls f on each string from column.
Expand Down

0 comments on commit 780d68d

Please sign in to comment.