Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(proto): improve ColStr writing performance for small strings #428

Merged
merged 2 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 73 additions & 17 deletions insert_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ch
import (
"context"
"fmt"
"strings"
"testing"

"github.com/go-faster/errors"
Expand All @@ -15,7 +16,7 @@ func BenchmarkInsert(b *testing.B) {
cht.Skip(b)
srv := cht.New(b)

bench := func(rows int) func(b *testing.B) {
bench := func(data proto.ColInput) func(b *testing.B) {
return func(b *testing.B) {
ctx := context.Background()
c, err := Dial(ctx, Options{
Expand All @@ -25,42 +26,97 @@ func BenchmarkInsert(b *testing.B) {
if err != nil {
b.Fatal(errors.Wrap(err, "dial"))
}
defer func() { _ = c.Close() }()

b.Cleanup(func() {
if err := c.Do(ctx, Query{
Body: "DROP TABLE IF EXISTS test_table",
}); err != nil {
b.Logf("Cleanup failed: %+v", err)
}
_ = c.Close()
})
if err := c.Do(ctx, Query{
Body: "CREATE TABLE IF NOT EXISTS test_table (id Int64) ENGINE = Null",
Body: fmt.Sprintf("CREATE TABLE IF NOT EXISTS test_table (row %s) ENGINE = Null", data.Type()),
}); err != nil {
b.Fatal(err)
}

var id proto.ColInt64
for i := 0; i < rows; i++ {
id = append(id, 1)
}
var tmp proto.Buffer
data.EncodeColumn(&tmp)

b.SetBytes(int64(rows) * 8)
b.SetBytes(int64(len(tmp.Buf)))
b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
if err := c.Do(ctx, Query{
Body: "INSERT INTO test_table VALUES",
Input: []proto.InputColumn{
{Name: "id", Data: id},
{Name: "row", Data: data},
},
}); err != nil {
b.Fatal()
b.Fatal(err)
}
}
}
}
for _, rows := range []int{
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
for _, gen := range []struct {
name string
getData func(rows int) proto.ColInput
maxRows int
}{
{
"ColInt64",
func(rows int) proto.ColInput {
var data proto.ColInt64
for i := 0; i < rows; i++ {
data.Append(int64(i))
}
return data
},
-1,
},
{
"SmallColStr",
func(rows int) proto.ColInput {
var data proto.ColStr
for i := 0; i < rows; i++ {
data.Append(fmt.Sprintf("%016x", i))
}
return data
},
1_000_000,
},
{
"BigColStr",
func(rows int) proto.ColInput {
var (
data proto.ColStr
scratch = strings.Repeat("abcd", 1024)
)
for i := 0; i < rows; i++ {
data.Append(scratch)
}
return data
},
100_000,
},
} {
b.Run(fmt.Sprintf("Rows%d", rows), bench(rows))
b.Run(gen.name, func(b *testing.B) {
for _, rows := range []int{
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
} {
if gen.maxRows > 0 && rows > gen.maxRows {
continue
}
data := gen.getData(rows)

b.Run(fmt.Sprintf("Rows%d", rows), bench(data))
}
})
}
}
12 changes: 7 additions & 5 deletions proto/col_str.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ func (c ColStr) EncodeColumn(b *Buffer) {
// WriteColumn writes String rows to *Writer.
func (c ColStr) WriteColumn(w *Writer) {
buf := make([]byte, binary.MaxVarintLen64)
for _, p := range c.Pos {
w.ChainBuffer(func(b *Buffer) {
// Writing values from c.Buf directly might improve performance if [ColStr] contains a few rows of very long strings.
// However, most of the time it is quite opposite, so we copy data.
w.ChainBuffer(func(b *Buffer) {
for _, p := range c.Pos {
n := binary.PutUvarint(buf, uint64(p.End-p.Start))
b.PutRaw(buf[:n])
})
w.ChainWrite(c.Buf[p.Start:p.End])
}
b.PutRaw(c.Buf[p.Start:p.End])
}
})
}

// ForEach calls f on each string from column.
Expand Down
Loading