diff --git a/insert_bench_test.go b/insert_bench_test.go index 5a80cc0..ecea345 100644 --- a/insert_bench_test.go +++ b/insert_bench_test.go @@ -3,6 +3,7 @@ package ch import ( "context" "fmt" + "strings" "testing" "github.com/go-faster/errors" @@ -15,7 +16,7 @@ func BenchmarkInsert(b *testing.B) { cht.Skip(b) srv := cht.New(b) - bench := func(rows int) func(b *testing.B) { + bench := func(data proto.ColInput) func(b *testing.B) { return func(b *testing.B) { ctx := context.Background() c, err := Dial(ctx, Options{ @@ -25,20 +26,25 @@ func BenchmarkInsert(b *testing.B) { if err != nil { b.Fatal(errors.Wrap(err, "dial")) } - defer func() { _ = c.Close() }() + b.Cleanup(func() { + if err := c.Do(ctx, Query{ + Body: "DROP TABLE IF EXISTS test_table", + }); err != nil { + b.Logf("Cleanup failed: %+v", err) + } + _ = c.Close() + }) if err := c.Do(ctx, Query{ - Body: "CREATE TABLE IF NOT EXISTS test_table (id Int64) ENGINE = Null", + Body: fmt.Sprintf("CREATE TABLE IF NOT EXISTS test_table (row %s) ENGINE = Null", data.Type()), }); err != nil { b.Fatal(err) } - var id proto.ColInt64 - for i := 0; i < rows; i++ { - id = append(id, 1) - } + var tmp proto.Buffer + data.EncodeColumn(&tmp) - b.SetBytes(int64(rows) * 8) + b.SetBytes(int64(len(tmp.Buf))) b.ResetTimer() b.ReportAllocs() @@ -46,21 +52,71 @@ func BenchmarkInsert(b *testing.B) { if err := c.Do(ctx, Query{ Body: "INSERT INTO test_table VALUES", Input: []proto.InputColumn{ - {Name: "id", Data: id}, + {Name: "row", Data: data}, }, }); err != nil { - b.Fatal() + b.Fatal(err) } } } } - for _, rows := range []int{ - 10_000, - 100_000, - 1_000_000, - 10_000_000, - 100_000_000, + for _, gen := range []struct { + name string + getData func(rows int) proto.ColInput + maxRows int + }{ + { + "ColInt64", + func(rows int) proto.ColInput { + var data proto.ColInt64 + for i := 0; i < rows; i++ { + data.Append(int64(i)) + } + return data + }, + -1, + }, + { + "SmallColStr", + func(rows int) proto.ColInput { + var data proto.ColStr + for i := 0; i < rows; i++ { + data.Append(fmt.Sprintf("%016x", i)) + } + return data + }, + 1_000_000, + }, + { + "BigColStr", + func(rows int) proto.ColInput { + var ( + data proto.ColStr + scratch = strings.Repeat("abcd", 1024) + ) + for i := 0; i < rows; i++ { + data.Append(scratch) + } + return data + }, + 100_000, + }, } { - b.Run(fmt.Sprintf("Rows%d", rows), bench(rows)) + b.Run(gen.name, func(b *testing.B) { + for _, rows := range []int{ + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + } { + if gen.maxRows > 0 && rows > gen.maxRows { + continue + } + data := gen.getData(rows) + + b.Run(fmt.Sprintf("Rows%d", rows), bench(data)) + } + }) } } diff --git a/proto/col_str.go b/proto/col_str.go index 108c08e..9786f0e 100644 --- a/proto/col_str.go +++ b/proto/col_str.go @@ -79,13 +79,15 @@ func (c ColStr) EncodeColumn(b *Buffer) { // WriteColumn writes String rows to *Writer. func (c ColStr) WriteColumn(w *Writer) { buf := make([]byte, binary.MaxVarintLen64) - for _, p := range c.Pos { - w.ChainBuffer(func(b *Buffer) { + // Writing values from c.Buf directly might improve performance if [ColStr] contains a few rows of very long strings. + // However, most of the time it is quite opposite, so we copy data. + w.ChainBuffer(func(b *Buffer) { + for _, p := range c.Pos { n := binary.PutUvarint(buf, uint64(p.End-p.Start)) b.PutRaw(buf[:n]) - }) - w.ChainWrite(c.Buf[p.Start:p.End]) - } + b.PutRaw(c.Buf[p.Start:p.End]) + } + }) } // ForEach calls f on each string from column.