From c49cb3a404004b344b10a235006018c5dd034b59 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 26 Sep 2024 06:03:55 +0300 Subject: [PATCH 1/2] test: add `ColStr` inserting benchmarks --- insert_bench_test.go | 90 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 17 deletions(-) diff --git a/insert_bench_test.go b/insert_bench_test.go index 5a80cc0d..ecea345c 100644 --- a/insert_bench_test.go +++ b/insert_bench_test.go @@ -3,6 +3,7 @@ package ch import ( "context" "fmt" + "strings" "testing" "github.com/go-faster/errors" @@ -15,7 +16,7 @@ func BenchmarkInsert(b *testing.B) { cht.Skip(b) srv := cht.New(b) - bench := func(rows int) func(b *testing.B) { + bench := func(data proto.ColInput) func(b *testing.B) { return func(b *testing.B) { ctx := context.Background() c, err := Dial(ctx, Options{ @@ -25,20 +26,25 @@ func BenchmarkInsert(b *testing.B) { if err != nil { b.Fatal(errors.Wrap(err, "dial")) } - defer func() { _ = c.Close() }() + b.Cleanup(func() { + if err := c.Do(ctx, Query{ + Body: "DROP TABLE IF EXISTS test_table", + }); err != nil { + b.Logf("Cleanup failed: %+v", err) + } + _ = c.Close() + }) if err := c.Do(ctx, Query{ - Body: "CREATE TABLE IF NOT EXISTS test_table (id Int64) ENGINE = Null", + Body: fmt.Sprintf("CREATE TABLE IF NOT EXISTS test_table (row %s) ENGINE = Null", data.Type()), }); err != nil { b.Fatal(err) } - var id proto.ColInt64 - for i := 0; i < rows; i++ { - id = append(id, 1) - } + var tmp proto.Buffer + data.EncodeColumn(&tmp) - b.SetBytes(int64(rows) * 8) + b.SetBytes(int64(len(tmp.Buf))) b.ResetTimer() b.ReportAllocs() @@ -46,21 +52,71 @@ func BenchmarkInsert(b *testing.B) { if err := c.Do(ctx, Query{ Body: "INSERT INTO test_table VALUES", Input: []proto.InputColumn{ - {Name: "id", Data: id}, + {Name: "row", Data: data}, }, }); err != nil { - b.Fatal() + b.Fatal(err) } } } } - for _, rows := range []int{ - 10_000, - 100_000, - 1_000_000, - 10_000_000, - 100_000_000, + for _, gen := range []struct { + name string + getData func(rows int) proto.ColInput + maxRows int + }{ + { + "ColInt64", + func(rows int) proto.ColInput { + var data proto.ColInt64 + for i := 0; i < rows; i++ { + data.Append(int64(i)) + } + return data + }, + -1, + }, + { + "SmallColStr", + func(rows int) proto.ColInput { + var data proto.ColStr + for i := 0; i < rows; i++ { + data.Append(fmt.Sprintf("%016x", i)) + } + return data + }, + 1_000_000, + }, + { + "BigColStr", + func(rows int) proto.ColInput { + var ( + data proto.ColStr + scratch = strings.Repeat("abcd", 1024) + ) + for i := 0; i < rows; i++ { + data.Append(scratch) + } + return data + }, + 100_000, + }, } { - b.Run(fmt.Sprintf("Rows%d", rows), bench(rows)) + b.Run(gen.name, func(b *testing.B) { + for _, rows := range []int{ + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + } { + if gen.maxRows > 0 && rows > gen.maxRows { + continue + } + data := gen.getData(rows) + + b.Run(fmt.Sprintf("Rows%d", rows), bench(data)) + } + }) } } From 308ecb31e23aa2e552f88e57221b8a3171cc4292 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 26 Sep 2024 06:04:46 +0300 Subject: [PATCH 2/2] perf(proto): improve `ColStr` writing performance for small strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` goos: linux goarch: amd64 pkg: github.com/ClickHouse/ch-go cpu: AMD Ryzen 9 5950X 16-Core Processor │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Insert/SmallColStr/Rows10000-32 2903.0µ ± 4% 634.0µ ± 8% -78.16% (p=0.000 n=15) Insert/SmallColStr/Rows100000-32 28.549m ± 2% 2.584m ± 13% -90.95% (p=0.000 n=15) Insert/SmallColStr/Rows1000000-32 172.39m ± 2% 27.35m ± 2% -84.14% (p=0.000 n=15) Insert/BigColStr/Rows10000-32 25.97m ± 17% 33.78m ± 10% +30.07% (p=0.000 n=15) Insert/BigColStr/Rows100000-32 375.0m ± 11% 1051.0m ± 41% +180.22% (p=0.000 n=15) geomean 20.52m 13.16m -35.87% │ old.txt │ new.txt │ │ B/s │ B/s vs base │ Insert/SmallColStr/Rows10000-32 55.85Mi ± 4% 255.73Mi ± 8% +357.91% (p=0.000 n=15) Insert/SmallColStr/Rows100000-32 56.79Mi ± 2% 627.37Mi ± 12% +1004.70% (p=0.000 n=15) Insert/SmallColStr/Rows1000000-32 94.04Mi ± 2% 592.83Mi ± 2% +530.39% (p=0.000 n=15) Insert/BigColStr/Rows10000-32 1.469Gi ± 15% 1.130Gi ± 11% -23.12% (p=0.000 n=15) Insert/BigColStr/Rows100000-32 1042.1Mi ± 10% 371.9Mi ± 70% -64.31% (p=0.000 n=15) geomean 407.7Mi 635.8Mi +55.93% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ Insert/SmallColStr/Rows10000-32 2085.57Ki ± 0% 11.14Ki ± 0% -99.47% (p=0.000 n=15) Insert/SmallColStr/Rows100000-32 23133.30Ki ± 0% 28.04Ki ± 4% -99.88% (p=0.000 n=15) Insert/SmallColStr/Rows1000000-32 234.485Mi ± 2% 2.458Mi ± 7% -98.95% (p=0.000 n=15) Insert/BigColStr/Rows10000-32 2.041Mi ± 0% 5.743Mi ± 12% +181.31% (p=0.000 n=15) Insert/BigColStr/Rows100000-32 24.32Mi ± 6% 2409.10Mi ± 50% +9804.48% (p=0.000 n=15) geomean 402.6Ki 135.7Ki -66.31% │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ Insert/SmallColStr/Rows10000-32 335.0 ± 0% 327.0 ± 0% -2.39% (p=0.000 n=15) Insert/SmallColStr/Rows100000-32 343.0 ± 0% 328.0 ± 0% -4.37% (p=0.000 n=15) Insert/SmallColStr/Rows1000000-32 353.0 ± 1% 329.0 ± 0% -6.80% (p=0.000 n=15) Insert/BigColStr/Rows10000-32 335.0 ± 0% 329.0 ± 0% -1.79% (p=0.000 n=15) Insert/BigColStr/Rows100000-32 354.0 ± 1% 381.0 ± 7% +7.63% (p=0.009 n=15) geomean 336.2 333.3 -0.87% ``` --- proto/col_str.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/proto/col_str.go b/proto/col_str.go index 108c08e9..9786f0ee 100644 --- a/proto/col_str.go +++ b/proto/col_str.go @@ -79,13 +79,15 @@ func (c ColStr) EncodeColumn(b *Buffer) { // WriteColumn writes String rows to *Writer. func (c ColStr) WriteColumn(w *Writer) { buf := make([]byte, binary.MaxVarintLen64) - for _, p := range c.Pos { - w.ChainBuffer(func(b *Buffer) { + // Writing values from c.Buf directly might improve performance if [ColStr] contains a few rows of very long strings. + // However, most of the time it is quite opposite, so we copy data. + w.ChainBuffer(func(b *Buffer) { + for _, p := range c.Pos { n := binary.PutUvarint(buf, uint64(p.End-p.Start)) b.PutRaw(buf[:n]) - }) - w.ChainWrite(c.Buf[p.Start:p.End]) - } + b.PutRaw(c.Buf[p.Start:p.End]) + } + }) } // ForEach calls f on each string from column.