From d9bf3bf2d0e056e2faa514a63c42c3966aeea598 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Tue, 24 Aug 2021 13:00:04 +0800 Subject: [PATCH] executor: make `group_concat` function consider the collation (#27490) --- executor/aggfuncs/func_group_concat.go | 22 +++++++++++++++++----- executor/analyze_test.go | 13 +++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/executor/aggfuncs/func_group_concat.go b/executor/aggfuncs/func_group_concat.go index 87dd948ee003a..1718b22682cfd 100644 --- a/executor/aggfuncs/func_group_concat.go +++ b/executor/aggfuncs/func_group_concat.go @@ -28,8 +28,8 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/dbterror" - "github.com/pingcap/tidb/util/hack" "github.com/pingcap/tidb/util/set" ) @@ -221,10 +221,16 @@ func (e *groupConcatDistinct) UpdatePartialResult(sctx sessionctx.Context, rowsI memDelta += int64(p.buffer.Cap()) } }() + + collators := make([]collate.Collator, 0, len(e.args)) + for _, arg := range e.args { + collators = append(collators, collate.GetCollator(arg.GetType().Collate)) + } + for _, row := range rowsInGroup { p.valsBuf.Reset() p.encodeBytesBuffer = p.encodeBytesBuffer[:0] - for _, arg := range e.args { + for i, arg := range e.args { v, isNull, err = arg.EvalString(sctx, row) if err != nil { return memDelta, err @@ -232,7 +238,7 @@ func (e *groupConcatDistinct) UpdatePartialResult(sctx sessionctx.Context, rowsI if isNull { break } - p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, hack.Slice(v)) + p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, collators[i].Key(v)) p.valsBuf.WriteString(v) } if isNull { @@ -537,10 +543,16 @@ func (e *groupConcatDistinctOrder) UpdatePartialResult(sctx sessionctx.Context, v, isNull := "", false memDelta -= int64(cap(p.encodeBytesBuffer)) defer func() { memDelta += int64(cap(p.encodeBytesBuffer)) }() + + collators := make([]collate.Collator, 0, len(e.args)) + for _, arg := range e.args { + collators = append(collators, collate.GetCollator(arg.GetType().Collate)) + } + for _, row := range rowsInGroup { buffer := new(bytes.Buffer) p.encodeBytesBuffer = p.encodeBytesBuffer[:0] - for _, arg := range e.args { + for i, arg := range e.args { v, isNull, err = arg.EvalString(sctx, row) if err != nil { return memDelta, err @@ -548,7 +560,7 @@ func (e *groupConcatDistinctOrder) UpdatePartialResult(sctx sessionctx.Context, if isNull { break } - p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, hack.Slice(v)) + p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, collators[i].Key(v)) buffer.WriteString(v) } if isNull { diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 0230549a089f3..a33b30736c816 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -949,6 +949,19 @@ func (s *testSuite1) TestDefaultValForAnalyze(c *C) { "└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false")) } +func (s *testSerialSuite2) TestIssue27429(c *C) { + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table test.t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin)") + tk.MustExec("insert into test.t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def ');") + + tk.MustQuery("select upper(group_concat(distinct value order by 1)) from test.t;").Check(testkit.Rows("ABC,DEF")) + tk.MustQuery("select upper(group_concat(distinct value)) from test.t;").Check(testkit.Rows("ABC,DEF")) +} + func (s *testSerialSuite2) TestIssue20874(c *C) { collate.SetNewCollationEnabledForTest(true) defer collate.SetNewCollationEnabledForTest(false)