From d6ebc600f8c0d3a0f6e28611c144c56a76c190a2 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Fri, 19 Aug 2022 11:20:52 +0800 Subject: [PATCH] expression: make collation work with json type (#37211) close pingcap/tidb#31640 --- .../r/collation_agg_func_disabled.result | 16 +++++------ .../r/collation_agg_func_enabled.result | 28 ++++++++++++++++--- expression/integration_test.go | 15 ++++++++++ planner/core/expression_rewriter.go | 12 +++++++- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/cmd/explaintest/r/collation_agg_func_disabled.result b/cmd/explaintest/r/collation_agg_func_disabled.result index 3344ab2ff55d1..d2503250f063f 100644 --- a/cmd/explaintest/r/collation_agg_func_disabled.result +++ b/cmd/explaintest/r/collation_agg_func_disabled.result @@ -263,13 +263,13 @@ min(d) desc format='brief' select min(d collate utf8mb4_bin) from tt; id estRows task access object operator info StreamAgg 1.00 root funcs:min(Column#8)->Column#6 -└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8 +└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 └─Projection 1.00 root collation_agg_func.tt.d └─TopN 1.00 root Column#7, offset:0, count:1 - └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 └─TableReader 1.00 root data:TopN - └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY), offset:0, count:1 - └─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295)))) + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo select min(d collate utf8mb4_bin) from tt; min(d collate utf8mb4_bin) @@ -288,13 +288,13 @@ max(d) desc format='brief' select max(d collate utf8mb4_bin) from tt; id estRows task access object operator info StreamAgg 1.00 root funcs:max(Column#8)->Column#6 -└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8 +└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 └─Projection 1.00 root collation_agg_func.tt.d └─TopN 1.00 root Column#7:desc, offset:0, count:1 - └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 └─TableReader 1.00 root data:TopN - └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY):desc, offset:0, count:1 - └─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295)))) + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo select max(d collate utf8mb4_bin) from tt; max(d collate utf8mb4_bin) diff --git a/cmd/explaintest/r/collation_agg_func_enabled.result b/cmd/explaintest/r/collation_agg_func_enabled.result index a74985243bdab..35c21a1ec469a 100644 --- a/cmd/explaintest/r/collation_agg_func_enabled.result +++ b/cmd/explaintest/r/collation_agg_func_enabled.result @@ -258,9 +258,19 @@ select min(d) from tt; min(d) {"A": "A"} desc format='brief' select min(d collate utf8mb4_bin) from tt; -Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.d + └─TopN 1.00 root Column#7, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo select min(d collate utf8mb4_bin) from tt; -Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +min(d collate utf8mb4_bin) +{"A": "A"} desc format='brief' select max(d) from tt; id estRows task access object operator info StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6 @@ -273,8 +283,18 @@ select max(d) from tt; max(d) {"c": "c"} desc format='brief' select max(d collate utf8mb4_bin) from tt; -Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.d + └─TopN 1.00 root Column#7:desc, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo select max(d collate utf8mb4_bin) from tt; -Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +max(d collate utf8mb4_bin) +{"c": "c"} drop database collation_agg_func; use test diff --git a/expression/integration_test.go b/expression/integration_test.go index 8ebf193fb6b6f..a5021ab2e21fd 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7385,6 +7385,21 @@ func TestImcompleteDateFunc(t *testing.T) { tk.MustQuery("select YEARWEEK('1998-00-11')").Check(testkit.Rows("")) } +func TestIssue31640(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("create table t(a json);") + tk.MustExec(`insert into t values ('"a"'), ('"B"'), ('"c"'), ('"D"'), ('{"a": 1}'), ('1'), ('{"b": 2}'), ('[1, 2]'), ('[3, 4]');`) + tk.MustQuery("select min(a) from t;").Check(testkit.Rows("1")) + tk.MustQuery("select max(a) from t;").Check(testkit.Rows("[3, 4]")) + tk.MustQuery("select min(a collate utf8mb4_bin) from t;").Check(testkit.Rows("\"B\"")) + tk.MustQuery("select max(a collate utf8mb4_bin) from t;").Check(testkit.Rows("{\"b\": 2}")) + tk.MustQuery("select min(a collate utf8mb4_unicode_ci) from t;").Check(testkit.Rows("\"a\"")) + tk.MustQuery("select max(a collate utf8mb4_unicode_ci) from t;").Check(testkit.Rows("1")) +} + func TestIssue36279(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 52fa796922b46..dff1739192523 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1203,21 +1203,31 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok break } chs := arg.GetType().GetCharset() + // if the field is json, the charset is always utf8mb4. + if arg.GetType().GetType() == mysql.TypeJSON { + chs = mysql.UTF8MB4Charset + } if chs != "" && collInfo.CharsetName != chs { er.err = charset.ErrCollationCharsetMismatch.GenWithStackByArgs(collInfo.Name, chs) break } } // SetCollationExpr sets the collation explicitly, even when the evaluation type of the expression is non-string. - if _, ok := arg.(*expression.Column); ok { + if _, ok := arg.(*expression.Column); ok || arg.GetType().GetType() == mysql.TypeJSON { if arg.GetType().GetType() == mysql.TypeEnum || arg.GetType().GetType() == mysql.TypeSet { er.err = ErrNotSupportedYet.GenWithStackByArgs("use collate clause for enum or set") break } // Wrap a cast here to avoid changing the original FieldType of the column expression. exprType := arg.GetType().Clone() + // if arg type is json, we should cast it to longtext if there is collate clause. + if arg.GetType().GetType() == mysql.TypeJSON { + exprType = types.NewFieldType(mysql.TypeLongBlob) + exprType.SetCharset(mysql.UTF8MB4Charset) + } exprType.SetCollate(v.Collate) casted := expression.BuildCastFunction(er.sctx, arg, exprType) + arg = casted er.ctxStackPop(1) er.ctxStackAppend(casted, types.EmptyName) } else {