Skip to content

Commit

Permalink
expression: make collation work with json type (#37211)
Browse files Browse the repository at this point in the history
close #31640
  • Loading branch information
xiongjiwei authored Aug 19, 2022
1 parent 7d401c5 commit d6ebc60
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 13 deletions.
16 changes: 8 additions & 8 deletions cmd/explaintest/r/collation_agg_func_disabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,13 @@ min(d)
desc format='brief' select min(d collate utf8mb4_bin) from tt;
id estRows task access object operator info
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8
└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
└─Projection 1.00 root collation_agg_func.tt.d
└─TopN 1.00 root Column#7, offset:0, count:1
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
└─TableReader 1.00 root data:TopN
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY), offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295))))
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
select min(d collate utf8mb4_bin) from tt;
min(d collate utf8mb4_bin)
Expand All @@ -288,13 +288,13 @@ max(d)
desc format='brief' select max(d collate utf8mb4_bin) from tt;
id estRows task access object operator info
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8
└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
└─Projection 1.00 root collation_agg_func.tt.d
└─TopN 1.00 root Column#7:desc, offset:0, count:1
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
└─TableReader 1.00 root data:TopN
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY):desc, offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295))))
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
select max(d collate utf8mb4_bin) from tt;
max(d collate utf8mb4_bin)
Expand Down
28 changes: 24 additions & 4 deletions cmd/explaintest/r/collation_agg_func_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,19 @@ select min(d) from tt;
min(d)
{"A": "A"}
desc format='brief' select min(d collate utf8mb4_bin) from tt;
Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary'
id estRows task access object operator info
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
└─Projection 1.00 root collation_agg_func.tt.d
└─TopN 1.00 root Column#7, offset:0, count:1
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
└─TableReader 1.00 root data:TopN
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
select min(d collate utf8mb4_bin) from tt;
Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary'
min(d collate utf8mb4_bin)
{"A": "A"}
desc format='brief' select max(d) from tt;
id estRows task access object operator info
StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6
Expand All @@ -273,8 +283,18 @@ select max(d) from tt;
max(d)
{"c": "c"}
desc format='brief' select max(d collate utf8mb4_bin) from tt;
Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary'
id estRows task access object operator info
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
└─Projection 1.00 root cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
└─Projection 1.00 root collation_agg_func.tt.d
└─TopN 1.00 root Column#7:desc, offset:0, count:1
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
└─TableReader 1.00 root data:TopN
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
select max(d collate utf8mb4_bin) from tt;
Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary'
max(d collate utf8mb4_bin)
{"c": "c"}
drop database collation_agg_func;
use test
15 changes: 15 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7385,6 +7385,21 @@ func TestImcompleteDateFunc(t *testing.T) {
tk.MustQuery("select YEARWEEK('1998-00-11')").Check(testkit.Rows("<nil>"))
}

func TestIssue31640(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)

tk.MustExec("use test")
tk.MustExec("create table t(a json);")
tk.MustExec(`insert into t values ('"a"'), ('"B"'), ('"c"'), ('"D"'), ('{"a": 1}'), ('1'), ('{"b": 2}'), ('[1, 2]'), ('[3, 4]');`)
tk.MustQuery("select min(a) from t;").Check(testkit.Rows("1"))
tk.MustQuery("select max(a) from t;").Check(testkit.Rows("[3, 4]"))
tk.MustQuery("select min(a collate utf8mb4_bin) from t;").Check(testkit.Rows("\"B\""))
tk.MustQuery("select max(a collate utf8mb4_bin) from t;").Check(testkit.Rows("{\"b\": 2}"))
tk.MustQuery("select min(a collate utf8mb4_unicode_ci) from t;").Check(testkit.Rows("\"a\""))
tk.MustQuery("select max(a collate utf8mb4_unicode_ci) from t;").Check(testkit.Rows("1"))
}

func TestIssue36279(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
12 changes: 11 additions & 1 deletion planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -1203,21 +1203,31 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok
break
}
chs := arg.GetType().GetCharset()
// if the field is json, the charset is always utf8mb4.
if arg.GetType().GetType() == mysql.TypeJSON {
chs = mysql.UTF8MB4Charset
}
if chs != "" && collInfo.CharsetName != chs {
er.err = charset.ErrCollationCharsetMismatch.GenWithStackByArgs(collInfo.Name, chs)
break
}
}
// SetCollationExpr sets the collation explicitly, even when the evaluation type of the expression is non-string.
if _, ok := arg.(*expression.Column); ok {
if _, ok := arg.(*expression.Column); ok || arg.GetType().GetType() == mysql.TypeJSON {
if arg.GetType().GetType() == mysql.TypeEnum || arg.GetType().GetType() == mysql.TypeSet {
er.err = ErrNotSupportedYet.GenWithStackByArgs("use collate clause for enum or set")
break
}
// Wrap a cast here to avoid changing the original FieldType of the column expression.
exprType := arg.GetType().Clone()
// if arg type is json, we should cast it to longtext if there is collate clause.
if arg.GetType().GetType() == mysql.TypeJSON {
exprType = types.NewFieldType(mysql.TypeLongBlob)
exprType.SetCharset(mysql.UTF8MB4Charset)
}
exprType.SetCollate(v.Collate)
casted := expression.BuildCastFunction(er.sctx, arg, exprType)
arg = casted
er.ctxStackPop(1)
er.ctxStackAppend(casted, types.EmptyName)
} else {
Expand Down

0 comments on commit d6ebc60

Please sign in to comment.