diff --git a/cmd/explaintest/r/collation_agg_func_disabled.result b/cmd/explaintest/r/collation_agg_func_disabled.result new file mode 100644 index 0000000000000..f0297f70b094c --- /dev/null +++ b/cmd/explaintest/r/collation_agg_func_disabled.result @@ -0,0 +1,294 @@ +create database collation_agg_func; +use collation_agg_func; +create table t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin); +insert into t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def '), (5, 'abc', 'ABC'); +desc format='brief' select group_concat(value order by 1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value order by 1) from t; +group_concat(value order by 1) +Abc,abc,abc,def +desc format='brief' select group_concat(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value) from t; +group_concat(value) +abc,Abc,def,abc +desc format='brief' select group_concat(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value collate utf8mb4_bin) from t; +group_concat(value collate utf8mb4_bin) +abc,Abc,def,abc +desc format='brief' select group_concat(distinct value order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value order by 1)) from t; +upper(group_concat(distinct value order by 1)) +ABC,ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 order by Column#7 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin order by 1)) from t; +upper(group_concat(distinct value collate utf8mb4_bin order by 1)) +ABC,ABC,DEF +desc format='brief' select group_concat(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value)) from t; +upper(group_concat(distinct value)) +ABC,ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin)) from t; +upper(group_concat(distinct value collate utf8mb4_bin)) +ABC,ABC,DEF +desc format='brief' select count(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value) from t; +count(distinct value) +3 +desc format='brief' select count(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin) from t; +count(distinct value collate utf8mb4_bin) +3 +desc format='brief' select count(distinct value, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value, value1) from t; +count(distinct value, value1) +4 +desc format='brief' select count(distinct value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin, value1) from t; +count(distinct value collate utf8mb4_bin, value1) +4 +desc format='brief' select approx_count_distinct(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value) from t; +approx_count_distinct(value) +3 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin) from t; +approx_count_distinct(value collate utf8mb4_bin) +3 +desc format='brief' select approx_count_distinct(value, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value, value1) from t; +approx_count_distinct(value, value1) +4 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +approx_count_distinct(value collate utf8mb4_bin, value1) +4 +create table tt(a char(10), b enum('a', 'B', 'c'), c set('a', 'B', 'c'), d json) collate utf8mb4_general_ci; +insert into tt values ("a", "a", "a", JSON_OBJECT("a", "a")); +insert into tt values ("A", "A", "A", JSON_OBJECT("A", "A")); +Error 1265: Data truncated for column 'b' at row 1 +insert into tt values ("b", "b", "b", JSON_OBJECT("b", "b")); +Error 1265: Data truncated for column 'b' at row 1 +insert into tt values ("B", "B", "B", JSON_OBJECT("B", "B")); +insert into tt values ("c", "c", "c", JSON_OBJECT("c", "c")); +insert into tt values ("C", "C", "C", JSON_OBJECT("C", "C")); +Error 1265: Data truncated for column 'b' at row 1 +split table tt by (0), (1), (2), (3), (4), (5); +desc format='brief' select min(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a) from tt; +min(a) +B +desc format='brief' select min(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a collate utf8mb4_bin) from tt; +min(a collate utf8mb4_bin) +B +desc format='brief' select max(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a:desc, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a) from tt; +max(a) +c +desc format='brief' select max(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7:desc, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a collate utf8mb4_bin) from tt; +max(a collate utf8mb4_bin) +c +desc format='brief' select min(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:min(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(b) from tt; +min(b) +B +desc format='brief' select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:max(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(b) from tt; +max(b) +c +desc format='brief' select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:min(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(c) from tt; +min(c) +B +desc format='brief' select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:max(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(c) from tt; +max(c) +c +desc format='brief' select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(d) from tt; +min(d) +{"B": "B"} +desc format='brief' select min(d collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.d + └─TopN 1.00 root Column#7, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(d collate utf8mb4_bin) from tt; +min(d collate utf8mb4_bin) +{"B": "B"} +desc format='brief' select max(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d:desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(d) from tt; +max(d) +{"c": "c"} +desc format='brief' select max(d collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.d + └─TopN 1.00 root Column#7:desc, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(d collate utf8mb4_bin) from tt; +max(d collate utf8mb4_bin) +{"c": "c"} +drop database collation_agg_func; +use test diff --git a/cmd/explaintest/r/collation_agg_func_enabled.result b/cmd/explaintest/r/collation_agg_func_enabled.result new file mode 100644 index 0000000000000..e40627439cd88 --- /dev/null +++ b/cmd/explaintest/r/collation_agg_func_enabled.result @@ -0,0 +1,271 @@ +create database collation_agg_func; +use collation_agg_func; +create table t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin); +insert into t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def '), (5, 'abc', 'ABC'); +desc format='brief' select group_concat(value order by 1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value order by 1) from t; +group_concat(value order by 1) +Abc,abc,abc,def +desc format='brief' select group_concat(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value) from t; +group_concat(value) +abc,Abc,def,abc +desc format='brief' select group_concat(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value collate utf8mb4_bin) from t; +group_concat(value collate utf8mb4_bin) +abc,Abc,def,abc +desc format='brief' select group_concat(distinct value order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value order by 1)) from t; +upper(group_concat(distinct value order by 1)) +ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 order by Column#7 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin order by 1)) from t; +upper(group_concat(distinct value collate utf8mb4_bin order by 1)) +ABC,ABC,DEF +desc format='brief' select group_concat(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value)) from t; +upper(group_concat(distinct value)) +ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin)) from t; +upper(group_concat(distinct value collate utf8mb4_bin)) +ABC,ABC,DEF +desc format='brief' select count(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value) from t; +count(distinct value) +2 +desc format='brief' select count(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin) from t; +count(distinct value collate utf8mb4_bin) +3 +desc format='brief' select count(distinct value, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value, value1) from t; +count(distinct value, value1) +3 +desc format='brief' select count(distinct value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin, value1) from t; +count(distinct value collate utf8mb4_bin, value1) +4 +desc format='brief' select approx_count_distinct(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value) from t; +approx_count_distinct(value) +2 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin) from t; +approx_count_distinct(value collate utf8mb4_bin) +3 +desc format='brief' select approx_count_distinct(value, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value, value1) from t; +approx_count_distinct(value, value1) +3 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +approx_count_distinct(value collate utf8mb4_bin, value1) +4 +create table tt(a char(10), b enum('a', 'B', 'c'), c set('a', 'B', 'c'), d json) collate utf8mb4_general_ci; +insert into tt values ("a", "a", "a", JSON_OBJECT("a", "a")); +insert into tt values ("A", "A", "A", JSON_OBJECT("A", "A")); +insert into tt values ("b", "b", "b", JSON_OBJECT("b", "b")); +insert into tt values ("B", "B", "B", JSON_OBJECT("B", "B")); +insert into tt values ("c", "c", "c", JSON_OBJECT("c", "c")); +insert into tt values ("C", "C", "C", JSON_OBJECT("C", "C")); +split table tt by (0), (1), (2), (3), (4), (5); +desc format='brief' select min(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a) from tt; +min(a) +a +desc format='brief' select min(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a collate utf8mb4_bin) from tt; +min(a collate utf8mb4_bin) +A +desc format='brief' select max(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a:desc, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a) from tt; +max(a) +c +desc format='brief' select max(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7:desc, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a collate utf8mb4_bin) from tt; +max(a collate utf8mb4_bin) +c +desc format='brief' select min(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:min(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(b) from tt; +min(b) +a +desc format='brief' select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:max(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(b) from tt; +max(b) +c +desc format='brief' select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:min(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(c) from tt; +min(c) +a +desc format='brief' select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:max(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(c) from tt; +max(c) +c +desc format='brief' select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(d) from tt; +min(d) +{"A": "A"} +desc format='brief' select min(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +select min(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +desc format='brief' select max(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d:desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(d) from tt; +max(d) +{"c": "c"} +desc format='brief' select max(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +select max(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +drop database collation_agg_func; +use test diff --git a/cmd/explaintest/t/collation_agg_func.test b/cmd/explaintest/t/collation_agg_func.test new file mode 100644 index 0000000000000..160116ac06c96 --- /dev/null +++ b/cmd/explaintest/t/collation_agg_func.test @@ -0,0 +1,106 @@ +# These tests test the aggregate function's behavior according to collation. +# The result of min/max of enum/set is wrong, please fix them soon. + +# prepare database +create database collation_agg_func; +use collation_agg_func; + +create table t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin); +insert into t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def '), (5, 'abc', 'ABC'); + +# group_concat +desc format='brief' select group_concat(value order by 1) from t; +select group_concat(value order by 1) from t; +desc format='brief' select group_concat(value) from t; +select group_concat(value) from t; +desc format='brief' select group_concat(value collate utf8mb4_bin) from t; +select group_concat(value collate utf8mb4_bin) from t; +desc format='brief' select group_concat(distinct value order by 1) from t; +select upper(group_concat(distinct value order by 1)) from t; +desc format='brief' select group_concat(distinct value collate utf8mb4_bin order by 1) from t; +select upper(group_concat(distinct value collate utf8mb4_bin order by 1)) from t; +desc format='brief' select group_concat(distinct value) from t; +select upper(group_concat(distinct value)) from t; +desc format='brief' select group_concat(distinct value collate utf8mb4_bin) from t; +select upper(group_concat(distinct value collate utf8mb4_bin)) from t; + +# count(distinct) +desc format='brief' select count(distinct value) from t; +select count(distinct value) from t; +desc format='brief' select count(distinct value collate utf8mb4_bin) from t; +select count(distinct value collate utf8mb4_bin) from t; +desc format='brief' select count(distinct value, value1) from t; +select count(distinct value, value1) from t; +desc format='brief' select count(distinct value collate utf8mb4_bin, value1) from t; +select count(distinct value collate utf8mb4_bin, value1) from t; + +# approxCountDistinct +desc format='brief' select approx_count_distinct(value) from t; +select approx_count_distinct(value) from t; +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin) from t; +select approx_count_distinct(value collate utf8mb4_bin) from t; +desc format='brief' select approx_count_distinct(value, value1) from t; +select approx_count_distinct(value, value1) from t; +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +select approx_count_distinct(value collate utf8mb4_bin, value1) from t; + +# minMax +create table tt(a char(10), b enum('a', 'B', 'c'), c set('a', 'B', 'c'), d json) collate utf8mb4_general_ci; +insert into tt values ("a", "a", "a", JSON_OBJECT("a", "a")); +--error 1265 +insert into tt values ("A", "A", "A", JSON_OBJECT("A", "A")); +--error 1265 +insert into tt values ("b", "b", "b", JSON_OBJECT("b", "b")); +insert into tt values ("B", "B", "B", JSON_OBJECT("B", "B")); +insert into tt values ("c", "c", "c", JSON_OBJECT("c", "c")); +--error 1265 +insert into tt values ("C", "C", "C", JSON_OBJECT("C", "C")); +split table tt by (0), (1), (2), (3), (4), (5); +desc format='brief' select min(a) from tt; +select min(a) from tt; +desc format='brief' select min(a collate utf8mb4_bin) from tt; +select min(a collate utf8mb4_bin) from tt; +desc format='brief' select max(a) from tt; +select max(a) from tt; +desc format='brief' select max(a collate utf8mb4_bin) from tt; +select max(a collate utf8mb4_bin) from tt; +desc format='brief' select min(b) from tt; +select min(b) from tt; +--error 1235 +desc format='brief' select min(b collate utf8mb4_bin) from tt; +--error 1235 +select min(b collate utf8mb4_bin) from tt; +desc format='brief' select max(b) from tt; +select max(b) from tt; +--error 1235 +desc format='brief' select max(b collate utf8mb4_bin) from tt; +--error 1235 +select max(b collate utf8mb4_bin) from tt; +desc format='brief' select min(c) from tt; +select min(c) from tt; +--error 1235 +desc format='brief' select min(c collate utf8mb4_bin) from tt; +--error 1235 +select min(c collate utf8mb4_bin) from tt; +desc format='brief' select max(c) from tt; +select max(c) from tt; +--error 1235 +desc format='brief' select max(c collate utf8mb4_bin) from tt; +--error 1235 +select max(c collate utf8mb4_bin) from tt; +desc format='brief' select min(d) from tt; +select min(d) from tt; +--error 1253 +desc format='brief' select min(d collate utf8mb4_bin) from tt; +--error 1253 +select min(d collate utf8mb4_bin) from tt; +desc format='brief' select max(d) from tt; +select max(d) from tt; +--error 1253 +desc format='brief' select max(d collate utf8mb4_bin) from tt; +--error 1253 +select max(d collate utf8mb4_bin) from tt; + +# cleanup environment +drop database collation_agg_func; +use test diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index c088e0f48d582..c4b94a9874eb1 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1199,6 +1199,10 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok } // SetCollationExpr sets the collation explicitly, even when the evaluation type of the expression is non-string. if _, ok := arg.(*expression.Column); ok { + if arg.GetType().GetType() == mysql.TypeEnum || arg.GetType().GetType() == mysql.TypeSet { + er.err = ErrNotSupportedYet.GenWithStackByArgs("use collate clause for enum or set") + break + } // Wrap a cast here to avoid changing the original FieldType of the column expression. exprType := arg.GetType().Clone() exprType.Collate = v.Collate