-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
util/collate: implement utf8mb4_0900_ai_ci collation (#45650)
close #37566
- Loading branch information
Showing
36 changed files
with
64,000 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE | ||
|
||
COPYRIGHT AND PERMISSION NOTICE | ||
|
||
Copyright © 1991-2023 Unicode, Inc. | ||
|
||
NOTICE TO USER: Carefully read the following legal agreement. BY | ||
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR | ||
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE | ||
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT | ||
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a | ||
copy of data files and any associated documentation (the "Data Files") or | ||
software and any associated documentation (the "Software") to deal in the | ||
Data Files or Software without restriction, including without limitation | ||
the rights to use, copy, modify, merge, publish, distribute, and/or sell | ||
copies of the Data Files or Software, and to permit persons to whom the | ||
Data Files or Software are furnished to do so, provided that either (a) | ||
this copyright and permission notice appear with all copies of the Data | ||
Files or Software, or (b) this copyright and permission notice appear in | ||
associated Documentation. | ||
|
||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF | ||
THIRD PARTY RIGHTS. | ||
|
||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE | ||
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | ||
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | ||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | ||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA | ||
FILES OR SOFTWARE. | ||
|
||
Except as contained in this notice, the name of a copyright holder shall | ||
not be used in advertising or otherwise to promote the sale, use or other | ||
dealings in these Data Files or Software without prior written | ||
authorization of the copyright holder. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_test") | ||
|
||
go_test( | ||
name = "collation_test", | ||
timeout = "short", | ||
srcs = ["uca_test.go"], | ||
flaky = True, | ||
deps = ["//testkit"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2023 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package collation | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/pingcap/tidb/testkit" | ||
) | ||
|
||
func TestUTF8MB40900AICIOrder(t *testing.T) { | ||
store := testkit.CreateMockStore(t) | ||
|
||
tk := testkit.NewTestKit(t, store) | ||
tk.MustExec("USE test;") | ||
tk.MustExec("create table t (id int primary key auto_increment, str VARCHAR(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci)") | ||
tk.MustExec("insert into t(str) values ('カ'), ('カ'), ('abc'), ('abuFFFEc'), ('abⓒ'), ('𝒶bc'), ('𝕒bc'), ('ガ'), ('が'), ('abç'), ('äbc'), ('ヵ'), ('か'), ('Abc'), ('abC'), ('File-3'), ('file-12'), ('filé-110'), ('🍣'), ('🍺')") | ||
tk.MustQuery("select min(id) from t group by str order by str").Check(testkit.Rows( | ||
"19", "20", "3", "4", "18", "17", "16", "1")) | ||
} | ||
|
||
func TestUTF8MB40900AICIStrFunc(t *testing.T) { | ||
store := testkit.CreateMockStore(t) | ||
|
||
tk := testkit.NewTestKit(t, store) | ||
tk.MustExec("USE test;") | ||
// test locate | ||
tk.MustQuery("select LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci)").Check( | ||
testkit.Rows("4"), | ||
) | ||
// test regexp | ||
tk.MustQuery("select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci").Check( | ||
testkit.Rows("1"), | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.