Skip to content

Commit

Permalink
*: fix upper-cased charset and collation name
Browse files Browse the repository at this point in the history
  • Loading branch information
bb7133 committed Apr 28, 2019
1 parent f3ecae0 commit 8218952
Show file tree
Hide file tree
Showing 8 changed files with 3,933 additions and 3,862 deletions.
8 changes: 4 additions & 4 deletions ast/ddl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,12 +234,12 @@ func (ts *testDDLSuite) TestDDLColumnDefRestore(c *C) {
{"id set('''a''','''b''')", "`id` SET('''a''','''b''')"},
{"id set('a\\nb','a'' \\r\\nb','a\\rb')", "`id` SET('a\nb','a'' \r\nb','a\rb')"},
{`id set("a'\nb","a'b\tc")`, "`id` SET('a''\nb','a''b\tc')"},
{"id TEXT CHARACTER SET UTF8 COLLATE UTF8_UNICODE_G", "`id` TEXT CHARACTER SET UTF8 COLLATE UTF8_UNICODE_G"},
{"id TEXT CHARACTER SET UTF8 COLLATE UTF8_UNICODE_CI", "`id` TEXT CHARACTER SET UTF8 COLLATE utf8_unicode_ci"},
{"id text character set UTF8", "`id` TEXT CHARACTER SET UTF8"},
{"id text charset UTF8", "`id` TEXT CHARACTER SET UTF8"},
{"id varchar(50) collate UTF8MB4_CZECH_CI", "`id` VARCHAR(50) COLLATE UTF8MB4_CZECH_CI"},
{"id varchar(50) collate utf8", "`id` VARCHAR(50) COLLATE utf8"},
{"id varchar(50) collate utf8 collate utf8mb4_bin", "`id` VARCHAR(50) COLLATE utf8 COLLATE utf8mb4_bin"},
{"id varchar(50) collate UTF8MB4_CZECH_CI", "`id` VARCHAR(50) COLLATE utf8mb4_czech_ci"},
{"id varchar(50) collate utf8_bin", "`id` VARCHAR(50) COLLATE utf8_bin"},
{"id varchar(50) collate utf8_unicode_ci collate utf8mb4_bin", "`id` VARCHAR(50) COLLATE utf8_unicode_ci COLLATE utf8mb4_bin"},
{"c1 char(10) character set LATIN1 collate latin1_german1_ci", "`c1` CHAR(10) CHARACTER SET LATIN1 COLLATE latin1_german1_ci"},

{"id int(11) PRIMARY KEY", "`id` INT(11) PRIMARY KEY"},
Expand Down
32 changes: 28 additions & 4 deletions charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -18,6 +18,17 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/parser/terror"
)

const (
codeCollationCharsetMismatch = terror.ErrCode(mysql.ErrCollationCharsetMismatch)
codeUnknownCollation = terror.ErrCode(mysql.ErrUnknownCollation)
)

var (
ErrUnknownCollation = terror.ClassDDL.New(codeUnknownCollation, mysql.MySQLErrName[mysql.ErrUnknownCollation])
ErrCollationCharsetMismatch = terror.ClassDDL.New(codeCollationCharsetMismatch, mysql.MySQLErrName[mysql.ErrCollationCharsetMismatch])
)

// Charset is a charset.
Expand All @@ -40,7 +51,8 @@ type Collation struct {
}

var charsets = make(map[string]*Charset)
var collationsMap = make(map[int]*Collation)
var collationsIDMap = make(map[int]*Collation)
var collationsNameMap = make(map[string]*Collation)
var descs = make([]*Desc, 0, len(charsetInfos))
var supportedCollations = make([]*Collation, 0, len(supportedCollationNames))

Expand Down Expand Up @@ -155,7 +167,7 @@ func GetCharsetInfoByID(coID int) (string, string, error) {
if coID == mysql.DefaultCollationID {
return mysql.DefaultCharset, mysql.DefaultCollationName, nil
}
if collation, ok := collationsMap[coID]; ok {
if collation, ok := collationsIDMap[coID]; ok {
return collation.CharsetName, collation.Name, nil
}
return "", "", errors.Errorf("Unknown charset id %d", coID)
Expand All @@ -166,6 +178,14 @@ func GetCollations() []*Collation {
return collations
}

func GetCollationByName(name string) (*Collation, error) {
collation, ok := collationsNameMap[strings.ToLower(name)]
if !ok {
return nil, ErrUnknownCollation.GenWithStackByArgs(name)
}
return collation, nil
}

const (
// CharsetBin is used for marking binary charset.
CharsetBin = "binary"
Expand Down Expand Up @@ -426,7 +446,7 @@ func init() {
}

for _, c := range collations {
collationsMap[c.ID] = c
collationsIDMap[c.ID] = c

if _, ok := supportedCollationNames[c.Name]; ok {
supportedCollations = append(supportedCollations, c)
Expand All @@ -436,4 +456,8 @@ func init() {
charset.Collations[c.Name] = c
}
}

for id, name := range mysql.Collations {
collationsNameMap[name] = collationsIDMap[int(id)]
}
}
14 changes: 14 additions & 0 deletions charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,19 @@ func (s *testCharsetSuite) TestGetCharsetDesc(c *C) {
}
}

func (s *testCharsetSuite) TestGetCollationByName(c *C) {
defer testleak.AfterTest(c)()

for _, collation := range collations {
coll, err := GetCollationByName(collation.Name)
c.Assert(err, IsNil)
c.Assert(coll, Equals, collation)
}

_, err := GetCollationByName("non_exist")
c.Assert(err, ErrorMatches, "\\[ddl:1273\\]Unknown collation: 'non_exist'")
}

func BenchmarkGetCharsetDesc(b *testing.B) {
b.ResetTimer()
charsets := []string{CharsetUTF8, CharsetUTF8MB4, CharsetASCII, CharsetLatin1, CharsetBin}
Expand All @@ -165,4 +178,5 @@ func BenchmarkGetCharsetDesc(b *testing.B) {
for i := 0; i < b.N; i++ {
GetCharsetDesc(cs)
}

}
8 changes: 7 additions & 1 deletion model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,15 @@ const (
// After version V2.1.2 (PR#8738) , TiDB add UTF8 check, then the user upgrade from v2.0.8 insert some UTF8MB4 characters will got error.
// This is not compatibility for user. Then we try to fix this in PR #9820, and increase the version number.
TableInfoVersion2 = uint16(2)
// TableInfoVersion3 means the table info version is 3.
// This version aims to deal with upper-cased charset name in TableInfo stored by versions prior to TiDB v2.1.9:
// TiDB always suppose all charsets / collations as lower-cased and try to convert them if they're not.
// However, the convert is missed in some scenarios before v2.1.9, so for all those tables prior to TableInfoVersion3, their
// charsets / collations will be converted to lower-case while loading from the storage.
TableInfoVersion3 = uint16(3)

// CurrLatestTableInfoVersion means the latest table info in the current TiDB.
CurrLatestTableInfoVersion = TableInfoVersion2
CurrLatestTableInfoVersion = TableInfoVersion3
)

// ExtraHandleName is the name of ExtraHandle Column.
Expand Down
2 changes: 2 additions & 0 deletions mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ var Collations = map[uint8]string{
245: "utf8mb4_croatian_ci",
246: "utf8mb4_unicode_520_ci",
247: "utf8mb4_vietnamese_ci",
255: "utf8mb4_0900_ai_ci",
}

// CollationNames maps MySQL collation name to its ID
Expand Down Expand Up @@ -566,6 +567,7 @@ var CollationNames = map[string]uint8{
"utf8mb4_croatian_ci": 245,
"utf8mb4_unicode_520_ci": 246,
"utf8mb4_vietnamese_ci": 247,
"utf8mb4_0900_ai_ci": 255,
}

// MySQL collation information.
Expand Down
Loading

0 comments on commit 8218952

Please sign in to comment.