Skip to content

Commit

Permalink
*: fix upper-cased charset and collation name (#301) (#314)
Browse files Browse the repository at this point in the history
  • Loading branch information
bb7133 authored and winkyao committed May 5, 2019
1 parent 9cdb890 commit b2f7fc7
Show file tree
Hide file tree
Showing 6 changed files with 3,657 additions and 3,588 deletions.
39 changes: 31 additions & 8 deletions charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/parser/terror"
)

const (
codeCollationCharsetMismatch = terror.ErrCode(mysql.ErrCollationCharsetMismatch)
codeUnknownCollation = terror.ErrCode(mysql.ErrUnknownCollation)
)

var (
ErrUnknownCollation = terror.ClassDDL.New(codeUnknownCollation, mysql.MySQLErrName[mysql.ErrUnknownCollation])
ErrCollationCharsetMismatch = terror.ClassDDL.New(codeCollationCharsetMismatch, mysql.MySQLErrName[mysql.ErrCollationCharsetMismatch])
)

// Charset is a charset.
Expand All @@ -40,6 +51,8 @@ type Collation struct {
}

var charsets = make(map[string]*Charset)
var collationsIDMap = make(map[int]*Collation)
var collationsNameMap = make(map[string]*Collation)

// All the supported charsets should be in the following table.
var charsetInfos = []*Charset{
Expand Down Expand Up @@ -150,10 +163,8 @@ func GetCharsetInfoByID(coID int) (string, string, error) {
if coID == mysql.DefaultCollationID {
return mysql.DefaultCharset, mysql.DefaultCollationName, nil
}
for _, collation := range collations {
if coID == collation.ID {
return collation.CharsetName, collation.Name, nil
}
if collation, ok := collationsIDMap[coID]; ok {
return collation.CharsetName, collation.Name, nil
}
return "", "", errors.Errorf("Unknown charset id %d", coID)
}
Expand All @@ -163,6 +174,14 @@ func GetCollations() []*Collation {
return collations
}

func GetCollationByName(name string) (*Collation, error) {
collation, ok := collationsNameMap[strings.ToLower(name)]
if !ok {
return nil, ErrUnknownCollation.GenWithStackByArgs(name)
}
return collation, nil
}

const (
// CharsetBin is used for marking binary charset.
CharsetBin = "binary"
Expand Down Expand Up @@ -415,10 +434,14 @@ func init() {
charsets[c.Name] = c
}
for _, c := range collations {
charset, ok := charsets[c.CharsetName]
if !ok {
continue
collationsIDMap[c.ID] = c

if charset, ok := charsets[c.CharsetName]; ok {
charset.Collations[c.Name] = c
}
charset.Collations[c.Name] = c
}

for id, name := range mysql.Collations {
collationsNameMap[name] = collationsIDMap[int(id)]
}
}
13 changes: 13 additions & 0 deletions charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,16 @@ func (s *testCharsetSuite) TestGetDefaultCollation(c *C) {
testGetDefaultCollation(c, tt.cs, tt.co, tt.succ)
}
}

func (s *testCharsetSuite) TestGetCollationByName(c *C) {
defer testleak.AfterTest(c)()

for _, collation := range collations {
coll, err := GetCollationByName(collation.Name)
c.Assert(err, IsNil)
c.Assert(coll, Equals, collation)
}

_, err := GetCollationByName("non_exist")
c.Assert(err, ErrorMatches, "\\[ddl:1273\\]Unknown collation: 'non_exist'")
}
8 changes: 7 additions & 1 deletion model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,15 @@ const (
// After version V2.1.2 (PR#8738) , TiDB add UTF8 check, then the user upgrade from v2.0.8 insert some UTF8MB4 characters will got error.
// This is not compatibility for user. Then we try to fix this in PR #9820, and increase the version number.
TableInfoVersion2 = uint16(2)
// TableInfoVersion3 means the table info version is 3.
// This version aims to deal with upper-cased charset name in TableInfo stored by versions prior to TiDB v2.1.9:
// TiDB always suppose all charsets / collations as lower-cased and try to convert them if they're not.
// However, the convert is missed in some scenarios before v2.1.9, so for all those tables prior to TableInfoVersion3, their
// charsets / collations will be converted to lower-case while loading from the storage.
TableInfoVersion3 = uint16(3)

// CurrLatestTableInfoVersion means the latest table info in the current TiDB.
CurrLatestTableInfoVersion = TableInfoVersion2
CurrLatestTableInfoVersion = TableInfoVersion3
)

// ExtraHandleName is the name of ExtraHandle Column.
Expand Down
2 changes: 2 additions & 0 deletions mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ var Collations = map[uint8]string{
245: "utf8mb4_croatian_ci",
246: "utf8mb4_unicode_520_ci",
247: "utf8mb4_vietnamese_ci",
255: "utf8mb4_0900_ai_ci",
}

// CollationNames maps MySQL default collation name to its ID
Expand Down Expand Up @@ -547,6 +548,7 @@ var CollationNames = map[string]uint8{
"utf8mb4_croatian_ci": 245,
"utf8mb4_unicode_520_ci": 246,
"utf8mb4_vietnamese_ci": 247,
"utf8mb4_0900_ai_ci": 255,
}

// MySQL collation information.
Expand Down
Loading

0 comments on commit b2f7fc7

Please sign in to comment.