Skip to content

Commit

Permalink
[FIX] encoding handling, added MS936
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentin-Kaiser committed Aug 28, 2024
1 parent 615bb9e commit 7736553
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions dbase/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import (

"io"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
)

Expand All @@ -18,9 +20,11 @@ type EncodingConverter interface {
}

type DefaultConverter struct {
encoding *charmap.Charmap
encoding encoding.Encoding
}

var customEncoding = map[byte]encoding.Encoding{}

// Decode decodes a specified encoding to byte slice to a UTF8 byte slice
func (c DefaultConverter) Decode(in []byte) ([]byte, error) {
if utf8.Valid(in) {
Expand Down Expand Up @@ -74,17 +78,28 @@ func (c DefaultConverter) CodePage() byte {
return 0x7D
case charmap.Windows1256: // Arabic Windows
return 0x7E
case simplifiedchinese.GBK: // Simplified Chinese GBK
return 0x7A
default:
for mark, encoding := range customEncoding {
if encoding == c.encoding {
return mark
}
}
return 0x00
}
}

func NewDefaultConverter(encoding *charmap.Charmap) DefaultConverter {
func NewDefaultConverter(encoding encoding.Encoding) DefaultConverter {
return DefaultConverter{encoding: encoding}
}

// NewDefaultConverterFromCodePage returns a new EncodingConverter from a code page mark
func ConverterFromCodePage(codePageMark byte) DefaultConverter {
if encoding, ok := customEncoding[codePageMark]; ok {
return NewDefaultConverter(encoding)
}

switch codePageMark {
case 0x01: // U.S. MS-DOS
return NewDefaultConverter(charmap.CodePage437)
Expand Down Expand Up @@ -112,7 +127,13 @@ func ConverterFromCodePage(codePageMark byte) DefaultConverter {
return NewDefaultConverter(charmap.Windows1255)
case 0x7E: // Arabic Windows
return NewDefaultConverter(charmap.Windows1256)
case 0x7A: // Simplified Chinese GBK
return NewDefaultConverter(simplifiedchinese.GBK)
default: // Default to Central European Windows
return NewDefaultConverter(charmap.Windows1250)
}
}

func RegisterCustomEncoding(codePageMark byte, encoding encoding.Encoding) {
customEncoding[codePageMark] = encoding
}

0 comments on commit 7736553

Please sign in to comment.