From 7736553729b6d73238118bccc2f2b82bdb24d350 Mon Sep 17 00:00:00 2001 From: Valentin Kaiser Date: Wed, 28 Aug 2024 10:19:01 +0200 Subject: [PATCH] [FIX] encoding handling, added MS936 --- dbase/encoding.go | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/dbase/encoding.go b/dbase/encoding.go index a0f60f7..787b83f 100644 --- a/dbase/encoding.go +++ b/dbase/encoding.go @@ -6,7 +6,9 @@ import ( "io" + "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" ) @@ -18,9 +20,11 @@ type EncodingConverter interface { } type DefaultConverter struct { - encoding *charmap.Charmap + encoding encoding.Encoding } +var customEncoding = map[byte]encoding.Encoding{} + // Decode decodes a specified encoding to byte slice to a UTF8 byte slice func (c DefaultConverter) Decode(in []byte) ([]byte, error) { if utf8.Valid(in) { @@ -74,17 +78,28 @@ func (c DefaultConverter) CodePage() byte { return 0x7D case charmap.Windows1256: // Arabic Windows return 0x7E + case simplifiedchinese.GBK: // Simplified Chinese GBK + return 0x7A default: + for mark, encoding := range customEncoding { + if encoding == c.encoding { + return mark + } + } return 0x00 } } -func NewDefaultConverter(encoding *charmap.Charmap) DefaultConverter { +func NewDefaultConverter(encoding encoding.Encoding) DefaultConverter { return DefaultConverter{encoding: encoding} } // NewDefaultConverterFromCodePage returns a new EncodingConverter from a code page mark func ConverterFromCodePage(codePageMark byte) DefaultConverter { + if encoding, ok := customEncoding[codePageMark]; ok { + return NewDefaultConverter(encoding) + } + switch codePageMark { case 0x01: // U.S. MS-DOS return NewDefaultConverter(charmap.CodePage437) @@ -112,7 +127,13 @@ func ConverterFromCodePage(codePageMark byte) DefaultConverter { return NewDefaultConverter(charmap.Windows1255) case 0x7E: // Arabic Windows return NewDefaultConverter(charmap.Windows1256) + case 0x7A: // Simplified Chinese GBK + return NewDefaultConverter(simplifiedchinese.GBK) default: // Default to Central European Windows return NewDefaultConverter(charmap.Windows1250) } } + +func RegisterCustomEncoding(codePageMark byte, encoding encoding.Encoding) { + customEncoding[codePageMark] = encoding +}