From 56a4e167786a3cb56894aa41040bca8d0bdcd200 Mon Sep 17 00:00:00 2001 From: Yutaka Ichibangase Date: Sat, 23 Jul 2022 16:46:34 +0900 Subject: [PATCH] generalize small letter char --- engine/lexer.go | 13 +++++++------ engine/lexer_test.go | 9 +++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/engine/lexer.go b/engine/lexer.go index d94bc235..0a538146 100644 --- a/engine/lexer.go +++ b/engine/lexer.go @@ -680,7 +680,12 @@ func (l *Lexer) doubleQuotedListToken() Token { // Characters func isGraphicChar(r rune) bool { - return strings.ContainsRune(`#$&*+-./:<=>?@^~`, r) + return strings.ContainsRune(`#$&*+-./:<=>?@^~`, r) || unicode.In(r, &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 0x2200, Hi: 0x22FF, Stride: 1}, // Mathematical Operators + {Lo: 0x2A00, Hi: 0x2AFF, Stride: 1}, // Supplemental Mathematical Operators + }, + }) } func isAlphanumericChar(r rune) bool { @@ -696,11 +701,7 @@ func isLetterChar(r rune) bool { } func isSmallLetterChar(r rune) bool { - return unicode.IsLower(r) || unicode.In(r, - unicode.Unified_Ideograph, - unicode.Hiragana, - unicode.Katakana, - ) + return unicode.In(r, unicode.Ll, unicode.Lo) } func isCapitalLetterChar(r rune) bool { diff --git a/engine/lexer_test.go b/engine/lexer_test.go index aafd5c46..85aa1520 100644 --- a/engine/lexer_test.go +++ b/engine/lexer_test.go @@ -33,9 +33,18 @@ func TestLexer_Token(t *testing.T) { {input: "/* comment ", token: Token{Kind: TokenInsufficient}}, {input: `/ *`, token: Token{Kind: TokenGraphic, Val: `/`}}, + {input: `改善`, token: Token{Kind: TokenLetterDigit, Val: `改善`}}, + {input: `プロログ`, token: Token{Kind: TokenLetterDigit, Val: `プロログ`}}, + {input: `ぷろろぐ`, token: Token{Kind: TokenLetterDigit, Val: `ぷろろぐ`}}, + {input: `프롤로그`, token: Token{Kind: TokenLetterDigit, Val: `프롤로그`}}, + {input: `برولوغ`, token: Token{Kind: TokenLetterDigit, Val: `برولوغ`}}, + {input: `פרולוג`, token: Token{Kind: TokenLetterDigit, Val: `פרולוג`}}, + {input: `..`, token: Token{Kind: TokenGraphic, Val: `..`}}, {input: `#`, token: Token{Kind: TokenGraphic, Val: `#`}}, {input: `\`, token: Token{Kind: TokenGraphic, Val: `\`}}, + {input: `∀`, token: Token{Kind: TokenGraphic, Val: `∀`}}, + {input: `⨀`, token: Token{Kind: TokenGraphic, Val: `⨀`}}, {input: `'abc'`, token: Token{Kind: TokenQuoted, Val: "'abc'"}}, {input: `'don''t panic'`, token: Token{Kind: TokenQuoted, Val: "'don''t panic'"}},