Skip to content

Commit

Permalink
generalize small letter char
Browse files Browse the repository at this point in the history
  • Loading branch information
ichiban committed Jul 23, 2022
1 parent 862b26a commit 56a4e16
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
13 changes: 7 additions & 6 deletions engine/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,12 @@ func (l *Lexer) doubleQuotedListToken() Token {
// Characters

func isGraphicChar(r rune) bool {
return strings.ContainsRune(`#$&*+-./:<=>?@^~`, r)
return strings.ContainsRune(`#$&*+-./:<=>?@^~`, r) || unicode.In(r, &unicode.RangeTable{
R16: []unicode.Range16{
{Lo: 0x2200, Hi: 0x22FF, Stride: 1}, // Mathematical Operators
{Lo: 0x2A00, Hi: 0x2AFF, Stride: 1}, // Supplemental Mathematical Operators
},
})
}

func isAlphanumericChar(r rune) bool {
Expand All @@ -696,11 +701,7 @@ func isLetterChar(r rune) bool {
}

func isSmallLetterChar(r rune) bool {
return unicode.IsLower(r) || unicode.In(r,
unicode.Unified_Ideograph,
unicode.Hiragana,
unicode.Katakana,
)
return unicode.In(r, unicode.Ll, unicode.Lo)
}

func isCapitalLetterChar(r rune) bool {
Expand Down
9 changes: 9 additions & 0 deletions engine/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,18 @@ func TestLexer_Token(t *testing.T) {
{input: "/* comment ", token: Token{Kind: TokenInsufficient}},
{input: `/ *`, token: Token{Kind: TokenGraphic, Val: `/`}},

{input: `改善`, token: Token{Kind: TokenLetterDigit, Val: `改善`}},
{input: `プロログ`, token: Token{Kind: TokenLetterDigit, Val: `プロログ`}},
{input: `ぷろろぐ`, token: Token{Kind: TokenLetterDigit, Val: `ぷろろぐ`}},
{input: `프롤로그`, token: Token{Kind: TokenLetterDigit, Val: `프롤로그`}},
{input: `برولوغ`, token: Token{Kind: TokenLetterDigit, Val: `برولوغ`}},
{input: `פרולוג`, token: Token{Kind: TokenLetterDigit, Val: `פרולוג`}},

{input: `..`, token: Token{Kind: TokenGraphic, Val: `..`}},
{input: `#`, token: Token{Kind: TokenGraphic, Val: `#`}},
{input: `\`, token: Token{Kind: TokenGraphic, Val: `\`}},
{input: `∀`, token: Token{Kind: TokenGraphic, Val: `∀`}},
{input: `⨀`, token: Token{Kind: TokenGraphic, Val: `⨀`}},

{input: `'abc'`, token: Token{Kind: TokenQuoted, Val: "'abc'"}},
{input: `'don''t panic'`, token: Token{Kind: TokenQuoted, Val: "'don''t panic'"}},
Expand Down

0 comments on commit 56a4e16

Please sign in to comment.