From 0faee16367ee5100185888bc93d930a159d53162 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 6 Nov 2024 16:00:36 +0900 Subject: [PATCH] fix number parsing (#509) --- lexer/lexer_test.go | 62 ++++++++++++++++++++++- token/token.go | 121 +++++++++++++++++++++++--------------------- 2 files changed, 124 insertions(+), 59 deletions(-) diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 810d2d2b..001e886a 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -30,7 +30,7 @@ func TestTokenize(t *testing.T) { YAML: `0_`, Tokens: token.Tokens{ { - Type: token.OctetIntegerType, + Type: token.IntegerType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "0_", @@ -38,6 +38,54 @@ func TestTokenize(t *testing.T) { }, }, }, + { + YAML: `0x_1A_2B_3C`, + Tokens: token.Tokens{ + { + Type: token.HexIntegerType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0x_1A_2B_3C", + Origin: "0x_1A_2B_3C", + }, + }, + }, + { + YAML: `+0b1010`, + Tokens: token.Tokens{ + { + Type: token.BinaryIntegerType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "+0b1010", + Origin: "+0b1010", + }, + }, + }, + { + YAML: `0100`, + Tokens: token.Tokens{ + { + Type: token.OctetIntegerType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0100", + Origin: "0100", + }, + }, + }, + { + YAML: `0o10`, + Tokens: token.Tokens{ + { + Type: token.OctetIntegerType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0o10", + Origin: "0o10", + }, + }, + }, { YAML: `{} `, @@ -2197,6 +2245,18 @@ s: >-3 }, }, }, + { + YAML: `1x0`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "1x0", + Origin: "1x0", + }, + }, + }, } for _, test := range tests { t.Run(test.YAML, func(t *testing.T) { diff --git a/token/token.go b/token/token.go index c2d9a4bc..fce89039 100644 --- a/token/token.go +++ b/token/token.go @@ -523,86 +523,91 @@ type numStat struct { typ numType } -func getNumberStat(str string) *numStat { +func getNumberStat(value string) *numStat { stat := &numStat{} - if str == "" { + if value == "" { return stat } - if str == "-" || str == "." || str == "+" || str == "_" { + dotCount := strings.Count(value, ".") + if dotCount > 1 { return stat } - if str[0] == '_' { + + trimmed := strings.TrimPrefix(strings.TrimPrefix(value, "+"), "-") + + var typ numType + switch { + case strings.HasPrefix(trimmed, "0x"): + trimmed = strings.TrimPrefix(trimmed, "0x") + typ = numTypeHex + case strings.HasPrefix(trimmed, "0o"): + trimmed = strings.TrimPrefix(trimmed, "0o") + typ = numTypeOctet + case strings.HasPrefix(trimmed, "0b"): + trimmed = strings.TrimPrefix(trimmed, "0b") + typ = numTypeBinary + case dotCount == 1: + typ = numTypeFloat + } + + if trimmed == "" { return stat } - dotFound := false - isNegative := false - isExponent := false - if str[0] == '-' { - isNegative = true - } - for idx, c := range str { + + var numCount int + for idx, c := range trimmed { + if isNumber(c) { + numCount++ + continue + } switch c { - case 'x': - if (isNegative && idx == 2) || (!isNegative && idx == 1) { - continue - } - case 'o': - if (isNegative && idx == 2) || (!isNegative && idx == 1) { - continue - } - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case '_', '.': continue - case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F': - if (len(str) > 2 && str[0] == '0' && str[1] == 'x') || - (len(str) > 3 && isNegative && str[1] == '0' && str[2] == 'x') { - // hex number - continue + case 'a', 'b', 'c', 'd', 'f', 'A', 'B', 'C', 'D', 'F': + if typ != numTypeHex && typ != numTypeBinary { + return stat } - if c == 'b' && ((isNegative && idx == 2) || (!isNegative && idx == 1)) { - // binary number + case 'e', 'E': + if typ == numTypeHex || typ == numTypeBinary { continue } - if (c == 'e' || c == 'E') && dotFound { - // exponent - isExponent = true - continue + if typ != numTypeFloat { + return stat } - case '.': - if dotFound { - // multiple dot + + // looks like exponent number. + if len(trimmed) <= idx+2 { return stat } - dotFound = true - continue - case '-': - if idx == 0 || isExponent { - continue + sign := trimmed[idx+1] + if sign != '+' && sign != '-' { + return stat } - case '+': - if idx == 0 || isExponent { - continue + for _, c := range trimmed[idx+2:] { + if !isNumber(c) { + return stat + } } - case '_': - continue + stat.isNum = true + stat.typ = typ + return stat + default: + return stat } - return stat } - stat.isNum = true - switch { - case dotFound: - stat.typ = numTypeFloat - case strings.HasPrefix(str, "0b") || strings.HasPrefix(str, "-0b"): - stat.typ = numTypeBinary - case strings.HasPrefix(str, "0x") || strings.HasPrefix(str, "-0x"): - stat.typ = numTypeHex - case strings.HasPrefix(str, "0o") || strings.HasPrefix(str, "-0o"): - stat.typ = numTypeOctet - case (len(str) > 1 && str[0] == '0') || (len(str) > 1 && str[0] == '-' && str[1] == '0'): - stat.typ = numTypeOctet + if numCount > 1 && trimmed[0] == '0' && typ == numTypeNone { + // YAML 1.1 Spec ? + typ = numTypeOctet } + stat.isNum = true + stat.typ = typ return stat } +func isNumber(c rune) bool { + return c >= '0' && c <= '9' +} + func looksLikeTimeValue(value string) bool { for i, c := range value { switch c { @@ -672,7 +677,7 @@ func LiteralBlockHeader(value string) string { } } -// New create reserved keyword token or number token and other string token +// New create reserved keyword token or number token and other string token. func New(value string, org string, pos *Position) *Token { fn := reservedKeywordMap[value] if fn != nil {