From 46a94fdd600fd92717d3eaeb716213656b8457c1 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 6 Nov 2024 17:25:05 +0900 Subject: [PATCH] fix number parsing (#511) --- ast/ast.go | 112 ++++--------------------------- lexer/lexer_test.go | 48 ++++++++++++++ token/token.go | 158 +++++++++++++++++++++----------------------- 3 files changed, 135 insertions(+), 183 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 47039ec6..e5341df5 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -341,118 +341,30 @@ func Bool(tk *token.Token) *BoolNode { // Integer create node for integer value func Integer(tk *token.Token) *IntegerNode { - switch tk.Type { - case token.BinaryIntegerType: - // skip two characters because binary token starts with '0b' - parsedNum := parseNumber("0b", tk.Value) - if parsedNum.isNegative { - i, _ := strconv.ParseInt(parsedNum.String(), 2, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - } - i, _ := strconv.ParseUint(parsedNum.String(), 2, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - case token.OctetIntegerType: - // octet token starts with '0o' or '-0o' or '0' or '-0' - parsedNum := parseNumber("0o", tk.Value) - if parsedNum.isNegative { - i, _ := strconv.ParseInt(parsedNum.String(), 8, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - } - i, _ := strconv.ParseUint(parsedNum.String(), 8, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - case token.HexIntegerType: - // hex token starts with '0x' or '-0x' - parsedNum := parseNumber("0x", tk.Value) - if parsedNum.isNegative { - i, _ := strconv.ParseInt(parsedNum.String(), 16, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - } - i, _ := strconv.ParseUint(parsedNum.String(), 16, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } - } - parsedNum := parseNumber("", tk.Value) - if parsedNum.isNegative { - i, _ := strconv.ParseInt(parsedNum.String(), 10, 64) - return &IntegerNode{ - BaseNode: &BaseNode{}, - Token: tk, - Value: i, - } + var v any + if num := token.ToNumber(tk.Value); num != nil { + v = num.Value } - i, _ := strconv.ParseUint(parsedNum.String(), 10, 64) return &IntegerNode{ BaseNode: &BaseNode{}, Token: tk, - Value: i, - } -} - -type parsedNumber struct { - isNegative bool - num string -} - -func (n *parsedNumber) String() string { - if n.isNegative { - return "-" + n.num - } - return n.num -} - -func parseNumber(prefix, value string) *parsedNumber { - isNegative := value[0] == '-' - trimmed := strings.TrimPrefix(value, "+") - trimmed = strings.TrimPrefix(trimmed, "-") - trimmed = strings.TrimPrefix(trimmed, prefix) - - num := make([]rune, 0, len(trimmed)) - for _, v := range trimmed { - if v == '_' { - continue - } - num = append(num, v) - } - if len(num) == 0 { - num = append(num, '0') - } - return &parsedNumber{ - isNegative: isNegative, - num: string(num), + Value: v, } } // Float create node for float value func Float(tk *token.Token) *FloatNode { - parsedNum := parseNumber("", tk.Value) - f, _ := strconv.ParseFloat(parsedNum.String(), 64) + var v float64 + if num := token.ToNumber(tk.Value); num != nil && num.Type == token.NumberTypeFloat { + value, ok := num.Value.(float64) + if ok { + v = value + } + } return &FloatNode{ BaseNode: &BaseNode{}, Token: tk, - Value: f, + Value: v, } } diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 001e886a..bd2bd24c 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -86,6 +86,18 @@ func TestTokenize(t *testing.T) { }, }, }, + { + YAML: `0.123e+123`, + Tokens: token.Tokens{ + { + Type: token.FloatType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0.123e+123", + Origin: "0.123e+123", + }, + }, + }, { YAML: `{} `, @@ -2257,6 +2269,42 @@ s: >-3 }, }, }, + { + YAML: `0b98765`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0b98765", + Origin: "0b98765", + }, + }, + }, + { + YAML: `098765`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "098765", + Origin: "098765", + }, + }, + }, + { + YAML: `0o98765`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "0o98765", + Origin: "0o98765", + }, + }, + }, } for _, test := range tests { t.Run(test.YAML, func(t *testing.T) { diff --git a/token/token.go b/token/token.go index fce89039..2b3d99d6 100644 --- a/token/token.go +++ b/token/token.go @@ -2,6 +2,7 @@ package token import ( "fmt" + "strconv" "strings" ) @@ -508,104 +509,95 @@ var ( } ) -type numType int +type NumberType string const ( - numTypeNone numType = iota - numTypeBinary - numTypeOctet - numTypeHex - numTypeFloat + NumberTypeDecimal NumberType = "decimal" + NumberTypeBinary NumberType = "binary" + NumberTypeOctet NumberType = "octet" + NumberTypeHex NumberType = "hex" + NumberTypeFloat NumberType = "float" ) -type numStat struct { - isNum bool - typ numType +type NumberValue struct { + Type NumberType + Value any + Text string } -func getNumberStat(value string) *numStat { - stat := &numStat{} - if value == "" { - return stat +func ToNumber(value string) *NumberValue { + if len(value) == 0 { + return nil + } + if strings.HasPrefix(value, "_") { + return nil } dotCount := strings.Count(value, ".") if dotCount > 1 { - return stat + return nil } - trimmed := strings.TrimPrefix(strings.TrimPrefix(value, "+"), "-") + isNegative := strings.HasPrefix(value, "-") + normalized := strings.ReplaceAll(strings.TrimPrefix(strings.TrimPrefix(value, "+"), "-"), "_", "") - var typ numType + var ( + typ NumberType + base int + ) switch { - case strings.HasPrefix(trimmed, "0x"): - trimmed = strings.TrimPrefix(trimmed, "0x") - typ = numTypeHex - case strings.HasPrefix(trimmed, "0o"): - trimmed = strings.TrimPrefix(trimmed, "0o") - typ = numTypeOctet - case strings.HasPrefix(trimmed, "0b"): - trimmed = strings.TrimPrefix(trimmed, "0b") - typ = numTypeBinary + case strings.HasPrefix(normalized, "0x"): + normalized = strings.TrimPrefix(normalized, "0x") + base = 16 + typ = NumberTypeHex + case strings.HasPrefix(normalized, "0o"): + normalized = strings.TrimPrefix(normalized, "0o") + base = 8 + typ = NumberTypeOctet + case strings.HasPrefix(normalized, "0b"): + normalized = strings.TrimPrefix(normalized, "0b") + base = 2 + typ = NumberTypeBinary + case strings.HasPrefix(normalized, "0") && len(normalized) > 1 && dotCount == 0: + base = 8 + typ = NumberTypeOctet case dotCount == 1: - typ = numTypeFloat + typ = NumberTypeFloat + default: + typ = NumberTypeDecimal + base = 10 } - if trimmed == "" { - return stat + text := normalized + if isNegative { + text = "-" + text } - var numCount int - for idx, c := range trimmed { - if isNumber(c) { - numCount++ - continue + var v any + if typ == NumberTypeFloat { + f, err := strconv.ParseFloat(text, 64) + if err != nil { + return nil } - switch c { - case '_', '.': - continue - case 'a', 'b', 'c', 'd', 'f', 'A', 'B', 'C', 'D', 'F': - if typ != numTypeHex && typ != numTypeBinary { - return stat - } - case 'e', 'E': - if typ == numTypeHex || typ == numTypeBinary { - continue - } - if typ != numTypeFloat { - return stat - } - - // looks like exponent number. - if len(trimmed) <= idx+2 { - return stat - } - sign := trimmed[idx+1] - if sign != '+' && sign != '-' { - return stat - } - for _, c := range trimmed[idx+2:] { - if !isNumber(c) { - return stat - } - } - stat.isNum = true - stat.typ = typ - return stat - default: - return stat + v = f + } else if isNegative { + i, err := strconv.ParseInt(text, base, 64) + if err != nil { + return nil } + v = i + } else { + u, err := strconv.ParseUint(text, base, 64) + if err != nil { + return nil + } + v = u } - if numCount > 1 && trimmed[0] == '0' && typ == numTypeNone { - // YAML 1.1 Spec ? - typ = numTypeOctet - } - stat.isNum = true - stat.typ = typ - return stat -} -func isNumber(c rune) bool { - return c >= '0' && c <= '9' + return &NumberValue{ + Type: typ, + Value: v, + Text: text, + } } func looksLikeTimeValue(value string) bool { @@ -632,7 +624,7 @@ func IsNeedQuoted(value string) bool { if _, exists := reservedEncKeywordMap[value]; exists { return true } - if stat := getNumberStat(value); stat.isNum { + if num := ToNumber(value); num != nil { return true } first := value[0] @@ -683,7 +675,7 @@ func New(value string, org string, pos *Position) *Token { if fn != nil { return fn(value, org, pos) } - if stat := getNumberStat(value); stat.isNum { + if num := ToNumber(value); num != nil { tk := &Token{ Type: IntegerType, CharacterType: CharacterTypeMiscellaneous, @@ -692,14 +684,14 @@ func New(value string, org string, pos *Position) *Token { Origin: org, Position: pos, } - switch stat.typ { - case numTypeFloat: + switch num.Type { + case NumberTypeFloat: tk.Type = FloatType - case numTypeBinary: + case NumberTypeBinary: tk.Type = BinaryIntegerType - case numTypeOctet: + case NumberTypeOctet: tk.Type = OctetIntegerType - case numTypeHex: + case NumberTypeHex: tk.Type = HexIntegerType } return tk