From 1e3deb47b4fcb370eb8db926cf4f5cd95d260d9c Mon Sep 17 00:00:00 2001 From: Laurent Demailly Date: Tue, 20 Aug 2024 15:10:14 -0700 Subject: [PATCH] Show parsing errors in context of line where they occur (#168) * Show errors in context of line where they occur * noprefix also is about previous token * Fix case where error is at start of line and fix illegal token case for prefix eg @ * who doesn't love linters changing under them * correct comment about error logging in main --- eval/eval.go | 4 ++-- lexer/lexer.go | 22 ++++++++++++++++++++++ main.go | 9 ++++++--- main_test.txtar | 28 ++++++++++++++++++++-------- object/memory.go | 2 +- parser/parser.go | 33 +++++++++++++++++++++++---------- parser/parser_test.go | 15 +++++++++++++++ repl/repl.go | 2 -- token/token.go | 7 ++++++- 9 files changed, 95 insertions(+), 27 deletions(-) diff --git a/eval/eval.go b/eval/eval.go index dd74d26c..7c3f8b3f 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -327,7 +327,7 @@ func evalIndexExpression(left, index object.Object) object.Object { if idx < 0 || idx >= int64(len(str)) { return object.NULL } - return object.Integer{Value: int64(str[idx])} + return object.Integer{Value: int64(str[idx])} //nolint:gosec // https://github.com/securego/gosec/issues/1185 case left.Type() == object.ARRAY && idxOrZero.Type() == object.INTEGER: return evalArrayIndexExpression(left, idxOrZero) case left.Type() == object.MAP: @@ -725,7 +725,7 @@ func evalIntegerInfixExpression(operator token.Type, left, right object.Object) case token.LEFTSHIFT: return object.Integer{Value: leftVal << rightVal} case token.RIGHTSHIFT: - return object.Integer{Value: int64(uint64(leftVal) >> rightVal)} + return object.Integer{Value: int64(uint64(leftVal) >> rightVal)} //nolint:gosec // we want to be able to shift the hight bit. case token.BITAND: return object.Integer{Value: leftVal & rightVal} case token.BITOR: diff --git a/lexer/lexer.go b/lexer/lexer.go index d43d8c5f..e555e20c 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,6 +1,7 @@ package lexer import ( + "bytes" "strings" "grol.io/grol/token" @@ -12,6 +13,7 @@ type Lexer struct { lineMode bool hadWhitespace bool hadNewline bool // newline was seen before current token + lastNewLine int // position just after most recent newline } // Mode with input expected the be complete (multiline/file). @@ -35,6 +37,25 @@ func (l *Lexer) EOLEOF() *token.Token { return token.EOFT } +func (l *Lexer) Pos() int { + return l.pos +} + +func (l *Lexer) LastNewLine() int { + return l.lastNewLine +} + +// For error handling, somewhat expensive. +// Returns the current line and the current position relative in that line. +func (l *Lexer) CurrentLine() (string, int) { + p := min(l.pos, len(l.input)) + nextNewline := bytes.IndexByte(l.input[p:], '\n') + if nextNewline == -1 { + nextNewline = len(l.input) - p + } + return string(l.input[l.lastNewLine : p+nextNewline]), p - l.lastNewLine +} + func (l *Lexer) NextToken() *token.Token { l.skipWhitespace() ch := l.readChar() @@ -135,6 +156,7 @@ func (l *Lexer) skipWhitespace() { } if ch == '\n' { l.hadNewline = true + l.lastNewLine = l.pos + 1 } l.hadWhitespace = true l.pos++ diff --git a/main.go b/main.go index fe539de8..3d90cbde 100644 --- a/main.go +++ b/main.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "runtime/debug" + "strings" "fortio.org/cli" "fortio.org/log" @@ -113,11 +114,13 @@ func Main() int { } if *commandFlag != "" { res, errs, _ := repl.EvalStringWithOption(options, *commandFlag) - if len(errs) > 0 { - log.Errf("Errors: %v", errs) + // Only parsing errors are already logged, eval errors aren't, we (re)log everything: + numErrs := len(errs) + if numErrs > 0 { + log.Errf("Total %d %s:\n%s", numErrs, cli.Plural(numErrs, "error"), strings.Join(errs, "\n")) } fmt.Print(res) - return len(errs) + return numErrs } if len(flag.Args()) == 0 { return repl.Interactive(options) diff --git a/main_test.txtar b/main_test.txtar index 042e144c..89d8336b 100644 --- a/main_test.txtar +++ b/main_test.txtar @@ -24,7 +24,7 @@ stderr welcome # syntax error non mode, stdout doesn't repeat errors !grol -c 'foo' -stderr 'Errors' +stderr 'Total 1 error' stderr 'identifier not found: foo' !stdout . @@ -148,38 +148,50 @@ stderr 'Read/evaluated: .gr' # max depth -!grol -no-auto -max-depth 12 -c 'func foo(n) {if n<=1 {1} else {self(n-1);n}}; foo(13)' +!grol -max-depth 12 -c 'func foo(n) {if n<=1 {1} else {self(n-1);n}}; foo(13)' stderr 'max depth 13 reached' -grol -no-auto -max-depth 12 -c 'func foo(n) {if n<=1 {1} else {self(n-1);n}}; foo(12)' +grol -max-depth 12 -c 'func foo(n) {if n<=1 {1} else {self(n-1);n}}; foo(12)' stdout '^12$' !stderr 'max depth.*reached' # map don't mutate on append -grol -no-auto -quiet -c 'm={2:"b"};n={1:"a"};println(m+n); println(m)' +grol -quiet -c 'm={2:"b"};n={1:"a"};println(m+n); println(m)' stdout '^{1:"a",2:"b"}\n{2:"b"}$' !stderr . -grol -no-auto -quiet -c 'm={1:1, nil:"foo"}; println(m+{nil:"bar"}); m' +grol -quiet -c 'm={1:1, nil:"foo"}; println(m+{nil:"bar"}); m' stdout '^{1:1,nil:"bar"}\n{1:1,nil:"foo"}$' !stderr . # int -grol -no-auto -quiet -c 'print(int("0xff"), int(PI))' +grol -quiet -c 'print(int("0xff"), int(PI))' stdout '^255 3$' !stderr . # short circuiting -grol -no-auto -quiet -c 'if true || println("not ran") {println("is true")}' +grol -quiet -c 'if true || println("not ran") {println("is true")}' stdout '^is true$' !stderr . !stdout 'not ran' -grol -no-auto -quiet -c 'if false && println("not ran") {true} else {println("is false")}' +grol -quiet -c 'if false && println("not ran") {true} else {println("is false")}' stdout '^is false$' !stderr . !stdout 'not ran' +# parse error context not crashing +!grol -quiet -panic -c '^&@%%^!%^&^&!%^%^&!' +stderr 'parser error' +!stderr panic + +!grol -quiet -panic -c '@' +!stderr panic +!stderr NIL_TOKEN +!stderr '\[CRI\]' +stderr 'parser error' +stderr '@' + -- sample_test.gr -- // Sample file that our gorepl can interpret // <--- comments diff --git a/object/memory.go b/object/memory.go index c7b3d9b4..e188e9f8 100644 --- a/object/memory.go +++ b/object/memory.go @@ -17,7 +17,7 @@ func FreeMemory() int64 { currentAlloc := memStats.HeapAlloc // retrieve the current limit. gomemlimit := debug.SetMemoryLimit(-1) - return int64(gomemlimit) - int64(currentAlloc) //nolint:unconvert // necessary, can be negative. + return int64(gomemlimit) - int64(currentAlloc) //nolint:unconvert,gosec // necessary, can be negative. } func SizeOk(n int) (bool, int64) { diff --git a/parser/parser.go b/parser/parser.go index 81a04c82..417150ce 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -45,11 +45,12 @@ type Parser struct { curToken *token.Token peekToken *token.Token - prevNewline bool - nextNewline bool - - errors []string + prevNewline bool + nextNewline bool continuationNeeded bool + prevPos int + + errors []string prefixParseFns map[token.Type]prefixParseFn infixParseFns map[token.Type]infixParseFn @@ -155,6 +156,7 @@ func (p *Parser) Errors() []string { func (p *Parser) nextToken() { p.prevToken = p.curToken p.curToken = p.peekToken + p.prevPos = p.l.Pos() p.peekToken = p.l.NextToken() p.prevNewline = p.nextNewline p.nextNewline = p.l.HadNewline() @@ -273,14 +275,25 @@ func (p *Parser) expectPeek(t token.Type) bool { return false } +// ErrorContext returns the current line and a pointer to the error position. +// If prev is true, the error position is relative to the previous token instead of current one. +func (p *Parser) ErrorContext(prev bool) string { + line, errPos := p.l.CurrentLine() + if prev { + errPos -= (p.l.Pos() - p.prevPos) + } + repeat := max(0, errPos-1) + return line + "\n" + strings.Repeat(" ", repeat) + "^" +} + func (p *Parser) peekError(t token.Type) { - msg := fmt.Sprintf("expected next token to be %s, got %s (%q) instead", - t, p.peekToken.Type(), p.peekToken.Literal()) + msg := fmt.Sprintf("expected next token to be `%s`, got `%s` instead:\n%s", + token.ByType(t).Literal(), p.peekToken.Literal(), p.ErrorContext(false)) p.errors = append(p.errors, msg) } -func (p *Parser) noPrefixParseFnError(t token.Type) { - msg := fmt.Sprintf("no prefix parse function for %s found", t) +func (p *Parser) noPrefixParseFnError(t *token.Token) { + msg := fmt.Sprintf("no prefix parse function for `%s` found:\n%s", t.Literal(), p.ErrorContext(true)) p.errors = append(p.errors, msg) } @@ -293,7 +306,7 @@ func (p *Parser) parseExpression(precedence Priority) ast.Node { } prefix := p.prefixParseFns[p.curToken.Type()] if prefix == nil { - p.noPrefixParseFnError(p.curToken.Type()) + p.noPrefixParseFnError(p.curToken) return nil } leftExp := prefix() @@ -343,7 +356,7 @@ func (p *Parser) parseIntegerLiteral() ast.Node { func (p *Parser) parseFloatLiteral() ast.Node { value, err := strconv.ParseFloat(p.curToken.Literal(), 64) if err != nil { - msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal()) + msg := fmt.Sprintf("could not parse %q as float:\n%s", p.curToken.Literal(), p.ErrorContext(true)) p.errors = append(p.errors, msg) return nil } diff --git a/parser/parser_test.go b/parser/parser_test.go index d3a04eca..f64502de 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -364,3 +364,18 @@ func TestIncompleteBlockComment(t *testing.T) { } } } + +func TestNilToken(t *testing.T) { + inp := "@" + l := lexer.New(inp) + p := parser.New(l) + _ = p.ParseProgram() + errs := p.Errors() + if len(errs) != 1 { + t.Fatalf("expecting 1 error, got %d", len(errs)) + } + expected := "no prefix parse function for `@` found:\n@\n^" + if errs[0] != expected { + t.Errorf("unexpected error: wanted %q got %q", expected, errs[0]) + } +} diff --git a/repl/repl.go b/repl/repl.go index 50746500..bc37748b 100644 --- a/repl/repl.go +++ b/repl/repl.go @@ -33,8 +33,6 @@ func logParserErrors(p *parser.Parser) bool { if len(errs) == 0 { return false } - - log.Critf("parser has %d error(s)", len(errs)) for _, msg := range errs { log.Errf("parser error: %s", msg) } diff --git a/token/token.go b/token/token.go index 8f688ca0..58258007 100644 --- a/token/token.go +++ b/token/token.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" + "fortio.org/log" "fortio.org/sets" ) @@ -308,12 +309,16 @@ func LookupIdent(ident string) *Token { // only have one possible instance/value // (ie all the tokens except for the first 4 value tokens). // TODO: codegen all the token constants to avoid needing this function. -// (even though that's better than string comparaisons). +// (even though that's better than string comparisons). func ByType(t Type) *Token { return tToT[t] } func (t *Token) Literal() string { + if t == nil { + log.Critf("Nil token .Literal() called") + return "NIL_TOKEN" + } return t.literal }