Skip to content

Commit

Permalink
Refactor lexer (#653)
Browse files Browse the repository at this point in the history
  • Loading branch information
antonmedv authored May 17, 2024
1 parent 6cf0edb commit 1659c23
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 159 deletions.
13 changes: 12 additions & 1 deletion expr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,10 @@ func TestCompile_exposed_error(t *testing.T) {

b, err := json.Marshal(err)
require.NoError(t, err)
require.Equal(t, `{"Line":1,"Column":2,"Message":"invalid operation: == (mismatched types int and bool)","Snippet":"\n | 1 == true\n | ..^","Prev":null}`, string(b))
require.Equal(t,
`{"from":2,"to":4,"line":1,"column":2,"message":"invalid operation: == (mismatched types int and bool)","snippet":"\n | 1 == true\n | ..^","prev":null}`,
string(b),
)
}

func TestAsBool_exposed_error(t *testing.T) {
Expand Down Expand Up @@ -2667,3 +2670,11 @@ func TestIssue_integer_truncated_by_compiler(t *testing.T) {
_, err = expr.Compile("fn(256)", expr.Env(env))
require.Error(t, err)
}

func TestExpr_crash(t *testing.T) {
content, err := os.ReadFile("testdata/crash.txt")
require.NoError(t, err)

_, err = expr.Compile(string(content))
require.Error(t, err)
}
28 changes: 21 additions & 7 deletions file/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,36 @@ import (

type Error struct {
Location
Message string
Snippet string
Prev error
Line int `json:"line"`
Column int `json:"column"`
Message string `json:"message"`
Snippet string `json:"snippet"`
Prev error `json:"prev"`
}

func (e *Error) Error() string {
return e.format()
}

func (e *Error) Bind(source *Source) *Error {
if snippet, found := source.Snippet(e.Location.Line); found {
func (e *Error) Bind(source Source) *Error {
e.Line = 1
for i, r := range source {
if i == e.From {
break
}
if r == '\n' {
e.Line++
e.Column = 0
} else {
e.Column++
}
}
if snippet, found := source.Snippet(e.Line); found {
snippet := strings.Replace(snippet, "\t", " ", -1)
srcLine := "\n | " + snippet
var bytes = []byte(snippet)
var indLine = "\n | "
for i := 0; i < e.Location.Column && len(bytes) > 0; i++ {
for i := 0; i < e.Column && len(bytes) > 0; i++ {
_, sz := utf8.DecodeRune(bytes)
bytes = bytes[sz:]
if sz > 1 {
Expand Down Expand Up @@ -54,7 +68,7 @@ func (e *Error) Wrap(err error) {
}

func (e *Error) format() string {
if e.Location.Empty() {
if e.Snippet == "" {
return e.Message
}
return fmt.Sprintf(
Expand Down
8 changes: 2 additions & 6 deletions file/location.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package file

type Location struct {
Line int // The 1-based line of the location.
Column int // The 0-based column number of the location.
}

func (l Location) Empty() bool {
return l.Column == 0 && l.Line == 0
From int `json:"from"`
To int `json:"to"`
}
73 changes: 21 additions & 52 deletions file/source.go
Original file line number Diff line number Diff line change
@@ -1,78 +1,47 @@
package file

import (
"encoding/json"
"strings"
"unicode/utf8"
)

type Source struct {
contents []rune
lineOffsets []int32
}

func NewSource(contents string) *Source {
s := &Source{
contents: []rune(contents),
}
s.updateOffsets()
return s
}

func (s *Source) MarshalJSON() ([]byte, error) {
return json.Marshal(s.contents)
}

func (s *Source) UnmarshalJSON(b []byte) error {
contents := make([]rune, 0)
err := json.Unmarshal(b, &contents)
if err != nil {
return err
}
type Source []rune

s.contents = contents
s.updateOffsets()
return nil
func NewSource(contents string) Source {
return []rune(contents)
}

func (s *Source) Content() string {
return string(s.contents)
func (s Source) String() string {
return string(s)
}

func (s *Source) Snippet(line int) (string, bool) {
func (s Source) Snippet(line int) (string, bool) {
if s == nil {
return "", false
}
charStart, found := s.findLineOffset(line)
if !found || len(s.contents) == 0 {
lines := strings.Split(string(s), "\n")
lineOffsets := make([]int, len(lines))
var offset int
for i, line := range lines {
offset = offset + utf8.RuneCountInString(line) + 1
lineOffsets[i] = offset
}
charStart, found := getLineOffset(lineOffsets, line)
if !found || len(s) == 0 {
return "", false
}
charEnd, found := s.findLineOffset(line + 1)
charEnd, found := getLineOffset(lineOffsets, line+1)
if found {
return string(s.contents[charStart : charEnd-1]), true
}
return string(s.contents[charStart:]), true
}

// updateOffsets compute line offsets up front as they are referred to frequently.
func (s *Source) updateOffsets() {
lines := strings.Split(string(s.contents), "\n")
offsets := make([]int32, len(lines))
var offset int32
for i, line := range lines {
offset = offset + int32(utf8.RuneCountInString(line)) + 1
offsets[int32(i)] = offset
return string(s[charStart : charEnd-1]), true
}
s.lineOffsets = offsets
return string(s[charStart:]), true
}

// findLineOffset returns the offset where the (1-indexed) line begins,
// or false if line doesn't exist.
func (s *Source) findLineOffset(line int) (int32, bool) {
func getLineOffset(lineOffsets []int, line int) (int, bool) {
if line == 1 {
return 0, true
} else if line > 1 && line <= len(s.lineOffsets) {
offset := s.lineOffsets[line-2]
} else if line > 1 && line <= len(lineOffsets) {
offset := lineOffsets[line-2]
return offset, true
}
return -1, false
Expand Down
15 changes: 0 additions & 15 deletions file/source_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package file

import (
"encoding/json"
"testing"

"github.com/expr-lang/expr/internal/testify/assert"
)

const (
Expand Down Expand Up @@ -55,15 +52,3 @@ func TestStringSource_SnippetSingleLine(t *testing.T) {
t.Errorf(unexpectedSnippet, t.Name(), str2, "")
}
}

func TestStringSource_MarshalJSON(t *testing.T) {
source := NewSource("hello, world")
encoded, err := json.Marshal(source)
assert.NoError(t, err)
assert.Equal(t, `[104,101,108,108,111,44,32,119,111,114,108,100]`, string(encoded))

decoded := &Source{}
err = json.Unmarshal(encoded, decoded)
assert.NoError(t, err)
assert.Equal(t, source.Content(), decoded.Content())
}
91 changes: 43 additions & 48 deletions parser/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@ package lexer
import (
"fmt"
"strings"
"unicode/utf8"

"github.com/expr-lang/expr/file"
)

func Lex(source *file.Source) ([]Token, error) {
func Lex(source file.Source) ([]Token, error) {
l := &lexer{
input: source.Content(),
source: source,
tokens: make([]Token, 0),
start: 0,
end: 0,
}

l.loc = file.Location{Line: 1, Column: 0}
l.prev = l.loc
l.startLoc = l.loc
l.commit()

for state := root; state != nil; {
state = state(l)
Expand All @@ -30,34 +28,25 @@ func Lex(source *file.Source) ([]Token, error) {
}

type lexer struct {
input string
source file.Source
tokens []Token
start, end int // current position in input
width int // last rune width
startLoc file.Location // start location
prev, loc file.Location // prev location of end location, end location
start, end int
err *file.Error
}

const eof rune = -1

func (l *lexer) commit() {
l.start = l.end
}

func (l *lexer) next() rune {
if l.end >= len(l.input) {
l.width = 0
if l.end >= len(l.source) {
l.end++
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.end:])
l.width = w
l.end += w

l.prev = l.loc
if r == '\n' {
l.loc.Line++
l.loc.Column = 0
} else {
l.loc.Column++
}

r := l.source[l.end]
l.end++
return r
}

Expand All @@ -68,8 +57,7 @@ func (l *lexer) peek() rune {
}

func (l *lexer) backup() {
l.end -= l.width
l.loc = l.prev
l.end--
}

func (l *lexer) emit(t Kind) {
Expand All @@ -78,35 +66,39 @@ func (l *lexer) emit(t Kind) {

func (l *lexer) emitValue(t Kind, value string) {
l.tokens = append(l.tokens, Token{
Location: l.startLoc,
Location: file.Location{From: l.start, To: l.end},
Kind: t,
Value: value,
})
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) emitEOF() {
from := l.end - 2
if from < 0 {
from = 0
}
to := l.end - 1
if to < 0 {
to = 0
}
l.tokens = append(l.tokens, Token{
Location: l.prev, // Point to previous position for better error messages.
Location: file.Location{From: from, To: to},
Kind: EOF,
})
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) skip() {
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) word() string {
return l.input[l.start:l.end]
}

func (l *lexer) ignore() {
l.start = l.end
l.startLoc = l.loc
// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
if l.start > len(l.source) || l.end > len(l.source) {
return "__invalid__"
}
return string(l.source[l.start:l.end])
}

func (l *lexer) accept(valid string) bool {
Expand All @@ -132,18 +124,18 @@ func (l *lexer) skipSpaces() {
}

func (l *lexer) acceptWord(word string) bool {
pos, loc, prev := l.end, l.loc, l.prev
pos := l.end

l.skipSpaces()

for _, ch := range word {
if l.next() != ch {
l.end, l.loc, l.prev = pos, loc, prev
l.end = pos
return false
}
}
if r := l.peek(); r != ' ' && r != eof {
l.end, l.loc, l.prev = pos, loc, prev
l.end = pos
return false
}

Expand All @@ -153,8 +145,11 @@ func (l *lexer) acceptWord(word string) bool {
func (l *lexer) error(format string, args ...any) stateFn {
if l.err == nil { // show first error
l.err = &file.Error{
Location: l.loc,
Message: fmt.Sprintf(format, args...),
Location: file.Location{
From: l.end - 1,
To: l.end,
},
Message: fmt.Sprintf(format, args...),
}
}
return nil
Expand Down Expand Up @@ -230,6 +225,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
ch = l.next()
n++
}
l.emitValue(String, l.input[l.start+1:l.end-1])
l.emitValue(String, string(l.source[l.start+1:l.end-1]))
return
}
Loading

0 comments on commit 1659c23

Please sign in to comment.