Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

!perf: Use string for iteration and error reporting #24

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func ExampleValidateOne() {
`null`

for offset, x := 0, s; x != ""; offset = len(s) - len(x) {
var err jscan.Error[string]
var err jscan.Error
if x, err = jscan.ValidateOne(x); err.IsErr() {
panic(fmt.Errorf("unexpected error: %w", err))
}
Expand Down
2 changes: 1 addition & 1 deletion internal/strfind/strfind.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var charMap = [256]byte{
// the whitespace sequence.
// If the returned ctrlChar == true then index points at an
// illegal character that was encountered during the scan.
func EndOfWhitespaceSeq[S ~string | ~[]byte](s S) (trailing S, ctrlChar bool) {
func EndOfWhitespaceSeq(s string) (trailing string, ctrlChar bool) {
for ; len(s) > 15; s = s[16:] {
if charMap[s[0]] != 1 {
goto NONSPACE
Expand Down
69 changes: 43 additions & 26 deletions jscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strconv"
"sync"
"unicode/utf8"
"unsafe"

"github.com/romshark/jscan/v2/internal/keyescape"
)
Expand All @@ -24,10 +25,8 @@ func newValidator[S ~string | ~[]byte]() *Validator[S] {
}

var (
iteratorPoolString = sync.Pool{New: func() any { return newIterator[string]() }}
iteratorPoolBytes = sync.Pool{New: func() any { return newIterator[[]byte]() }}
validatorPoolString = sync.Pool{New: func() any { return newValidator[string]() }}
validatorPoolBytes = sync.Pool{New: func() any { return newValidator[[]byte]() }}
iteratorPool = sync.Pool{New: func() any { return newIterator[string]() }}
validatorPool = sync.Pool{New: func() any { return newValidator[string]() }}
)

type stackNodeType int8
Expand All @@ -47,7 +46,7 @@ type stackNode struct {
// Iterator provides access to the recently encountered value.
type Iterator[S ~string | ~[]byte] struct {
stack []stackNode
src S
src string
pointer []byte

valueType ValueType
Expand Down Expand Up @@ -92,15 +91,31 @@ func (i *Iterator[S]) Key() (key S) {
if i.keyIndex == -1 {
return
}
return i.src[i.keyIndex:i.keyIndexEnd]
s := i.src[i.keyIndex:i.keyIndexEnd]
var z S
switch any(z).(type) {
case string:
return S(s)
case []byte:
return S(unsafeBytesToString([]byte(s)))
}
return z
}

// Value returns the value if any.
func (i *Iterator[S]) Value() (value S) {
if i.valueIndexEnd == -1 {
return
}
return i.src[i.valueIndex:i.valueIndexEnd]
s := i.src[i.valueIndex:i.valueIndexEnd]
var z S
switch any(z).(type) {
case string:
return S(s)
case []byte:
return S(unsafeBytesToString([]byte(s)))
}
return z
}

// ScanStack calls fn for every element in the stack.
Expand Down Expand Up @@ -158,10 +173,10 @@ func (i *Iterator[S]) ViewPointer(fn func(p []byte)) {
i.pointer = i.pointer[:0]
}

func (i *Iterator[S]) getError(c ErrorCode) Error[S] {
return Error[S]{
func (i *Iterator[S]) getError(c ErrorCode) Error {
return Error{
src: i.src,
Code: c,
Src: i.src,
Index: i.valueIndex,
}
}
Expand All @@ -170,9 +185,9 @@ func (i *Iterator[S]) getError(c ErrorCode) Error[S] {
// The only exception is ErrorCodeCallback which indicates a callback
// explicitly breaking by returning true instead of a syntax error.
// (Error).IsErr() returning false is equivalent to err == nil.
type Error[S ~string | ~[]byte] struct {
type Error struct {
// Src refers to the original source.
Src S
src string

// Index points to the error start index in the source.
Index int
Expand All @@ -181,23 +196,17 @@ type Error[S ~string | ~[]byte] struct {
Code ErrorCode
}

var _ error = Error[string]{}
var _ error = Error{}

// IsErr returns true if there is an error, otherwise returns false.
func (e Error[S]) IsErr() bool { return e.Code != 0 }
func (e Error) IsErr() bool { return e.Code != 0 }

// Error stringifies the error implementing the built-in error interface.
// Calling Error should be avoided in performance-critical code as it
// relies on dynamic memory allocation.
func (e Error[S]) Error() string {
if e.Index < len(e.Src) {
var r rune
switch x := any(e.Src).(type) {
case string:
r, _ = utf8.DecodeRuneInString(x[e.Index:])
case []byte:
r, _ = utf8.DecodeRune(x[e.Index:])
}
func (e Error) Error() string {
if e.Index < len(e.src) {
r, _ := utf8.DecodeRuneInString(e.src[e.Index:])
return errorMessage(e.Code, e.Index, r)
}
return errorMessage(e.Code, e.Index, 0)
Expand Down Expand Up @@ -335,10 +344,18 @@ var lutEscape = [256]byte{
}

// getError returns the stringified error, if any.
func getError[S ~string | ~[]byte](c ErrorCode, src S, s S) Error[S] {
return Error[S]{
func getError(c ErrorCode, src, s string) Error {
return Error{
Code: c,
Src: src,
src: src,
Index: len(src) - len(s),
}
}

func unsafeStringToBytes(str string) []byte {
return unsafe.Slice(unsafe.StringData(str), len(str))
}

func unsafeBytesToString(bs []byte) string {
return unsafe.String(unsafe.SliceData(bs), len(bs))
}
4 changes: 2 additions & 2 deletions jscan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ func testControlCharacters[S ~string | ~[]byte](t *testing.T, input S, expectErr
require.True(t, err.IsErr())
require.Equal(t, expectErr, err.Error())
require.Equal(t, jscan.ErrorCodeIllegalControlChar, err.Code)
require.Equal(t, err.Src[err.Index:], trailing)
require.Equal(t, input[err.Index:], trailing)
s = trailing
if err.IsErr() {
break
Expand All @@ -1214,7 +1214,7 @@ func testControlCharacters[S ~string | ~[]byte](t *testing.T, input S, expectErr
require.True(t, err.IsErr())
require.Equal(t, expectErr, err.Error())
require.Equal(t, jscan.ErrorCodeIllegalControlChar, err.Code)
require.Equal(t, err.Src[err.Index:], trailing)
require.Equal(t, input[err.Index:], trailing)
s = trailing
if err.IsErr() {
break
Expand Down
94 changes: 55 additions & 39 deletions scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,28 @@ import (
// WARNING: Don't use or alias *Iterator[S] after fn returns!
func ScanOne[S ~string | ~[]byte](
s S, fn func(*Iterator[S]) (err bool),
) (trailing S, err Error[S]) {
var i *Iterator[S]
switch any(s).(type) {
) (trailing S, err Error) {
x := iteratorPool.Get()
defer iteratorPool.Put(x)
i := x.(*Iterator[S])
reset(i)

switch v := any(s).(type) {
case string:
x := iteratorPoolString.Get()
defer iteratorPoolString.Put(x)
i = x.(*Iterator[S])
i.src = v
case []byte:
x := iteratorPoolBytes.Get()
defer iteratorPoolBytes.Put(x)
i = x.(*Iterator[S])
i.src = unsafeBytesToString(v)
default:
i = newIterator[S]()
i.src = string(s)
}
i.src = s
reset(i)
return scan(i, fn)

if v, ok := any(s).([]byte); ok {
i.src = unsafeBytesToString(v)
t, err := scan(i, fn)
return S(unsafeStringToBytes(t)), err
}
t, err := scan(i, fn)
return S(t), err
}

// Scan calls fn for every encountered value including objects and arrays.
Expand All @@ -66,35 +71,34 @@ func ScanOne[S ~string | ~[]byte](
// WARNING: Don't use or alias *Iterator[S] after fn returns!
func Scan[S ~string | ~[]byte](
s S, fn func(*Iterator[S]) (err bool),
) (err Error[S]) {
var i *Iterator[S]
switch any(s).(type) {
) (err Error) {
x := iteratorPool.Get()
defer iteratorPool.Put(x)
i := x.(*Iterator[S])
reset(i)

switch v := any(s).(type) {
case string:
x := iteratorPoolString.Get()
defer iteratorPoolString.Put(x)
i = x.(*Iterator[S])
i.src = v
case []byte:
x := iteratorPoolBytes.Get()
defer iteratorPoolBytes.Put(x)
i = x.(*Iterator[S])
i.src = unsafeBytesToString(v)
default:
i = newIterator[S]()
i.src = string(s)
}
i.src = s
reset(i)

t, err := scan(i, fn)
if err.IsErr() {
return err
}
var illegalChar bool
t, illegalChar = strfind.EndOfWhitespaceSeq(t)
if illegalChar {
return getError(ErrorCodeIllegalControlChar, s, t)
return getError(ErrorCodeIllegalControlChar, i.src, t)
}
if len(t) > 0 {
return getError(ErrorCodeUnexpectedToken, s, t)
return getError(ErrorCodeUnexpectedToken, i.src, t)
}
return Error[S]{}
return Error{}
}

// Parser wraps an iterator in a reusable instance.
Expand Down Expand Up @@ -127,10 +131,16 @@ func NewParser[S ~string | ~[]byte](preallocStackFrames int) *Parser[S] {
// WARNING: Don't use or alias *Iterator[S] after fn returns!
func (p *Parser[S]) ScanOne(
s S, fn func(*Iterator[S]) (err bool),
) (trailing S, err Error[S]) {
) (trailing S, err Error) {
reset(p.i)
p.i.src = s
return scan(p.i, fn)
if s, ok := any(s).([]byte); ok {
p.i.src = unsafeBytesToString(s)
t, err := scan(p.i, fn)
return S(unsafeStringToBytes(t)), err
}
p.i.src = string(s)
t, err := scan(p.i, fn)
return S(t), err
}

// Scan calls fn for every encountered value including objects and arrays.
Expand All @@ -140,9 +150,15 @@ func (p *Parser[S]) ScanOne(
// WARNING: Don't use or alias *Iterator[S] after fn returns!
func (p *Parser[S]) Scan(
s S, fn func(*Iterator[S]) (err bool),
) Error[S] {
) Error {
reset(p.i)
p.i.src = s

switch v := any(s).(type) {
case string:
p.i.src = v
case []byte:
p.i.src = unsafeBytesToString(v)
}

t, err := scan(p.i, fn)
if err.IsErr() {
Expand All @@ -151,21 +167,21 @@ func (p *Parser[S]) Scan(
var illegalChar bool
t, illegalChar = strfind.EndOfWhitespaceSeq(t)
if illegalChar {
return getError(ErrorCodeIllegalControlChar, s, t)
return getError(ErrorCodeIllegalControlChar, p.i.src, t)
}
if len(t) > 0 {
return getError(ErrorCodeUnexpectedToken, s, t)
return getError(ErrorCodeUnexpectedToken, p.i.src, t)
}
return Error[S]{}
return Error{}
}

// scan calls fn for every value encountered.
// Returns the remainder of i.src and an error if any is encountered.
func scan[S ~string | ~[]byte](
i *Iterator[S], fn func(*Iterator[S]) (err bool),
) (S, Error[S]) {
) (string, Error) {
var (
rollback S // Used as fallback for error report
rollback string // Used as fallback for error report
s = i.src
b bool
ks, ke int
Expand Down Expand Up @@ -699,7 +715,7 @@ VALUE_OR_ARR_TERM:

AFTER_VALUE:
if len(i.stack) == 0 {
return s, Error[S]{}
return s, Error{}
}
if len(s) < 1 {
return s, getError(ErrorCodeUnexpectedEOF, i.src, s)
Expand Down
Loading
Loading