Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
93650: util/json: Correctly handle trailing tokens r=miretskiy a=miretskiy

Fix a bug in fast json parser implementation where it would incorrectly allow arrays (or objects) with trailing comma (`[1,2,]`).

Add test cases for this regression.
Implement a mechanism to generate corrupt random JSON inputs, and use that to build fuzz test comparing
results of parsing using standard Go JSON parser with fast JSON parser.

Fixes cockroachdb#93613
Epic: none

Release note: None

Co-authored-by: Yevgeniy Miretskiy <[email protected]>
  • Loading branch information
craig[bot] and Yevgeniy Miretskiy committed Dec 17, 2022
2 parents fc082cc + b30d307 commit 93ed655
Show file tree
Hide file tree
Showing 3 changed files with 236 additions and 7 deletions.
59 changes: 59 additions & 0 deletions pkg/util/json/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,26 @@ func TestJSONErrors(t *testing.T) {
testCase(`\u1`, `invalid JSON token`, useFastJSONParser),
testCase(`\u111z`, `unable to decode JSON: invalid character .* looking for beginning of value`, useStdGoJSON),
testCase(`\u111z`, `invalid JSON token`, useFastJSONParser),

// Trailing "," should not be allowed.
testCase(`[,]`, `invalid character ','`, useStdGoJSON),
testCase(`[,]`, `unexpected comma`, useFastJSONParser),
testCase(`[1,]`, `invalid character ']'`, useStdGoJSON),
testCase(`[1,]`, `unexpected comma`, useFastJSONParser),
testCase(`[1, [2,]]`, `invalid character ']'`, useStdGoJSON),
testCase(`[1, [2,]]`, `unexpected comma`, useFastJSONParser),
testCase(`[1, [2, 3],]`, `invalid character ']'`, useStdGoJSON),
testCase(`[1, [2, 3],]`, `unexpected comma`, useFastJSONParser),
testCase(`{"k":,}`, `invalid character ','`, useStdGoJSON),
testCase(`{"k":,}`, `unexpected object token ","`, useFastJSONParser),
testCase(`{"k": [1,]}`, `invalid character ']'`, useStdGoJSON),
testCase(`{"k": [1,]}`, `unexpected comma`, useFastJSONParser),
testCase(`{"b": false, }`, `invalid character '}' looking for beginning of object key`, useStdGoJSON),
testCase(`{"b": false, }`, `stateObjectString: missing string key`, useFastJSONParser),
testCase(`[1, {"a":"b",}]`, `invalid character '}'`, useStdGoJSON),
testCase(`[1, {"a":"b",}]`, `stateObjectString: missing string key`, useFastJSONParser),
testCase(`[1, {"a":"b"},]`, `invalid character ']'`, useStdGoJSON),
testCase(`[1, {"a":"b"},]`, `unexpected comma`, useFastJSONParser),
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("%s/%s", tc.implName, tc.input), func(t *testing.T) {
Expand All @@ -391,6 +411,45 @@ func TestJSONErrors(t *testing.T) {
}
}

// Not a true fuzz test, but we'll generate some number of random JSON objects,
// with or without errors, and attempt to parse them using both standard and
// fast parsers.
func TestParseJSONFuzz(t *testing.T) {
const errProb = 0.005 // ~0.5% chance of an error.
const fuzzTargetErrors = 1000 // ~90k inputs.

rng, seed := randutil.NewTestRand()
t.Log("test seed ", seed)
numInputs := 0
for numErrors := 0; numErrors < fuzzTargetErrors; {
inputJson, err := RandGen(rng)
require.NoError(t, err)

jsonStr := AsStringWithErrorChance(inputJson, rng, errProb)
numInputs++

fastJson, fastErr := ParseJSON(jsonStr, WithFastJSONParser())
stdJson, stdErr := ParseJSON(jsonStr, WithGoStandardParser())

if fastErr == nil && stdErr == nil {
// No errors -- both JSONs must be identical.
// Note: we can't compare against inputJSON since jsonStr (generated with
// error probability), sometimes drops array/object elements.
eq, err := fastJson.Compare(stdJson)
require.NoError(t, err)
require.Equal(t, 0, eq, "unequal JSON objects: std<%s> fast<%s>",
asString(stdJson), asString(fastJson))
} else {
numErrors++
// Both parsers must produce an error.
require.Errorf(t, fastErr, "expected fast parser error for input: %s", jsonStr)
require.Errorf(t, stdErr, "expected std parser error for input: %s", jsonStr)
}
}

t.Logf("Executed fuzz test against %d inputs", numInputs)
}

func TestJSONSize(t *testing.T) {
testCases := []struct {
input string
Expand Down
149 changes: 149 additions & 0 deletions pkg/util/json/random.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package json

import (
"bytes"
"encoding/json"
"fmt"
"math/rand"
Expand Down Expand Up @@ -192,3 +193,151 @@ func doRandomJSON(rng *rand.Rand, cfg randConfig) interface{} {
return encoded
}
}

// AsStringWithErrorChance returns string representation of JSON object,
// but allows up to specified chance that the returned string will contain
// errors -- i.e. the string should not be parse-able back into JSON.
func AsStringWithErrorChance(j JSON, rng *rand.Rand, p float32) string {
ej := &errJSON{
JSON: j,
errProb: p,
rng: rng,
}
return asString(ej)
}

// garbage is array of strings that will be appended (or prepended) to the
// JSON string to produce invalid JSON string.
var garbage = []string{
" [", " ]", " []", "[1]", " {", " }", "{}", `{"garbage": "in"}`, "-", " garbage", " 0", `" 0"`,
}

// numericGarbage are numeric garbage tokens that will be added
// to the number string to make it invalid.
var numericGarbage = []string{
"0", // okay to append, but prepending should produce an error
"=", "-", "e", "E", "e+", "E+",
}

type errJSON struct {
JSON
errProb float32
rng *rand.Rand
}

func (j *errJSON) injectErr() bool {
r := j.rng.Float32() < j.errProb
j.errProb /= 2 // decay
return r
}

func (j *errJSON) writeGarbage(pile []string, buf *bytes.Buffer) {
if j.injectErr() {
buf.WriteString(pile[j.rng.Intn(len(pile))])
}
}

func (j *errJSON) errJSON(other JSON) JSON {
return &errJSON{
JSON: other,
errProb: j.errProb,
rng: j.rng,
}
}

// Format implements JSON, and overrides underlying JSON object
// implementation in order to inject errors.
func (j *errJSON) Format(buf *bytes.Buffer) {
j.writeGarbage(garbage, buf) // Possibly prefix with garbage data.
defer func() {
j.writeGarbage(garbage, buf) // Possibly append garbage data.
}()

switch t := j.JSON.(type) {
default:
j.JSON.Format(buf)
case jsonObject:
if j.injectErr() {
buf.WriteByte('[') // Oops, array instead of object.
} else {
buf.WriteByte('{')
}
for i := range t {
if i != 0 {
buf.WriteString(", ")
}
// Skip element (which results in trailing or extra ",")
// if injectErr is true.
if !j.injectErr() {
ek := j.errJSON(t[i].k)
ev := j.errJSON(t[i].v)
buf.WriteString(asString(ek))
buf.WriteString(": ")
ev.Format(buf)
}
}
if j.injectErr() {
buf.WriteByte(']') // Oops, array instead of object.
} else {
buf.WriteByte('}')
}
case jsonArray:
if j.injectErr() {
buf.WriteByte('{') // Oops, object instead of array.
} else {
buf.WriteByte('[')
}
for i := range t {
if i != 0 {
buf.WriteString(", ")
}
// Skip element (which results in trailing or extra ",")
// if injectErr is true.
if !j.injectErr() {
j.errJSON(t[i]).Format(buf)
}
}
if j.injectErr() {
buf.WriteByte('}') // Oops, object instead of array.
} else {
buf.WriteByte(']')
}
case jsonString:
t.Format(buf)
if j.injectErr() {
// Drop terminating quote.
buf.Truncate(1)
}
case jsonNumber:
if j.injectErr() {
n := j.rng.Int31n(3) // 0 -> prefix, 1 -> prefix & suffix, 2 -> suffix only
if n <= 1 {
j.writeGarbage(numericGarbage, buf) // Prefix
}
t.Format(buf)
if n >= 1 {
j.writeGarbage(numericGarbage, buf)
}
} else {
t.Format(buf)
}
case jsonNull:
if j.injectErr() {
buf.WriteString("nil")
} else {
buf.WriteString("null")
}
case jsonTrue:
if j.injectErr() {
buf.WriteString("truish")
} else {
buf.WriteString("true")
}
case jsonFalse:
if j.injectErr() {
buf.WriteString("falsy")
} else {
buf.WriteString("false")
}
}
}
35 changes: 28 additions & 7 deletions pkg/util/json/tokenizer/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ import (
type Decoder struct {
scanner Scanner
state func(*Decoder) ([]byte, error)

// mustHaveValue is set when decoder processes
// array or object -- as indicated by stack state.
// In those cases, when we see a comma, there *must*
// be either an array value or a string object key following
// it; and if array/object terminates without seeing
// this value, return an error.
mustHaveValue bool
stack
}

Expand Down Expand Up @@ -119,6 +127,11 @@ func (d *Decoder) stateObjectString() ([]byte, error) {
}
switch tok[0] {
case '}':
if d.mustHaveValue {
d.scanner.offset -= len(tok) + 1 // Rewind to point to comma.
return nil, fmt.Errorf("stateObjectString: missing string key")
}

inObj := d.pop()
switch {
case d.len() == 0:
Expand Down Expand Up @@ -171,7 +184,7 @@ func (d *Decoder) stateObjectValue() ([]byte, error) {
}
}

func (d *Decoder) stateObjectComma() ([]byte, error) {
func (d *Decoder) stateObjectComma() (_ []byte, err error) {
tok := d.scanner.Next()
if len(tok) < 1 {
return nil, io.ErrUnexpectedEOF
Expand All @@ -189,8 +202,10 @@ func (d *Decoder) stateObjectComma() ([]byte, error) {
}
return tok, nil
case Comma:
d.state = (*Decoder).stateObjectString
return d.NextToken()
d.mustHaveValue = true
tok, err = d.stateObjectString()
d.mustHaveValue = false
return tok, err
default:
return tok, fmt.Errorf("stateObjectComma: expecting comma")
}
Expand All @@ -211,6 +226,10 @@ func (d *Decoder) stateArrayValue() ([]byte, error) {
d.push(false)
return tok, nil
case ']':
if d.mustHaveValue {
d.scanner.offset -= len(tok) + 1 // Rewind to point to comma.
return nil, fmt.Errorf("stateArrayValue: unexpected comma")
}
inObj := d.pop()
switch {
case d.len() == 0:
Expand All @@ -221,15 +240,15 @@ func (d *Decoder) stateArrayValue() ([]byte, error) {
d.state = (*Decoder).stateArrayComma
}
return tok, nil
case ',':
case Comma:
return nil, fmt.Errorf("stateArrayValue: unexpected comma")
default:
d.state = (*Decoder).stateArrayComma
return tok, nil
}
}

func (d *Decoder) stateArrayComma() ([]byte, error) {
func (d *Decoder) stateArrayComma() (_ []byte, err error) {
tok := d.scanner.Next()
if len(tok) < 1 {
return nil, io.ErrUnexpectedEOF
Expand All @@ -247,8 +266,10 @@ func (d *Decoder) stateArrayComma() ([]byte, error) {
}
return tok, nil
case Comma:
d.state = (*Decoder).stateArrayValue
return d.NextToken()
d.mustHaveValue = true
tok, err = d.stateArrayValue()
d.mustHaveValue = false
return tok, err
default:
return nil, fmt.Errorf("stateArrayComma: expected comma, %v", d.stack)
}
Expand Down

0 comments on commit 93ed655

Please sign in to comment.