Skip to content
This repository has been archived by the owner on Jul 10, 2024. It is now read-only.

Replace '\C' with '\\' instead of '\' #166

Merged
merged 1 commit into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions learn/json_preprocessor.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,23 @@ import (
)

type stripControlCharactersReader struct {
wrapped io.Reader
hasPrecedingBackslash bool
wrapped io.Reader
}

func newStripControlCharactersReader(wrapped io.Reader) stripControlCharactersReader {
return stripControlCharactersReader{wrapped: wrapped}
func newStripControlCharactersReader(wrapped io.Reader) *stripControlCharactersReader {
return &stripControlCharactersReader{wrapped: wrapped}
}

// Read up to len(p) bytes, removing any control characters found.
// Removed characters do not count toward the total bytes read. Returns the
// number of bytes read.
func (r stripControlCharactersReader) Read(p []byte) (n int, err error) {
// Removed characters do not count toward the total bytes read.
//
// If the control character is preceded by a backslash, it is replaced with a
// backslash rather than removed, e.g. "\<CR>" becomes "\\". This prevents
// the JSON parser from applying the backslash to escape the next character.
//
// Returns the number of bytes read.
func (r *stripControlCharactersReader) Read(p []byte) (n int, err error) {
pIdx := 0
buf := make([]byte, len(p))

Expand All @@ -31,10 +37,26 @@ func (r stripControlCharactersReader) Read(p []byte) (n int, err error) {

// Copy from buf to p, skipping control characters.
for _, c := range bufSlice[:bufN] {
toWrite := c

if c <= 0x1f {
continue
if r.hasPrecedingBackslash {
// If the control character is escaped, replace it with a
// backslash.
toWrite = '\\'
} else {
// Otherwise, just remove it.
continue
}
}

if c == '\\' {
r.hasPrecedingBackslash = !r.hasPrecedingBackslash
} else {
r.hasPrecedingBackslash = false
}
p[pIdx] = c

p[pIdx] = toWrite
pIdx += 1
}

Expand Down
57 changes: 40 additions & 17 deletions learn/json_preprocessor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,9 @@ func TestSkipCarriageReturnReader(t *testing.T) {

func TestSkipControlCharacterReader_JSON(t *testing.T) {
testCases := []struct {
Name string
Input string
Expected interface{}
ExpectedError bool
Name string
Input string
Expected interface{}
}{
{
Name: "no control characters",
Expand All @@ -119,11 +118,11 @@ func TestSkipControlCharacterReader_JSON(t *testing.T) {
},
{
Name: "escaped control char",
Input: "{\"foo\\\r\\\n\": \"bar\"}",
Input: "{\"foo\\\r\": \"bar\"}",

// The Go JSON parser doesn't support escaping control characters.
// However, if someone were to try, the preprocessor would remove
// the control character but leave the backslash.
// the control character and replace it with a backslash.
Expected: map[string]interface{}{`foo\`: "bar"},
},
{
Expand All @@ -133,25 +132,49 @@ func TestSkipControlCharacterReader_JSON(t *testing.T) {
",
"subject": "world"
}`,
Expected: map[string]interface{}{"greeting": "hello", "subject": "world"},

// After the newline is removed, the JSON parser interprets the backslash
// as escaping the quote.
ExpectedError: true,
Expected: map[string]interface{}{"greeting": "hello \\", "subject": "world"},
},
}

for _, tc := range testCases {
// Parse JSON after removing control strings.
var parsed interface{}
var err error

// Try parsing using a JSON decoder.
decoder := json.NewDecoder(newStripControlCharactersReader(strings.NewReader(tc.Input)))
err := decoder.Decode(&parsed)
if tc.ExpectedError {
assert.Error(t, err, tc.Name)
} else {
assert.NoError(t, err, tc.Name)
assert.Equal(t, tc.Expected, parsed, tc.Name)
err = decoder.Decode(&parsed)
assert.NoError(t, err, "["+tc.Name+", decoder] error")
assert.Equal(t, tc.Expected, parsed, "["+tc.Name+", decoder] not equal")

// Try reading all bytes from the reader, one byte at a time, and then
// parsing.
bytesToRead := 1
reader := newStripControlCharactersReader(strings.NewReader(tc.Input))
read := make([]byte, 0, len([]byte(tc.Input)))
buf := make([]byte, bytesToRead)
for {
var n int
n, err = reader.Read(buf)

read = append(read, buf[:n]...)

if err != nil {
break
}

// Clear the buffer.
for i, _ := range buf {
buf[i] = '\x00'
}
}

// Parse.
if err == io.EOF {
err = json.Unmarshal(read, &parsed)
}
assert.NoError(t, err, "["+tc.Name+", byte-by-byte] error")
assert.Equal(t, tc.Expected, parsed, "["+tc.Name+", byte-by-byte] not equal")
}
}

Expand Down