Skip to content

Commit

Permalink
fix: #117, #121 improve repairing of truncated strings
Browse files Browse the repository at this point in the history
  • Loading branch information
josdejong committed Apr 11, 2024
1 parent 576a10d commit 0fe1757
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 14 deletions.
23 changes: 15 additions & 8 deletions src/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
expect(jsonrepair('"\\u2605')).toBe('"\\u2605"')
expect(jsonrepair('{"s \\ud')).toBe('{"s": null}')
expect(jsonrepair('{"message": "it\'s working')).toBe('{"message": "it\'s working"}')
expect(jsonrepair('{"text":"Hello Sergey,I hop')).toBe('{"text":"Hello Sergey,I hop"}')
expect(jsonrepair('{"message": "with, multiple, commma\'s, you see?')).toBe('{"message": "with, multiple, commma\'s, you see?"}')
})

test('should add missing start quote', () => {
Expand All @@ -163,6 +165,7 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
expect(jsonrepair('[\n"abc, \n"def"\n]')).toBe('[\n"abc", \n"def"\n]')
expect(jsonrepair('["abc]\n')).toBe('["abc"]\n')
expect(jsonrepair('["abc ]\n')).toBe('["abc" ]\n')
expect(jsonrepair('[\n[\n"abc\n]\n]\n')).toBe('[\n[\n"abc"\n]\n]\n')
})

test('should replace single quotes with double quotes', () => {
Expand Down Expand Up @@ -233,8 +236,9 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
expect(jsonrepair('{"key": "apple "bee" carrot"}')).toBe('{"key": "apple \\"bee\\" carrot"}')

expect(jsonrepair('[",",":"]')).toBe('[",",":"]')
expect(jsonrepair('["a" 2]')).toBe('["a", 2]')
expect(jsonrepair('["a" 2')).toBe('["a", 2]')
expect(jsonrepair('["," 2')).toBe('[""," 2"]') // Ideally it would repair as [",", 2]
expect(jsonrepair('["," 2')).toBe('[",", 2]')
})

test('should replace special white space characters', () => {
Expand Down Expand Up @@ -452,15 +456,15 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
})

test('should repair missing comma between array items', () => {
// expect(jsonrepair('{"array": [{}{}]}')).toBe('{"array": [{},{}]}')
// expect(jsonrepair('{"array": [{} {}]}'), '{"array": [{}).toBe({}]}')
// expect(jsonrepair('{"array": [{}\n{}]}')).toBe('{"array": [{},\n{}]}')
// expect(jsonrepair('{"array": [\n{}\n{}\n]}')).toBe('{"array": [\n{},\n{}\n]}')
// expect(jsonrepair('{"array": [\n1\n2\n]}')).toBe('{"array": [\n1,\n2\n]}')
expect(jsonrepair('{"array": [{}{}]}')).toBe('{"array": [{},{}]}')
expect(jsonrepair('{"array": [{} {}]}'), '{"array": [{}).toBe({}]}')
expect(jsonrepair('{"array": [{}\n{}]}')).toBe('{"array": [{},\n{}]}')
expect(jsonrepair('{"array": [\n{}\n{}\n]}')).toBe('{"array": [\n{},\n{}\n]}')
expect(jsonrepair('{"array": [\n1\n2\n]}')).toBe('{"array": [\n1,\n2\n]}')
expect(jsonrepair('{"array": [\n"a"\n"b"\n]}')).toBe('{"array": [\n"a",\n"b"\n]}')

// // should leave normal array as is
// expect(jsonrepair('[\n{},\n{}\n]')).toBe('[\n{},\n{}\n]')
// should leave normal array as is
expect(jsonrepair('[\n{},\n{}\n]')).toBe('[\n{},\n{}\n]')
})

test('should repair missing comma between object properties', () => {
Expand All @@ -478,6 +482,9 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
test('should repair missing colon between object key and value', () => {
expect(jsonrepair('{"a" "b"}')).toBe('{"a": "b"}')
expect(jsonrepair('{"a" 2}')).toBe('{"a": 2}')
expect(jsonrepair('{"a" true}')).toBe('{"a": true}')
expect(jsonrepair('{"a" false}')).toBe('{"a": false}')
expect(jsonrepair('{"a" null}')).toBe('{"a": null}')
expect(jsonrepair('{"a"2}')).toBe('{"a":2}')
expect(jsonrepair('{\n"a" "b"\n}')).toBe('{\n"a": "b"\n}')
expect(jsonrepair('{"a" \'b\'}')).toBe('{"a": "b"}')
Expand Down
16 changes: 13 additions & 3 deletions src/regular/jsonrepair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,11 @@ export function jsonrepair(text: string): string {
while (true) {
if (i >= text.length) {
// end of text, we are missing an end quote
if (!stopAtDelimiter) {

const iPrev = prevNonWhitespaceIndex(i - 1)
if (!stopAtDelimiter && isDelimiter(text.charAt(iPrev))) {
// if the text ends with a delimiter, like ["hello],
// so the missing end quote should be inserted before this delimiter
// retry parsing the string, stopping at the first next delimiter
i = iBefore
output = output.substring(0, oBefore)
Expand All @@ -432,8 +436,14 @@ export function jsonrepair(text: string): string {

parseWhitespaceAndSkipComments()

if (stopAtDelimiter || i >= text.length || isDelimiter(text.charAt(i)) || isQuote(text.charCodeAt(i))) {
// The quote is followed by a delimiter or the end of the text,
if (
stopAtDelimiter ||
i >= text.length ||
isDelimiter(text.charAt(i)) ||
isQuote(text.charCodeAt(i)) ||
isDigit(text.charCodeAt(i))
) {
// The quote is followed by the end of the text, a delimiter, or a next value
// so the quote is indeed the end of the string
parseConcatenatedString()

Expand Down
17 changes: 14 additions & 3 deletions src/streaming/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,12 @@ export function jsonrepairCore({
while (true) {
if (input.isEnd(i)) {
// end of text, we have a missing quote somewhere
if (!stopAtDelimiter) {

const iPrev = prevNonWhitespaceIndex(i - 1)
if (!stopAtDelimiter && isDelimiter(input.charAt(iPrev))) {
// if the text ends with a delimiter, like ["hello],
// so the missing end quote should be inserted before this delimiter
// retry parsing the string, stopping at the first next delimiter
i = iBefore
output.remove(oBefore)

Expand All @@ -585,8 +590,14 @@ export function jsonrepairCore({

parseWhitespaceAndSkipComments()

if (stopAtDelimiter || input.isEnd(i) || isDelimiter(input.charAt(i)) || isQuote(input.charCodeAt(i))) {
// The quote is followed by a delimiter or the end of the text,
if (
stopAtDelimiter ||
input.isEnd(i) ||
isDelimiter(input.charAt(i)) ||
isQuote(input.charCodeAt(i)) ||
isDigit(input.charCodeAt(i))
) {
// The quote is followed by the end of the text, a delimiter, or a next value
// so the quote is indeed the end of the string
parseConcatenatedString()

Expand Down

0 comments on commit 0fe1757

Please sign in to comment.