diff --git a/src/functions.rs b/src/functions.rs index 2b002e2..8611a6a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -871,10 +871,7 @@ fn container_to_string(value: &[u8], offset: &mut usize, json: &mut String) { let jentry_encoded = read_u32(value, jentry_offset).unwrap(); let jentry = JEntry::decode_jentry(jentry_encoded); let key_length = jentry.length as usize; - let key = unsafe { - std::str::from_utf8_unchecked(&value[key_offset..key_offset + key_length]) - }; - keys.push_back(key); + keys.push_back((key_offset, key_offset + key_length)); jentry_offset += 4; key_offset += key_length; } @@ -883,8 +880,8 @@ fn container_to_string(value: &[u8], offset: &mut usize, json: &mut String) { if i > 0 { json.push(','); } - let key = keys.pop_front().unwrap(); - json.extend(format!("{:?}", key).chars()); + let (key_start, key_end) = keys.pop_front().unwrap(); + escape_scalar_string(value, key_start, key_end, json); json.push(':'); scalar_to_string(value, &mut jentry_offset, &mut value_offset, json); } @@ -912,10 +909,7 @@ fn scalar_to_string( json.push_str(&format!("{num}")); } STRING_TAG => { - let val = unsafe { - std::str::from_utf8_unchecked(&value[*value_offset..*value_offset + length]) - }; - json.extend(format!("{:?}", val).chars()); + escape_scalar_string(value, *value_offset, *value_offset + length, json); } CONTAINER_TAG => { container_to_string(value, value_offset, json); @@ -926,6 +920,38 @@ fn scalar_to_string( *value_offset += length; } +fn escape_scalar_string(value: &[u8], start: usize, end: usize, json: &mut String) { + json.push('\"'); + let mut last_start = start; + for i in start..end { + // add backslash for escaped characters. + let c = match value[i] { + 0x5C => "\\\\", + 0x22 => "\\\"", + 0x2F => "\\/", + 0x08 => "\\b", + 0x0C => "\\f", + 0x0A => "\\n", + 0x0D => "\\r", + 0x09 => "\\t", + _ => { + continue; + } + }; + if i > last_start { + let val = unsafe { std::str::from_utf8_unchecked(&value[last_start..i]) }; + json.push_str(val); + } + json.push_str(c); + last_start = i + 1; + } + if last_start < end { + let val = unsafe { std::str::from_utf8_unchecked(&value[last_start..end]) }; + json.push_str(val); + } + json.push('\"'); +} + // Check whether the value is `JSONB` format, // for compatibility with previous `JSON` string. fn is_jsonb(value: &[u8]) -> bool { diff --git a/src/util.rs b/src/util.rs index 03ce12a..e47150c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -65,7 +65,10 @@ pub fn parse_escaped_string<'a>( data = &data[1..]; data.read_exact(numbers.as_mut_slice())?; if data[0] != b'}' { - return Err(Error::Syntax(ParseErrorCode::UnexpectedEndOfHexEscape, *idx)); + return Err(Error::Syntax( + ParseErrorCode::UnexpectedEndOfHexEscape, + *idx, + )); } data = &data[1..]; *idx += 6; @@ -102,7 +105,10 @@ pub fn parse_escaped_string<'a>( data = &data[1..]; data.read_exact(lower_numbers.as_mut_slice())?; if data[0] != b'}' { - return Err(Error::Syntax(ParseErrorCode::UnexpectedEndOfHexEscape, *idx)); + return Err(Error::Syntax( + ParseErrorCode::UnexpectedEndOfHexEscape, + *idx, + )); } data = &data[1..]; *idx += 6; diff --git a/tests/it/functions.rs b/tests/it/functions.rs index 63f4d57..f51f948 100644 --- a/tests/it/functions.rs +++ b/tests/it/functions.rs @@ -662,6 +662,8 @@ fn test_to_string() { (r#"123.4567"#, r#"123.4567"#), (r#""abcdef""#, r#""abcdef""#), (r#""ab\n\"\uD83D\uDC8Eζ΅‹θ―•""#, r#""ab\n\"πŸ’Žζ΅‹θ―•""#), + (r#""α€™α€Όα€”α€Ία€™α€¬α€˜α€¬α€žα€¬""#, r#""α€™α€Όα€”α€Ία€™α€¬α€˜α€¬α€žα€¬""#), + (r#""βš οΈβœ…βŒ""#, r#""βš οΈβœ…βŒ""#), (r#"[1,2,3,4]"#, r#"[1,2,3,4]"#), ( r#"["a","b",true,false,[1,2,3],{"a":"b"}]"#, @@ -675,7 +677,6 @@ fn test_to_string() { let mut buf: Vec = Vec::new(); for (s, expect) in sources { let value = parse_value(s.as_bytes()).unwrap(); - assert_eq!(format!("{}", value), expect); value.write_to_vec(&mut buf); let res = to_string(&buf); assert_eq!(res, expect); diff --git a/tests/it/parser.rs b/tests/it/parser.rs index 07945a5..07555b8 100644 --- a/tests/it/parser.rs +++ b/tests/it/parser.rs @@ -310,10 +310,7 @@ fn test_parse_string() { r#""\"ab\"\uD803\uDC0Bζ΅‹θ―•""#, Value::String(Cow::from("\"ab\"𐰋桋试")), ), - ( - r#""⚠\u{fe0f}""#, - Value::String(Cow::from("⚠\u{fe0f}")), - ), + (r#""⚠\u{fe0f}""#, Value::String(Cow::from("⚠\u{fe0f}"))), ]); }