Skip to content

Commit

Permalink
fix: fuzz serde struct and fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
liuq19 committed Dec 7, 2023
1 parent 314dc0b commit cd7ee0d
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 142 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sonic-rs"
version = "0.2.4"
version = "0.2.5"
authors = ["Volo Team <[email protected]>"]
edition = "2021"
description = "Sonic-rs is a fast Rust JSON library based on SIMD"
Expand Down
26 changes: 21 additions & 5 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ members = ["."]
libfuzzer-sys = "0.4"
sonic-rs = { path = ".." }
serde_json = { version = "1.0", features = ["float_roundtrip"] }
faststr = "0.2"
serde = { version = "1.0", features = ["derive"] }

[[bin]]
name = "fuzz_value"
Expand Down
129 changes: 129 additions & 0 deletions fuzz/fuzz_targets/from_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,35 @@ use sonic_rs::JsonNumberTrait;
use sonic_rs::JsonValue;
use sonic_rs::{to_array_iter, to_array_iter_unchecked, to_object_iter, to_object_iter_unchecked};

macro_rules! test_struct {
($ty:ty, $data:expr) => {
match serde_json::from_slice::<$ty>($data) {
Ok(jv) => {
let sv = sonic_rs::from_slice::<$ty>($data).expect(&format!(
"parse valid json {:?} failed for type {}",
$data,
stringify!($ty)
));
assert_eq!(sv, jv);

// fuzz the struct to_string
let sout = sonic_rs::to_string(&sv).unwrap();
let jout = serde_json::to_string(&jv).unwrap();
let sv = sonic_rs::from_str::<$ty>(&sout).unwrap();
let jv = serde_json::from_str::<$ty>(&jout).unwrap();
assert_eq!(sv, jv);
}
Err(_) => {
let _ = sonic_rs::from_slice::<$ty>($data).expect_err(&format!(
"parse invalid json {:?} wrong for type {}",
$data,
stringify!($ty)
));
}
}
};
}

fuzz_target!(|data: &[u8]| {
match serde_json::from_slice::<JValue>(data) {
Ok(jv) => {
Expand Down Expand Up @@ -66,6 +95,12 @@ fuzz_target!(|data: &[u8]| {
let _ = dom_from_slice(data).unwrap_err();
}
}

test_struct!(TestStruct, data);
test_struct!(Foo, data);
test_struct!(Enum, data);
test_struct!(String, data);
test_struct!(f64, data);
});

fn compare_lazyvalue(jv: &JValue, sv: &sonic_rs::LazyValue) {
Expand Down Expand Up @@ -120,3 +155,97 @@ fn compare_value(jv: &JValue, sv: &sonic_rs::Value) -> bool {
}
true
}

use faststr::FastStr;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::{collections::HashMap, hash::Hash, marker::PhantomData};

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Foo {
name: FastStr,
id: u64,
}

#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq)]
enum Enum {
Zero = 0,
One = 1,
Two = 2,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
enum FieldEnum {
Integer(i8),
Tuple((FastStr, i32)),
Struct(Foo),
Unit,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
enum FieldlessEnum {
Tuple(),
Struct {},
Unit,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Wrapper<'a>(&'a str);

// A unit struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Unit;

// A uint struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Phan<T> {
phan: String,
_data: PhantomData<T>,
}

// A tuple struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Pair(i32, f32);

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct TestStruct<'a> {
fieldless: FieldlessEnum,
enummap: HashMap<Enum, FieldlessEnum>,
enum_: Enum,

// basic types
boolean: bool,
integer: i32,
float: f64,
int128: i128,
uint128: u128,
char_: char,

// string or bytes
str_: &'a str,
// bytes_: &'a [u8],
string: String,
faststr: FastStr,
#[serde(borrow)]
cow: Cow<'a, str>,

// containers
vector: Vec<u32>,
array: [u32; 1],
empty_array: [u8; 0],
map: HashMap<FastStr, f64>,
map_opkey: HashMap<Option<FastStr>, f64>,

// enum types
option: Option<String>,
fieldenum: FieldEnum,

// tuple or struct
tuple: (u64, String),
tuple_struct: Pair,
unit_struct: Unit,

#[serde(borrow)]
wrapper: Wrapper<'a>,
phan_struct: Phan<()>,
}
4 changes: 4 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ impl Error {
| ErrorCode::ExpectedArrayCommaOrEnd
| ErrorCode::ExpectedArrayStart
| ErrorCode::ExpectedObjectStart
| ErrorCode::InvalidSurrogateUnicodeCodePoint
| ErrorCode::RecursionLimitExceeded => Category::Syntax,
}
}
Expand Down Expand Up @@ -252,6 +253,9 @@ pub(crate) enum ErrorCode {

#[error("Unexpected visited type in JSON visitor")]
UnexpectedVisitType,

#[error("Invalid surrogate Unicode code point")]
InvalidSurrogateUnicodeCodePoint,
}

impl Error {
Expand Down
14 changes: 10 additions & 4 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ where
fn error_index(&self) -> usize {
// when parsing strings , we need record the error postion.
// it must be smaller than reader.index().
std::cmp::min(self.error_index, self.read.index() - 1)
std::cmp::min(self.error_index, self.read.index().saturating_sub(1))
}

/// Error caused by a byte from next_char().
Expand Down Expand Up @@ -711,11 +711,12 @@ where
return perr!(self, EofWhileParsing);
};

// only check surrogate here, and we will check the code pointer later when use `codepoint_to_utf8`
if (0xD800..0xDC00).contains(&point1) {
// parse the second utf8 code point of surrogate
let point2 = if let Some(asc) = self.read.next_n(6) {
if asc[0] != b'\\' || asc[1] != b'u' {
return perr!(self, InvalidUnicodeCodePoint);
return perr!(self, InvalidSurrogateUnicodeCodePoint);
}
unsafe { hex_to_u32_nocheck(&*(asc.as_ptr().add(2) as *const _ as *const [u8; 4])) }
} else {
Expand All @@ -726,13 +727,13 @@ where
let low_bit = point2.wrapping_sub(0xdc00);
if (low_bit >> 10) != 0 {
// invalid surrogate
return perr!(self, InvalidUnicodeCodePoint);
return perr!(self, InvalidSurrogateUnicodeCodePoint);
}

Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
} else if (0xDC00..0xE000).contains(&point1) {
// invalid surrogate
perr!(self, InvalidUnicodeCodePoint)
perr!(self, InvalidSurrogateUnicodeCodePoint)
} else {
Ok(point1)
}
Expand All @@ -746,6 +747,9 @@ where
buf.reserve(4);
let ptr = buf.as_mut_ptr().add(buf.len());
let cnt = codepoint_to_utf8(code, ptr);
if cnt == 0 {
return perr!(self, InvalidUnicodeCodePoint);
}
buf.set_len(buf.len() + cnt);
}
Some(c) if ESCAPED_TAB[c as usize] != 0 => {
Expand Down Expand Up @@ -825,6 +829,7 @@ where
self.read.eat(1);
self.parse_escaped_char(buf)?;
}
b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
_ => {
buf.push(c);
self.read.eat(1);
Expand Down Expand Up @@ -897,6 +902,7 @@ where
self.read.eat(1);
return unsafe { self.parse_string_escaped(buf) };
}
b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
_ => self.read.eat(1),
}
}
Expand Down
41 changes: 41 additions & 0 deletions src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,4 +347,45 @@ mod test {
"Invalid UTF-8 characters in json at line 1 column 4\n\n\t\"\0\0\0��\"\n\t....^..\n"
);
}

macro_rules! test_struct {
($ty:ty, $data:expr) => {
match serde_json::from_slice::<$ty>($data) {
Ok(jv) => {
let sv = crate::from_slice::<$ty>($data).expect(&format!(
"parse valid json {:?} failed for type {}",
$data,
stringify!($ty)
));
assert_eq!(sv, jv);

// fuzz the struct to_string
let sout = crate::to_string(&sv).unwrap();
let jout = serde_json::to_string(&jv).unwrap();
let sv = crate::from_str::<$ty>(&sout).unwrap();
let jv = serde_json::from_str::<$ty>(&jout).unwrap();
assert_eq!(sv, jv);
}
Err(err) => {
let _ = crate::from_slice::<$ty>($data).expect_err(&format!(
"parse invalid json {:?} wrong for type {}, should error: {}",
$data,
stringify!($ty),
err
));
}
}
};
}

// the testcase is found by fuzzing tests
#[test]
fn test_more_structs() {
// invalid json: has control chars
test_struct!(String, &[34, 58, 55, 10, 0, 34, 32, 10]);
test_struct!(String, &[34, b'\\', b't', 9, 34]);
test_struct!(String, &[34, 92, 34, 34]);
test_struct!(String, b"\"\\umap9map009\"");
test_struct!(Foo, &b"[\"5XXXXXXZX:XXZX:[\",-0]"[..]);
}
}
4 changes: 4 additions & 0 deletions src/util/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ pub(crate) fn parse_number(
*index += 1;

if *index >= data.len() || !matches!(data[*index], b'.' | b'e' | b'E') {
// view -0 as float number
if negative {
return Ok(ParserNumber::Float(0.0));
}
return Ok(ParserNumber::Unsigned(0));
}

Expand Down
Loading

0 comments on commit cd7ee0d

Please sign in to comment.