Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support stream decoder #104

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ rust-
*.data
opensource_git_commit.log
Cargo.lock
*.profraw
*.profdata
8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sonic-rs"
version = "0.3.10"
version = "0.3.11"
authors = ["Volo Team <[email protected]>"]
edition = "2021"
description = "Sonic-rs is a fast Rust JSON library based on SIMD"
Expand All @@ -15,7 +15,6 @@ categories = ["encoding", "parser-implementations"]

[dependencies]
cfg-if = "1.0"
arrayref = "0.3"
serde = { version = "1.0", features = ["rc", "derive"] }
itoa = "1.0"
ryu = "1.0"
Expand All @@ -26,7 +25,6 @@ bytes = "1.4"
thiserror = "1.0"
simdutf8 = "0.1"
parking_lot = "0.12"
page_size = "0.6"


[target.'cfg(not(target_env = "msvc"))'.dev-dependencies]
Expand Down Expand Up @@ -78,6 +76,10 @@ harness = false
name = "get_from"
harness = false

[[bench]]
name = "value_operator"
harness = false

[features]
default = []

Expand Down
83 changes: 83 additions & 0 deletions benches/value_operator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#[macro_use]
extern crate criterion;
use std::io::Read;

use criterion::{criterion_group, BatchSize, Criterion};
use sonic_rs::JsonValueTrait;

fn bench_get(c: &mut Criterion) {
let core_ids = core_affinity::get_core_ids().unwrap();
core_affinity::set_for_current(core_ids[0]);

let mut data = Vec::new();
let root = env!("CARGO_MANIFEST_DIR").to_owned();
std::fs::File::open(root + concat!("/benches/testdata/twitter.json"))
.unwrap()
.read_to_end(&mut data)
.unwrap();

let sonic_value: sonic_rs::Value = sonic_rs::from_slice(&data).unwrap();
let serde_value: serde_json::Value = serde_json::from_slice(&data).unwrap();

assert_eq!(
sonic_value["statuses"][4]["entities"]["media"][0]["source_status_id_str"].as_str(),
Some("439430848190742528")
);
assert_eq!(
serde_value["statuses"][4]["entities"]["media"][0]["source_status_id_str"].as_str(),
Some("439430848190742528")
);

let mut group = c.benchmark_group("value");
group.bench_with_input("sonic-rs::value_get", &sonic_value, |b, data| {
b.iter_batched(
|| data,
|value| {
let _ =
value["statuses"][4]["entities"]["media"][0]["source_status_id_str"].as_str();
},
BatchSize::SmallInput,
)
});

group.bench_with_input("serde_json::value_get", &serde_value, |b, data| {
b.iter_batched(
|| data,
|value| {
let _ =
value["statuses"][4]["entities"]["media"][0]["source_status_id_str"].as_str();
},
BatchSize::SmallInput,
)
});

group.bench_with_input("sonic_rs::value_new", &sonic_value, |b, data| {
b.iter_batched(
|| data,
|_value| {
let mut value = sonic_rs::Array::new();
for i in 0..100 {
value.push(i);
}
},
BatchSize::SmallInput,
)
});

group.bench_with_input("serde_json::value_new", &serde_value, |b, data| {
b.iter_batched(
|| data,
|_value| {
let mut value = serde_json::Value::Array(Vec::new());
let array = &mut value.as_array_mut().unwrap();
for i in 0..100 {
array.push(serde_json::Value::from(i as f64));
}
},
BatchSize::SmallInput,
)
});
}

criterion_group!(benches, bench_get);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{parser::as_str, util::private::Sealed};
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum JsonSlice<'de> {
Raw(&'de [u8]),
FastStr(FastStr),
FastStr(FastStr), // note: FastStr maybe inlined and in the stack.
}

impl<'de> JsonSlice<'de> {
Expand Down
38 changes: 17 additions & 21 deletions src/lazyvalue/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,14 @@ use crate::{
/// }
/// }
/// ```
pub struct ObjectJsonIter<'de>(ObjectInner<'de>);
pub struct ObjectJsonIter<'de> {
json: JsonSlice<'de>,
parser: Option<Parser<Read<'de>>>,
strbuf: Vec<u8>,
first: bool,
ending: bool,
check: bool,
}

/// A lazied iterator for JSON array text. It will parse the JSON when iterating.
///
Expand Down Expand Up @@ -63,26 +70,15 @@ pub struct ObjectJsonIter<'de>(ObjectInner<'de>);
/// }
/// }
/// ```
pub struct ArrayJsonIter<'de>(ArrayInner<'de>);

struct ObjectInner<'de> {
json: JsonSlice<'de>,
parser: Option<Parser<Read<'de>>>,
strbuf: Vec<u8>,
first: bool,
ending: bool,
check: bool,
}

struct ArrayInner<'de> {
pub struct ArrayJsonIter<'de> {
json: JsonSlice<'de>,
parser: Option<Parser<Read<'de>>>,
first: bool,
ending: bool,
check: bool,
}

impl<'de> ObjectInner<'de> {
impl<'de> ObjectJsonIter<'de> {
fn new(json: JsonSlice<'de>, check: bool) -> Self {
Self {
json,
Expand Down Expand Up @@ -131,7 +127,7 @@ impl<'de> ObjectInner<'de> {
}
}

impl<'de> ArrayInner<'de> {
impl<'de> ArrayJsonIter<'de> {
fn new(json: JsonSlice<'de>, check: bool) -> Self {
Self {
json,
Expand Down Expand Up @@ -219,7 +215,7 @@ impl<'de> ArrayInner<'de> {
/// }
/// ```
pub fn to_object_iter<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
ObjectJsonIter(ObjectInner::new(json.to_json_slice(), true))
ObjectJsonIter::new(json.to_json_slice(), true)
}

/// Traverse the JSON array text through a lazy iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -256,7 +252,7 @@ pub fn to_object_iter<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
/// }
/// ```
pub fn to_array_iter<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
ArrayJsonIter(ArrayInner::new(json.to_json_slice(), true))
ArrayJsonIter::new(json.to_json_slice(), true)
}

/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -293,7 +289,7 @@ pub fn to_array_iter<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
/// }
/// ```
pub unsafe fn to_object_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
ObjectJsonIter(ObjectInner::new(json.to_json_slice(), false))
ObjectJsonIter::new(json.to_json_slice(), false)
}

/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -327,22 +323,22 @@ pub unsafe fn to_object_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> Objec
/// }
/// ```
pub unsafe fn to_array_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
ArrayJsonIter(ArrayInner::new(json.to_json_slice(), false))
ArrayJsonIter::new(json.to_json_slice(), false)
}

impl<'de> Iterator for ObjectJsonIter<'de> {
type Item = Result<(FastStr, LazyValue<'de>)>;

fn next(&mut self) -> Option<Self::Item> {
self.0.next_entry_impl(self.0.check)
self.next_entry_impl(self.check)
}
}

impl<'de> Iterator for ArrayJsonIter<'de> {
type Item = Result<LazyValue<'de>>;

fn next(&mut self) -> Option<Self::Item> {
self.0.next_elem_impl(self.0.check)
self.next_elem_impl(self.check)
}
}

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub use crate::pointer::{JsonPointer, PointerNode, PointerTree};
pub use crate::serde::{
from_slice, from_slice_unchecked, from_str, to_string, to_string_pretty, to_vec, to_vec_pretty,
to_writer, to_writer_pretty, Deserializer, JsonNumberTrait, Number, RawNumber, Serializer,
StreamDeserializer,
};
#[doc(inline)]
pub use crate::value::{
Expand Down
7 changes: 3 additions & 4 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::{
str::from_utf8_unchecked,
};

use arrayref::array_ref;
use faststr::FastStr;
use serde::de::{self, Expected, Unexpected};
use smallvec::SmallVec;
Expand Down Expand Up @@ -1217,7 +1216,7 @@ where
let reader = &mut self.read;

while let Some(chunk) = reader.peek_n(64) {
let input = array_ref![chunk, 0, 64];
let input = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
if let Some(count) = skip_container_loop(
input,
&mut prev_instring,
Expand Down Expand Up @@ -1291,7 +1290,7 @@ where

// then we use simd to accelerate skipping space
while let Some(chunk) = reader.peek_n(64) {
let chunk = array_ref![chunk, 0, 64];
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
let bitmap = unsafe { get_nonspace_bits(chunk) };
if bitmap != 0 {
self.nospace_bits = bitmap;
Expand Down Expand Up @@ -1351,7 +1350,7 @@ where

// then we use simd to accelerate skipping space
while let Some(chunk) = reader.peek_n(64) {
let chunk = array_ref![chunk, 0, 64];
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
let bitmap = unsafe { get_nonspace_bits(chunk) };
if bitmap != 0 {
self.nospace_bits = bitmap;
Expand Down
Loading
Loading