Skip to content

Commit

Permalink
str various refactor and perf
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jul 2, 2024
1 parent 9382058 commit c369ea4
Show file tree
Hide file tree
Showing 21 changed files with 668 additions and 514 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pyo3-ffi = { version = "^0.22", default-features = false, features = ["extension
ryu = { version = "1", default-features = false }
serde = { version = "1", default-features = false }
serde_json = { version = "1", default-features = false, features = ["std", "float_roundtrip"] }
simdutf8 = { version = "0.1", default-features = false, features = ["std", "aarch64_neon"] }
simdutf8 = { version = "0.1", default-features = false, features = ["std", "public_imp", "aarch64_neon"] }
smallvec = { version = "^1.11", default-features = false, features = ["union", "write"] }
unwinding = { version = "0.2", features = ["unwinder"], optional = true }
xxhash-rust = { version = "^0.8", default-features = false, features = ["xxh3"] }
Expand Down
4 changes: 0 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ fn main() {
if env::var("ORJSON_DISABLE_SIMD").is_err() {
if let Some(true) = version_check::supports_feature("portable_simd") {
println!("cargo:rustc-cfg=feature=\"unstable-simd\"");
#[cfg(all(target_arch = "x86_64", target_feature = "avx512vl"))]
if env::var("ORJSON_DISABLE_AVX512").is_err() {
println!("cargo:rustc-cfg=feature=\"avx512\"");
}
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/deserialize/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ use crate::util::INVALID_STR;
use core::ffi::c_char;
use std::borrow::Cow;

#[cfg(all(target_arch = "x86_64", not(target_feature = "sse4.2")))]
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx2")))]
fn is_valid_utf8(buf: &[u8]) -> bool {
if std::is_x86_feature_detected!("sse4.2") {
simdutf8::basic::from_utf8(buf).is_ok()
if std::is_x86_feature_detected!("avx2") {
unsafe { simdutf8::basic::imp::x86::avx2::validate_utf8(buf).is_ok() }
} else {
encoding_rs::Encoding::utf8_valid_up_to(buf) == buf.len()
}
}

#[cfg(all(target_arch = "x86_64", target_feature = "sse4.2"))]
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
fn is_valid_utf8(buf: &[u8]) -> bool {
simdutf8::basic::from_utf8(buf).is_ok()
}
Expand Down
10 changes: 2 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

#![cfg_attr(
all(target_arch = "x86_64", feature = "avx512"),
feature(avx512_target_feature)
)]
#![cfg_attr(
all(target_arch = "x86_64", feature = "avx512"),
feature(stdarch_x86_avx512)
)]
#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
#![cfg_attr(feature = "avx512", feature(stdarch_x86_avx512))]
#![cfg_attr(feature = "intrinsics", feature(core_intrinsics))]
#![cfg_attr(feature = "optimize", feature(optimize_attribute))]
#![cfg_attr(feature = "strict_provenance", feature(strict_provenance))]
Expand Down
15 changes: 9 additions & 6 deletions src/serialize/per_type/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@ impl StrSerializer {
}

impl Serialize for StrSerializer {
#[inline]
#[inline(always)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let uni = unicode_to_str(self.ptr);
if unlikely!(uni.is_none()) {
err!(SerializeError::InvalidStr)
}
serializer.serialize_str(uni.unwrap())
let uni = {
let tmp = unicode_to_str(self.ptr);
if unlikely!(tmp.is_none()) {
err!(SerializeError::InvalidStr)
};
tmp.unwrap()
};
serializer.serialize_str(uni)
}
}

Expand Down
10 changes: 10 additions & 0 deletions src/serialize/writer/byteswriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ pub trait WriteExt: std::io::Write {
let _ = len;
}

#[inline]
fn has_capacity(&mut self, _len: usize) -> bool {
false
}

#[inline]
fn set_written(&mut self, len: usize) {
let _ = len;
Expand Down Expand Up @@ -157,6 +162,11 @@ impl WriteExt for &mut BytesWriter {
}
}

#[inline]
fn has_capacity(&mut self, len: usize) -> bool {
return self.len + len <= self.cap;
}

#[inline(always)]
fn set_written(&mut self, len: usize) {
self.len += len;
Expand Down
16 changes: 11 additions & 5 deletions src/serialize/writer/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
use crate::serialize::writer::WriteExt;
use std::io;

macro_rules! debug_assert_has_capacity {
($writer:expr) => {
debug_assert!($writer.has_capacity(4))
};
}

pub trait Formatter {
#[inline]
fn write_null<W>(&mut self, writer: &mut W) -> io::Result<()>
Expand Down Expand Up @@ -196,7 +202,7 @@ pub trait Formatter {
where
W: ?Sized + io::Write + WriteExt,
{
reserve_minimum!(writer);
debug_assert_has_capacity!(writer);
unsafe { writer.write_reserved_punctuation(b']').unwrap() };
Ok(())
}
Expand All @@ -206,7 +212,7 @@ pub trait Formatter {
where
W: ?Sized + io::Write + WriteExt,
{
reserve_minimum!(writer);
debug_assert_has_capacity!(writer);
if !first {
unsafe { writer.write_reserved_punctuation(b',').unwrap() }
}
Expand Down Expand Up @@ -238,7 +244,7 @@ pub trait Formatter {
where
W: ?Sized + io::Write + WriteExt,
{
reserve_minimum!(writer);
debug_assert_has_capacity!(writer);
unsafe {
writer.write_reserved_punctuation(b'}').unwrap();
}
Expand All @@ -250,7 +256,7 @@ pub trait Formatter {
where
W: ?Sized + io::Write + WriteExt,
{
reserve_minimum!(writer);
debug_assert_has_capacity!(writer);
if !first {
unsafe {
writer.write_reserved_punctuation(b',').unwrap();
Expand All @@ -272,7 +278,7 @@ pub trait Formatter {
where
W: ?Sized + io::Write + WriteExt,
{
reserve_minimum!(writer);
debug_assert_has_capacity!(writer);
unsafe { writer.write_reserved_punctuation(b':') }
}

Expand Down
26 changes: 23 additions & 3 deletions src/serialize/writer/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,29 @@ macro_rules! reserve_str {
};
}

#[cfg(all(feature = "unstable-simd", not(target_arch = "x86_64")))]
#[inline(always)]
fn format_escaped_str<W>(writer: &mut W, value: &str)
where
W: ?Sized + io::Write + WriteExt,
{
unsafe {
reserve_str!(writer, value);

let written = format_escaped_str_impl_generic_128(
writer.as_mut_buffer_ptr(),
value.as_bytes().as_ptr(),
value.len(),
);

writer.set_written(written);
}
}

#[cfg(all(
feature = "unstable-simd",
any(not(target_arch = "x86_64"), not(feature = "avx512"))
target_arch = "x86_64",
not(feature = "avx512")
))]
#[inline(always)]
fn format_escaped_str<W>(writer: &mut W, value: &str)
Expand All @@ -584,7 +604,7 @@ where
unsafe {
reserve_str!(writer, value);

let written = format_escaped_str_impl_128(
let written = format_escaped_str_impl_generic_128(
writer.as_mut_buffer_ptr(),
value.as_bytes().as_ptr(),
value.len(),
Expand All @@ -611,7 +631,7 @@ where
);
writer.set_written(written);
} else {
let written = format_escaped_str_impl_128(
let written = format_escaped_str_impl_generic_128(
writer.as_mut_buffer_ptr(),
value.as_bytes().as_ptr(),
value.len(),
Expand Down
2 changes: 1 addition & 1 deletion src/serialize/writer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

mod byteswriter;
mod formatter;
Expand Down
Loading

0 comments on commit c369ea4

Please sign in to comment.