diff --git a/Cargo.lock b/Cargo.lock index 6785db8d..829472f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,6 +207,7 @@ dependencies = [ "itoa", "itoap", "once_cell", + "page_size", "pyo3-build-config", "pyo3-ffi", "ryu", @@ -217,6 +218,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "proc-macro2" version = "1.0.78" @@ -363,6 +374,28 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index 526ec5fe..68fae48c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ encoding_rs = { version = "0.8", default_features = false } itoa = { version = "1", default_features = false } itoap = { version = "1", features = ["std", "simd"] } once_cell = { version = "1", default_features = false, features = ["race"] } +page_size = { version = "0.6" } pyo3-ffi = { version = "^0.20.2", default_features = false, features = ["extension-module"]} ryu = { version = "1", default_features = false } serde = { version = "1", default_features = false } diff --git a/src/serialize/writer/simd.rs b/src/serialize/writer/simd.rs index fe4a0dcf..94a42ac8 100644 --- a/src/serialize/writer/simd.rs +++ b/src/serialize/writer/simd.rs @@ -2,6 +2,7 @@ // Copyright 2023-2024 liuq19, ijl // adapted from sonic-rs' src/util/string.rs +use crate::typeref::PAGE_SIZE; use core::simd::cmp::{SimdPartialEq, SimdPartialOrd}; macro_rules! impl_escape_unchecked { @@ -9,60 +10,29 @@ macro_rules! impl_escape_unchecked { $nb -= $cn; $dst = $dst.add($cn); $src = $src.add($cn); - let mut mask = $omask << $cn; + $omask >>= $cn; loop { $nb -= 1; - mask = mask << 1; - let replacement = if *($src) == b'"' { - (*b"\\\"\0\0\0\0\0\0", 2) + $omask = $omask >> 1; + + if *($src) == b'"' { + core::ptr::copy_nonoverlapping(b"\\\"".as_ptr(), $dst, 2); + $dst = $dst.add(2); } else if *($src) == b'\\' { - (*b"\\\\\0\0\0\0\0\0", 2) + core::ptr::copy_nonoverlapping(b"\\\\".as_ptr(), $dst, 2); + $dst = $dst.add(2); } else { - match *($src) { - 0 => (*b"\\u0000\0\0", 6), - 1 => (*b"\\u0001\0\0", 6), - 2 => (*b"\\u0002\0\0", 6), - 3 => (*b"\\u0003\0\0", 6), - 4 => (*b"\\u0004\0\0", 6), - 5 => (*b"\\u0005\0\0", 6), - 6 => (*b"\\u0006\0\0", 6), - 7 => (*b"\\u0007\0\0", 6), - 8 => (*b"\\b\0\0\0\0\0\0", 2), - 9 => (*b"\\t\0\0\0\0\0\0", 2), - 10 => (*b"\\n\0\0\0\0\0\0", 2), - 11 => (*b"\\u000b\0\0", 6), - 12 => (*b"\\f\0\0\0\0\0\0", 2), - 13 => (*b"\\r\0\0\0\0\0\0", 2), - 14 => (*b"\\u000e\0\0", 6), - 15 => (*b"\\u000f\0\0", 6), - 16 => (*b"\\u0010\0\0", 6), - 17 => (*b"\\u0011\0\0", 6), - 18 => (*b"\\u0012\0\0", 6), - 19 => (*b"\\u0013\0\0", 6), - 20 => (*b"\\u0014\0\0", 6), - 21 => (*b"\\u0015\0\0", 6), - 22 => (*b"\\u0016\0\0", 6), - 23 => (*b"\\u0017\0\0", 6), - 24 => (*b"\\u0018\0\0", 6), - 25 => (*b"\\u0019\0\0", 6), - 26 => (*b"\\u001a\0\0", 6), - 27 => (*b"\\u001b\0\0", 6), - 28 => (*b"\\u001c\0\0", 6), - 29 => (*b"\\u001d\0\0", 6), - 30 => (*b"\\u001e\0\0", 6), - 31 => (*b"\\u001f\0\0", 6), - _ => unreachable!(), - } + $dst = write_unusual_escape($src, $dst); }; - core::ptr::copy_nonoverlapping(replacement.0.as_ptr(), $dst, 8); - $dst = $dst.add(replacement.1 as usize); + $src = $src.add(1); - if likely!(mask & (1 << (STRIDE - 1)) != 1) { + if likely!($omask & 1 != 1) { break; } } }; } + macro_rules! impl_format_simd { ($odptr:expr, $value_ptr:expr, $value_len:expr) => { let mut dptr = $odptr; @@ -81,7 +51,7 @@ macro_rules! impl_format_simd { while nb >= STRIDE { let v = StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE)); v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); - let mask = + let mut mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32; if likely!(mask == 0) { @@ -95,12 +65,18 @@ macro_rules! impl_format_simd { } while nb > 0 { - let mut v = StrVector::default(); - v.as_mut_array()[..nb].copy_from_slice(core::slice::from_raw_parts(sptr, nb)); + let v = if unlikely!(is_cross_page!(sptr)) { + let mut v = StrVector::default(); + v.as_mut_array()[..nb].copy_from_slice(core::slice::from_raw_parts(sptr, nb)); + v + } else { + StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE)) + }; v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); - let mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() + let mut mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32 - & (STRIDE_SATURATION >> (STRIDE - nb)); + & (STRIDE_SATURATION >> (32 - STRIDE - nb)); + if likely!(mask == 0) { dptr = dptr.add(nb); break; @@ -118,6 +94,57 @@ macro_rules! impl_format_simd { }; } +macro_rules! is_cross_page { + ($src:expr) => { + unsafe { (($src as usize & (PAGE_SIZE - 1)) + STRIDE) > PAGE_SIZE } + }; +} + +#[cold] +#[inline(never)] +fn write_unusual_escape(sptr: *const u8, dptr: *mut u8) -> *mut u8 { + unsafe { + debug_assert!(*sptr < 32); + let replacement = match *(sptr) { + 0 => (*b"\\u0000\0\0", 6), + 1 => (*b"\\u0001\0\0", 6), + 2 => (*b"\\u0002\0\0", 6), + 3 => (*b"\\u0003\0\0", 6), + 4 => (*b"\\u0004\0\0", 6), + 5 => (*b"\\u0005\0\0", 6), + 6 => (*b"\\u0006\0\0", 6), + 7 => (*b"\\u0007\0\0", 6), + 8 => (*b"\\b\0\0\0\0\0\0", 2), + 9 => (*b"\\t\0\0\0\0\0\0", 2), + 10 => (*b"\\n\0\0\0\0\0\0", 2), + 11 => (*b"\\u000b\0\0", 6), + 12 => (*b"\\f\0\0\0\0\0\0", 2), + 13 => (*b"\\r\0\0\0\0\0\0", 2), + 14 => (*b"\\u000e\0\0", 6), + 15 => (*b"\\u000f\0\0", 6), + 16 => (*b"\\u0010\0\0", 6), + 17 => (*b"\\u0011\0\0", 6), + 18 => (*b"\\u0012\0\0", 6), + 19 => (*b"\\u0013\0\0", 6), + 20 => (*b"\\u0014\0\0", 6), + 21 => (*b"\\u0015\0\0", 6), + 22 => (*b"\\u0016\0\0", 6), + 23 => (*b"\\u0017\0\0", 6), + 24 => (*b"\\u0018\0\0", 6), + 25 => (*b"\\u0019\0\0", 6), + 26 => (*b"\\u001a\0\0", 6), + 27 => (*b"\\u001b\0\0", 6), + 28 => (*b"\\u001c\0\0", 6), + 29 => (*b"\\u001d\0\0", 6), + 30 => (*b"\\u001e\0\0", 6), + 31 => (*b"\\u001f\0\0", 6), + _ => unreachable!(), + }; + core::ptr::copy_nonoverlapping(replacement.0.as_ptr(), dptr, 8); + dptr.add(replacement.1 as usize) + } +} + #[inline(never)] pub unsafe fn format_escaped_str_impl_128( odptr: *mut u8, diff --git a/src/typeref.rs b/src/typeref.rs index 486122bb..3689bbb7 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -75,6 +75,9 @@ pub static mut DESCR_STR: *mut PyObject = null_mut(); pub static mut VALUE_STR: *mut PyObject = null_mut(); pub static mut INT_ATTR_STR: *mut PyObject = null_mut(); +#[cfg(feature = "unstable-simd")] +pub static mut PAGE_SIZE: usize = 0; + #[cfg(feature = "yyjson")] pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8; @@ -135,6 +138,11 @@ pub fn init_typerefs() { fn _init_typerefs_impl() -> bool { unsafe { debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32); + + #[cfg(feature = "unstable-simd")] + { + PAGE_SIZE = page_size::get(); + } assert!(crate::deserialize::KEY_MAP .set(crate::deserialize::KeyMap::default()) .is_ok());