Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM NEON support #32

Merged
merged 9 commits into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
kind: pipeline
name: test-on-avx2

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native'
commands:
- cargo build --verbose --all
- cargo test --verbose --all
---

kind: pipeline
name: test-on-sse42

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native -C target-feature=-avx2'
commands:
- cargo build --verbose --all
- cargo test --verbose --all

---

kind: pipeline
name: test-on-pre-sse42

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native -C target-feature=-avx2,-sse4.2'
commands:
- cargo build --verbose --all
- cargo test --verbose --all

---

kind: pipeline
name: test-on-arm64

platform:
arch: arm64

steps:
- name: test
image: rust:1
commands:
- rustup default nightly
- rustup update
- cargo clean && cargo +nightly build --verbose --all --features neon
- cargo +nightly test --verbose --all --features neon
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jemallocator = { version = "0.3", optional = true }
perfcnt = { version = "0.4", optional = true }
getopts = { version = "0.2", optional = true }
colored = { version = "1.7", optional = true }
simd-lite = { version = "0.1.0", optional = true }



Expand All @@ -45,6 +46,8 @@ harness = false

[features]
default = ["swar-number-parsing", "serde_impl"]
# Support for ARM NEON SIMD
neon = ["simd-lite"]
# use 8 number at once parsing strategy
swar-number-parsing = []
# serde compatibility
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SIMD Json for Rust   [![Build Status]][circleci.com] [![Windows Build Status]][appveyor.com] [![Latest Version]][crates.io]
# SIMD Json for Rust   [![Build Status]][drone.io] [![Windows Build Status]][appveyor.com] [![Latest Version]][crates.io]

[Build Status]: https://circleci.com/gh/Licenser/simdjson-rs/tree/master.svg?style=svg
[circleci.com]: https://circleci.com/gh/Licenser/simdjson-rs/tree/master
[Windows Build Status]: https://ci.appveyor.com/api/projects/status/0kf0v6hj5v2gite9?svg=true

[Build Status]: https://cloud.drone.io/api/badges/simd-lite/simdjson-rs/status.svg
[drone.io]: https://cloud.drone.io/simd-lite/simdjson-rs
[Windows Build Status]: https://ci.appveyor.com/api/projects/status/ffi2ese7dxse6pb8?svg=true
[appveyor.com]: https://ci.appveyor.com/project/Licenser/simdjson-rs
[Latest Version]: https://img.shields.io/crates/v/simd-json.svg
[crates.io]: https://crates.io/crates/simd-json
Expand All @@ -17,6 +18,8 @@

To be able to take advantage of simdjson your system needs to be SIMD compatible. This means to compile with native cpu support and the given features. Look at [The cargo config in this repository](.cargo/config) to get an example.

simd-json.rs supports AVX2, SSE4.2 and NEON.

### jemalloc

If you are writing performance centric code, make sure to use jemalloc and not the system allocator (which has now become default in rust), it gives a very noticeable boost in performance.
Expand Down
1 change: 1 addition & 0 deletions data/fail/fail70.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["string contains bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail71.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["contains bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail72.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["contains bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail73.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail74.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail75.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[" �"]
1 change: 1 addition & 0 deletions data/fail/fail76.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["�"]
51 changes: 51 additions & 0 deletions src/avx2/generator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

use crate::value::generator::ESCAPED;
use std::io;

#[inline(always)]
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write {
let zero = _mm256_set1_epi8(0);
let lower_quote_range = _mm256_set1_epi8(0x1F as i8);
let quote = _mm256_set1_epi8(b'"' as i8);
let backslash = _mm256_set1_epi8(b'\\' as i8);
while *len - *idx >= 32 {
// Load 32 bytes of data;
#[allow(clippy::cast_ptr_alignment)]
let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i);
// Test the data against being backslash and quote.
let bs_or_quote = _mm256_or_si256(
_mm256_cmpeq_epi8(data, backslash),
_mm256_cmpeq_epi8(data, quote),
);
// Now mask the data with the quote range (0x1F).
let in_quote_range = _mm256_and_si256(data, lower_quote_range);
// then test of the data is unchanged. aka: xor it with the
// Any field that was inside the quote range it will be zero
// now.
let is_unchanged = _mm256_xor_si256(data, in_quote_range);
let in_range = _mm256_cmpeq_epi8(is_unchanged, zero);
let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range));
if quote_bits != 0 {
let quote_dist = quote_bits.trailing_zeros() as usize;
stry!(writer.write_all(&string[0..*idx + quote_dist]));
let ch = string[*idx + quote_dist];
match ESCAPED[ch as usize] {
b'u' => stry!(write!(writer, "\\u{:04x}", ch)),

escape => stry!(writer.write_all(&[b'\\', escape])),
};
*string = &string[*idx + quote_dist + 1..];
*idx = 0;
*len = string.len();
} else {
*idx += 32;
}
}
stry!(writer.write_all(&string[0..*idx]));
*string = &string[*idx..];
Ok(())
}
3 changes: 2 additions & 1 deletion src/avx2/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod deser;
pub mod stage1;
pub mod utf8check;
pub mod utf8check;
pub mod generator;
35 changes: 31 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
#![deny(warnings)]

#![cfg_attr(target_feature = "neon", feature(
asm,
stdsimd,
repr_simd,
custom_inner_attributes,
aarch64_target_feature,
platform_intrinsics,
stmt_expr_attributes,
simd_ffi,
link_llvm_intrinsics,
rustc_attrs,
)
)]

#![cfg_attr(feature = "hints", feature(core_intrinsics))]
//! simdjson-rs is a rust port of the simejson c++ library. It follows
//! most of the design closely with a few exceptions to make it better
Expand Down Expand Up @@ -89,17 +104,25 @@ pub use crate::avx2::deser::*;
#[cfg(target_feature = "avx2")]
use crate::avx2::stage1::SIMDJSON_PADDING;

#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
mod sse42;
#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
pub use crate::sse42::deser::*;
#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
use crate::sse42::stage1::SIMDJSON_PADDING;

#[cfg(all(target_feature = "neon", feature = "neon"))]
mod neon;
#[cfg(all(target_feature = "neon", feature = "neon"))]
pub use crate::neon::deser::*;
#[cfg(all(target_feature = "neon", feature = "neon"))]
use crate::neon::stage1::SIMDJSON_PADDING;

mod stage2;
pub mod value;

use crate::numberparse::Number;
#[cfg(not(target_feature = "neon"))]
use std::mem;
use std::str;

Expand Down Expand Up @@ -163,7 +186,11 @@ impl<'de> Deserializer<'de> {

let counts = Deserializer::validate(input, &structural_indexes)?;

let strings = Vec::with_capacity(len + SIMDJSON_PADDING);
// Set length to allow slice access in ARM code
let mut strings = Vec::with_capacity(len + SIMDJSON_PADDING);
unsafe {
strings.set_len(len + SIMDJSON_PADDING);
}

Ok(Deserializer {
counts,
Expand Down
Loading