Skip to content

Commit

Permalink
perf(simd): avx2 fallack to swar instead of sse4.2 (#181)
Browse files Browse the repository at this point in the history
This has massive implications on the default runtime perf, improving how the code is lowered/inlined. (Falling back to SSE4.2 for a handful of bytes was wasteful).

Should supersede #175, #156
  • Loading branch information
AaronO authored Sep 3, 2024
1 parent fff851f commit 47853d7
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/simd/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::iter::Bytes;

#[inline]
#[target_feature(enable = "avx2", enable = "sse4.2")]
#[target_feature(enable = "avx2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
Expand All @@ -11,8 +11,8 @@ pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_uri_vectored(bytes)
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_uri_vectored(bytes)
}

#[inline(always)]
Expand Down Expand Up @@ -56,7 +56,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
r.trailing_zeros() as usize
}

#[target_feature(enable = "avx2", enable = "sse4.2")]
#[target_feature(enable = "avx2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
Expand All @@ -66,8 +66,8 @@ pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_header_value_vectored(bytes)
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_header_value_vectored(bytes)
}

#[inline(always)]
Expand Down

0 comments on commit 47853d7

Please sign in to comment.