diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index 92878f19..4fe54a10 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -4,7 +4,7 @@ name: CI jobs: check: - name: Check+Test + name: Check+Test default features runs-on: ubuntu-latest strategy: matrix: @@ -60,47 +60,58 @@ jobs: command: fmt args: -- --check - check_arm64: - name: Check and test Linux arm 64bit + check_no_features: + name: Check+Test no features runs-on: ubuntu-latest + strategy: + matrix: + rust: + - stable + - beta + - nightly + - 1.37 steps: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain + - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - target: aarch64-unknown-linux-gnu + toolchain: ${{ matrix.rust }} override: true - name: Run cargo check uses: actions-rs/cargo@v1 with: command: check - use-cross: true - args: --target aarch64-unknown-linux-gnu + args: --no-default-features - - name: Run cargo test for arm + - name: Run cargo test uses: actions-rs/cargo@v1 with: command: test - use-cross: true - args: --release --target aarch64-unknown-linux-gnu + args: --no-default-features check_arm64_neon: name: Check and test Linux arm 64bit with neon runs-on: ubuntu-latest + strategy: + matrix: + rust: + - stable + - beta + - nightly + - 1.61 steps: - name: Checkout sources uses: actions/checkout@v2 - - name: Install nightly toolchain + - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: nightly + toolchain: ${{ matrix.rust }} target: aarch64-unknown-linux-gnu override: true @@ -109,14 +120,14 @@ jobs: with: command: check use-cross: true - args: --features neon-nightly --target aarch64-unknown-linux-gnu + args: --features neon --target aarch64-unknown-linux-gnu - name: Run cargo test for arm uses: actions-rs/cargo@v1 with: command: test use-cross: true - args: --release --features neon-nightly --target aarch64-unknown-linux-gnu + args: --release --features neon --target aarch64-unknown-linux-gnu check_x86: name: Check and test Linux x86 32bit diff --git a/Cargo.toml b/Cargo.toml index 77650620..b742055f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ categories = ["algorithms", "compression", "multimedia::encoding", "science"] license = "MIT OR Apache-2.0" [features] -default = ["avx", "sse"] +default = ["avx", "sse", "neon"] # On x86_64, the "avx" feature enables compilation of AVX-acclerated code. # Similarly, the "sse" feature enables SSE-accelerated code. @@ -22,10 +22,10 @@ default = ["avx", "sse"] # If neither instruction set is available, it will fall back to the scalar code. # On every other platform, these features do nothing, and RustFFT will behave like they are not set. # -# On AArch64, the "neon-nightly" feature enables compilation of Neon-accelerated code. It requires a nightly compiler, and is disabled by default. +# On AArch64, the "neon" feature enables compilation of Neon-accelerated code. avx = [] sse = [] -neon-nightly = [] +neon = [] [dependencies] @@ -39,3 +39,6 @@ primal-check = "0.3.1" [dev-dependencies] rand = "0.8" paste = "1.0.4" + +[build-dependencies] +version_check = "0.9" diff --git a/README.md b/README.md index 43659e73..444f63d7 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,17 @@ RustFFT is a high-performance FFT library written in pure Rust. It can compute FFTs of any size, including prime-number sizes, in O(nlogn) time. +Unlike previous major versions, RustFFT 5.0 has several breaking changes compared to RustFFT 4.0. Check out the [Upgrade Guide](/UpgradeGuide4to5.md) for a walkthrough of the changes RustFFT 5.0 requires. + +## SIMD acceleration +### x86_64 RustFFT supports the AVX instruction set for increased performance. No special code is needed to activate AVX: Simply plan a FFT using the FftPlanner on a machine that supports the `avx` and `fma` CPU features, and RustFFT will automatically switch to faster AVX-accelerated algorithms. For machines that do not have AVX, it also supports the SSE4.1 instruction set. As for AVX, this is enabled automatically when using the FftPlanner. -Unlike previous major versions, RustFFT 5.0 has several breaking changes compared to RustFFT 4.0. Check out the [Upgrade Guide](/UpgradeGuide4to5.md) for a walkthrough of the changes RustFFT 5.0 requires. +### AArch64 +RustFFT optionally supports the NEON instruction set in 64-bit Arm, AArch64. This optional feature requires a newer rustc version: Rustc 1.61. See [Features](#features) for more details. + ## Usage @@ -41,7 +47,7 @@ Disabling them reduces compile time and binary size. On other platform than x86_64, these features do nothing and RustFFT will behave like they are not set. -On AArch64, the `neon-nightly` feature enables compilation of Neon-accelerated code. It requires a nightly compiler, and is disabled by default. Be warned that new nightly versions may break RustFFT's Neon support. +On AArch64, the `neon` feature enables compilation of Neon-accelerated code. This requires rustc 1.61 or newer, and is enabled by default. If this feature is disabled, rustc 1.37 or newer is required. ## Stability/Future Breaking Changes diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..aa40b30b --- /dev/null +++ b/build.rs @@ -0,0 +1,36 @@ +extern crate version_check; + +// All platforms except AArch64 with neon support enabled. +static MIN_RUSTC: &str = "1.37.0"; +// On AArch64 with neon support enabled. +#[cfg(all(target_arch = "aarch64", feature = "neon"))] +static MIN_RUSTC_NEON: &str = "1.61.0"; + +#[cfg(not(all(target_arch = "aarch64", feature = "neon")))] +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + match version_check::is_min_version(MIN_RUSTC) { + Some(true) => {} + Some(false) => panic!( + "\n====\nUnsupported rustc version {}\nRustFFT needs at least {}\n====\n", + version_check::Version::read().unwrap(), + MIN_RUSTC + ), + None => panic!("Unable to determine rustc version."), + }; +} + +#[cfg(all(target_arch = "aarch64", feature = "neon"))] +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + match version_check::is_min_version(MIN_RUSTC_NEON) { + Some(true) => {} + Some(false) => panic!( + "\n====\nUnsupported rustc version {}\nRustFFT with neon support needs at least {}\nIf the 'neon' feature flag is disabled, the minimum version is {}\n====\n", + version_check::Version::read().unwrap(), + MIN_RUSTC_NEON, + MIN_RUSTC + ), + None => panic!("Unable to determine rustc version."), + }; +} diff --git a/src/lib.rs b/src/lib.rs index 3f920ad3..b52f5ac5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,4 @@ #![cfg_attr(all(feature = "bench", test), feature(test))] -#![cfg_attr( - all(feature = "neon-nightly", target_arch = "aarch64"), - feature(aarch64_target_feature) -)] -#![cfg_attr( - all(feature = "neon-nightly", target_arch = "aarch64"), - feature(stdsimd) -)] //! RustFFT is a high-performance FFT library written in pure Rust. //! @@ -17,7 +9,7 @@ //! For machines that do not have AVX, RustFFT also supports the SSE4.1 instruction set. //! As for AVX, this is enabled automatically when using the FftPlanner. //! -//! Additionally, there is (opt-in, nightly-only) support for the Neon instruction set on AArch64. +//! Additionally, there is (opt-in) support for the Neon instruction set on AArch64. //! //! ### Usage //! @@ -72,11 +64,11 @@ //! supported and its feature flag is enabled, RustFFT will use AVX instead of SSE4.1. //! //! On every platform besides x86_64, this feature does nothing, and RustFFT will behave like it's not set. -//! * `neon` (Experimental, disabled by default) +//! * `neon` (Disabled by default) //! //! On AArch64 (64-bit ARM) the `neon` feature enables compilation of Neon-accelerated code. Enabling it improves //! performance, while disabling it reduces compile time and binary size. -//! Note that Rust's Neon support is very new, and the `neon` feature must use a nightly compiler. +//! Note that Rust's Neon support requires using rustc 1.61 or newer. //! //! ### Normalization //! @@ -410,13 +402,13 @@ mod sse { pub use self::sse::sse_planner::FftPlannerSse; -// Algorithms implemented to use Neon instructions. Only compiled on AArch64, and only compiled if the "neon-nightly" feature flag is set. -#[cfg(all(target_arch = "aarch64", feature = "neon-nightly"))] +// Algorithms implemented to use Neon instructions. Only compiled on AArch64, and only compiled if the "neon" feature flag is set. +#[cfg(all(target_arch = "aarch64", feature = "neon"))] mod neon; -// If we're not on AArch64, or if the "neon-nightly" feature was disabled, keep a stub implementation around that has the same API, but does nothing +// If we're not on AArch64, or if the "neon" feature was disabled, keep a stub implementation around that has the same API, but does nothing // That way, users can write code using the Neon planner and compile it on any platform -#[cfg(not(all(target_arch = "aarch64", feature = "neon-nightly")))] +#[cfg(not(all(target_arch = "aarch64", feature = "neon")))] mod neon { pub mod neon_planner { use crate::{Fft, FftDirection, FftNum}; diff --git a/src/neon/neon_butterflies.rs b/src/neon/neon_butterflies.rs index d4fc18d6..c4515d82 100644 --- a/src/neon/neon_butterflies.rs +++ b/src/neon/neon_butterflies.rs @@ -17,8 +17,6 @@ use super::neon_vector::{NeonArray, NeonArrayMut}; macro_rules! boilerplate_fft_neon_f32_butterfly { ($struct_name:ident, $len:expr, $direction_fn:expr) => { impl $struct_name { - //#[target_feature(enable = "neon")] - //#[inline(always)] pub(crate) unsafe fn perform_fft_butterfly(&self, buffer: &mut [Complex]) { self.perform_fft_contiguous( RawSlice::new_transmuted(buffer), @@ -26,8 +24,6 @@ macro_rules! boilerplate_fft_neon_f32_butterfly { ); } - //#[target_feature(enable = "neon")] - //#[inline(always)] pub(crate) unsafe fn perform_parallel_fft_butterfly(&self, buffer: &mut [Complex]) { self.perform_parallel_fft_contiguous( RawSlice::new_transmuted(buffer), @@ -36,7 +32,6 @@ macro_rules! boilerplate_fft_neon_f32_butterfly { } // Do multiple ffts over a longer vector inplace, called from "process_with_scratch" of Fft trait - //#[target_feature(enable = "neon")] pub(crate) unsafe fn perform_fft_butterfly_multi( &self, buffer: &mut [Complex], @@ -52,7 +47,6 @@ macro_rules! boilerplate_fft_neon_f32_butterfly { } // Do multiple ffts over a longer vector outofplace, called from "process_outofplace_with_scratch" of Fft trait - //#[target_feature(enable = "neon")] pub(crate) unsafe fn perform_oop_fft_butterfly_multi( &self, input: &mut [Complex], diff --git a/src/neon/neon_planner.rs b/src/neon/neon_planner.rs index d6d39fc8..6d5286a4 100644 --- a/src/neon/neon_planner.rs +++ b/src/neon/neon_planner.rs @@ -160,7 +160,7 @@ impl FftPlannerNeon { /// Returns `Ok(planner_instance)` if this machine has the required instruction sets. /// Returns `Err(())` if some instruction sets are missing. pub fn new() -> Result { - if is_aarch64_feature_detected!("neon") { + if std::arch::is_aarch64_feature_detected!("neon") { // Ideally, we would implement the planner with specialization. // Specialization won't be on stable rust for a long time though, so in the meantime, we can hack around it. //