make simd an optional feature

einstein8612 · Feb 8, 2024 · 213d49a · 213d49a
1 parent d742dd4
commit 213d49a
Show file tree

Hide file tree

Showing 10 changed files with 151 additions and 149 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "rust-analyzer.linkedProjects": [
+        ".\\Cargo.toml",
+        ".\\Cargo.toml"
+    ]
+}
diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,8 @@ documentation = "https://github.com/einstein8612/linearalgebra-rust/"
 repository = "https://github.com/einstein8612/linearalgebra-rust/"
 readme = "README.md"
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[features]
+simd = []
 
 [dependencies]
 rayon = "1.7"

diff --git a/benches/matrix_bench.rs b/benches/matrix_bench.rs
@@ -20,8 +20,7 @@ fn setup(len: usize) -> (Vec<f64>, Vec<f64>) {
 
 #[bench]
 fn matrix_simple_multiplication_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(3000 * 32);
+    let (m1, m2) = setup(3000 * 32);
 
     let matrix1 = Matrix::new(3000, 32, m1).unwrap();
     let matrix2 = Matrix::new(32, 3000, m2).unwrap();
@@ -33,21 +32,17 @@ fn matrix_simple_multiplication_bench(b: &mut Bencher) {
 
 #[bench]
 fn matrix_trivial_big_multiplication_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(384 * 384);
+    let (m1, m2) = setup(384 * 384);
 
     let matrix1 = Matrix::new(384, 384, m1).unwrap();
     let matrix2 = Matrix::new(384, 384, m2).unwrap();
 
-    b.iter(|| {
-        matrix1.trivial_product_matrix(&matrix2)
-    })
+    b.iter(|| matrix1.trivial_product_matrix(&matrix2))
 }
 
 #[bench]
 fn matrix_trivial_multiplication_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(3000 * 32);
+    let (m1, m2) = setup(3000 * 32);
 
     let matrix1 = Matrix::new(3000, 32, m1).unwrap();
     let matrix2 = Matrix::new(32, 3000, m2).unwrap();
@@ -59,8 +54,7 @@ fn matrix_trivial_multiplication_bench(b: &mut Bencher) {
 
 #[bench]
 fn matrix_trivial_multiplication_small_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(40 * 40);
+    let (m1, m2) = setup(40 * 40);
 
     let matrix1 = Matrix::new(40, 40, m1).unwrap();
     let matrix2 = Matrix::new(40, 40, m2).unwrap();
@@ -70,36 +64,20 @@ fn matrix_trivial_multiplication_small_bench(b: &mut Bencher) {
     })
 }
 
-#[bench]
-fn matrix_multiplication_1k_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(1024*1024);
-
-    let matrix1 = Matrix::new(1024, 1024, m1).unwrap();
-    let matrix2 = Matrix::new(1024, 1024, m2).unwrap();
-
-    b.iter(|| {
-        let _ = matrix1.simd_product_matrix(&matrix2);
-    })
-}
-
 #[bench]
 fn matrix_multiplication_384_bench(b: &mut Bencher) {
-
-    let (m1,m2) = setup(384 * 384);
+    let (m1, m2) = setup(384 * 384);
 
     let matrix1 = Matrix::new(384, 384, m1).unwrap();
     let matrix2 = Matrix::new(384, 384, m2).unwrap();
 
-    b.iter(|| {
-        matrix1.product_matrix(&matrix2)
-    })
+    b.iter(|| matrix1.product_matrix(&matrix2))
 }
 
 #[test]
 fn big_matrix_multiplication_test() {
     let pre_random = Instant::now();
-    let (m1,m2) = setup(3000 * 32);
+    let (m1, m2) = setup(3000 * 32);
     println!("{}", pre_random.elapsed().as_millis());
 
     let multiplication = Instant::now();
@@ -111,30 +89,40 @@ fn big_matrix_multiplication_test() {
     println!("{}", multiplication.elapsed().as_millis())
 }
 
-#[bench]
-fn simd_f64_matrix_multiplication_384_bench(b: &mut Bencher) {
+#[cfg(feature = "simd")]
+mod simd_tests {
+    use super::*;
+    #[bench]
+    fn simd_f64_matrix_multiplication_1k_bench(b: &mut Bencher) {
+        let (m1, m2) = setup(1024 * 1024);
 
-    let (m1,m2) = setup(384 * 384);
+        let matrix1 = Matrix::new(1024, 1024, m1).unwrap();
+        let matrix2 = Matrix::new(1024, 1024, m2).unwrap();
 
-    let matrix1 = Matrix::new(384, 384, m1).unwrap();
-    let matrix2 = Matrix::new(384, 384, m2).unwrap();
+        b.iter(|| {
+            let _ = matrix1.simd_product_matrix(&matrix2);
+        })
+    }
 
-    b.iter(|| {
-        matrix1.simd_product_matrix(&matrix2)
-    })
-}
+    #[bench]
+    fn simd_f64_matrix_multiplication_384_bench(b: &mut Bencher) {
+        let (m1, m2) = setup(384 * 384);
 
-#[bench]
-fn simd_f32_matrix_multiplication_384_bench(b: &mut Bencher) {
+        let matrix1 = Matrix::new(384, 384, m1).unwrap();
+        let matrix2 = Matrix::new(384, 384, m2).unwrap();
 
-    let (m1,m2) = setup(384 * 384);
-    let m1_32: Vec<f32> = m1.iter().map(|x| *x as f32).collect();
-    let m2_32: Vec<f32> = m2.iter().map(|x| *x as f32).collect();
+        b.iter(|| matrix1.simd_product_matrix(&matrix2))
+    }
 
-    let matrix1 = Matrix::new(384, 384, m1_32).unwrap();
-    let matrix2 = Matrix::new(384, 384, m2_32).unwrap();
+    #[bench]
+    fn simd_f32_matrix_multiplication_384_bench(b: &mut Bencher) {
+        let (m1, m2) = setup(384 * 384);
+        let m1_32: Vec<f32> = m1.iter().map(|x| *x as f32).collect();
+        let m2_32: Vec<f32> = m2.iter().map(|x| *x as f32).collect();
 
-    b.iter(|| {
-        matrix1.simd_product_matrix(&matrix2)
-    })
-}
+        let matrix1 = Matrix::new(384, 384, m1_32).unwrap();
+        let matrix2 = Matrix::new(384, 384, m2_32).unwrap();
+
+        b.iter(|| matrix1.simd_product_matrix(&matrix2))
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,4 +1,4 @@
-#![feature(portable_simd)]
+#![cfg_attr(feature = "simd", feature(portable_simd))]
 
 mod numlib;
 

diff --git a/src/matrix/mat_mul.rs → src/matrix/mat_mul/mat_mul.rs b/src/matrix/mat_mul.rs → src/matrix/mat_mul/mat_mul.rs
@@ -1,12 +1,8 @@
-use super::Matrix;
+use crate::matrix::Matrix;
 
 use rayon::prelude::*;
-const CHUNK_SIZE: usize = 8usize;
 
-use std::{
-    ops::{Add, AddAssign, Mul},
-    simd::{Simd, SimdElement},
-};
+use std::ops::{Add, Mul};
 use crate::numlib::Zero;
 
 /**
@@ -108,59 +104,3 @@ impl<T: Send + Sync + Copy + Zero + Add<T, Output = T> + Mul<T, Output = T> + st
         Matrix::new(other.width, self.height, res)
     }
 }
-
-impl<T: Send + Sync + Zero + SimdElement + AddAssign> Matrix<T> where Simd<T, 8>: AddAssign + Mul<Output = Simd<T,8>> {
-// impl<T> Matrix<T> {
-    pub fn simd_product_matrix(
-        &self,
-        other: &Matrix<T>,
-    ) -> Result<Matrix<T>, &'static str> {
-        let data = &self.data;
-
-        let mut transposed_b = vec![T::zero(); other.width() * other.height()];
-        for i in 0..other.height() {
-            for j in 0..other.width() {
-                transposed_b[j * other.height() + i] = other[(i, j)];
-            }
-        }
-
-        let chunks = self.width() / CHUNK_SIZE;
-        let left = chunks * CHUNK_SIZE;
-
-        let res = (0..self.height() * other.width())
-            .into_par_iter()
-            .map(|index| {
-                let row = index / other.width();
-                let column = index % self.height();
-
-                let mut total_simd = Simd::<T, CHUNK_SIZE>::splat(T::zero());
-                for k in 0..chunks {
-                    let simd_a =
-                        Simd::from_slice(&data[row * self.width() + k * CHUNK_SIZE..]);
-                    let simd_b =
-                        Simd::from_slice(&transposed_b[column * other.height() + k * CHUNK_SIZE..]);
-
-                    let multiplied = simd_a * simd_b;
-                    total_simd += multiplied;
-                }
-
-                let mut a_simd = Simd::splat(T::zero());
-                let mut b_simd = Simd::splat(T::zero());
-
-                for k in left..self.width() {
-                    a_simd[k - left] = data[row * self.width() + k];
-                    b_simd[k - left] = transposed_b[column * other.height() + k];
-                }
-
-                total_simd += a_simd * b_simd;
-
-                let mut total = T::zero();
-                for end in total_simd.to_array().iter() {
-                    total += *end;
-                }
-                total
-            }).collect();
-
-        Ok(Matrix::new(other.width(), self.height(), res).unwrap())
-    }
-}
diff --git a/src/matrix/mat_mul/mat_mul_simd.rs b/src/matrix/mat_mul/mat_mul_simd.rs
@@ -0,0 +1,65 @@
+use crate::matrix::Matrix;
+use crate::numlib::Zero;
+use std::simd::SimdElement;
+use std::ops::AddAssign;
+use std::simd::Simd;
+use std::ops::Mul;
+use rayon::prelude::*;
+
+const CHUNK_SIZE: usize = 8usize;
+
+impl<T: Send + Sync + Zero + SimdElement + AddAssign> Matrix<T> where Simd<T, 8>: AddAssign + Mul<Output = Simd<T,8>> {
+    // impl<T> Matrix<T> {
+        pub fn simd_product_matrix(
+            &self,
+            other: &Matrix<T>,
+        ) -> Result<Matrix<T>, &'static str> {
+            let data = &self.data;
+
+            let mut transposed_b = Vec::with_capacity(other.size);
+            for row in 0..other.width {
+                for col in 0..other.height {
+                    transposed_b.push(other[(col, row)]);
+                }
+            }
+
+            let chunks = self.width() / CHUNK_SIZE;
+            let left = chunks * CHUNK_SIZE;
+
+            let res = (0..self.height() * other.width())
+                .into_par_iter()
+                .map(|index| {
+                    let row = index / other.width();
+                    let column = index % self.height();
+
+                    let mut total_simd = Simd::<T, CHUNK_SIZE>::splat(T::zero());
+                    for k in 0..chunks {
+                        let simd_a =
+                            Simd::from_slice(&data[row * self.width() + k * CHUNK_SIZE..]);
+                        let simd_b =
+                            Simd::from_slice(&transposed_b[column * other.height() + k * CHUNK_SIZE..]);
+
+                        let multiplied = simd_a * simd_b;
+                        total_simd += multiplied;
+                    }
+
+                    let mut a_simd = Simd::splat(T::zero());
+                    let mut b_simd = Simd::splat(T::zero());
+
+                    for k in left..self.width() {
+                        a_simd[k - left] = data[row * self.width() + k];
+                        b_simd[k - left] = transposed_b[column * other.height() + k];
+                    }
+
+                    total_simd += a_simd * b_simd;
+
+                    let mut total = T::zero();
+                    for end in total_simd.to_array().iter() {
+                        total += *end;
+                    }
+                    total
+                }).collect();
+
+            Ok(Matrix::new(other.width(), self.height(), res).unwrap())
+        }
+    }
diff --git a/src/matrix/mat_mul/mod.rs b/src/matrix/mat_mul/mod.rs
@@ -0,0 +1,4 @@
+pub mod mat_mul;
+
+#[cfg(feature = "simd")]
+pub mod mat_mul_simd;
diff --git a/src/matrix/mod.rs b/src/matrix/mod.rs
@@ -2,9 +2,6 @@ mod mat_impl;
 mod mat_mul;
 mod mat_display;
 
-#[cfg(feature = "fast")]
-pub mod fast;
-
 #[derive(Debug, Clone)]
 pub struct Matrix<T> {
     width: usize,

diff --git a/tests/matrix.rs b/tests/matrix.rs
@@ -93,38 +93,6 @@ mod matrix_tests {
         )
     }
 
-    #[test]
-    fn matrix_trivial_matrix_product_test() {
-        let matrix = Matrix::new(
-            3,
-            3,
-            vec![1f64, 1f64, 2f64, 3f64, 4f64, 5f64, 6f64, 1f64, 2f64],
-        )
-        .unwrap();
-        let matrix2 = Matrix::new(
-            3,
-            3,
-            vec![1f64, 9f64, 2f64, 7f64, 1f64, 5f64, 3f64, 8f64, 2f64],
-        )
-        .unwrap();
-
-        assert_eq!(
-            matrix.simd_product_matrix(&matrix2).unwrap().as_vec(),
-            &vec![14f64, 26f64, 11f64, 46f64, 71f64, 36f64, 19f64, 71f64, 21f64]
-        )
-    }
-
-    #[test]
-    fn matrix_trivial_matrix_product_nonsquare_test() {
-        let matrix = Matrix::new(3, 2, vec![3f64, 4f64, 5f64, 6f64, 1f64, 2f64]).unwrap();
-        let matrix2 = Matrix::new(2, 3, vec![7f64, 1f64, 5f64, 3f64, 8f64, 2f64]).unwrap();
-
-        assert_eq!(
-            matrix.simd_product_matrix(&matrix2).unwrap().as_vec(),
-            &vec![81f64, 25f64, 63f64, 13f64]
-        )
-    }
-
     #[test]
     fn transpose_nonsquare_test() {
         let matrix = Matrix::new(3, 2, vec![1, 2, 3, 4, 5, 6]).unwrap();