diff --git a/Cargo.toml b/Cargo.toml index cf375d0..b2f4494 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [lib] name = "rustfrc" -crate-type = ["cdylib"] +crate-type = ["cdylib", "lib"] [dependencies] numpy = "~0.21" @@ -16,7 +16,14 @@ num-traits = "~0.2.18" [dependencies.pyo3] version = "~0.21.1" -features = ["extension-module", "gil-refs"] +features = ["extension-module"] + +[dev-dependencies] +criterion = "0.3" + +[[bench]] +name = "rustfrc_bench" +harness = false [package.metadata.maturin] python-source = "python" diff --git a/benches/rustfrc_bench.rs b/benches/rustfrc_bench.rs new file mode 100644 index 0000000..5d1809a --- /dev/null +++ b/benches/rustfrc_bench.rs @@ -0,0 +1,31 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, BatchSize}; +use rustfrc::{binom_split, pois_gen, sqr_abs, to_i32}; +use num_complex::Complex; +use ndarray_rand::rand::prelude::{Distribution, thread_rng}; +use ndarray_rand::rand_distr::Poisson; + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("sample-10"); + // Configure Criterion.rs to detect smaller differences and increase sample size to improve + // precision and counteract the resulting noise. + group.sample_size(10); + let size = [4000, 4000]; + let arr = pois_gen(&size, 200f64).unwrap(); + + let arr_complex = arr.map(|e| { + let mut rng = thread_rng(); + let a = Poisson::new(*e).unwrap().sample(&mut rng); + Complex::new(*e, a.to_owned()) + }); + + let arr_i32 = to_i32(arr.view()); + let pois_shape = [4000, 4000]; + + group.bench_function("binom", |b| b.iter_batched(|| arr_i32.to_owned(), |a| binom_split(black_box(a)), BatchSize::SmallInput)); + group.bench_function("pois", |b| b.iter(|| pois_gen(black_box(&pois_shape), black_box(100f64)))); + group.bench_function("sqr_abs", |b| b.iter_batched(|| arr_complex.to_owned(), |a| sqr_abs(black_box(a)), BatchSize::SmallInput)); + group.finish() +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 0804331..125ff0f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,11 +4,14 @@ use std::fmt::{Display, Formatter}; use std::marker::{Send, Sync}; use std::sync::atomic::{AtomicBool, Ordering}; -use ndarray::{Array, ArrayD, Dimension}; +use ndarray::parallel::prelude::*; +use ndarray::parallel::prelude::IntoParallelIterator; +use ndarray::{Array, ArrayD, ArrayView, Dimension, Zip}; use ndarray_rand::rand::prelude::{Distribution, thread_rng}; use ndarray_rand::rand_distr::{Binomial, Poisson, PoissonError}; use num_complex::{Complex, Complex32, Complex64}; use num_traits::{Float, Zero}; +use num_traits::real::Real; use numpy::{IntoPyArray, PyArrayDyn, PyReadonlyArrayDyn, PyArrayMethods}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -16,14 +19,14 @@ use pyo3::prelude::*; use pyo3::PyObject; #[pymodule] -fn rustfrc(py: Python<'_>, m: &PyModule) -> PyResult<()> { - let internal = PyModule::new(py, "_internal")?; - internal.add_function(wrap_pyfunction!(binom_split_py, internal)?)?; - internal.add_function(wrap_pyfunction!(sqr_abs32_py, internal)?)?; - internal.add_function(wrap_pyfunction!(sqr_abs64_py, internal)?)?; - internal.add_function(wrap_pyfunction!(pois_gen_py, internal)?)?; +fn rustfrc(m: &Bound<'_, PyModule>) -> PyResult<()> { + let internal = PyModule::new_bound(m.py(), "_internal")?; + internal.add_function(wrap_pyfunction!(binom_split_py, &internal)?)?; + internal.add_function(wrap_pyfunction!(sqr_abs32_py, &internal)?)?; + internal.add_function(wrap_pyfunction!(sqr_abs64_py, &internal)?)?; + internal.add_function(wrap_pyfunction!(pois_gen_py, &internal)?)?; - m.add_submodule(internal)?; + m.add_submodule(&internal)?; Ok(()) } @@ -32,32 +35,32 @@ fn rustfrc(py: Python<'_>, m: &PyModule) -> PyResult<()> { /// binomial distribution (n, p) with n = pixel value and p = 0.5. Returns a single array. #[pyfunction] #[pyo3(text_signature = "a, /")] -fn binom_split_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, i32>) -> PyResult<&'py PyArrayDyn> { +fn binom_split_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, i32>) -> PyResult>> { let a = a.to_owned_array(); binom_split(a) .map_err(|e| PyValueError::new_err(format!("{}", e))) - .map(|a| a.into_pyarray(py)) + .map(|a| a.into_pyarray_bound(py)) } /// Takes an array (np.ndarray with dtype complex64) and takes the absolute value and then squares /// it, element-wise. #[pyfunction] #[pyo3(text_signature = "a, /")] -fn sqr_abs32_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, Complex32>) -> &'py PyArrayDyn { +fn sqr_abs32_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, Complex32>) -> Bound<'py, PyArrayDyn> { let a = a.to_owned_array(); - sqr_abs(a).into_pyarray(py) + sqr_abs(a).into_pyarray_bound(py) } /// Takes an array (np.ndarray with dtype complex128) and takes the absolute value and then squares /// it, element-wise. #[pyfunction] #[pyo3(text_signature = "a, /")] -fn sqr_abs64_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, Complex64>) -> &'py PyArrayDyn { +fn sqr_abs64_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, Complex64>) -> Bound<'py, PyArrayDyn> { let a = a.to_owned_array(); - sqr_abs(a).into_pyarray(py) + sqr_abs(a).into_pyarray_bound(py) } /// Generates an array (np.ndarray with dtype float64) by sampling a Poisson distribution with @@ -65,17 +68,17 @@ fn sqr_abs64_py<'py>(py: Python<'py>, a: PyReadonlyArrayDyn<'py, Complex64>) -> /// non-negative ints. #[pyfunction] #[pyo3(text_signature = "a, /")] -fn pois_gen_py(py: Python, shape: PyObject, lambda: f64 ) -> PyResult<&PyArrayDyn> { +fn pois_gen_py<'py>(py: Python<'py>, shape: PyObject, lambda: f64 ) -> PyResult>> { let shape_vec: Vec = shape.extract(py)?; let shape = shape_vec.as_slice(); pois_gen(shape, lambda) .map_err(|e| PyValueError::new_err(format!("{}", e))) - .map(|a| a.into_pyarray(py)) + .map(|a| a.into_pyarray_bound(py)) } #[derive(Debug)] -struct ToUsizeError {} +pub struct ToUsizeError {} impl Display for ToUsizeError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -88,12 +91,33 @@ impl Error for ToUsizeError {} /// Takes an ndarray (i32, dimension D) and splits every pixel value according to the /// binomial distribution (n, p) with n = element value and p = 0.5. Returns a single array. -fn binom_split(mut a: Array) -> Result, ToUsizeError> { +pub fn binom_split(mut a: Array) -> Result, ToUsizeError> { // AtomicBool is thread-safe, and allows for communicating an error state occurred across threads // We initialize it with the value false, since no error occurred let to_unsized_failed = AtomicBool::new(false); // We map all values in parallel, since they do not depend on each other + // let a = a.mapv(|i| { + // if !to_unsized_failed.load(Ordering::Relaxed) { + // // We use thread rng, which is fast + // let mut rng = thread_rng(); + // // We try to convert i32 to u64 (which is required for Binomial) + // // If it fails, we indicate a failure has occurred + // // Unfortunately it is not possible to escape from the loop immediately + // let n = u64::try_from(i).unwrap_or_else(|_| { + // to_unsized_failed.store(true, Ordering::Relaxed); + // 0 + // }); + // // Since the input fit into an i32 and the binomial output is always less, we know the output + // // will also fit into an i32 + // Binomial::new(n, 0.5).unwrap().sample(&mut rng) as i32 + // } + // // We just keep the rest unchanged if a failure occurred + // else { + // i + // } + // }); + a.par_mapv_inplace(|i| { // If no failure has occurred, we continue // We use Relaxed Ordering because the order in which stores and loads occur does not matter @@ -108,6 +132,8 @@ fn binom_split(mut a: Array) -> Result, ToUs to_unsized_failed.store(true, Ordering::Relaxed); 0 }); + // Since the input fit into an i32 and the binomial output is always less, we know the output + // will also fit into an i32 Binomial::new(n, 0.5).unwrap().sample(&mut rng) as i32 } // We just keep the rest unchanged if a failure occurred @@ -125,25 +151,35 @@ fn binom_split(mut a: Array) -> Result, ToUs /// Takes an ndarray (dimension D and complex number of generic float F) and computes the absolute /// value and then square for each element. -fn sqr_abs(mut a: Array, D>) -> Array { - // We map all values in parallel, since they do not depend on each other - // As there is no standard parallel map, we first map in place - a.par_mapv_inplace(|i| { let c: Complex = Complex::from({ - i.norm_sqr() - }); c }); - // Then we get the real part - a.mapv(|i| { - i.re - }) +pub fn sqr_abs(a: Array, D>) -> Array { + let v: Vec = a.par_iter().map(|i| i.norm_sqr()).collect(); + let v_arr = Array::from_shape_vec(a.raw_dim(), v).unwrap(); + + v_arr + + // a.mapv(|i| { + // i.norm_sqr() + // }) } /// Generates an ndarray (dynamic dimension) by sampling a Poisson distribution with parameter /// lambda for each element. Takes a lambda parameter (positive f64) and a shape slice. -fn pois_gen(shape: &[usize], lambda: f64) -> Result, PoissonError> { +pub fn pois_gen(shape: &[usize], lambda: f64) -> Result, PoissonError> { if lambda.is_sign_negative() || lambda.is_infinite() || lambda.is_nan() || lambda.is_zero() { return Err(PoissonError::ShapeTooSmall); } + // let size = shape.iter().product(); + // let mut v = Vec::with_capacity(size); + + // (0..size).into_par_iter().map(|_i| { + // let mut rng = thread_rng(); + + // Poisson::new(lambda).unwrap().sample(&mut rng) + // }).collect_into_vec(&mut v); + + // let a = Array::from_shape_vec(shape, v).unwrap(); + let mut a = ArrayD::::from_elem(shape, lambda); a.par_mapv_inplace(|l| { let mut rng = thread_rng(); @@ -153,6 +189,13 @@ fn pois_gen(shape: &[usize], lambda: f64) -> Result, PoissonError> { Ok(a) } +/// Panics if the input numbers cannot be cast to i32, use with caution! +pub fn to_i32(a: ArrayView) -> Array { + a.map(|l| { + l.to_i32().unwrap() + }) +} + #[cfg(test)] mod tests { use super::*;