diff --git a/kifmm/examples/single_node_helmholtz.rs b/kifmm/examples/single_node_helmholtz.rs index f7e45f23..962a772e 100644 --- a/kifmm/examples/single_node_helmholtz.rs +++ b/kifmm/examples/single_node_helmholtz.rs @@ -1,6 +1,6 @@ use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; use kifmm::fmm::types::BlasFieldTranslationIa; -use kifmm::{Evaluate, FftFieldTranslation, SingleNodeBuilder}; +use kifmm::{Evaluate, FftFieldTranslation, FmmSvdMode, SingleNodeBuilder}; use kifmm::tree::helpers::points_fixture; use num::{FromPrimitive, One}; @@ -70,7 +70,11 @@ fn main() { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(singular_value_threshold, None), + BlasFieldTranslationIa::new( + singular_value_threshold, + None, + FmmSvdMode::Deterministic, + ), ) .unwrap() .build() @@ -99,7 +103,11 @@ fn main() { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(singular_value_threshold, None), + BlasFieldTranslationIa::new( + singular_value_threshold, + None, + FmmSvdMode::Deterministic, + ), ) .unwrap() .build() diff --git a/kifmm/include/kifmm_rs.h b/kifmm/include/kifmm_rs.h index b69977f5..4d3d2038 100644 --- a/kifmm/include/kifmm_rs.h +++ b/kifmm/include/kifmm_rs.h @@ -613,6 +613,114 @@ struct FmmEvaluator *laplace_fft_f64_alloc(bool timed, uint64_t depth, uintptr_t block_size); +/** + * Constructor for F32 Helmholtz FMM with BLAS based M2L translations compressed + * with deterministic SVD. + * + * Note that either `n_crit` or `depth` must be specified. If `n_crit` is specified, depth + * must be set to 0 and vice versa. If `depth` is specified, `expansion_order` must be specified + * at each level, and stored as a buffer of length `depth` + 1. + * + * + * # Parameters + * - `timed`: Modulates whether operators and metadata are timed. + * - `expansion_order`: A pointer to an array of expansion orders. + * - `n_expansion_order`: The number of expansion orders. + * - `eval_type`: true corresponds to evaluating potentials, false corresponds to evaluating potentials and potential derivatives + * - `wavenumber`: The wavenumber. + * - `sources`: A pointer to the source points. + * - `n_sources`: The length of the source points buffer + * - `targets`: A pointer to the target points. + * - `n_targets`: The length of the target points buffer. + * - `charges`: A pointer to the charges associated with the source points. + * - `n_charges`: The length of the charges buffer. + * - `prune_empty`: A boolean flag indicating whether to prune empty leaf nodes, and their ancestors. + * - `n_crit`: Threshold for tree refinement, if set to 0 ignored. Otherwise will refine until threshold is + * reached based on a uniform particle distribution. + * - `depth`: The maximum depth of the tree, max supported depth is 16. + * - `singular_value_threshold`: Threshold for singular values used in compressing M2L matrices. + * - `surface_diff`: Set to 0 to disable, otherwise uses surface_diff+equivalent_surface_expansion_order = check_surface_expansion_order + * - `n_components`: If known, can specify the rank of the M2L matrix for randomised range finding, otherwise set to 0. + * - `n_oversamples`: Optionally choose the number of oversamples for randomised range finding, otherwise set to 10. + * + * # Safety + * This function is intended to be called from C. The caller must ensure that: + * - Input data corresponds to valid pointers + * - That they remain valid for the duration of the function call + */ +struct FmmEvaluator *helmholtz_blas_rsvd_f32_alloc(bool timed, + const uintptr_t *expansion_order, + uintptr_t n_expansion_order, + bool eval_type, + float wavenumber, + const void *sources, + uintptr_t n_sources, + const void *targets, + uintptr_t n_targets, + const void *charges, + uintptr_t n_charges, + bool prune_empty, + uint64_t n_crit, + uint64_t depth, + float singular_value_threshold, + uintptr_t surface_diff, + uintptr_t n_components, + uintptr_t n_oversamples); + +/** + * Constructor for F64 Helmholtz FMM with BLAS based M2L translations compressed + * with deterministic SVD. + * + * Note that either `n_crit` or `depth` must be specified. If `n_crit` is specified, depth + * must be set to 0 and vice versa. If `depth` is specified, `expansion_order` must be specified + * at each level, and stored as a buffer of length `depth` + 1. + * + * + * # Parameters + * - `timed`: Modulates whether operators and metadata are timed. + * - `expansion_order`: A pointer to an array of expansion orders. + * - `n_expansion_order`: The number of expansion orders. + * - `eval_type`: true corresponds to evaluating potentials, false corresponds to evaluating potentials and potential derivatives + * - `wavenumber`: The wavenumber. + * - `sources`: A pointer to the source points. + * - `n_sources`: The length of the source points buffer + * - `targets`: A pointer to the target points. + * - `n_targets`: The length of the target points buffer. + * - `charges`: A pointer to the charges associated with the source points. + * - `n_charges`: The length of the charges buffer. + * - `prune_empty`: A boolean flag indicating whether to prune empty leaf nodes, and their ancestors. + * - `n_crit`: Threshold for tree refinement, if set to 0 ignored. Otherwise will refine until threshold is + * reached based on a uniform particle distribution. + * - `depth`: The maximum depth of the tree, max supported depth is 16. + * - `singular_value_threshold`: Threshold for singular values used in compressing M2L matrices. + * - `surface_diff`: Set to 0 to disable, otherwise uses surface_diff+equivalent_surface_expansion_order = check_surface_expansion_order + * - `n_components`: If known, can specify the rank of the M2L matrix for randomised range finding, otherwise set to 0. + * - `n_oversamples`: Optionally choose the number of oversamples for randomised range finding, otherwise set to 10. + * + * # Safety + * This function is intended to be called from C. The caller must ensure that: + * - Input data corresponds to valid pointers + * - That they remain valid for the duration of the function call + */ +struct FmmEvaluator *helmholtz_blas_rsvd_f64_alloc(bool timed, + const uintptr_t *expansion_order, + uintptr_t n_expansion_order, + bool eval_type, + double wavenumber, + const void *sources, + uintptr_t n_sources, + const void *targets, + uintptr_t n_targets, + const void *charges, + uintptr_t n_charges, + bool prune_empty, + uint64_t n_crit, + uint64_t depth, + double singular_value_threshold, + uintptr_t surface_diff, + uintptr_t n_components, + uintptr_t n_oversamples); + /** * Constructor for F32 Helmholtz FMM with BLAS based M2L translations compressed * with deterministic SVD. diff --git a/kifmm/python/examples/API Usage.ipynb b/kifmm/python/examples/API Usage.ipynb index 39246b5a..e61ff113 100644 --- a/kifmm/python/examples/API Usage.ipynb +++ b/kifmm/python/examples/API Usage.ipynb @@ -149,7 +149,7 @@ { "data": { "text/plain": [ - "{'source_data': 4, 'target_data': 3, 'source_to_target_data': 256}" + "{'source_data': 2, 'target_data': 2, 'source_to_target_data': 252}" ] }, "execution_count": 3, @@ -266,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "89147a31-182c-4bd0-920e-000fb5ff5f9f", "metadata": {}, "outputs": [], @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "333eaf61-cb42-45d2-ae37-382cf866e9d6", "metadata": {}, "outputs": [], @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "363d8d4b-f967-44dd-b93e-c97192295ff4", "metadata": {}, "outputs": [], @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "25d4692c-d7ab-4f03-b2aa-ca92da29b961", "metadata": {}, "outputs": [], @@ -390,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "id": "cecccd0e-f229-4d1a-9849-7c73c725b58f", "metadata": {}, "outputs": [ @@ -519,33 +519,66 @@ "direct = np.zeros(n_targets * eval_type.value).astype(ctype)\n", "fmm.evaluate_kernel(EvalType.Value, sources, targets, charges, direct)\n", "\n", - "# assert np.allclose(fmm.all_potentials_u, direct)\n", "assert np.allclose(np.abs(direct), np.abs(fmm.all_potentials_u[0].T), 1e-3)" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "24a5eb2e-b718-4b14-9a6b-e270666df62d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3c61f7e-8efa-45da-a754-d02ef29341cb", + "cell_type": "markdown", + "id": "6191bb15-81c4-4516-92e8-491ef6cc765a", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## Randomised SVD" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "b1e1d1c6-8bf0-4452-b62f-0bf6262fe475", + "execution_count": 14, + "id": "24a5eb2e-b718-4b14-9a6b-e270666df62d", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "np.random.seed(0)\n", + "\n", + "dim = 3\n", + "dtype = np.float32\n", + "ctype = np.complex64\n", + "\n", + "# Set FMM Parameters\n", + "expansion_order = np.array([6], np.uint64) # Single expansion order as using n_crit\n", + "n_vec = 1\n", + "n_crit = 150\n", + "n_sources = 1000\n", + "n_targets = 2000\n", + "wavenumber = dtype(10)\n", + "prune_empty = True\n", + "svd_threshold=1e-5\n", + "\n", + "# Setup source/target/charge data in Fortran order\n", + "sources = np.random.rand(n_sources * dim).astype(dtype)\n", + "targets = np.random.rand(n_targets * dim).astype(dtype)\n", + "charges = np.random.rand(n_sources * n_vec).astype(ctype)\n", + "\n", + "eval_type = EvalType.Value\n", + "\n", + "# EvalType computes either potentials (EvalType.Value) or potentials + derivatives (EvalType.ValueDeriv)\n", + "kernel = HelmholtzKernel(dtype, wavenumber, eval_type)\n", + "\n", + "tree = SingleNodeTree(sources, targets, charges, n_crit=n_crit, prune_empty=prune_empty)\n", + "\n", + "field_translation = BlasFieldTranslation(kernel, svd_threshold, random=True)\n", + "\n", + "# Create FMM runtime object\n", + "fmm = KiFmm(expansion_order, tree, field_translation, timed=True)\n", + "\n", + "# Evaluate FMM\n", + "fmm.evaluate()\n", + "\n", + "# Direct evaluation check\n", + "direct = np.zeros(n_targets * eval_type.value).astype(ctype)\n", + "fmm.evaluate_kernel(EvalType.Value, sources, targets, charges, direct)\n", + "\n", + "assert np.allclose(np.abs(direct), np.abs(fmm.all_potentials_u[0].T), 1e-3)" + ] }, { "cell_type": "code", @@ -572,7 +605,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/kifmm/python/kifmm_py/__init__.py b/kifmm/python/kifmm_py/__init__.py index 6c42e225..c43fbda9 100644 --- a/kifmm/python/kifmm_py/__init__.py +++ b/kifmm/python/kifmm_py/__init__.py @@ -66,6 +66,7 @@ def __init__(self, times_p): def __repr__(self): return str(self._times) + class MetadataTimes: """ Helper class to destructure metadata runtimes. @@ -98,6 +99,7 @@ def __init__(self, times_p): def __repr__(self): return str(self._times) + class CommunicationTimes: """ Helper class to destructure communication runtimes. @@ -210,11 +212,9 @@ def __init__( self.surface_diff = surface_diff self.random = random - if isinstance(self.kernel, HelmholtzKernel): - if self.random: - raise TypeError("Randomised compression unimplemented for this kernel") - - elif isinstance(self.kernel, LaplaceKernel): + if isinstance(self.kernel, HelmholtzKernel) or isinstance( + self.kernel, LaplaceKernel + ): if self.random: self.rsvd = RandomSvdSettings( n_components, @@ -628,9 +628,52 @@ def _construct(self): elif isinstance(self._field_translation.kernel, HelmholtzKernel): if self._field_translation.random: - raise TypeError( - "Randomised SVD not yet supported for Helmholtz Kernel" - ) + if self._field_translation.kernel.dtype == np.float32: + self._fmm = lib.helmholtz_blas_rsvd_f32_alloc( + self._timed, + self._expansion_order_c, + self._nexpansion_order, + self._field_translation.kernel.eval_type, + self._field_translation.kernel.wavenumber, + self._tree.sources_c, + self._tree.n_sources, + self._tree.targets_c, + self._tree.n_targets, + self._tree.charges_c, + self._tree.ncharges, + self._tree.prune_empty, + self._tree.n_crit, + self._tree.depth, + self._field_translation.svd_threshold, + self._field_translation.surface_diff, + self._field_translation.rsvd.n_components, + self._field_translation.rsvd.n_oversamples, + ) + self.potential_dtype = np.complex64 + + elif self._field_translation.kernel.dtype == np.float64: + self._fmm = lib.helmholtz_blas_rsvd_f64_alloc( + self._timed, + self._expansion_order_c, + self._nexpansion_order, + self._field_translation.kernel.eval_type, + self._field_translation.kernel.wavenumber, + self._tree.sources_c, + self._tree.n_sources, + self._tree.targets_c, + self._tree.n_targets, + self._tree.charges_c, + self._tree.ncharges, + self._tree.prune_empty, + self._tree.n_crit, + self._tree.depth, + self._field_translation.svd_threshold, + self._field_translation.surface_diff, + self._field_translation.rsvd.n_components, + self._field_translation.rsvd.n_oversamples, + ) + self.potential_dtype = np.complex128 + else: if self._field_translation.kernel.dtype == np.float32: self._fmm = lib.helmholtz_blas_svd_f32_alloc( diff --git a/kifmm/src/bindings.rs b/kifmm/src/bindings.rs index 5c7a2124..62598c6a 100644 --- a/kifmm/src/bindings.rs +++ b/kifmm/src/bindings.rs @@ -286,7 +286,9 @@ pub mod constructors { use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; - use crate::{BlasFieldTranslationIa, BlasFieldTranslationSaRcmp, FftFieldTranslation}; + use crate::{ + BlasFieldTranslationIa, BlasFieldTranslationSaRcmp, FftFieldTranslation, FmmSvdMode, + }; use super::{ c32, c64, FmmCType, FmmEvaluator, FmmTranslationCType, Laplace3dKernel, SingleNodeBuilder, @@ -907,6 +909,244 @@ pub mod constructors { Box::into_raw(Box::new(evaluator)) } + /// Constructor for F32 Helmholtz FMM with BLAS based M2L translations compressed + /// with deterministic SVD. + /// + /// Note that either `n_crit` or `depth` must be specified. If `n_crit` is specified, depth + /// must be set to 0 and vice versa. If `depth` is specified, `expansion_order` must be specified + /// at each level, and stored as a buffer of length `depth` + 1. + /// + /// + /// # Parameters + /// - `timed`: Modulates whether operators and metadata are timed. + /// - `expansion_order`: A pointer to an array of expansion orders. + /// - `n_expansion_order`: The number of expansion orders. + /// - `eval_type`: true corresponds to evaluating potentials, false corresponds to evaluating potentials and potential derivatives + /// - `wavenumber`: The wavenumber. + /// - `sources`: A pointer to the source points. + /// - `n_sources`: The length of the source points buffer + /// - `targets`: A pointer to the target points. + /// - `n_targets`: The length of the target points buffer. + /// - `charges`: A pointer to the charges associated with the source points. + /// - `n_charges`: The length of the charges buffer. + /// - `prune_empty`: A boolean flag indicating whether to prune empty leaf nodes, and their ancestors. + /// - `n_crit`: Threshold for tree refinement, if set to 0 ignored. Otherwise will refine until threshold is + /// reached based on a uniform particle distribution. + /// - `depth`: The maximum depth of the tree, max supported depth is 16. + /// - `singular_value_threshold`: Threshold for singular values used in compressing M2L matrices. + /// - `surface_diff`: Set to 0 to disable, otherwise uses surface_diff+equivalent_surface_expansion_order = check_surface_expansion_order + /// - `n_components`: If known, can specify the rank of the M2L matrix for randomised range finding, otherwise set to 0. + /// - `n_oversamples`: Optionally choose the number of oversamples for randomised range finding, otherwise set to 10. + /// + /// # Safety + /// This function is intended to be called from C. The caller must ensure that: + /// - Input data corresponds to valid pointers + /// - That they remain valid for the duration of the function call + #[no_mangle] + pub unsafe extern "C" fn helmholtz_blas_rsvd_f32_alloc( + timed: bool, + expansion_order: *const usize, + n_expansion_order: usize, + eval_type: bool, + wavenumber: f32, + sources: *const c_void, + n_sources: usize, + targets: *const c_void, + n_targets: usize, + charges: *const c_void, + n_charges: usize, + prune_empty: bool, + n_crit: u64, + depth: u64, + singular_value_threshold: f32, + surface_diff: usize, + n_components: usize, + n_oversamples: usize, + ) -> *mut FmmEvaluator { + let eval_type = if eval_type { + GreenKernelEvalType::Value + } else { + GreenKernelEvalType::ValueDeriv + }; + + let sources = unsafe { std::slice::from_raw_parts(sources as *const f32, n_sources) }; + let targets = unsafe { std::slice::from_raw_parts(targets as *const f32, n_targets) }; + let charges = unsafe { std::slice::from_raw_parts(charges as *const c32, n_charges) }; + + let expansion_order = + unsafe { std::slice::from_raw_parts(expansion_order, n_expansion_order) }; + let n_crit = if n_crit > 0 { Some(n_crit) } else { None }; + + let depth = if depth > 0 { Some(depth) } else { None }; + let singular_value_threshold = Some(singular_value_threshold); + let surface_diff = if surface_diff > 0 { + Some(surface_diff) + } else { + None + }; + + let n_components = if n_components > 0 { + Some(n_components) + } else { + None + }; + + let n_oversamples = if n_oversamples > 0 { + Some(n_oversamples) + } else { + None + }; + + let field_translation = BlasFieldTranslationIa::new( + singular_value_threshold, + surface_diff, + crate::fmm::types::FmmSvdMode::new(true, None, n_components, n_oversamples, None), + ); + + let fmm = SingleNodeBuilder::new(timed) + .tree(sources, targets, n_crit, depth, prune_empty) + .unwrap() + .parameters( + charges, + expansion_order, + Helmholtz3dKernel::new(wavenumber), + eval_type, + field_translation, + ) + .unwrap() + .build() + .unwrap(); + + let data = Box::into_raw(Box::new(fmm)) as *mut c_void; + + let evaluator = FmmEvaluator { + data, + ctype: FmmCType::Helmholtz32, + ctranslation_type: FmmTranslationCType::Blas, + }; + + Box::into_raw(Box::new(evaluator)) + } + + /// Constructor for F64 Helmholtz FMM with BLAS based M2L translations compressed + /// with deterministic SVD. + /// + /// Note that either `n_crit` or `depth` must be specified. If `n_crit` is specified, depth + /// must be set to 0 and vice versa. If `depth` is specified, `expansion_order` must be specified + /// at each level, and stored as a buffer of length `depth` + 1. + /// + /// + /// # Parameters + /// - `timed`: Modulates whether operators and metadata are timed. + /// - `expansion_order`: A pointer to an array of expansion orders. + /// - `n_expansion_order`: The number of expansion orders. + /// - `eval_type`: true corresponds to evaluating potentials, false corresponds to evaluating potentials and potential derivatives + /// - `wavenumber`: The wavenumber. + /// - `sources`: A pointer to the source points. + /// - `n_sources`: The length of the source points buffer + /// - `targets`: A pointer to the target points. + /// - `n_targets`: The length of the target points buffer. + /// - `charges`: A pointer to the charges associated with the source points. + /// - `n_charges`: The length of the charges buffer. + /// - `prune_empty`: A boolean flag indicating whether to prune empty leaf nodes, and their ancestors. + /// - `n_crit`: Threshold for tree refinement, if set to 0 ignored. Otherwise will refine until threshold is + /// reached based on a uniform particle distribution. + /// - `depth`: The maximum depth of the tree, max supported depth is 16. + /// - `singular_value_threshold`: Threshold for singular values used in compressing M2L matrices. + /// - `surface_diff`: Set to 0 to disable, otherwise uses surface_diff+equivalent_surface_expansion_order = check_surface_expansion_order + /// - `n_components`: If known, can specify the rank of the M2L matrix for randomised range finding, otherwise set to 0. + /// - `n_oversamples`: Optionally choose the number of oversamples for randomised range finding, otherwise set to 10. + /// + /// # Safety + /// This function is intended to be called from C. The caller must ensure that: + /// - Input data corresponds to valid pointers + /// - That they remain valid for the duration of the function call + #[no_mangle] + pub unsafe extern "C" fn helmholtz_blas_rsvd_f64_alloc( + timed: bool, + expansion_order: *const usize, + n_expansion_order: usize, + eval_type: bool, + wavenumber: f64, + sources: *const c_void, + n_sources: usize, + targets: *const c_void, + n_targets: usize, + charges: *const c_void, + n_charges: usize, + prune_empty: bool, + n_crit: u64, + depth: u64, + singular_value_threshold: f64, + surface_diff: usize, + n_components: usize, + n_oversamples: usize, + ) -> *mut FmmEvaluator { + let eval_type = if eval_type { + GreenKernelEvalType::Value + } else { + GreenKernelEvalType::ValueDeriv + }; + + let sources = unsafe { std::slice::from_raw_parts(sources as *const f64, n_sources) }; + let targets = unsafe { std::slice::from_raw_parts(targets as *const f64, n_targets) }; + + let charges = unsafe { std::slice::from_raw_parts(charges as *const c64, n_charges) }; + + let expansion_order = + unsafe { std::slice::from_raw_parts(expansion_order, n_expansion_order) }; + let n_crit = if n_crit > 0 { Some(n_crit) } else { None }; + + let depth = if depth > 0 { Some(depth) } else { None }; + let singular_value_threshold = Some(singular_value_threshold); + let surface_diff = if surface_diff > 0 { + Some(surface_diff) + } else { + None + }; + let n_components = if n_components > 0 { + Some(n_components) + } else { + None + }; + + let n_oversamples = if n_oversamples > 0 { + Some(n_oversamples) + } else { + None + }; + + let field_translation = BlasFieldTranslationIa::new( + singular_value_threshold, + surface_diff, + crate::fmm::types::FmmSvdMode::new(true, None, n_components, n_oversamples, None), + ); + + let fmm = SingleNodeBuilder::new(timed) + .tree(sources, targets, n_crit, depth, prune_empty) + .unwrap() + .parameters( + charges, + expansion_order, + Helmholtz3dKernel::new(wavenumber), + eval_type, + field_translation, + ) + .unwrap() + .build() + .unwrap(); + + let data = Box::into_raw(Box::new(fmm)) as *mut c_void; + + let evaluator = FmmEvaluator { + data, + ctype: FmmCType::Helmholtz64, + ctranslation_type: FmmTranslationCType::Blas, + }; + + Box::into_raw(Box::new(evaluator)) + } + /// Constructor for F32 Helmholtz FMM with BLAS based M2L translations compressed /// with deterministic SVD. /// @@ -979,7 +1219,11 @@ pub mod constructors { None }; - let field_translation = BlasFieldTranslationIa::new(singular_value_threshold, surface_diff); + let field_translation = BlasFieldTranslationIa::new( + singular_value_threshold, + surface_diff, + FmmSvdMode::Deterministic, + ); let fmm = SingleNodeBuilder::new(timed) .tree(sources, targets, n_crit, depth, prune_empty) @@ -1079,7 +1323,11 @@ pub mod constructors { None }; - let field_translation = BlasFieldTranslationIa::new(singular_value_threshold, surface_diff); + let field_translation = BlasFieldTranslationIa::new( + singular_value_threshold, + surface_diff, + FmmSvdMode::Deterministic, + ); let fmm = SingleNodeBuilder::new(timed) .tree(sources, targets, n_crit, depth, prune_empty) diff --git a/kifmm/src/fmm/eval/single_node.rs b/kifmm/src/fmm/eval/single_node.rs index 036b5862..817c234c 100644 --- a/kifmm/src/fmm/eval/single_node.rs +++ b/kifmm/src/fmm/eval/single_node.rs @@ -139,7 +139,7 @@ mod test { tree::{FmmTreeNode, SingleFmmTree, SingleTree}, }, tree::{constants::ALPHA_INNER, helpers::points_fixture, types::MortonKey}, - BlasFieldTranslationSaRcmp, Evaluate, FftFieldTranslation, SingleNodeBuilder, + BlasFieldTranslationSaRcmp, Evaluate, FftFieldTranslation, FmmSvdMode, SingleNodeBuilder, SingleNodeFmmTree, }; @@ -1068,7 +1068,7 @@ mod test { } #[test] - fn test_helmholtz_fmm_vector() { + fn test_foo_bar() { // Setup random sources and targets let n_sources = 9000; let n_targets = 10000; @@ -1102,7 +1102,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1124,7 +1124,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::ValueDeriv, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1227,7 +1227,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1249,7 +1249,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::ValueDeriv, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1353,7 +1353,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(None, surface_diff), + BlasFieldTranslationIa::new(None, surface_diff, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1376,7 +1376,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::ValueDeriv, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1429,7 +1429,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(None, surface_diff), + BlasFieldTranslationIa::new(None, surface_diff, FmmSvdMode::Deterministic), ) .unwrap() .build() @@ -1577,7 +1577,11 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), eval_type, - BlasFieldTranslationIa::new(singular_value_threshold, None), + BlasFieldTranslationIa::new( + singular_value_threshold, + None, + FmmSvdMode::Deterministic, + ), ) .unwrap() .build() @@ -1599,7 +1603,11 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), eval_type, - BlasFieldTranslationIa::new(singular_value_threshold, None), + BlasFieldTranslationIa::new( + singular_value_threshold, + None, + FmmSvdMode::Deterministic, + ), ) .unwrap() .build() diff --git a/kifmm/src/fmm/field_translation/metadata/single_node.rs b/kifmm/src/fmm/field_translation/metadata/single_node.rs index 4f7a0007..22ddccf6 100644 --- a/kifmm/src/fmm/field_translation/metadata/single_node.rs +++ b/kifmm/src/fmm/field_translation/metadata/single_node.rs @@ -622,7 +622,7 @@ where impl SourceToTargetTranslationMetadata for KiFmm, BlasFieldTranslationIa> where - Scalar: RlstScalar + Default + MatrixSvd, + Scalar: RlstScalar + Default + MatrixRsvd, ::Real: Default, { fn displacements(&mut self, start_level: Option) { @@ -785,14 +785,48 @@ where let mut sigma = vec![Scalar::zero().re(); k]; let mut vt = rlst_dynamic_array2!(Scalar, [k, nvt]); - tmp_gram - .into_svd_alloc( - u.view_mut(), - vt.view_mut(), - &mut sigma[..], - SvdMode::Reduced, - ) - .unwrap(); + let target_rank; + + match &self.source_to_target.svd_mode { + &FmmSvdMode::Random { + n_components, + normaliser, + n_oversamples, + random_state, + } => { + // Estimate targget rank if unspecified by user + if let Some(n_components) = n_components { + target_rank = n_components + } else { + let max_equivalent_surface_ncoeffs = + self.n_coeffs_equivalent_surface.iter().max().unwrap(); + let max_check_surface_ncoeffs = + self.n_coeffs_check_surface.iter().max().unwrap(); + target_rank = + *max_equivalent_surface_ncoeffs.max(max_check_surface_ncoeffs); + } + + (sigma, u, vt) = Scalar::rsvd_fixed_rank( + &tmp_gram, + target_rank, + n_oversamples, + normaliser, + random_state, + ) + .unwrap(); + } + + FmmSvdMode::Deterministic => { + tmp_gram + .into_svd_alloc( + u.view_mut(), + vt.view_mut(), + &mut sigma[..], + SvdMode::Reduced, + ) + .unwrap(); + } + } let mut sigma_mat = rlst_dynamic_array2!(Scalar, [k, k]); @@ -835,6 +869,389 @@ where } } +impl SourceToTargetTranslationMetadata + for KiFmm, BlasFieldTranslationSaRcmp> +where + Scalar: RlstScalar + Default + MatrixRsvd, + ::Real: Default, +{ + fn displacements(&mut self, start_level: Option) { + let mut displacements = Vec::new(); + let start_level = if let Some(start_level) = start_level { + if start_level >= 2 { + start_level + } else { + 2 + } + } else { + 2 + }; + + for level in start_level..=self.tree.source_tree().depth() { + let sources = self.tree.source_tree().keys(level).unwrap(); + let n_sources = sources.len(); + let sentinel = -1i32; + + let result = vec![vec![sentinel; n_sources]; 316]; + let result = result.into_iter().map(RwLock::new).collect_vec(); + + sources + .into_par_iter() + .enumerate() + .for_each(|(source_idx, source)| { + // Find interaction list of each source, as this defines scatter locations + let interaction_list = source + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| { + !source.is_adjacent(pnc) + && self + .tree + .target_tree() + .all_keys_set() + .unwrap() + .contains(pnc) + }) + .collect_vec(); + + let transfer_vectors = interaction_list + .iter() + .map(|target| source.find_transfer_vector(target).unwrap()) + .collect_vec(); + + let mut transfer_vectors_map = HashMap::new(); + for (i, v) in transfer_vectors.iter().enumerate() { + transfer_vectors_map.insert(v, i); + } + + let transfer_vectors_set: HashSet<_> = + transfer_vectors.iter().cloned().collect(); + + // Mark items in interaction list for scattering + for (tv_idx, tv) in self.source_to_target.transfer_vectors.iter().enumerate() { + let mut all_displacements_lock = result[tv_idx].write().unwrap(); + if transfer_vectors_set.contains(&tv.hash) { + // Look up scatter location in target tree + let target = + &interaction_list[*transfer_vectors_map.get(&tv.hash).unwrap()]; + let &target_idx = self.level_index_pointer_locals[level as usize] + .get(target) + .unwrap(); + all_displacements_lock[source_idx] = target_idx as i32; + } + } + }); + + displacements.push(result); + } + + self.source_to_target.displacements = displacements; + } + + fn source_to_target(&mut self) { + // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + // Compute interaction matrices between source and unique targets, defined by unique transfer vectors + let depth = self.tree.source_tree().depth(); + + let iterator = if self.equivalent_surface_order.len() > 1 { + (2..=depth) + .zip(self.equivalent_surface_order.iter().skip(2).cloned()) + .zip(self.check_surface_order.iter().skip(2).cloned()) + .collect_vec() + } else { + (2..=depth) + .zip(vec![ + *self.equivalent_surface_order.last().unwrap(); + (depth - 1) as usize + ]) + .zip(vec![ + *self.check_surface_order.last().unwrap(); + (depth - 1) as usize + ]) + .collect_vec() + }; + + for ((level, equivalent_surface_order), check_surface_order) in iterator { + let transfer_vectors = + compute_transfer_vectors_at_level::(level).unwrap(); + + let nrows = ncoeffs_kifmm(check_surface_order); + let ncols = ncoeffs_kifmm(equivalent_surface_order); + + let mut se2tc_fat = + rlst_dynamic_array2!(Scalar, [nrows, ncols * NTRANSFER_VECTORS_KIFMM]); + let mut se2tc_thin = + rlst_dynamic_array2!(Scalar, [nrows * NTRANSFER_VECTORS_KIFMM, ncols]); + let alpha = Scalar::real(ALPHA_INNER); + + transfer_vectors.iter().enumerate().for_each(|(i, t)| { + let source_equivalent_surface = t.source.surface_grid( + equivalent_surface_order, + self.tree.source_tree().domain(), + alpha, + ); + let target_check_surface = t.target.surface_grid( + check_surface_order, + self.tree.source_tree().domain(), + alpha, + ); + + let mut tmp_gram = rlst_dynamic_array2!(Scalar, [nrows, ncols]); + + self.kernel.assemble_st( + GreenKernelEvalType::Value, + &target_check_surface[..], + &source_equivalent_surface[..], + tmp_gram.data_mut(), + ); + + let mut block = se2tc_fat + .view_mut() + .into_subview([0, i * ncols], [nrows, ncols]); + block.fill_from(tmp_gram.view()); + + let mut block_column = se2tc_thin + .view_mut() + .into_subview([i * nrows, 0], [nrows, ncols]); + block_column.fill_from(tmp_gram.view()); + }); + + let mu = se2tc_fat.shape()[0]; + let nvt = se2tc_fat.shape()[1]; + let k = std::cmp::min(mu, nvt); + + let mut u_big = rlst_dynamic_array2!(Scalar, [mu, k]); + let mut sigma = vec![Scalar::zero().re(); k]; + let mut vt_big = rlst_dynamic_array2!(Scalar, [k, nvt]); + + // Target rank defined by max dimension before cutoff + let mut target_rank = k; + + match &self.source_to_target.svd_mode { + &FmmSvdMode::Random { + n_components, + normaliser, + n_oversamples, + random_state, + } => { + // Estimate target rank if unspecified by user + if let Some(n_components) = n_components { + target_rank = n_components + } else { + let max_equivalent_surface_ncoeffs = + self.n_coeffs_equivalent_surface.iter().max().unwrap(); + let max_check_surface_ncoeffs = + self.n_coeffs_check_surface.iter().max().unwrap(); + target_rank = + max_equivalent_surface_ncoeffs.max(max_check_surface_ncoeffs) / 2; + } + + let mut se2tc_fat_transpose = + rlst_dynamic_array2!(Scalar, se2tc_fat.view().transpose().shape()); + se2tc_fat_transpose + .view_mut() + .fill_from(se2tc_fat.view().transpose()); + + let (sigma_t, u_big_t, vt_big_t) = Scalar::rsvd_fixed_rank( + &se2tc_fat_transpose, + target_rank, + n_oversamples, + normaliser, + random_state, + ) + .unwrap(); + u_big = rlst_dynamic_array2!(Scalar, [mu, sigma_t.len()]); + vt_big = rlst_dynamic_array2!(Scalar, [sigma_t.len(), nvt]); + + vt_big.fill_from(u_big_t.transpose()); + u_big.fill_from(vt_big_t.transpose()); + sigma = sigma_t; + } + FmmSvdMode::Deterministic => { + se2tc_fat + .into_svd_alloc( + u_big.view_mut(), + vt_big.view_mut(), + &mut sigma[..], + SvdMode::Reduced, + ) + .unwrap(); + } + } + + // Cutoff rank is the minimum of the target rank and the value found by user threshold + let cutoff_rank = + find_cutoff_rank(&sigma, self.source_to_target.threshold, ncols).min(target_rank); + + let mut u = rlst_dynamic_array2!(Scalar, [mu, cutoff_rank]); + let mut sigma_mat = rlst_dynamic_array2!(Scalar, [cutoff_rank, cutoff_rank]); + let mut vt = rlst_dynamic_array2!(Scalar, [cutoff_rank, nvt]); + + // Store compressed M2L operators + let thin_nrows = se2tc_thin.shape()[0]; + let nst = se2tc_thin.shape()[1]; + let k = std::cmp::min(thin_nrows, nst); + let mut st; + let mut _gamma; + let mut _r; + + if self.source_to_target.surface_diff() == 0 { + st = rlst_dynamic_array2!(Scalar, u_big.view().transpose().shape()); + st.fill_from(u_big.view().transpose()) + } else { + match &self.source_to_target.svd_mode { + &FmmSvdMode::Random { + n_components, + normaliser, + n_oversamples, + random_state, + } => { + let target_rank; + if let Some(n_components) = n_components { + target_rank = n_components + } else { + // Estimate target rank + let max_equivalent_surface_ncoeffs = + self.n_coeffs_equivalent_surface.iter().max().unwrap(); + let max_check_surface_ncoeffs = + self.n_coeffs_check_surface.iter().max().unwrap(); + target_rank = + max_equivalent_surface_ncoeffs.max(max_check_surface_ncoeffs) / 2; + } + + (_gamma, _r, st) = Scalar::rsvd_fixed_rank( + &se2tc_thin, + target_rank, + n_oversamples, + normaliser, + random_state, + ) + .unwrap(); + } + FmmSvdMode::Deterministic => { + _r = rlst_dynamic_array2!(Scalar, [thin_nrows, k]); + _gamma = vec![Scalar::zero().re(); k]; + st = rlst_dynamic_array2!(Scalar, [k, nst]); + se2tc_thin + .into_svd_alloc( + _r.view_mut(), + st.view_mut(), + &mut _gamma[..], + SvdMode::Reduced, + ) + .unwrap(); + } + } + } + + u.fill_from(u_big.into_subview([0, 0], [mu, cutoff_rank])); + vt.fill_from(vt_big.into_subview([0, 0], [cutoff_rank, nvt])); + for (j, s) in sigma.iter().enumerate().take(cutoff_rank) { + unsafe { + *sigma_mat.get_unchecked_mut([j, j]) = Scalar::from(*s).unwrap(); + } + } + + let mut s_trunc = rlst_dynamic_array2!(Scalar, [nst, cutoff_rank]); + for j in 0..cutoff_rank { + for i in 0..nst { + unsafe { *s_trunc.get_unchecked_mut([i, j]) = *st.get_unchecked([j, i]) } + } + } + + let c_u = Mutex::new(Vec::new()); + let c_vt = Mutex::new(Vec::new()); + let directional_cutoff_ranks = + Mutex::new(vec![0usize; self.source_to_target.transfer_vectors.len()]); + + for _ in 0..NTRANSFER_VECTORS_KIFMM { + c_u.lock() + .unwrap() + .push(rlst_dynamic_array2!(Scalar, [1, 1])); + c_vt.lock() + .unwrap() + .push(rlst_dynamic_array2!(Scalar, [1, 1])); + } + + (0..NTRANSFER_VECTORS_KIFMM).into_par_iter().for_each(|i| { + let vt_block = vt.view().into_subview([0, i * ncols], [cutoff_rank, ncols]); + + let tmp = empty_array::().simple_mult_into_resize( + sigma_mat.view(), + empty_array::() + .simple_mult_into_resize(vt_block.view(), s_trunc.view()), + ); + + let mut u_i = rlst_dynamic_array2!(Scalar, [cutoff_rank, cutoff_rank]); + let mut sigma_i = vec![Scalar::zero().re(); cutoff_rank]; + let mut vt_i = rlst_dynamic_array2!(Scalar, [cutoff_rank, cutoff_rank]); + + tmp.into_svd_alloc(u_i.view_mut(), vt_i.view_mut(), &mut sigma_i, SvdMode::Full) + .unwrap(); + + let directional_cutoff_rank = + find_cutoff_rank(&sigma_i, self.source_to_target.threshold, cutoff_rank); + + let mut u_i_compressed = + rlst_dynamic_array2!(Scalar, [cutoff_rank, directional_cutoff_rank]); + let mut vt_i_compressed_ = + rlst_dynamic_array2!(Scalar, [directional_cutoff_rank, cutoff_rank]); + + let mut sigma_mat_i_compressed = rlst_dynamic_array2!( + Scalar, + [directional_cutoff_rank, directional_cutoff_rank] + ); + + u_i_compressed + .fill_from(u_i.into_subview([0, 0], [cutoff_rank, directional_cutoff_rank])); + vt_i_compressed_ + .fill_from(vt_i.into_subview([0, 0], [directional_cutoff_rank, cutoff_rank])); + + for (j, s) in sigma_i.iter().enumerate().take(directional_cutoff_rank) { + unsafe { + *sigma_mat_i_compressed.get_unchecked_mut([j, j]) = + Scalar::from(*s).unwrap(); + } + } + + let vt_i_compressed = empty_array::().simple_mult_into_resize( + sigma_mat_i_compressed.view(), + vt_i_compressed_.view(), + ); + + directional_cutoff_ranks.lock().unwrap()[i] = directional_cutoff_rank; + c_u.lock().unwrap()[i] = u_i_compressed; + c_vt.lock().unwrap()[i] = vt_i_compressed; + }); + + let mut st_trunc = rlst_dynamic_array2!(Scalar, [cutoff_rank, nst]); + st_trunc.fill_from(s_trunc.transpose()); + + let c_vt = std::mem::take(&mut *c_vt.lock().unwrap()); + let c_u = std::mem::take(&mut *c_u.lock().unwrap()); + let directional_cutoff_ranks = + std::mem::take(&mut *directional_cutoff_ranks.lock().unwrap()); + + let result = BlasMetadataSaRcmp { + u, + st: st_trunc, + c_u, + c_vt, + }; + + self.source_to_target.metadata.push(result); + self.source_to_target.cutoff_rank.push(cutoff_rank); + self.source_to_target + .directional_cutoff_ranks + .push(directional_cutoff_ranks); + } + + // self.source_to_target = result; + } +} + impl SourceToTargetTranslationMetadata for KiFmm, FftFieldTranslation> where @@ -2572,12 +2989,17 @@ where { /// Constructor for BLAS based field translations, specify a compression threshold for the SVD compressed operators /// TODO: More docs - pub fn new(threshold: Option, surface_diff: Option) -> Self { + pub fn new( + threshold: Option, + surface_diff: Option, + svd_mode: FmmSvdMode, + ) -> Self { let tmp = Scalar::epsilon().re(); Self { threshold: threshold.unwrap_or(tmp), surface_diff: surface_diff.unwrap_or_default(), + svd_mode, ..Default::default() } } @@ -2750,7 +3172,7 @@ mod test { &expansion_order, Helmholtz3dKernel::new(wavenumber), GreenKernelEvalType::Value, - BlasFieldTranslationIa::new(None, None), + BlasFieldTranslationIa::new(None, None, FmmSvdMode::Deterministic), ) .unwrap() .build() diff --git a/kifmm/src/fmm/field_translation/source_to_target/blas/single_node.rs b/kifmm/src/fmm/field_translation/source_to_target/blas/single_node.rs index f300b8f0..b7a9196a 100644 --- a/kifmm/src/fmm/field_translation/source_to_target/blas/single_node.rs +++ b/kifmm/src/fmm/field_translation/source_to_target/blas/single_node.rs @@ -97,6 +97,10 @@ where let multipoles = rlst_array_from_slice2!(multipoles, [n_coeffs_equivalent_surface, n_sources]); + println!( + "HERE {:?} {:?}", + self.source_to_target.cutoff_rank, m2l_operator_index + ); // Allocate buffer to store compressed check potentials let compressed_check_potentials = rlst_dynamic_array2!( Scalar, diff --git a/kifmm/src/fmm/types.rs b/kifmm/src/fmm/types.rs index 7bf374d1..5f85a4d2 100644 --- a/kifmm/src/fmm/types.rs +++ b/kifmm/src/fmm/types.rs @@ -724,6 +724,9 @@ where /// Difference in expansion order between check and equivalent surface, defaults to 0 pub surface_diff: usize, + + /// Select SVD algorithm for compression, either deterministic or randomised + pub svd_mode: FmmSvdMode, } /// Represents the vector between a source and target boxes encoded by Morton keys.