From 070c3221ffc7e484359e0362c4b0a698c41d22bd Mon Sep 17 00:00:00 2001 From: Manos Papadakis Date: Wed, 6 Dec 2023 11:24:13 +0200 Subject: [PATCH] Squashed commit of the following: commit 79baa1cfdfe158ebafc1821497d19f21100aa80b Author: Manos Papadakis Date: Tue Dec 5 13:29:46 2023 +0200 Update nn.Rd commit 6c1531c2f6ce604878a94e5d5a9e98fc247f9232 Author: Manos Papadakis Date: Tue Dec 5 13:29:43 2023 +0200 Update system.Rd commit ad2c5a79135aece5e89ff7c596a36bece6f29ec1 Author: Manos Papadakis Date: Tue Dec 5 13:29:41 2023 +0200 Update .Rbuildignore commit fc9f68f605c339a0ad9de346f14760fcec06949e Author: Manos Papadakis Date: Mon Dec 4 17:20:24 2023 +0200 Update nn.Rd commit b686cf8855f9059a4798c5dafc447e48b24bcc8c Author: Manos Papadakis Date: Mon Dec 4 17:16:55 2023 +0200 Update NEWS.md commit 6eccaf4eaa603256b6b7e68429a15ce1036cf210 Author: Manos Papadakis Date: Mon Dec 4 17:16:52 2023 +0200 Update DESCRIPTION commit a6c50eda72ad8856fc7647fcf9d3660a0bf3caff Author: Manos Papadakis Date: Mon Dec 4 17:07:11 2023 +0200 Update NEWS.md commit 0273aba15bbf04417c7bf0e96ad63913bb1ef371 Author: Manos Papadakis Date: Mon Dec 4 17:06:58 2023 +0200 Update KDTreeArmadilloAdaptor.hpp commit b46a7c6af2b4f95127d21d52fc8749ccb8c7d1a8 Author: Manos Papadakis Date: Mon Dec 4 17:04:58 2023 +0200 Update helpers.h commit 8af14f4b31286c61d09574970bba808655221715 Author: Manos Papadakis Date: Thu Nov 23 19:25:23 2023 +0200 optimize log calculation commit 226d2578f9dbdeac83079830413b068c17f09c14 Author: Manos Papadakis Date: Thu Nov 23 19:06:20 2023 +0200 Update .gitignore commit f76c078d6622a22ba4ce9631e80df4ec13916810 Author: Manos Papadakis Date: Thu Nov 23 19:05:13 2023 +0200 optimize jensen_shannon commit aa1759e80993315598f22de7ddf4e1e33cbbd135 Author: Manos Papadakis Date: Thu Nov 16 13:28:36 2023 +0200 Update nn.R commit 470bfd5b3c9f4dab8079049a71cd17e10a22104c Author: Manos Papadakis Date: Wed Nov 15 16:41:38 2023 +0200 Update RcppExports.cpp commit 055399c65829c54a86c5538aca111cebdfd5532d Author: Manos Papadakis Date: Wed Nov 15 16:41:34 2023 +0200 Update nn.R commit 9c9542c8322ef7789c682e8a639441ce99e3171d Author: Manos Papadakis Date: Wed Nov 15 16:41:29 2023 +0200 Update .gitignore commit 65c7a2ff82ee309a37331539efc7ec1507b0ef56 Author: Manos Papadakis Date: Wed Nov 15 16:41:17 2023 +0200 upload proj commit 2049da86fc25e9e06057d62efbd87a7aade5ed7a Author: Manos Papadakis Date: Wed Nov 15 15:01:24 2023 +0200 add equations commit b8b19fdb5d74243110b4cb7a606023d1f1252372 Author: Manos Papadakis Date: Wed Nov 15 13:50:39 2023 +0200 Update .gitignore commit e352ba380dd441f7157ada73daf902f84fb488ff Author: Manos Papadakis Date: Wed Nov 15 13:46:08 2023 +0200 fix bugs This reverts commit 99a5c9876fe9235b9c6cfec7ed16e7704f337666. commit d976a3ec123e7d62a25f1ce79bbd4d80ca98ac93 Merge: f20c937 99a5c98 Author: Manos Papadakis Date: Tue Nov 14 22:04:01 2023 +0200 Merge branch 'main' into dev commit f20c9375b0f51af2f302c346e36b4007f9c18c00 Author: Manos Papadakis Date: Wed Nov 1 17:07:37 2023 +0200 Create r.yml (#2) --- .Rbuildignore | 8 +- .gitignore | 11 +- DESCRIPTION | 4 +- NEWS.md | 19 + R/nn.R | 11 +- .../internal/KDTreeArmadilloAdaptor.hpp | 229 ++++- inst/include/internal/dists.hpp | 864 +++++++++++++++++- inst/include/internal/helpers.h | 64 ++ man/macros/system.Rd | 2 + man/nn.Rd | 46 +- src/RcppExports.cpp | 10 +- src/knn.cpp | 247 ++++- 12 files changed, 1384 insertions(+), 131 deletions(-) create mode 100644 inst/include/internal/helpers.h diff --git a/.Rbuildignore b/.Rbuildignore index f46a76c..3af075d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,8 @@ ^.*\.Rproj$ ^\.Rproj\.user$ -^.RData -^.Rhistory \ No newline at end of file +^docs +^.todo +^.github +^.html +^.gitignore +^.vscode \ No newline at end of file diff --git a/.gitignore b/.gitignore index 52be4c4..ee8fbf5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ - -*src/.dll -*src/.o -*.dll +*.swp *.o +*.o.tmp +*\src-i386 +*\src-x64 +*.dll +*.html *.RData *.Rhistory +.Rproj.user diff --git a/DESCRIPTION b/DESCRIPTION index 3d1b1ef..af1dcde 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: Rnanoflann Type: Package Title: Extremely Fast Nearest Neighbor Search -Version: 0.0.1 -Date: 2023-10-30 +Version: 0.0.2 +Date: 2023-12-04 Authors@R: c( person("Manos Papadakis", email = "papadakm95@gmail.com", role = c("aut", "cre", "cph")), person("Jose Luis Blanco", email = "joseluisblancoc@gmail.com" , role = c("aut","cph")), diff --git a/NEWS.md b/NEWS.md index ea08deb..54245c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,25 @@ Rnanoflann +### **Version 0.0.2** + +------------------------------------------------------------------------ + +> **New** +> +> | Function | What's new! | +> |----------|------------------------------------------| +> | nn | add new metrics with parallel version, "hellinger","canberra","kullback_leibler","jensen_shannon","itakura_saito","bhattacharyya","jeffries_matusita","minimum","maximum","total_variation","sorensen,"cosine","gower","minkowski","soergel","kulczynski","wave_hedges","motyka","harmonic_mean". | +> | nn | fix bug in metric "euclidean". | +> +> **LinkingTo** +> +> | Function | What's new! | +> |----------|------------------------------------------| +> | nn | add new metrics with parallel version, "hellinger","canberra","kullback_leibler","jensen_shannon","itakura_saito","bhattacharyya","jeffries_matusita","minimum","maximum","total_variation","sorensen,"cosine","gower","minkowski","soergel","kulczynski","wave_hedges","motyka","harmonic_mean". | +> | nn | fix bug in metric "euclidean". | + + ### **Version 0.0.1** ------------------------------------------------------------------------ diff --git a/R/nn.R b/R/nn.R index b9fa056..ced1ddf 100644 --- a/R/nn.R +++ b/R/nn.R @@ -1,8 +1,11 @@ #[export] -nn <- function(data, points = data, k = nrow(data), method = "euclidean", search = "standard", eps = 0.0, square = FALSE, - sorted = FALSE, radius = 0.0, trans = TRUE, leafs = 10, parallel = FALSE, cores = 0){ - res <- .Call(`_Rnanoflann_nn`, t(data), t(points), k, method, search, eps, square, - sorted, radius, leafs, parallel, cores) +nn <- function(data, points, k = nrow(data), method = "euclidean", search = "standard", +eps = 0.0, square = FALSE, sorted = FALSE, radius = 0.0, trans = TRUE, leafs = 10L, p = 0.0, parallel = FALSE, cores = 0L) { + if(method == "hellinger"){ + data <- sqrt(data) + points <- sqrt(points) + } + res <- .Call(`_Rnanoflann_nn`, t(data), t(points), k, method, search, eps, square, sorted, radius, leafs, p, parallel, cores) if(trans){ res$indices <- t(res$indices) res$distances <- t(res$distances) diff --git a/inst/include/internal/KDTreeArmadilloAdaptor.hpp b/inst/include/internal/KDTreeArmadilloAdaptor.hpp index 06ce5f8..d45e161 100644 --- a/inst/include/internal/KDTreeArmadilloAdaptor.hpp +++ b/inst/include/internal/KDTreeArmadilloAdaptor.hpp @@ -6,25 +6,25 @@ using namespace arma; using namespace nanoflann; - -namespace Rnanoflann { +namespace Rnanoflann +{ // Define an Armadillo KDTreeAdaptor class template struct KDTreeArmadilloAdaptor { using self_t = KDTreeArmadilloAdaptor; using num_t = typename MatrixType::elem_type; - using IndexType = uword; - - using metric_t = typename Distance::template traits::distance_t;// You can change the distance metric as needed. - + using IndexType = uword; + + using metric_t = typename Distance::template traits::distance_t; // You can change the distance metric as needed. + using index_t = nanoflann::KDTreeSingleIndexAdaptor; - - index_t* index_; - + + index_t *index_; + const std::reference_wrapper m_data_matrix; - - explicit KDTreeArmadilloAdaptor(const uword dimensionality, const std::reference_wrapper& mat,const uword leafs = 10) + + explicit KDTreeArmadilloAdaptor(const uword dimensionality, const std::reference_wrapper &mat, const uword leafs = 10) : m_data_matrix(mat) { const auto dims = mat.get().n_rows; // Assumes column-major Armadillo matrix @@ -35,47 +35,51 @@ namespace Rnanoflann { index_ = new index_t(dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leafs)); index_->buildIndex(); } - + ~KDTreeArmadilloAdaptor() { delete index_; } - - const self_t& derived() const { return *this; } - self_t& derived() { return *this; } - + + const self_t &derived() const { return *this; } + self_t &derived() { return *this; } + uword kdtree_get_point_count() const { return m_data_matrix.get().n_cols; } - + num_t kdtree_get_pt(uword idx, size_t dim) const { - return m_data_matrix.get()(dim,idx); + return m_data_matrix.get()(dim, idx); + } + + colvec col(uword idx) const + { + return m_data_matrix.get().col(idx); } - + template - bool kdtree_get_bbox(BBOX& bb) const + bool kdtree_get_bbox(BBOX &bb) const { return false; // Optional bounding-box computation (not used in this example) } }; - // Define an Armadillo KDTreeAdaptor class - template + template struct KDTreeArmadilloAdaptor2 { using self_t = KDTreeArmadilloAdaptor2; using num_t = typename MatrixType::elem_type; - using IndexType = uword; - - using metric_t = typename Distance::template traits::distance_t;// You can change the distance metric as needed. - + using IndexType = uword; + + using metric_t = typename Distance::template traits::distance_t; // You can change the distance metric as needed. + using index_t = nanoflann::KDTreeSingleIndexAdaptor; - - index_t* index_; - + + index_t *index_; + const std::reference_wrapper m_data_matrix; - - explicit KDTreeArmadilloAdaptor2(const uword dimensionality, const std::reference_wrapper& mat,const uword leafs = 10) + + explicit KDTreeArmadilloAdaptor2(const uword dimensionality, const std::reference_wrapper &mat, const uword leafs = 10) : m_data_matrix(mat) { const auto dims = mat.get().n_rows; // Assumes column-major Armadillo matrix @@ -86,24 +90,169 @@ namespace Rnanoflann { index_ = new index_t(dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leafs)); index_->buildIndex(); } - + ~KDTreeArmadilloAdaptor2() { delete index_; } - - const self_t& derived() const { return *this; } - self_t& derived() { return *this; } - + + const self_t &derived() const { return *this; } + self_t &derived() { return *this; } + uword kdtree_get_point_count() const { return m_data_matrix.get().n_cols; } - + + colvec col(uword idx) const + { + return m_data_matrix.get().col(idx); + } + num_t kdtree_get_pt(uword idx, size_t dim) const { - return m_data_matrix.get()(dim,idx); + return m_data_matrix.get()(dim, idx); + } + + template + bool kdtree_get_bbox(BBOX &bb) const + { + return false; // Optional bounding-box computation (not used in this example) } - + }; + + template + struct KDTreeArmadilloAdaptor3 + { + using self_t = KDTreeArmadilloAdaptor3; + using num_t = typename MatrixType::elem_type; + using IndexType = uword; + + using metric_t = typename Distance::template traits::distance_t; // You can change the distance metric as needed. + + using index_t = nanoflann::KDTreeSingleIndexAdaptor; + + index_t *index_; + + const double p; + const double p_1; + + const std::reference_wrapper m_data_matrix; + + explicit KDTreeArmadilloAdaptor3(const uword dimensionality, const std::reference_wrapper &mat, const double p, const uword leafs = 10) + : p(p), p_1(1.0 / p), m_data_matrix(mat) + { + const auto dims = mat.get().n_rows; // Assumes column-major Armadillo matrix + if (static_cast(dims) != dimensionality) + throw std::runtime_error("Error: 'dimensionality' must match the column count in the data matrix"); + if (DIM > 0 && static_cast(dims) != DIM) + throw std::runtime_error("Data set dimensionality does not match the 'DIM' template argument"); + index_ = new index_t(dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leafs)); + index_->buildIndex(); + } + + ~KDTreeArmadilloAdaptor3() { delete index_; } + + const self_t &derived() const { return *this; } + self_t &derived() { return *this; } + + uword kdtree_get_point_count() const + { + return m_data_matrix.get().n_cols; + } + + num_t kdtree_get_pt(uword idx, size_t dim) const + { + return m_data_matrix.get()(dim, idx); + } + + colvec col(uword idx) const + { + return m_data_matrix.get().col(idx); + } + + const double getP() const + { + return p; + } + + const double getP_1() const + { + return p_1; + } + + template + bool kdtree_get_bbox(BBOX &bb) const + { + return false; // Optional bounding-box computation (not used in this example) + } + }; + + // Define an Armadillo KDTreeAdaptor class + template + struct KDTreeArmadilloAdaptor4 + { + using self_t = KDTreeArmadilloAdaptor4; + using num_t = typename MatrixType::elem_type; + using IndexType = uword; + + using metric_t = typename Distance::template traits::distance_t; // You can change the distance metric as needed. + + using index_t = nanoflann::KDTreeSingleIndexAdaptor; + + index_t *index_; + + const std::reference_wrapper m_data_matrix; + const MatrixType xlogx, ylogy; + const num_t *begin_points; + + explicit KDTreeArmadilloAdaptor4(const uword dimensionality, const std::reference_wrapper &mat, const MatrixType &points, const uword leafs = 10) + : m_data_matrix(mat), xlogx(mat.get() % arma::log(mat.get())), ylogy(points % arma::log(points)), begin_points(points.memptr()) + { + const auto dims = mat.get().n_rows; // Assumes column-major Armadillo matrix + if (static_cast(dims) != dimensionality) + throw std::runtime_error("Error: 'dimensionality' must match the column count in the data matrix"); + if (DIM > 0 && static_cast(dims) != DIM) + throw std::runtime_error("Data set dimensionality does not match the 'DIM' template argument"); + index_ = new index_t(dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leafs)); + index_->buildIndex(); + } + + ~KDTreeArmadilloAdaptor4() { delete index_; } + + const self_t &derived() const { return *this; } + self_t &derived() { return *this; } + + uword kdtree_get_point_count() const + { + return m_data_matrix.get().n_cols; + } + + num_t kdtree_get_pt(uword idx, size_t dim) const + { + return m_data_matrix.get()(dim, idx); + } + + colvec col(uword idx) const + { + return m_data_matrix.get().col(idx); + } + + colvec col_xlogx(uword idx) const + { + return xlogx.col(idx); + } + + colvec col_ylogy(const num_t *a) const + { + auto index = std::floor((a - begin_points) / ylogy.n_rows); + return ylogy.col(index); + } + + double log0_5() const + { + return std::log(0.5); + } + template - bool kdtree_get_bbox(BBOX& bb) const + bool kdtree_get_bbox(BBOX &bb) const { return false; // Optional bounding-box computation (not used in this example) } diff --git a/inst/include/internal/dists.hpp b/inst/include/internal/dists.hpp index 9c0d946..f2c0d06 100644 --- a/inst/include/internal/dists.hpp +++ b/inst/include/internal/dists.hpp @@ -1,55 +1,861 @@ #pragma once +//[[Rcpp::depends(RcppArmadillo)]] +#include #include "nanoflann.hpp" +#include "Coeff.h" +#include "Dist.h" +#include "helpers.h" using namespace nanoflann; +using namespace arma; +namespace Rnanoflann +{ -namespace Rnanoflann { - template < - class T, class DataSource, bool Square = false, typename _DistanceType = T, - typename IndexType = uint32_t> - struct euclidean_adaptor + struct euclidean : public Metric { - using ElementType = T; - using DistanceType = _DistanceType; + template < + class T, class DataSource, bool Square, typename _DistanceType = T, + typename IndexType = uint32_t> + struct euclidean_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; - const DataSource& data_source; + const DataSource &data_source; - euclidean_adaptor(const DataSource& _data_source) - : data_source(_data_source) + euclidean_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + DistanceType result = Dist::euclidean(y, x); + return Square ? result : std::sqrt(result); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits { - } + using distance_t = euclidean_adaptor; + }; + }; - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const + struct manhattan : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct manhattan_adaptor { - DistanceType result = DistanceType(); - for (size_t i = 0; i < size; ++i) + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + manhattan_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const { - const DistanceType diff = - a[i] - data_source.kdtree_get_pt(b_idx, i); - result += diff * diff; + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + DistanceType result = Dist::manhattan(y, x); + return result; } - return result; - } - template - DistanceType accum_dist(const U a, const V b, const size_t) const + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits { - return Square ? (a - b) : std::sqrt(a - b); - } + using distance_t = manhattan_adaptor; + }; }; + struct hellinger : public Metric + { + template < + class T, class DataSource, bool Square, typename _DistanceType = T, + typename IndexType = uint32_t> + struct hellinger_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + hellinger_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + DistanceType result = Dist::euclidean(y, x); + return Square ? result * 0.5 : std::sqrt(result) * (1.0 / std::sqrt(2.0)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; - /** Metaprogramming helper traits class for the L2 (Euclidean) **squared** - * distance metric */ - struct euclidean : public Metric - { template struct traits { - using distance_t = euclidean_adaptor; + using distance_t = hellinger_adaptor; + }; + }; + + struct minkowski : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct minkowski_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + minkowski_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + DistanceType result = DistanceType(); + for (size_t i = 0; i < size; ++i) + { + result += pow(abs(a[i] - data_source.kdtree_get_pt(b_idx, i)), data_source.getP()); + } + return pow(result, data_source.getP_1()); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = minkowski_adaptor; + }; + }; + + struct maximum : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct maximum_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + maximum_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + Col result = abs(x - y); + return result[result.index_max()]; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = maximum_adaptor; + }; + }; + + struct minimum : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct minimum_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + minimum_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + Col result = abs(x - y); + return result[result.index_min()]; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = minimum_adaptor; + }; + }; + + struct total_variation : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct total_variation_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + total_variation_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return Dist::manhattan(y, x) * 0.5; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = total_variation_adaptor; + }; + }; + + struct cosine : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct cosine_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + cosine_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return dot(y, x) / (sqrt(sum(square(x))) * sqrt(sum(square(y)))); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = cosine_adaptor; + }; + }; + + struct gower : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct gower_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + gower_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return Dist::manhattan(y, x) * (1.0 / size); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = gower_adaptor; + }; + }; + + struct sorensen : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct sorensen_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + sorensen_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return sum(abs(x - y) / (y + x)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = sorensen_adaptor; + }; + }; + + struct canberra : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct canberra_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + canberra_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return sum(abs(x - y) / (abs(x) + abs(y))); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = canberra_adaptor; + }; + }; + + struct kullback_leibler : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct kullback_leibler_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + kullback_leibler_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + DistanceType res = DistanceType(); + for (size_t i = 0; i < size; ++i) + { + DistanceType y = a[i], x = data_source.kdtree_get_pt(b_idx, i); + DistanceType v = (y - x) * (std::log(y) - std::log(x)); + if (std::isfinite(v)) + { + res += v; + } + } + return res; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = kullback_leibler_adaptor; + }; + }; + + struct jensen_shannon : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct jensen_shannon_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + jensen_shannon_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + Col v = x + y; + return sum_with_condition( + data_source.col_xlogx(b_idx) + data_source.col_ylogy(a) - (arma::log(v) + data_source.log0_5()) % (v)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = jensen_shannon_adaptor; + }; + }; + + struct itakura_saito : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct itakura_saito_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + itakura_saito_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + DistanceType res = DistanceType(); + for (size_t i = 0; i < size; ++i) + { + DistanceType y = a[i], x = data_source.kdtree_get_pt(b_idx, i); + DistanceType v = (x / y) - (std::log(x) - std::log(y)) - 1.0; // Not symmetric so x/y is not y/x + if (std::isfinite(v)) + { + res += v; + } + } + + return res; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = itakura_saito_adaptor; + }; + }; + + struct bhattacharyya : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct bhattacharyya_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + bhattacharyya_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return -log(Coeff::bhattacharyya(y, x)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = bhattacharyya_adaptor; + }; + }; + + struct jeffries_matusita : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct jeffries_matusita_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + jeffries_matusita_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return std::sqrt(2.0 - 2.0 * Coeff::bhattacharyya(y, x)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = jeffries_matusita_adaptor; }; }; + + struct soergel : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct soergel_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + soergel_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return Dist::manhattan(y, x) / sum_with>(y, x); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = soergel_adaptor; + }; + }; + + struct kulczynski : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct kulczynski_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + kulczynski_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return Dist::manhattan(y, x) / sum_with>(y, x); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = kulczynski_adaptor; + }; + }; + + struct wave_hedges : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct wave_hedges_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + wave_hedges_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return sum(abs(y - x) / elems(y, x)); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = wave_hedges_adaptor; + }; + }; + + struct motyka : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct motyka_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + motyka_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return 1.0 - sum_with>(y, x) / sum(y + x); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = motyka_adaptor; + }; + }; + + struct harmonic_mean : public Metric + { + template < + class T, class DataSource, typename _DistanceType = T, + typename IndexType = uint32_t> + struct harmonic_mean_adaptor + { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource &data_source; + + harmonic_mean_adaptor(const DataSource &_data_source) + : data_source(_data_source) + { + } + + DistanceType evalMetric( + const T *a, const IndexType b_idx, size_t size) const + { + Col y(const_cast(a), size, false); + const Col x = data_source.col(b_idx); + return 2.0 * dot(y, x) / sum(y + x); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const + { + return 0; + } + }; + + template + struct traits + { + using distance_t = harmonic_mean_adaptor; + }; + }; + }; \ No newline at end of file diff --git a/inst/include/internal/helpers.h b/inst/include/internal/helpers.h new file mode 100644 index 0000000..4266c1e --- /dev/null +++ b/inst/include/internal/helpers.h @@ -0,0 +1,64 @@ +#pragma once + +//[[Rcpp::depends(RcppArmadillo)]] +#include +using namespace arma; + +typedef double (*Unary_Function)(double); // unary function + +template +using Mfunction = RET (*)(Args...); + +using Binary_Function = Mfunction; +template +using ConditionFunction = bool (*)(T); + +template +double sum_with(T x, T y) +{ + double a = 0; + typename T::iterator startx = x.begin(); + typename T::iterator starty = y.begin(); + for (; startx != x.end(); ++startx, ++starty) + { + a += F(*startx, *starty); + } + return a; +} + +template COND, class F> +T sum_with_condition(F x) +{ + T a = 0; + for (typename F::iterator start = x.begin(); start != x.end(); ++start) + { + if (COND(*start)) + { + a += *start; + } + } + return a; +} + +template +colvec elems(colvec x, colvec y) +{ + colvec maxs(x.n_elem, fill::none); + for (unsigned int i = 0; i < x.n_elem; ++i) + { + maxs[i] = F(x[i], y[i]); + } + return maxs; +} + +inline bool check_if_is_finite(double x) +{ + return x > 0 and !R_IsNA(x); +} + +template +void fill_with(T1 start,T1 end,T2 startf){ + for(;start!=end;++start,++startf){ + *startf=F(*start); + } +} \ No newline at end of file diff --git a/man/macros/system.Rd b/man/macros/system.Rd index f9a76ed..d301c9c 100644 --- a/man/macros/system.Rd +++ b/man/macros/system.Rd @@ -1,3 +1,4 @@ +%[dont read] % System Rd macros % These macros are automatically loaded whenever R processes an Rd file. @@ -16,6 +17,7 @@ % To get the package author at build time from the DESCRIPTION file + \newcommand{\packagePackage}{\Sexpr[results=rd,stage=build]{utils::packageDescription("#1")[["Package"]]}} \newcommand{\packageType}{\Sexpr[results=rd,stage=build]{utils::packageDescription("#1")[["Type"]]}} \newcommand{\packageVersion}{\Sexpr[results=rd,stage=build]{utils::packageDescription("#1")[["Version"]]}} diff --git a/man/nn.Rd b/man/nn.Rd index 8ced60e..5f88fdc 100644 --- a/man/nn.Rd +++ b/man/nn.Rd @@ -5,9 +5,9 @@ Uses a kd-tree to find the k nearest neighbours for each point in a given dataset. } \usage{ -nn(data, points = data, k = nrow(data), method = "euclidean", search = "standard", -eps = 0.0, square = FALSE, sorted = FALSE, radius = 0.0, trans = TRUE, leafs = 10, -parallel = FALSE, cores = 0) +nn(data, points, k = nrow(data), method = "euclidean", search = "standard", +eps = 0.0, square = FALSE, sorted = FALSE, radius = 0.0, trans = TRUE, +leafs = 10L, p = 0.0, parallel = FALSE, cores = 0L) } \arguments{ @@ -18,10 +18,10 @@ A numerical matrix. The k nearest points will be extracted from this matrix. A numerical matrix. The function will find the nearest neighbours of each row of this matrix. } \item{k}{ -The number of neares neighbours to search for. +The number of nearest neighbours to search for. } \item{method}{ -The type of distance. Currently two distances are supported, the "euclidean" and "manhattan". +The type of distance.See details for the supported metrics. } \item{search}{ The type of search. Apart from the "standard" there is the "radius" option. It searches @@ -45,6 +45,9 @@ The radius of the search, when search = "radius". \item{trans}{ Should the return matrices be transposed? The default value is TRUE. } +\item{p}{ +This is for the the Minkowski, the power of the metric. +} \item{leafs}{ Number of divided points. Default is 10. @@ -63,12 +66,41 @@ Number of threads for parallel version. The default is 0 which means all the ava } } +\details{ +The target of this function is to calculate the distances between xnew and x without having to calculate the whole +distance matrix of xnew and x. The latter does extra calculations, which can be avoided. + +\itemize{ +\item euclidean : \eqn{ \sum \sqrt( \sum | P_i - Q_i |^2)} +\item manhattan : \eqn{ \sum \sum | P_i - Q_i |} +\item minimum : \eqn{ \sum \min | P_i - Q_i |} +\item maximum : \eqn{ \sum \max | P_i - Q_i |} +\item minkowski : \eqn{ \sum ( \sum | P_i - Q_i |^p)^\frac{1}{p}} +\item bhattacharyya : \eqn{ \sum - ln \sum \sqrt(P_i * Q_i)} +\item hellinger : \eqn{ \sum 2 * \sqrt( 1 - \sum \sqrt(P_i * Q_i))} +\item kullback_leibler : \eqn{ \sum \sum P_i * log(\frac{P_i}{Q_i})} +\item jensen_shannon : \eqn{ \sum 0.5 * ( \sum P_i * log(2 * \frac{P_i}{Q_i} + Q_i) + \sum Q_i * log(2 * \frac{Q_i}{P_i} + Q_i))} +\item canberra : \eqn{ \sum \sum \frac{| P_i - Q_i |}{P_i + Q_i}} +\item chi_square \eqn{X}^2 : \eqn{ \sum \sum (\frac{(P_i - Q_i )^2}{P_i + Q_i})} +\item soergel : \eqn{ \sum \frac{\sum | P_i - Q_i |}{\sum \max(P_i , Q_i)}} +\item sorensen : \eqn{ \sum \frac{\sum | P_i - Q_i |}{\sum (P_i + Q_i)}} +\item cosine : \eqn{ \sum \frac{\sum (P_i * Q_i)}{\sqrt(\sum P_i^2) * \sqrt(\sum Q_i^2)}} +\item wave_hedges : \eqn{ \sum \frac{\sum | P_i - Q_i |}{\max(P_i , Q_i)}} +\item motyka : \eqn{ \sum \sum \frac{\min(P_i , Q_i)}{(P_i + Q_i)}} +\item harmonic_mean : \eqn{ \sum 2 * \frac{\sum P_i * Q_i}{P_i + Q_i}} +\item jeffries_matusita : \eqn{ \sum \sqrt( 2 - 2 * \sum \sqrt(P_i * Q_i))} +\item gower : \eqn{ \sum \frac{1}{d} * \sum | P_i - Q_i |} +\item kulczynski : \eqn{ \sum \frac{\sum | P_i - Q_i |}{\sum \min(P_i , Q_i)}} +\item itakura_saito : \eqn{ \sum \frac{P_i}{Q_i} - log(\frac{P_i}{Q_i}) - 1} +} +} + \value{ A list with 2 fields. -\item{inds}{ +\item{indices}{ A matrix with the indices of each nearest neighbour for each of the rows of the matrix "points". } -\item{dists}{ +\item{distances}{ A matrix with the distances between each nearest neighbour and each of the rows of the matrix "points". } } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index dc6658a..07fae13 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,6 +1,7 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +#include "../inst/include/Rnanoflann.h" #include #include @@ -12,8 +13,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // nn -List nn(arma::mat data, arma::mat points, arma::uword k, const std::string method, const std::string search, const double eps, const bool square, const bool sorted, const double radius, const unsigned int leafs, const bool parallel, const unsigned int cores); -RcppExport SEXP _Rnanoflann_nn(SEXP dataSEXP, SEXP pointsSEXP, SEXP kSEXP, SEXP methodSEXP, SEXP searchSEXP, SEXP epsSEXP, SEXP squareSEXP, SEXP sortedSEXP, SEXP radiusSEXP, SEXP leafsSEXP, SEXP parallelSEXP, SEXP coresSEXP) { +List nn(arma::mat data, arma::mat points, arma::uword k, const std::string method, const std::string search, const double eps, const bool square, const bool sorted, const double radius, const unsigned int leafs, const double p, const bool parallel, const unsigned int cores); +RcppExport SEXP _Rnanoflann_nn(SEXP dataSEXP, SEXP pointsSEXP, SEXP kSEXP, SEXP methodSEXP, SEXP searchSEXP, SEXP epsSEXP, SEXP squareSEXP, SEXP sortedSEXP, SEXP radiusSEXP, SEXP leafsSEXP, SEXP pSEXP, SEXP parallelSEXP, SEXP coresSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -27,15 +28,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const bool >::type sorted(sortedSEXP); Rcpp::traits::input_parameter< const double >::type radius(radiusSEXP); Rcpp::traits::input_parameter< const unsigned int >::type leafs(leafsSEXP); + Rcpp::traits::input_parameter< const double >::type p(pSEXP); Rcpp::traits::input_parameter< const bool >::type parallel(parallelSEXP); Rcpp::traits::input_parameter< const unsigned int >::type cores(coresSEXP); - rcpp_result_gen = Rcpp::wrap(nn(data, points, k, method, search, eps, square, sorted, radius, leafs, parallel, cores)); + rcpp_result_gen = Rcpp::wrap(nn(data, points, k, method, search, eps, square, sorted, radius, leafs, p, parallel, cores)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_Rnanoflann_nn", (DL_FUNC) &_Rnanoflann_nn, 12}, + {"_Rnanoflann_nn", (DL_FUNC) &_Rnanoflann_nn, 13}, {NULL, NULL, 0} }; diff --git a/src/knn.cpp b/src/knn.cpp index ee0cbc1..e259590 100644 --- a/src/knn.cpp +++ b/src/knn.cpp @@ -9,67 +9,236 @@ using namespace arma; using namespace Rcpp; -using namespace Rnanoflann; +using Rnanoflann::KDTreeArmadilloAdaptor; +using Rnanoflann::KDTreeArmadilloAdaptor2; +using Rnanoflann::KDTreeArmadilloAdaptor3; +using Rnanoflann::KDTreeArmadilloAdaptor4; template -void nn_helper(T& mat_index, SearchParameters &searchParams, arma::mat &points, arma::uword k, - const std::string& search, const double radius, const bool parallel, - const unsigned int cores, arma::umat &indices, arma::mat &distances){ - if(search == "standard"){ - #ifdef _OPENMP - #pragma omp parallel for if(parallel) num_threads(cores) - #endif - for(uword i=0;iknnSearch(points.colptr(i), k, indices.colptr(i), distances.colptr(i)); +void nn_helper(T &mat_index, SearchParameters &searchParams, arma::mat &points, arma::uword k, + const std::string &search, const double radius, const bool parallel, + const unsigned int cores, arma::umat &indices, arma::mat &distances) +{ + if (search == "standard") + { + if (parallel) + { +#ifdef _OPENMP +#pragma omp parallel for num_threads(cores) +#endif + for (uword i = 0; i < points.n_cols; ++i) + { + mat_index.index_->knnSearch(points.colptr(i), k, indices.colptr(i), distances.colptr(i)); + } } - }else if(search == "radius"){ - #ifdef _OPENMP - #pragma omp parallel for if(parallel) num_threads(cores) - #endif - for(uword i=0;i> radius_search_results; // Store the results - radius_search_results.reserve(k); - mat_index.index_->radiusSearch(points.memptr(), radius, radius_search_results); - - uword* ind = indices.colptr(i); - double* dist = distances.colptr(i); - for(uword j=0;jknnSearch(points.colptr(i), k, indices.colptr(i), distances.colptr(i)); } } } -} + else if (search == "radius") + { + if (parallel) + { +#ifdef _OPENMP +#pragma omp parallel for num_threads(cores) +#endif + for (uword i = 0; i < points.n_cols; ++i) + { + std::vector> radius_search_results; // Store the results + radius_search_results.reserve(k); + mat_index.index_->radiusSearch(points.memptr(), radius, radius_search_results); + uword *ind = indices.colptr(i); + double *dist = distances.colptr(i); + for (uword j = 0; j < radius_search_results.size(); ++j) + { + ind[j] = radius_search_results[j].first; + dist[j] = radius_search_results[j].second; + } + } + } + else + { + for (uword i = 0; i < points.n_cols; ++i) + { + std::vector> radius_search_results; // Store the results + radius_search_results.reserve(k); + mat_index.index_->radiusSearch(points.memptr(), radius, radius_search_results); + + uword *ind = indices.colptr(i); + double *dist = distances.colptr(i); + for (uword j = 0; j < radius_search_results.size(); ++j) + { + ind[j] = radius_search_results[j].first; + dist[j] = radius_search_results[j].second; + } + } + } + } +} //[[Rcpp::export]] -List nn(arma::mat data, arma::mat points, arma::uword k,const std::string method = "euclidean", - const std::string search = "standard", const double eps = 0.0, const bool square = false, - const bool sorted = false, const double radius = 0.0, const unsigned int leafs = 10, +List nn(arma::mat data, arma::mat points, arma::uword k, const std::string method = "euclidean", + const std::string search = "standard", const double eps = 0.0, const bool square = false, + const bool sorted = false, const double radius = 0.0, const unsigned int leafs = 10, const double p = 0.0, const bool parallel = false, const unsigned int cores = 0) { - umat indices(k,points.n_cols); - mat distances(k,points.n_cols); - SearchParameters searchParams(eps,sorted); - + umat indices(k, points.n_cols); + mat distances(k, points.n_cols); + SearchParameters searchParams(eps, sorted); + if (method == "euclidean") { - if(square){ - using my_kd_tree_t = KDTreeArmadilloAdaptor2; + if (square) + { + using my_kd_tree_t = KDTreeArmadilloAdaptor2; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else + { + using my_kd_tree_t = KDTreeArmadilloAdaptor2; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + } + else if (method == "hellinger") + { + if (square) + { + using my_kd_tree_t = KDTreeArmadilloAdaptor2; my_kd_tree_t mat_index(data.n_rows, data, leafs); nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); - }else{ - using my_kd_tree_t = KDTreeArmadilloAdaptor2; + } + else + { + using my_kd_tree_t = KDTreeArmadilloAdaptor2; my_kd_tree_t mat_index(data.n_rows, data, leafs); nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); } } else if (method == "manhattan") { - using my_kd_tree_t = KDTreeArmadilloAdaptor; + using my_kd_tree_t = KDTreeArmadilloAdaptor; my_kd_tree_t mat_index(data.n_rows, data, leafs); nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); } - - return List::create(_["indices"]=indices+1, _["distances"]=distances); + else if (method == "canberra") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "kullback_leibler") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "jensen_shannon") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor4; + my_kd_tree_t mat_index(data.n_rows, data, points, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "itakura_saito") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "bhattacharyya") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "jeffries_matusita") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "minimum") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "maximum") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "total_variation") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "sorensen") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "cosine") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "gower") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "minkowski") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor3; + my_kd_tree_t mat_index(data.n_rows, data, p, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "soergel") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "kulczynski") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "wave_hedges") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "motyka") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else if (method == "harmonic_mean") + { + using my_kd_tree_t = KDTreeArmadilloAdaptor; + my_kd_tree_t mat_index(data.n_rows, data, leafs); + nn_helper(mat_index, searchParams, points, k, search, radius, parallel, cores, indices, distances); + } + else + { + stop("Unsupported Method: %s", method); + } + + return List::create(_["indices"] = indices + 1, _["distances"] = distances); } \ No newline at end of file