diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ab7767e3605..c724dfa763a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -74,7 +74,6 @@ build_exatrkx:
       - build/
     exclude:
       - build/**/*.o
-      - build/bin/ActsUnitTest*
       - build/bin/ActsIntegrationTest*
 
   script:
@@ -96,13 +95,24 @@ build_exatrkx:
       -DCMAKE_CUDA_ARCHITECTURES="75;86"
       -DACTS_BUILD_PLUGIN_EXATRKX=ON
       -DACTS_BUILD_EXAMPLES_EXATRKX=ON
+      -DACTS_BUILD_UNITTESTS=ON
       -DACTS_EXATRKX_ENABLE_TORCH=ON
       -DACTS_EXATRKX_ENABLE_ONNX=ON
       -DACTS_BUILD_EXAMPLES_PYTHON_BINDINGS=ON
       -DACTS_ENABLE_LOG_FAILURE_THRESHOLD=ON
     - cmake --build build -- -j3
 
-test_exatrkx:
+test_exatrkx_unittests:
+  stage: test
+  needs:
+    - build_exatrkx
+  image: ghcr.io/acts-project/ubuntu2004_exatrkx:v41
+  tags:
+    - docker-gpu-nvidia
+  script:
+    - ctest --test-dir build -R ExaTrkX
+
+test_exatrkx_python:
   stage: test
   needs:
     - build_exatrkx
diff --git a/Examples/Python/tests/root_file_hashes.txt b/Examples/Python/tests/root_file_hashes.txt
index b70f52235d5..d2c6c853273 100644
--- a/Examples/Python/tests/root_file_hashes.txt
+++ b/Examples/Python/tests/root_file_hashes.txt
@@ -86,7 +86,7 @@ test_root_material_writer__material.root: e3b0c44298fc1c149afbf4c8996fb92427ae41
 test_root_clusters_writer[configPosConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
 test_root_clusters_writer[configKwConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
 test_root_clusters_writer[kwargsConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
-test_exatrkx[cpu-torch]__performance_track_finding.root: 926d5056c290f1f35d0564e3781c5a1953f35c7f03095ce6420e8814b6e0ab84
+test_exatrkx[cpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262
 test_exatrkx[gpu-onnx]__performance_track_finding.root: c232d638e53f0f5394d94e8343d1c4f34cf551aaab13db3f8ade4b1fb48b26dd
 test_exatrkx[gpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262
 test_ML_Ambiguity_Solver__performance_ambiML.root: 080e183e758b8593a9c233e2d1b4d213f28fdcb18d82acefdac7c9a5a5763bfc
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp
deleted file mode 100644
index d430e83420e..00000000000
--- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// This file is part of the Acts project.
-//
-// Copyright (C) 2022 CERN for the benefit of the Acts project
-//
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#pragma once
-
-#include <cstdint>
-
-namespace at {
-class Tensor;
-}
-
-namespace Acts {
-
-/// Edge building using FRNN and CUDA. If CUDA is not available,
-/// a brute-force CPU method is used
-/// TODO implement better CPU method (K-D-Tree, ...)
-/// TODO make parameters concise (the tensor should have the numSpacepoints, dim
-/// info)
-///
-/// @param embedFeatures Tensor of shape (n_nodes, embedding_dim)
-/// @param numSpacepoints number of spacepoints
-/// @param dim embedding embedding dim
-/// @param rVal radius for NN search
-/// @param kVal max number of neighbours in NN search
-/// @param flipDirections if we want to randomly flip directions of the
-/// edges after the edge building
-at::Tensor buildEdges(at::Tensor& embedFeatures, int64_t numSpacepoints,
-                      int dim, float rVal, int kVal,
-                      bool flipDirections = false);
-
-}  // namespace Acts
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp
new file mode 100644
index 00000000000..4f5de7009f2
--- /dev/null
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp
@@ -0,0 +1,92 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2023 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Utilities/Concepts.hpp"
+
+#include <cstdint>
+#include <vector>
+
+#include <torch/torch.h>
+
+namespace Acts::detail {
+
+/// So far this is only needed for integers
+template <typename T>
+struct TorchTypeMap {};
+
+template <>
+struct TorchTypeMap<int64_t> {
+  constexpr static torch::Dtype type = torch::kInt64;
+};
+
+template <>
+struct TorchTypeMap<int32_t> {
+  constexpr static torch::Dtype type = torch::kInt32;
+};
+
+template <>
+struct TorchTypeMap<int16_t> {
+  constexpr static torch::Dtype type = torch::kInt16;
+};
+
+template <>
+struct TorchTypeMap<int8_t> {
+  constexpr static torch::Dtype type = torch::kInt8;
+};
+
+template <>
+struct TorchTypeMap<float> {
+  constexpr static torch::Dtype type = torch::kFloat32;
+};
+
+template <>
+struct TorchTypeMap<double> {
+  constexpr static torch::Dtype type = torch::kFloat64;
+};
+
+/// Converts vector to 2D tensor
+/// Make sure your vector has a even number of elements!
+/// @Note Input must be mutable, due to torch API.
+/// @Note Tensor does not take ownership! `.clone()` afterwards to get
+/// ownership of the data
+template <typename T>
+at::Tensor vectorToTensor2D(std::vector<T> &vec, std::size_t cols) {
+  assert(vec.size() % cols == 0);
+
+  auto opts =
+      at::TensorOptions().dtype(TorchTypeMap<T>::type).device(torch::kCPU);
+
+  return torch::from_blob(
+      vec.data(),
+      {static_cast<long>(vec.size() / cols), static_cast<long>(cols)}, opts);
+}
+
+/// Converts 2D tensor to vector
+/// @Note Automatically converts tensor to target type!
+template <typename T>
+std::vector<T> tensor2DToVector(const at::Tensor &tensor) {
+  assert(tensor.sizes().size() == 2);
+
+  // clone to make sure we own the data
+  // bring to CPU
+  // convert to requested type
+  // ensure the tensor is contiguous (e.g. not the case if indexed with step)
+
+  at::Tensor transformedTensor =
+      tensor.to(torch::kCPU).to(TorchTypeMap<T>::type).contiguous();
+
+  std::vector<T> edgeIndex(
+      transformedTensor.template data_ptr<T>(),
+      transformedTensor.template data_ptr<T>() + transformedTensor.numel());
+
+  return edgeIndex;
+}
+
+}  // namespace Acts::detail
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp
new file mode 100644
index 00000000000..1d41e9c3626
--- /dev/null
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp
@@ -0,0 +1,47 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2022 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include <cstdint>
+
+namespace at {
+class Tensor;
+}
+
+namespace Acts {
+namespace detail {
+
+/// Post process edges
+at::Tensor postprocessEdgeTensor(at::Tensor edges, bool removeSelfLoops = true,
+                                 bool removeDuplicates = true,
+                                 bool flipDirections = false);
+
+/// Edge building using FRNN and CUDA.
+/// Raises an exception if not built with CUDA
+at::Tensor buildEdgesFRNN(at::Tensor& embedFeatures, float rVal, int kVal,
+                          bool flipDirections = false);
+
+/// Edge building using the Acts KD-Tree implementation
+/// Note that this implementation has no maximum number of neighbours
+/// in the NN search. kVal is only a hint for reserving memory
+at::Tensor buildEdgesKDTree(at::Tensor& embedFeatures, float rVal, int kVal,
+                            bool flipDirections = false);
+
+/// Dispatches either to FRNN or KD-Tree based edge building
+///
+/// @param embedFeatures Tensor of shape (n_nodes, embedding_dim)
+/// @param rVal radius for NN search
+/// @param kVal max number of neighbours in NN search
+/// @param flipDirections if we want to randomly flip directions of the
+/// edges after the edge building
+at::Tensor buildEdges(at::Tensor& embedFeatures, float rVal, int kVal,
+                      bool flipDirections = false);
+
+}  // namespace detail
+}  // namespace Acts
diff --git a/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp b/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp
index 53ad452ecfb..452fac25f6e 100644
--- a/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp
+++ b/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp
@@ -8,7 +8,7 @@
 
 #include "Acts/Plugins/ExaTrkX/OnnxMetricLearning.hpp"
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
 
 #include <onnxruntime_cxx_api.h>
 #include <torch/script.h>
@@ -44,10 +44,9 @@ void OnnxMetricLearning::buildEdgesWrapper(std::vector<float>& embedFeatures,
 
   torch::Tensor embedTensor =
       torch::tensor(embedFeatures, options)
-          .reshape({1, numSpacepoints, m_cfg.embeddingDim});
+          .reshape({numSpacepoints, m_cfg.embeddingDim});
 
-  auto stackedEdges = buildEdges(embedTensor, numSpacepoints,
-                                 m_cfg.embeddingDim, m_cfg.rVal, m_cfg.knnVal);
+  auto stackedEdges = detail::buildEdges(embedTensor, m_cfg.rVal, m_cfg.knnVal);
 
   stackedEdges = stackedEdges.toType(torch::kInt64).to(torch::kCPU);
 
diff --git a/Plugins/ExaTrkX/src/TorchMetricLearning.cpp b/Plugins/ExaTrkX/src/TorchMetricLearning.cpp
index f8a16b25985..b721a9c5415 100644
--- a/Plugins/ExaTrkX/src/TorchMetricLearning.cpp
+++ b/Plugins/ExaTrkX/src/TorchMetricLearning.cpp
@@ -8,13 +8,16 @@
 
 #include "Acts/Plugins/ExaTrkX/TorchMetricLearning.hpp"
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
 
 #include <torch/script.h>
 #include <torch/torch.h>
 
 #include "printCudaMemInfo.hpp"
 
+using namespace torch::indexing;
+
 namespace Acts {
 
 TorchMetricLearning::TorchMetricLearning(const Config &cfg,
@@ -58,18 +61,22 @@ std::tuple<std::any, std::any> TorchMetricLearning::operator()(
                << *std::min_element(inputValues.begin(), inputValues.end()))
   printCudaMemInfo(logger());
 
+  auto inputTensor =
+      detail::vectorToTensor2D(inputValues, m_cfg.spacepointFeatures);
+
+  // If we are on CPU, clone to get ownership (is this necessary?), else bring
+  // to device.
+  if (inputTensor.options().device() == device) {
+    inputTensor = inputTensor.clone();
+  } else {
+    inputTensor = inputTensor.to(device);
+  }
+
   // **********
   // Embedding
   // **********
 
-  const int64_t numSpacepoints = inputValues.size() / m_cfg.spacepointFeatures;
   std::vector<torch::jit::IValue> inputTensors;
-  auto opts = torch::TensorOptions().dtype(torch::kFloat32);
-  torch::Tensor inputTensor =
-      torch::from_blob(inputValues.data(),
-                       {numSpacepoints, m_cfg.spacepointFeatures}, opts)
-          .to(torch::kFloat32)
-          .to(device);
 
   // Clone models (solve memory leak? members can be const...)
   auto model = m_model->clone();
@@ -87,8 +94,7 @@ std::tuple<std::any, std::any> TorchMetricLearning::operator()(
   // Building Edges
   // ****************
 
-  auto edgeList = buildEdges(output, numSpacepoints, m_cfg.embeddingDim,
-                             m_cfg.rVal, m_cfg.knnVal);
+  auto edgeList = detail::buildEdges(output, m_cfg.rVal, m_cfg.knnVal);
 
   ACTS_VERBOSE("Shape of built edges: (" << edgeList.size(0) << ", "
                                          << edgeList.size(1));
diff --git a/Plugins/ExaTrkX/src/buildEdges.cpp b/Plugins/ExaTrkX/src/buildEdges.cpp
index 455ae72a52a..380833c6da2 100644
--- a/Plugins/ExaTrkX/src/buildEdges.cpp
+++ b/Plugins/ExaTrkX/src/buildEdges.cpp
@@ -6,7 +6,11 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
+
+#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp"
+#include "Acts/Utilities/Helpers.hpp"
+#include "Acts/Utilities/KDTree.hpp"
 
 #include <iostream>
 #include <mutex>
@@ -25,15 +29,54 @@
 #include <grid/prefix_sum.h>
 #endif
 
-namespace {
-#ifndef ACTS_EXATRKX_CPUONLY
-torch::Tensor buildEdgesFRNN(at::Tensor &embedFeatures, int64_t numSpacepoints,
-                             int dim, float rVal, int kVal,
-                             bool flipDirections) {
-  using namespace torch::indexing;
+using namespace torch::indexing;
+
+torch::Tensor Acts::detail::postprocessEdgeTensor(torch::Tensor edges,
+                                                  bool removeSelfLoops,
+                                                  bool removeDuplicates,
+                                                  bool flipDirections) {
+  // Remove self-loops
+  if (removeSelfLoops) {
+    torch::Tensor selfLoopMask = edges.index({0}) != edges.index({1});
+    edges = edges.index({Slice(), selfLoopMask});
+  }
+
+  // Remove duplicates
+  if (removeDuplicates) {
+    // This code is wrong, but we keep it here to separate the refactor from the
+    // fix pull request Replace this after the refactor went in
+    torch::Tensor duplicate_mask = edges.index({0}) > edges.index({1});
+    edges = edges.index({Slice(), duplicate_mask});
+
+    // This is the correct code:
+    // torch::Tensor mask = edges.index({0}) > edges.index({1});
+    // edges.index_put_({Slice(), mask}, edges.index({Slice(), mask}).flip(0));
+    // edges = std::get<0>(torch::unique_dim(edges, -1, false));
+  }
 
+  // Randomly flip direction
+  if (flipDirections) {
+    torch::Tensor random_cut_keep = torch::randint(2, {edges.size(1)});
+    torch::Tensor random_cut_flip = 1 - random_cut_keep;
+    torch::Tensor keep_edges =
+        edges.index({Slice(), random_cut_keep.to(torch::kBool)});
+    torch::Tensor flip_edges =
+        edges.index({Slice(), random_cut_flip.to(torch::kBool)}).flip({0});
+    edges = torch::cat({keep_edges, flip_edges}, 1);
+  }
+
+  return edges.toType(torch::kInt64);
+}
+
+torch::Tensor Acts::detail::buildEdgesFRNN(torch::Tensor &embedFeatures,
+                                           float rVal, int kVal,
+                                           bool flipDirections) {
+#ifndef ACTS_EXATRKX_CPUONLY
   torch::Device device(torch::kCUDA);
 
+  const int64_t numSpacepoints = embedFeatures.size(0);
+  const int dim = embedFeatures.size(1);
+
   const int grid_params_size = 8;
   const int grid_delta_idx = 3;
   const int grid_total_idx = 7;
@@ -119,192 +162,122 @@ torch::Tensor buildEdgesFRNN(at::Tensor &embedFeatures, int64_t numSpacepoints,
   CountingSortCUDA(embedTensor, lengths.to(torch::kInt64), pc_grid_cell,
                    pc_grid_idx, pc_grid_off, sorted_points, sorted_points_idxs);
 
-  std::tuple<at::Tensor, at::Tensor> nbr_output = FindNbrsCUDA(
+  auto [indices, distances] = FindNbrsCUDA(
       sorted_points, sorted_points, lengths.to(torch::kInt64),
       lengths.to(torch::kInt64), pc_grid_off.to(torch::kInt32),
       sorted_points_idxs, sorted_points_idxs,
       gridParamsCuda.to(torch::kFloat32), kVal, r_tensor, r_tensor * r_tensor);
-  torch::Tensor positiveIndices = std::get<0>(nbr_output) >= 0;
+  torch::Tensor positiveIndices = indices >= 0;
 
   torch::Tensor repeatRange = torch::arange(positiveIndices.size(1), device)
                                   .repeat({1, positiveIndices.size(2), 1})
                                   .transpose(1, 2);
 
-  torch::Tensor stackedEdges =
-      torch::stack({repeatRange.index({positiveIndices}),
-                    std::get<0>(nbr_output).index({positiveIndices})});
+  torch::Tensor stackedEdges = torch::stack(
+      {repeatRange.index({positiveIndices}), indices.index({positiveIndices})});
 
-  // Remove self-loops
-  torch::Tensor selfLoopMask =
-      stackedEdges.index({0}) != stackedEdges.index({1});
-  stackedEdges = stackedEdges.index({Slice(), selfLoopMask});
+  return postprocessEdgeTensor(std::move(stackedEdges), true, true,
+                               flipDirections);
+#else
+  throw std::runtime_error(
+      "ACTS not compiled with CUDA, cannot run Acts::buildEdgesFRNN");
+#endif
+}
 
-  // Remove duplicates
-  torch::Tensor duplicate_mask =
-      stackedEdges.index({0}) > stackedEdges.index({1});
-  stackedEdges = stackedEdges.index({Slice(), duplicate_mask});
+/// This is a very unsophisticated span implementation to avoid data copies in
+/// the KDTree search.
+/// Should be replaced with std::span when possible
+template <typename T, std::size_t S>
+struct Span {
+  T *ptr;
 
-  // Randomly flip direction
-  if (flipDirections) {
-    torch::Tensor random_cut_keep = torch::randint(2, {stackedEdges.size(1)});
-    torch::Tensor random_cut_flip = 1 - random_cut_keep;
-    torch::Tensor keep_edges =
-        stackedEdges.index({Slice(), random_cut_keep.to(torch::kBool)});
-    torch::Tensor flip_edges =
-        stackedEdges.index({Slice(), random_cut_flip.to(torch::kBool)})
-            .flip({0});
-    stackedEdges = torch::cat({keep_edges, flip_edges}, 1);
-  } else {
-    stackedEdges = stackedEdges.toType(torch::kInt64).to(torch::kCPU);
-  }
+  auto size() const { return S; }
 
-  return stackedEdges;
-}
-#endif
+  using const_iterator = T const *;
+  const_iterator cbegin() const { return ptr; }
+  const_iterator cend() const { return ptr + S; }
 
-torch::Tensor buildEdgesBruteForce(at::Tensor &embedFeatures,
-                                   int64_t numSpacepoints, int dim, float rVal,
-                                   int) {
-  auto distance = [](const at::Tensor &a, const at::Tensor &b) {
-    return std::sqrt(((a - b) * (a - b)).sum().item().to<float>());
-  };
+  auto operator[](std::size_t i) const { return ptr[i]; }
+};
 
-  struct TwoRanges {
-    int i_min, i_max, j_min, j_max;
-  };
+template <std::size_t Dim>
+float dist(const Span<float, Dim> &a, const Span<float, Dim> &b) {
+  float s = 0.f;
+  for (auto i = 0ul; i < Dim; ++i) {
+    s += (a[i] - b[i]) * (a[i] - b[i]);
+  }
+  return std::sqrt(s);
+};
 
-  torch::Tensor data =
-      embedFeatures.reshape({numSpacepoints, dim}).to(torch::kCPU);
-  std::cout << "data: " << data.size(0) << " " << data.size(1) << "\n";
+template <std::size_t Dim>
+struct BuildEdgesKDTree {
+  static torch::Tensor invoke(torch::Tensor &embedFeatures, float rVal,
+                              int kVal) {
+    assert(embedFeatures.size(1) == Dim);
+    embedFeatures = embedFeatures.to(torch::kCPU);
 
-  const int n_chunks = std::thread::hardware_concurrency();
-  const int per_chunk = numSpacepoints / n_chunks;
+    ////////////////
+    // Build tree //
+    ////////////////
+    using KDTree = Acts::KDTree<Dim, int, float, Span>;
 
-  std::vector<TwoRanges> ranges;
+    typename KDTree::vector_t features;
+    features.reserve(embedFeatures.size(0));
 
-  for (int i = 0; i < n_chunks; ++i) {
-    for (int j = i; j < n_chunks; ++j) {
-      TwoRanges r;
+    auto dataPtr = embedFeatures.data_ptr<float>();
 
-      r.i_min = i * per_chunk;
+    for (int i = 0; i < embedFeatures.size(0); ++i) {
+      features.push_back({Span<float, Dim>{dataPtr + i * Dim}, i});
+    }
 
-      if (i < n_chunks - 1) {
-        r.i_max = (i + 1) * per_chunk;
-      } else {
-        r.i_max = numSpacepoints;
-      }
+    KDTree tree(std::move(features));
 
-      r.j_min = j * per_chunk;
+    /////////////////
+    // Search tree //
+    /////////////////
+    std::vector<int32_t> edges;
+    edges.reserve(2 * kVal * embedFeatures.size(0));
 
-      if (j < n_chunks - 1) {
-        r.j_max = (j + 1) * per_chunk;
-      } else {
-        r.j_max = numSpacepoints;
-      }
+    for (int iself = 0; iself < embedFeatures.size(0); ++iself) {
+      const Span<float, Dim> self{dataPtr + iself * Dim};
 
-      ranges.push_back(r);
-    }
-  }
-  std::cout << "#ranges: " << ranges.size() << "\n";
-
-  std::vector<int> all_edges;
-  all_edges.reserve(2 * numSpacepoints * 10);
-
-  std::mutex res_mutex;
-  std::mutex int_mutex;
-  std::mutex progress_mutex;
-  std::vector<float> progress(ranges.size());
-  int index = -1;
-  int print_id = 1;
-
-  // This for loop can be easily parallelized, however this is not done here to
-  // not mess up the dependencies. Possible options are:
-  // clang-format off
-  // std::for_each(std::execution::par, ranges.begin(), ranges.end(), [&](const TwoRanges &r) { 
-  // tbb::parallel_for_each(ranges.begin(), ranges.end(), [&](const TwoRanges &r) {
-  // clang-format on
-  std::for_each(ranges.begin(), ranges.end(), [&](const TwoRanges &r) {
-    const int my_id = [&]() {
-      std::lock_guard<std::mutex> guard(int_mutex);
-      return ++index;
-    }();
-
-    std::vector<int> edges;
-
-    auto action = [&](int i, int j) {
-      const auto d = distance(data[i], data[j]);
-
-      if (d < rVal) {
-        edges.push_back(i);
-        edges.push_back(j);
-      }
-    };
-
-    auto print_progress = [&](int i) {
-      if (i % 50 == 0) {
-        std::lock_guard<std::mutex> guard(progress_mutex);
-        progress[my_id] = (100.0 * (i - r.i_min)) / (r.i_max - r.i_min);
-        if (my_id == print_id) {
-          const float p =
-              std::accumulate(progress.begin(), progress.end(), 0.f) /
-              progress.size();
-          std::cout << "Average progress: " << p << "%           \n";
-          print_id =
-              std::distance(progress.begin(),
-                            std::min_element(progress.begin(), progress.end()));
-        }
-      }
-    };
-
-    if (r.i_min == r.j_min && r.i_max == r.j_max) {
-      for (int i = r.i_min; i < r.i_max; ++i) {
-        print_progress(i);
-        for (int j = i + 1; j < r.i_max; ++j) {
-          action(i, j);
-        }
+      Acts::RangeXD<Dim, float, Span> range;
+      for (auto j = 0ul; j < Dim; ++j) {
+        range[j] = Acts::Range1D(self[j] - rVal, self[j] + rVal);
       }
-    } else {
-      for (int i = r.i_min; i < r.i_max; ++i) {
-        print_progress(i);
-        for (int j = r.j_min; j < r.j_max; ++j) {
-          action(i, j);
-        }
-      }
-    }
 
-    std::lock_guard<std::mutex> guard(res_mutex);
-    for (auto &p : edges) {
-      all_edges.emplace_back(std::move(p));
+      tree.rangeSearchMapDiscard(
+          range, [&](const Span<float, Dim> &other, const int &iother) {
+            if (iself != iother && dist(self, other) <= rVal) {
+              edges.push_back(iself);
+              edges.push_back(iother);
+            }
+          });
     }
-  });
-
-  auto edge_index = torch::tensor(all_edges)
-                        .clone()
-                        .reshape({static_cast<int>(all_edges.size() / 2), 2})
-                        .transpose(0, 1);
 
-  for (int i = 0; i < 5; ++i) {
-    if ((edge_index[0][i].item<int>() != all_edges[2 * i]) ||
-        (edge_index[1][i].item<int>() != all_edges[2 * i + 1])) {
-      throw std::runtime_error("reshape error");
-    }
+    // Transpose is necessary here, clone to get ownership
+    return Acts::detail::vectorToTensor2D(edges, 2).t().clone();
   }
+};
+
+torch::Tensor Acts::detail::buildEdgesKDTree(torch::Tensor &embedFeatures,
+                                             float rVal, int kVal,
+                                             bool flipDirections) {
+  auto tensor = Acts::template_switch<BuildEdgesKDTree, 1, 12>(
+      embedFeatures.size(1), embedFeatures, rVal, kVal);
 
-  return edge_index;
+  return postprocessEdgeTensor(tensor, true, true, flipDirections);
 }
-}  // namespace
 
-torch::Tensor Acts::buildEdges(at::Tensor &embedFeatures,
-                               int64_t numSpacepoints, int dim, float rVal,
-                               int kVal, [[maybe_unused]] bool flipDirections) {
+torch::Tensor Acts::detail::buildEdges(torch::Tensor &embedFeatures, float rVal,
+                                       int kVal, bool flipDirections) {
 #ifndef ACTS_EXATRKX_CPUONLY
   if (torch::cuda::is_available()) {
-    return buildEdgesFRNN(embedFeatures, numSpacepoints, dim, rVal, kVal,
-                          flipDirections);
+    return detail::buildEdgesFRNN(embedFeatures, rVal, kVal, flipDirections);
   } else {
-    return buildEdgesBruteForce(embedFeatures, numSpacepoints, dim, rVal, kVal);
+    return detail::buildEdgesKDTree(embedFeatures, rVal, kVal, flipDirections);
   }
 #else
-  return buildEdgesBruteForce(embedFeatures, numSpacepoints, dim, rVal, kVal);
+  return detail::buildEdgesKDTree(embedFeatures, rVal, kVal, flipDirections);
 #endif
 }
diff --git a/Plugins/ExaTrkX/src/printCudaMemInfo.hpp b/Plugins/ExaTrkX/src/printCudaMemInfo.hpp
index 2d705a73311..739c89c7921 100644
--- a/Plugins/ExaTrkX/src/printCudaMemInfo.hpp
+++ b/Plugins/ExaTrkX/src/printCudaMemInfo.hpp
@@ -16,21 +16,27 @@
 
 #include <cstdint>
 
+#include <torch/torch.h>
+
 namespace {
 
 inline void printCudaMemInfo(const Acts::Logger& logger) {
 #ifndef ACTS_EXATRKX_CPUONLY
-  constexpr float kb = 1024;
-  constexpr float mb = kb * kb;
-
-  int device;
-  std::size_t free, total;
-  cudaMemGetInfo(&free, &total);
-  cudaGetDevice(&device);
-
-  ACTS_VERBOSE("Current CUDA device: " << device);
-  ACTS_VERBOSE("Memory (used / total) [in MB]: " << (total - free) / mb << " / "
-                                                 << total / mb);
+  if (torch::cuda::is_available()) {
+    constexpr float kb = 1024;
+    constexpr float mb = kb * kb;
+
+    int device;
+    std::size_t free, total;
+    cudaMemGetInfo(&free, &total);
+    cudaGetDevice(&device);
+
+    ACTS_VERBOSE("Current CUDA device: " << device);
+    ACTS_VERBOSE("Memory (used / total) [in MB]: " << (total - free) / mb
+                                                   << " / " << total / mb);
+  } else {
+    ACTS_VERBOSE("No memory info, CUDA disabled");
+  }
 #else
   ACTS_VERBOSE("No memory info, CUDA disabled");
 #endif
diff --git a/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt b/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt
index a241e679833..842fcac2533 100644
--- a/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt
+++ b/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt
@@ -1,3 +1,4 @@
 set(unittest_extra_libraries ActsPluginExaTrkX ${TORCH_LIBRARIES})
 
+add_unittest(ExaTrkXTensorConversion ExaTrkXTensorConversionTests.cpp)
 add_unittest(ExaTrkXEdgeBuilding ExaTrkXEdgeBuildingTest.cpp)
diff --git a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp
index 21abdf66bab..85e53be8df4 100644
--- a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp
+++ b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp
@@ -8,7 +8,8 @@
 
 #include <boost/test/unit_test.hpp>
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
 
 #include <cassert>
 #include <iostream>
@@ -16,7 +17,8 @@
 #include <Eigen/Core>
 #include <torch/torch.h>
 
-namespace {
+#define PRINT 0
+
 float distance(const at::Tensor &a, const at::Tensor &b) {
   assert(a.sizes() == b.sizes());
   assert(a.sizes().size() == 1);
@@ -24,31 +26,50 @@ float distance(const at::Tensor &a, const at::Tensor &b) {
   return std::sqrt(((a - b) * (a - b)).sum().item().to<float>());
 }
 
-int cantor_pair(int x, int y) {
-  return y + ((x + y) * (x + y + 1)) / 2;
-}
+struct CantorPair {
+  int value;
 
-std::pair<int, int> cantor_pair_inverse(int a) {
-  auto f = [](int w) -> int { return (w * (w + 1)) / 2; };
-  auto q = [](int w) -> int {
-    return std::floor((std::sqrt(8 * w + 1) - 1) / 2);
-  };
+  CantorPair(int x, int y) : value(y + ((x + y) * (x + y + 1)) / 2) {}
+
+  std::pair<int, int> inverse() const {
+    auto f = [](int w) -> int { return (w * (w + 1)) / 2; };
+    auto q = [](int w) -> int {
+      return std::floor((std::sqrt(8 * w + 1) - 1) / 2);
+    };
+
+    auto y = value - f(q(value));
+    auto x = q(value) - y;
+
+    return {x, y};
+  }
+};
+
+#if PRINT
+std::ostream &operator<<(std::ostream &os, CantorPair p) {
+  auto [a, b] = p.inverse();
+  os << "(" << a << "," << b << ")";
+  return os;
+}
+#endif
 
-  auto y = a - f(q(a));
-  auto x = q(a) - y;
+bool operator<(CantorPair a, CantorPair b) {
+  return a.value < b.value;
+}
 
-  return {x, y};
+bool operator==(CantorPair a, CantorPair b) {
+  return a.value == b.value;
 }
-}  // namespace
 
-void test_random_graph(int emb_dim, int n_nodes, float r, int knn) {
+template <typename edge_builder_t>
+void test_random_graph(int emb_dim, int n_nodes, float r, int knn,
+                       const edge_builder_t &edgeBuilder) {
   // Create a random point cloud
   auto random_features = at::randn({n_nodes, emb_dim});
 
   // Generate the truth via brute-force
   Eigen::MatrixXf distance_matrix(n_nodes, n_nodes);
 
-  std::vector<int> edges_ref_cantor;
+  std::vector<CantorPair> edges_ref_cantor;
   std::vector<int> edge_counts(n_nodes, 0);
 
   for (int i = 0; i < n_nodes; ++i) {
@@ -58,87 +79,211 @@ void test_random_graph(int emb_dim, int n_nodes, float r, int knn) {
       distance_matrix(j, i) = d;
 
       if (d < r && i != j) {
-        edges_ref_cantor.push_back(cantor_pair(i, j));
+        edges_ref_cantor.emplace_back(i, j);
         edge_counts[i]++;
       }
     }
   }
 
-  // Check if knn can find all edges
-  auto max_edges = *std::max_element(edge_counts.begin(), edge_counts.end());
-  if (max_edges > knn) {
-    std::cout << "Warning: edge_count per node can be higher than knn, test "
-                 "will fail\n";
-  }
+  const auto max_edges =
+      *std::max_element(edge_counts.begin(), edge_counts.end());
 
-  std::cout << "Max edge count: " << max_edges << "\n";
+  // If this is not the case, the test is ill-formed
+  // knn specifies how many edges can be found by the function at max. Thus we
+  // should design the test in a way, that our brute-force test algorithm does
+  // not find more edges then the algorithm that we test against it can find
+  BOOST_REQUIRE(max_edges <= knn);
 
   // Run the edge building
-  auto random_features_cuda = random_features.to(torch::kCUDA);
-  auto edges_test =
-      Acts::buildEdges(random_features_cuda, n_nodes, emb_dim, r, knn);
+  auto edges_test = edgeBuilder(random_features, r, knn);
 
   // Map the edges to cantor pairs
-  std::vector<int> edges_test_cantor;
+  std::vector<CantorPair> edges_test_cantor;
 
   for (int i = 0; i < edges_test.size(1); ++i) {
     const auto a = edges_test[0][i].template item<int>();
     const auto b = edges_test[1][i].template item<int>();
-    edges_test_cantor.push_back(a < b ? cantor_pair(a, b) : cantor_pair(b, a));
+    edges_test_cantor.push_back(a < b ? CantorPair(a, b) : CantorPair(b, a));
   }
 
   std::sort(edges_ref_cantor.begin(), edges_ref_cantor.end());
   std::sort(edges_test_cantor.begin(), edges_test_cantor.end());
 
-  // Check
-  if ((edges_ref_cantor.size() != edges_test_cantor.size()) or
-      not std::equal(edges_test_cantor.begin(), edges_test_cantor.end(),
-                     edges_ref_cantor.begin())) {
-    auto print_in_a_but_not_in_b = [&](const auto a, const auto b) {
-      std::vector<int> diff;
-
-      std::set_difference(a.begin(), a.end(), b.begin(), b.end(),
-                          std::back_inserter(diff));
-
-      if (diff.empty()) {
-        std::cout << "  none\n";
-        return;
-      }
+#if PRINT
+  std::cout << "test size " << edges_test_cantor.size() << std::endl;
+  std::cout << "ref size " << edges_ref_cantor.size() << std::endl;
+  std::cout << "test: ";
+  std::copy(
+      edges_test_cantor.begin(),
+      edges_test_cantor.begin() + std::min(edges_test_cantor.size(), 10ul),
+      std::ostream_iterator<CantorPair>(std::cout, " "));
+  std::cout << std::endl;
+  std::cout << "ref: ";
+  std::copy(edges_ref_cantor.begin(),
+            edges_ref_cantor.begin() + std::min(edges_ref_cantor.size(), 10ul),
+            std::ostream_iterator<CantorPair>(std::cout, " "));
+  std::cout << std::endl;
+#endif
 
-      for (auto c : diff) {
-        const auto [e0, e1] = cantor_pair_inverse(c);
-        std::cout << "  element (" << e0 << "," << e1
-                  << ") with d = " << distance_matrix(e0, e1) << "\n";
-      }
-    };
-
-    std::cout << "Elements in ref but not in test:\n";
-    print_in_a_but_not_in_b(edges_ref_cantor, edges_test_cantor);
-    std::cout << "Elements in test but not in ref:\n";
-    print_in_a_but_not_in_b(edges_test_cantor, edges_ref_cantor);
-
-    throw std::runtime_error("edges mismatch");
-  }
-
-  std::cout << "OK!" << std::endl;
+  // Check
+  BOOST_CHECK(edges_ref_cantor.size() == edges_test_cantor.size());
+  BOOST_CHECK(std::equal(edges_test_cantor.begin(), edges_test_cantor.end(),
+                         edges_ref_cantor.begin()));
 }
 
 BOOST_AUTO_TEST_CASE(test_cantor_pair_functions) {
   int a = 345;
   int b = 23;
-  const auto [aa, bb] = cantor_pair_inverse(cantor_pair(a, b));
+  const auto [aa, bb] = CantorPair(a, b).inverse();
   BOOST_CHECK(a == aa);
   BOOST_CHECK(b == bb);
 }
 
-BOOST_AUTO_TEST_CASE(test_random_graph_edge_building) {
-  const int emb_dim = 3;
-  const int n_nodes = 20;
-  const float r = 1.5;
-  const int knn = 50;
-  const int seed = 42;
+const int emb_dim = 3;
+const int n_nodes = 20;
+const float r = 1.5;
+const int knn = 50;
+const int seed = 42;
+
+BOOST_AUTO_TEST_CASE(test_random_graph_edge_building_cuda,
+                     *boost::unit_test::precondition([](auto) {
+                       return torch::cuda::is_available();
+                     })) {
+  torch::manual_seed(seed);
+
+  auto cudaEdgeBuilder = [](auto &features, auto radius, auto k) {
+    auto features_cuda = features.to(torch::kCUDA);
+    return Acts::detail::buildEdgesFRNN(features_cuda, radius, k);
+  };
+
+  test_random_graph(emb_dim, n_nodes, r, knn, cudaEdgeBuilder);
+}
 
+BOOST_AUTO_TEST_CASE(test_random_graph_edge_building_kdtree) {
   torch::manual_seed(seed);
 
-  test_random_graph(emb_dim, n_nodes, r, knn);
+  auto cpuEdgeBuilder = [](auto &features, auto radius, auto k) {
+    auto features_cpu = features.to(torch::kCPU);
+    return Acts::detail::buildEdgesKDTree(features_cpu, radius, k);
+  };
+
+  test_random_graph(emb_dim, n_nodes, r, knn, cpuEdgeBuilder);
+}
+
+BOOST_AUTO_TEST_CASE(test_self_loop_removal) {
+  // clang-format off
+  std::vector<int64_t> edges = {
+    1,1,
+    2,3,
+    2,2,
+    5,4,
+  };
+  // clang-format on
+
+  auto opts = torch::TensorOptions().dtype(torch::kInt64);
+  const auto edgeTensor =
+      torch::from_blob(edges.data(), {static_cast<long>(edges.size() / 2), 2},
+                       opts)
+          .transpose(0, 1);
+
+  const auto withoutSelfLoops =
+      Acts::detail::postprocessEdgeTensor(edgeTensor, true, false, false)
+          .transpose(1, 0)
+          .flatten();
+
+  const std::vector<int64_t> postEdges(
+      withoutSelfLoops.data_ptr<int64_t>(),
+      withoutSelfLoops.data_ptr<int64_t>() + withoutSelfLoops.numel());
+
+  // clang-format off
+  const std::vector<int64_t> ref = {
+    2,3,
+    5,4,
+  };
+  // clang-format on
+
+  BOOST_CHECK(ref == postEdges);
+}
+
+// Enable this test in PR where duplicate removal is fixed
+/*
+BOOST_AUTO_TEST_CASE(test_duplicate_removal) {
+  // clang-format off
+  std::vector<int64_t> edges = {
+    1,2,
+    2,1,   // duplicate, flipped
+    3,2,
+    3,2,   // duplicate, not flipped
+    7,6,   // should be flipped
+  };
+  // clang-format on
+
+  auto opts = torch::TensorOptions().dtype(torch::kInt64);
+  const auto edgeTensor =
+      torch::from_blob(edges.data(), {static_cast<long>(edges.size() / 2), 2},
+                       opts)
+          .transpose(0, 1);
+
+  const auto withoutDups =
+      Acts::detail::postprocessEdgeTensor(edgeTensor, false, true, false)
+          .transpose(1, 0)
+          .flatten();
+
+  const std::vector<int64_t> postEdges(
+      withoutDups.data_ptr<int64_t>(),
+      withoutDups.data_ptr<int64_t>() + withoutDups.numel());
+
+  // clang-format off
+  const std::vector<int64_t> ref = {
+    1,2,
+    2,3,
+    6,7,
+  };
+  // clang-format on
+
+  BOOST_CHECK(ref == postEdges);
+}
+*/
+
+BOOST_AUTO_TEST_CASE(test_random_flip) {
+  torch::manual_seed(seed);
+
+  // clang-format off
+  std::vector<int64_t> edges = {
+    1,2,
+    2,3,
+    3,4,
+    4,5,
+  };
+  // clang-format on
+
+  auto opts = torch::TensorOptions().dtype(torch::kInt64);
+  const auto edgeTensor =
+      torch::from_blob(edges.data(), {static_cast<long>(edges.size() / 2), 2},
+                       opts)
+          .transpose(0, 1);
+
+  const auto flipped =
+      Acts::detail::postprocessEdgeTensor(edgeTensor, false, false, true)
+          .transpose(0, 1)
+          .flatten();
+
+  const std::vector<int64_t> postEdges(
+      flipped.data_ptr<int64_t>(),
+      flipped.data_ptr<int64_t>() + flipped.numel());
+
+  BOOST_CHECK(postEdges.size() == edges.size());
+  for (auto preIt = edges.begin(); preIt != edges.end(); preIt += 2) {
+    int found = 0;
+
+    for (auto postIt = postEdges.begin(); postIt != postEdges.end();
+         postIt += 2) {
+      bool noflp = (*preIt == *postIt) and *(preIt + 1) == *(postIt + 1);
+      bool flp = *preIt == *(postIt + 1) and *(preIt + 1) == *(postIt);
+
+      found += (flp or noflp);
+    }
+
+    BOOST_CHECK(found == 1);
+  }
 }
diff --git a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp
new file mode 100644
index 00000000000..25ed3cceea0
--- /dev/null
+++ b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp
@@ -0,0 +1,109 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2022 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <boost/test/unit_test.hpp>
+
+#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp"
+
+#include <iostream>
+
+#include <torch/torch.h>
+
+BOOST_AUTO_TEST_CASE(test_vector_tensor_conversion_int_2cols) {
+  std::vector<int64_t> start_vec = {
+      // clang-format off
+    0, 1,
+    1, 2,
+    2, 3,
+    3, 4
+      // clang-format on
+  };
+
+  auto tensor = Acts::detail::vectorToTensor2D(start_vec, 2).clone();
+
+  BOOST_CHECK(tensor.options().dtype() == torch::kInt64);
+  BOOST_CHECK(tensor.sizes().size() == 2);
+  BOOST_CHECK(tensor.size(0) == 4);
+  BOOST_CHECK(tensor.size(1) == 2);
+
+  BOOST_CHECK(tensor[0][0].item<int64_t>() == 0);
+  BOOST_CHECK(tensor[0][1].item<int64_t>() == 1);
+
+  BOOST_CHECK(tensor[1][0].item<int64_t>() == 1);
+  BOOST_CHECK(tensor[1][1].item<int64_t>() == 2);
+
+  BOOST_CHECK(tensor[2][0].item<int64_t>() == 2);
+  BOOST_CHECK(tensor[2][1].item<int64_t>() == 3);
+
+  BOOST_CHECK(tensor[3][0].item<int64_t>() == 3);
+  BOOST_CHECK(tensor[3][1].item<int64_t>() == 4);
+
+  auto test_vec = Acts::detail::tensor2DToVector<int64_t>(tensor);
+
+  BOOST_CHECK(test_vec == start_vec);
+}
+
+BOOST_AUTO_TEST_CASE(test_vector_tensor_conversion_float_3cols) {
+  std::vector<float> start_vec = {
+      // clang-format off
+    0.f, 0.f, 0.f,
+    1.f, 1.f, 1.f,
+    2.f, 2.f, 2.f,
+    3.f, 3.f, 3.f
+      // clang-format on
+  };
+
+  auto tensor = Acts::detail::vectorToTensor2D(start_vec, 3).clone();
+
+  BOOST_CHECK(tensor.options().dtype() == torch::kFloat32);
+  BOOST_CHECK(tensor.sizes().size() == 2);
+  BOOST_CHECK(tensor.size(0) == 4);
+  BOOST_CHECK(tensor.size(1) == 3);
+
+  for (auto i : {0, 1, 2, 3}) {
+    BOOST_CHECK(tensor[i][0].item<int64_t>() == static_cast<float>(i));
+    BOOST_CHECK(tensor[i][1].item<int64_t>() == static_cast<float>(i));
+    BOOST_CHECK(tensor[i][2].item<int64_t>() == static_cast<float>(i));
+  }
+
+  auto test_vec = Acts::detail::tensor2DToVector<float>(tensor);
+
+  BOOST_CHECK(test_vec == start_vec);
+}
+
+BOOST_AUTO_TEST_CASE(test_slicing) {
+  std::vector<float> start_vec = {
+      // clang-format off
+    0.f, 4.f, 0.f,
+    1.f, 5.f, 1.f,
+    2.f, 6.f, 2.f,
+    3.f, 7.f, 3.f
+      // clang-format on
+  };
+
+  auto tensor = Acts::detail::vectorToTensor2D(start_vec, 3).clone();
+
+  using namespace torch::indexing;
+  tensor = tensor.index({Slice{}, Slice{0, None, 2}});
+
+  BOOST_CHECK(tensor.size(0) == 4);
+  BOOST_CHECK(tensor.size(1) == 2);
+
+  const std::vector<float> ref_vec = {
+      // clang-format off
+    0.f, 0.f,
+    1.f, 1.f,
+    2.f, 2.f,
+    3.f, 3.f,
+      // clang-format on
+  };
+
+  const auto test_vec = Acts::detail::tensor2DToVector<float>(tensor);
+
+  BOOST_CHECK(test_vec == ref_vec);
+}