From 0486e42344f07154da7ac247b934d5d3eda0a420 Mon Sep 17 00:00:00 2001 From: Benjamin Huth <37871400+benjaminhuth@users.noreply.github.com> Date: Wed, 16 Aug 2023 21:38:02 +0200 Subject: [PATCH] feat: ExaTrkX edge building KDTree on CPU + fixes + refactor + tests (#2360) * Replaces brute force edge building with `Acts::KDTree` method * Abstracts `std::vector` to `torch::Tensor` conversions * Fixes bug in edge duplicate removal * Add more unit tests and enable in CI --- .gitlab-ci.yml | 14 +- Examples/Python/tests/root_file_hashes.txt | 2 +- .../Acts/Plugins/ExaTrkX/buildEdges.hpp | 36 --- .../ExaTrkX/detail/TensorVectorConversion.hpp | 92 ++++++ .../Plugins/ExaTrkX/detail/buildEdges.hpp | 47 +++ Plugins/ExaTrkX/src/OnnxMetricLearning.cpp | 7 +- Plugins/ExaTrkX/src/TorchMetricLearning.cpp | 26 +- Plugins/ExaTrkX/src/buildEdges.cpp | 287 ++++++++---------- Plugins/ExaTrkX/src/printCudaMemInfo.hpp | 28 +- .../UnitTests/Plugins/ExaTrkX/CMakeLists.txt | 1 + .../ExaTrkX/ExaTrkXEdgeBuildingTest.cpp | 279 +++++++++++++---- .../ExaTrkX/ExaTrkXTensorConversionTests.cpp | 109 +++++++ 12 files changed, 640 insertions(+), 288 deletions(-) delete mode 100644 Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp create mode 100644 Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp create mode 100644 Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp create mode 100644 Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ab7767e3605..c724dfa763a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -74,7 +74,6 @@ build_exatrkx: - build/ exclude: - build/**/*.o - - build/bin/ActsUnitTest* - build/bin/ActsIntegrationTest* script: @@ -96,13 +95,24 @@ build_exatrkx: -DCMAKE_CUDA_ARCHITECTURES="75;86" -DACTS_BUILD_PLUGIN_EXATRKX=ON -DACTS_BUILD_EXAMPLES_EXATRKX=ON + -DACTS_BUILD_UNITTESTS=ON -DACTS_EXATRKX_ENABLE_TORCH=ON -DACTS_EXATRKX_ENABLE_ONNX=ON -DACTS_BUILD_EXAMPLES_PYTHON_BINDINGS=ON -DACTS_ENABLE_LOG_FAILURE_THRESHOLD=ON - cmake --build build -- -j3 -test_exatrkx: +test_exatrkx_unittests: + stage: test + needs: + - build_exatrkx + image: ghcr.io/acts-project/ubuntu2004_exatrkx:v41 + tags: + - docker-gpu-nvidia + script: + - ctest --test-dir build -R ExaTrkX + +test_exatrkx_python: stage: test needs: - build_exatrkx diff --git a/Examples/Python/tests/root_file_hashes.txt b/Examples/Python/tests/root_file_hashes.txt index b70f52235d5..d2c6c853273 100644 --- a/Examples/Python/tests/root_file_hashes.txt +++ b/Examples/Python/tests/root_file_hashes.txt @@ -86,7 +86,7 @@ test_root_material_writer__material.root: e3b0c44298fc1c149afbf4c8996fb92427ae41 test_root_clusters_writer[configPosConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae test_root_clusters_writer[configKwConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae test_root_clusters_writer[kwargsConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae -test_exatrkx[cpu-torch]__performance_track_finding.root: 926d5056c290f1f35d0564e3781c5a1953f35c7f03095ce6420e8814b6e0ab84 +test_exatrkx[cpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262 test_exatrkx[gpu-onnx]__performance_track_finding.root: c232d638e53f0f5394d94e8343d1c4f34cf551aaab13db3f8ade4b1fb48b26dd test_exatrkx[gpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262 test_ML_Ambiguity_Solver__performance_ambiML.root: 080e183e758b8593a9c233e2d1b4d213f28fdcb18d82acefdac7c9a5a5763bfc diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp deleted file mode 100644 index d430e83420e..00000000000 --- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// This file is part of the Acts project. -// -// Copyright (C) 2022 CERN for the benefit of the Acts project -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#pragma once - -#include - -namespace at { -class Tensor; -} - -namespace Acts { - -/// Edge building using FRNN and CUDA. If CUDA is not available, -/// a brute-force CPU method is used -/// TODO implement better CPU method (K-D-Tree, ...) -/// TODO make parameters concise (the tensor should have the numSpacepoints, dim -/// info) -/// -/// @param embedFeatures Tensor of shape (n_nodes, embedding_dim) -/// @param numSpacepoints number of spacepoints -/// @param dim embedding embedding dim -/// @param rVal radius for NN search -/// @param kVal max number of neighbours in NN search -/// @param flipDirections if we want to randomly flip directions of the -/// edges after the edge building -at::Tensor buildEdges(at::Tensor& embedFeatures, int64_t numSpacepoints, - int dim, float rVal, int kVal, - bool flipDirections = false); - -} // namespace Acts diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp new file mode 100644 index 00000000000..4f5de7009f2 --- /dev/null +++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp @@ -0,0 +1,92 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2023 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#pragma once + +#include "Acts/Utilities/Concepts.hpp" + +#include +#include + +#include + +namespace Acts::detail { + +/// So far this is only needed for integers +template +struct TorchTypeMap {}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kInt64; +}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kInt32; +}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kInt16; +}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kInt8; +}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kFloat32; +}; + +template <> +struct TorchTypeMap { + constexpr static torch::Dtype type = torch::kFloat64; +}; + +/// Converts vector to 2D tensor +/// Make sure your vector has a even number of elements! +/// @Note Input must be mutable, due to torch API. +/// @Note Tensor does not take ownership! `.clone()` afterwards to get +/// ownership of the data +template +at::Tensor vectorToTensor2D(std::vector &vec, std::size_t cols) { + assert(vec.size() % cols == 0); + + auto opts = + at::TensorOptions().dtype(TorchTypeMap::type).device(torch::kCPU); + + return torch::from_blob( + vec.data(), + {static_cast(vec.size() / cols), static_cast(cols)}, opts); +} + +/// Converts 2D tensor to vector +/// @Note Automatically converts tensor to target type! +template +std::vector tensor2DToVector(const at::Tensor &tensor) { + assert(tensor.sizes().size() == 2); + + // clone to make sure we own the data + // bring to CPU + // convert to requested type + // ensure the tensor is contiguous (e.g. not the case if indexed with step) + + at::Tensor transformedTensor = + tensor.to(torch::kCPU).to(TorchTypeMap::type).contiguous(); + + std::vector edgeIndex( + transformedTensor.template data_ptr(), + transformedTensor.template data_ptr() + transformedTensor.numel()); + + return edgeIndex; +} + +} // namespace Acts::detail diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp new file mode 100644 index 00000000000..1d41e9c3626 --- /dev/null +++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp @@ -0,0 +1,47 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2022 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +namespace at { +class Tensor; +} + +namespace Acts { +namespace detail { + +/// Post process edges +at::Tensor postprocessEdgeTensor(at::Tensor edges, bool removeSelfLoops = true, + bool removeDuplicates = true, + bool flipDirections = false); + +/// Edge building using FRNN and CUDA. +/// Raises an exception if not built with CUDA +at::Tensor buildEdgesFRNN(at::Tensor& embedFeatures, float rVal, int kVal, + bool flipDirections = false); + +/// Edge building using the Acts KD-Tree implementation +/// Note that this implementation has no maximum number of neighbours +/// in the NN search. kVal is only a hint for reserving memory +at::Tensor buildEdgesKDTree(at::Tensor& embedFeatures, float rVal, int kVal, + bool flipDirections = false); + +/// Dispatches either to FRNN or KD-Tree based edge building +/// +/// @param embedFeatures Tensor of shape (n_nodes, embedding_dim) +/// @param rVal radius for NN search +/// @param kVal max number of neighbours in NN search +/// @param flipDirections if we want to randomly flip directions of the +/// edges after the edge building +at::Tensor buildEdges(at::Tensor& embedFeatures, float rVal, int kVal, + bool flipDirections = false); + +} // namespace detail +} // namespace Acts diff --git a/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp b/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp index 53ad452ecfb..452fac25f6e 100644 --- a/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp +++ b/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp @@ -8,7 +8,7 @@ #include "Acts/Plugins/ExaTrkX/OnnxMetricLearning.hpp" -#include "Acts/Plugins/ExaTrkX/buildEdges.hpp" +#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp" #include #include @@ -44,10 +44,9 @@ void OnnxMetricLearning::buildEdgesWrapper(std::vector& embedFeatures, torch::Tensor embedTensor = torch::tensor(embedFeatures, options) - .reshape({1, numSpacepoints, m_cfg.embeddingDim}); + .reshape({numSpacepoints, m_cfg.embeddingDim}); - auto stackedEdges = buildEdges(embedTensor, numSpacepoints, - m_cfg.embeddingDim, m_cfg.rVal, m_cfg.knnVal); + auto stackedEdges = detail::buildEdges(embedTensor, m_cfg.rVal, m_cfg.knnVal); stackedEdges = stackedEdges.toType(torch::kInt64).to(torch::kCPU); diff --git a/Plugins/ExaTrkX/src/TorchMetricLearning.cpp b/Plugins/ExaTrkX/src/TorchMetricLearning.cpp index f8a16b25985..b721a9c5415 100644 --- a/Plugins/ExaTrkX/src/TorchMetricLearning.cpp +++ b/Plugins/ExaTrkX/src/TorchMetricLearning.cpp @@ -8,13 +8,16 @@ #include "Acts/Plugins/ExaTrkX/TorchMetricLearning.hpp" -#include "Acts/Plugins/ExaTrkX/buildEdges.hpp" +#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp" +#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp" #include #include #include "printCudaMemInfo.hpp" +using namespace torch::indexing; + namespace Acts { TorchMetricLearning::TorchMetricLearning(const Config &cfg, @@ -58,18 +61,22 @@ std::tuple TorchMetricLearning::operator()( << *std::min_element(inputValues.begin(), inputValues.end())) printCudaMemInfo(logger()); + auto inputTensor = + detail::vectorToTensor2D(inputValues, m_cfg.spacepointFeatures); + + // If we are on CPU, clone to get ownership (is this necessary?), else bring + // to device. + if (inputTensor.options().device() == device) { + inputTensor = inputTensor.clone(); + } else { + inputTensor = inputTensor.to(device); + } + // ********** // Embedding // ********** - const int64_t numSpacepoints = inputValues.size() / m_cfg.spacepointFeatures; std::vector inputTensors; - auto opts = torch::TensorOptions().dtype(torch::kFloat32); - torch::Tensor inputTensor = - torch::from_blob(inputValues.data(), - {numSpacepoints, m_cfg.spacepointFeatures}, opts) - .to(torch::kFloat32) - .to(device); // Clone models (solve memory leak? members can be const...) auto model = m_model->clone(); @@ -87,8 +94,7 @@ std::tuple TorchMetricLearning::operator()( // Building Edges // **************** - auto edgeList = buildEdges(output, numSpacepoints, m_cfg.embeddingDim, - m_cfg.rVal, m_cfg.knnVal); + auto edgeList = detail::buildEdges(output, m_cfg.rVal, m_cfg.knnVal); ACTS_VERBOSE("Shape of built edges: (" << edgeList.size(0) << ", " << edgeList.size(1)); diff --git a/Plugins/ExaTrkX/src/buildEdges.cpp b/Plugins/ExaTrkX/src/buildEdges.cpp index 455ae72a52a..380833c6da2 100644 --- a/Plugins/ExaTrkX/src/buildEdges.cpp +++ b/Plugins/ExaTrkX/src/buildEdges.cpp @@ -6,7 +6,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include "Acts/Plugins/ExaTrkX/buildEdges.hpp" +#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp" + +#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp" +#include "Acts/Utilities/Helpers.hpp" +#include "Acts/Utilities/KDTree.hpp" #include #include @@ -25,15 +29,54 @@ #include #endif -namespace { -#ifndef ACTS_EXATRKX_CPUONLY -torch::Tensor buildEdgesFRNN(at::Tensor &embedFeatures, int64_t numSpacepoints, - int dim, float rVal, int kVal, - bool flipDirections) { - using namespace torch::indexing; +using namespace torch::indexing; + +torch::Tensor Acts::detail::postprocessEdgeTensor(torch::Tensor edges, + bool removeSelfLoops, + bool removeDuplicates, + bool flipDirections) { + // Remove self-loops + if (removeSelfLoops) { + torch::Tensor selfLoopMask = edges.index({0}) != edges.index({1}); + edges = edges.index({Slice(), selfLoopMask}); + } + + // Remove duplicates + if (removeDuplicates) { + // This code is wrong, but we keep it here to separate the refactor from the + // fix pull request Replace this after the refactor went in + torch::Tensor duplicate_mask = edges.index({0}) > edges.index({1}); + edges = edges.index({Slice(), duplicate_mask}); + + // This is the correct code: + // torch::Tensor mask = edges.index({0}) > edges.index({1}); + // edges.index_put_({Slice(), mask}, edges.index({Slice(), mask}).flip(0)); + // edges = std::get<0>(torch::unique_dim(edges, -1, false)); + } + // Randomly flip direction + if (flipDirections) { + torch::Tensor random_cut_keep = torch::randint(2, {edges.size(1)}); + torch::Tensor random_cut_flip = 1 - random_cut_keep; + torch::Tensor keep_edges = + edges.index({Slice(), random_cut_keep.to(torch::kBool)}); + torch::Tensor flip_edges = + edges.index({Slice(), random_cut_flip.to(torch::kBool)}).flip({0}); + edges = torch::cat({keep_edges, flip_edges}, 1); + } + + return edges.toType(torch::kInt64); +} + +torch::Tensor Acts::detail::buildEdgesFRNN(torch::Tensor &embedFeatures, + float rVal, int kVal, + bool flipDirections) { +#ifndef ACTS_EXATRKX_CPUONLY torch::Device device(torch::kCUDA); + const int64_t numSpacepoints = embedFeatures.size(0); + const int dim = embedFeatures.size(1); + const int grid_params_size = 8; const int grid_delta_idx = 3; const int grid_total_idx = 7; @@ -119,192 +162,122 @@ torch::Tensor buildEdgesFRNN(at::Tensor &embedFeatures, int64_t numSpacepoints, CountingSortCUDA(embedTensor, lengths.to(torch::kInt64), pc_grid_cell, pc_grid_idx, pc_grid_off, sorted_points, sorted_points_idxs); - std::tuple nbr_output = FindNbrsCUDA( + auto [indices, distances] = FindNbrsCUDA( sorted_points, sorted_points, lengths.to(torch::kInt64), lengths.to(torch::kInt64), pc_grid_off.to(torch::kInt32), sorted_points_idxs, sorted_points_idxs, gridParamsCuda.to(torch::kFloat32), kVal, r_tensor, r_tensor * r_tensor); - torch::Tensor positiveIndices = std::get<0>(nbr_output) >= 0; + torch::Tensor positiveIndices = indices >= 0; torch::Tensor repeatRange = torch::arange(positiveIndices.size(1), device) .repeat({1, positiveIndices.size(2), 1}) .transpose(1, 2); - torch::Tensor stackedEdges = - torch::stack({repeatRange.index({positiveIndices}), - std::get<0>(nbr_output).index({positiveIndices})}); + torch::Tensor stackedEdges = torch::stack( + {repeatRange.index({positiveIndices}), indices.index({positiveIndices})}); - // Remove self-loops - torch::Tensor selfLoopMask = - stackedEdges.index({0}) != stackedEdges.index({1}); - stackedEdges = stackedEdges.index({Slice(), selfLoopMask}); + return postprocessEdgeTensor(std::move(stackedEdges), true, true, + flipDirections); +#else + throw std::runtime_error( + "ACTS not compiled with CUDA, cannot run Acts::buildEdgesFRNN"); +#endif +} - // Remove duplicates - torch::Tensor duplicate_mask = - stackedEdges.index({0}) > stackedEdges.index({1}); - stackedEdges = stackedEdges.index({Slice(), duplicate_mask}); +/// This is a very unsophisticated span implementation to avoid data copies in +/// the KDTree search. +/// Should be replaced with std::span when possible +template +struct Span { + T *ptr; - // Randomly flip direction - if (flipDirections) { - torch::Tensor random_cut_keep = torch::randint(2, {stackedEdges.size(1)}); - torch::Tensor random_cut_flip = 1 - random_cut_keep; - torch::Tensor keep_edges = - stackedEdges.index({Slice(), random_cut_keep.to(torch::kBool)}); - torch::Tensor flip_edges = - stackedEdges.index({Slice(), random_cut_flip.to(torch::kBool)}) - .flip({0}); - stackedEdges = torch::cat({keep_edges, flip_edges}, 1); - } else { - stackedEdges = stackedEdges.toType(torch::kInt64).to(torch::kCPU); - } + auto size() const { return S; } - return stackedEdges; -} -#endif + using const_iterator = T const *; + const_iterator cbegin() const { return ptr; } + const_iterator cend() const { return ptr + S; } -torch::Tensor buildEdgesBruteForce(at::Tensor &embedFeatures, - int64_t numSpacepoints, int dim, float rVal, - int) { - auto distance = [](const at::Tensor &a, const at::Tensor &b) { - return std::sqrt(((a - b) * (a - b)).sum().item().to()); - }; + auto operator[](std::size_t i) const { return ptr[i]; } +}; - struct TwoRanges { - int i_min, i_max, j_min, j_max; - }; +template +float dist(const Span &a, const Span &b) { + float s = 0.f; + for (auto i = 0ul; i < Dim; ++i) { + s += (a[i] - b[i]) * (a[i] - b[i]); + } + return std::sqrt(s); +}; - torch::Tensor data = - embedFeatures.reshape({numSpacepoints, dim}).to(torch::kCPU); - std::cout << "data: " << data.size(0) << " " << data.size(1) << "\n"; +template +struct BuildEdgesKDTree { + static torch::Tensor invoke(torch::Tensor &embedFeatures, float rVal, + int kVal) { + assert(embedFeatures.size(1) == Dim); + embedFeatures = embedFeatures.to(torch::kCPU); - const int n_chunks = std::thread::hardware_concurrency(); - const int per_chunk = numSpacepoints / n_chunks; + //////////////// + // Build tree // + //////////////// + using KDTree = Acts::KDTree; - std::vector ranges; + typename KDTree::vector_t features; + features.reserve(embedFeatures.size(0)); - for (int i = 0; i < n_chunks; ++i) { - for (int j = i; j < n_chunks; ++j) { - TwoRanges r; + auto dataPtr = embedFeatures.data_ptr(); - r.i_min = i * per_chunk; + for (int i = 0; i < embedFeatures.size(0); ++i) { + features.push_back({Span{dataPtr + i * Dim}, i}); + } - if (i < n_chunks - 1) { - r.i_max = (i + 1) * per_chunk; - } else { - r.i_max = numSpacepoints; - } + KDTree tree(std::move(features)); - r.j_min = j * per_chunk; + ///////////////// + // Search tree // + ///////////////// + std::vector edges; + edges.reserve(2 * kVal * embedFeatures.size(0)); - if (j < n_chunks - 1) { - r.j_max = (j + 1) * per_chunk; - } else { - r.j_max = numSpacepoints; - } + for (int iself = 0; iself < embedFeatures.size(0); ++iself) { + const Span self{dataPtr + iself * Dim}; - ranges.push_back(r); - } - } - std::cout << "#ranges: " << ranges.size() << "\n"; - - std::vector all_edges; - all_edges.reserve(2 * numSpacepoints * 10); - - std::mutex res_mutex; - std::mutex int_mutex; - std::mutex progress_mutex; - std::vector progress(ranges.size()); - int index = -1; - int print_id = 1; - - // This for loop can be easily parallelized, however this is not done here to - // not mess up the dependencies. Possible options are: - // clang-format off - // std::for_each(std::execution::par, ranges.begin(), ranges.end(), [&](const TwoRanges &r) { - // tbb::parallel_for_each(ranges.begin(), ranges.end(), [&](const TwoRanges &r) { - // clang-format on - std::for_each(ranges.begin(), ranges.end(), [&](const TwoRanges &r) { - const int my_id = [&]() { - std::lock_guard guard(int_mutex); - return ++index; - }(); - - std::vector edges; - - auto action = [&](int i, int j) { - const auto d = distance(data[i], data[j]); - - if (d < rVal) { - edges.push_back(i); - edges.push_back(j); - } - }; - - auto print_progress = [&](int i) { - if (i % 50 == 0) { - std::lock_guard guard(progress_mutex); - progress[my_id] = (100.0 * (i - r.i_min)) / (r.i_max - r.i_min); - if (my_id == print_id) { - const float p = - std::accumulate(progress.begin(), progress.end(), 0.f) / - progress.size(); - std::cout << "Average progress: " << p << "% \n"; - print_id = - std::distance(progress.begin(), - std::min_element(progress.begin(), progress.end())); - } - } - }; - - if (r.i_min == r.j_min && r.i_max == r.j_max) { - for (int i = r.i_min; i < r.i_max; ++i) { - print_progress(i); - for (int j = i + 1; j < r.i_max; ++j) { - action(i, j); - } + Acts::RangeXD range; + for (auto j = 0ul; j < Dim; ++j) { + range[j] = Acts::Range1D(self[j] - rVal, self[j] + rVal); } - } else { - for (int i = r.i_min; i < r.i_max; ++i) { - print_progress(i); - for (int j = r.j_min; j < r.j_max; ++j) { - action(i, j); - } - } - } - std::lock_guard guard(res_mutex); - for (auto &p : edges) { - all_edges.emplace_back(std::move(p)); + tree.rangeSearchMapDiscard( + range, [&](const Span &other, const int &iother) { + if (iself != iother && dist(self, other) <= rVal) { + edges.push_back(iself); + edges.push_back(iother); + } + }); } - }); - - auto edge_index = torch::tensor(all_edges) - .clone() - .reshape({static_cast(all_edges.size() / 2), 2}) - .transpose(0, 1); - for (int i = 0; i < 5; ++i) { - if ((edge_index[0][i].item() != all_edges[2 * i]) || - (edge_index[1][i].item() != all_edges[2 * i + 1])) { - throw std::runtime_error("reshape error"); - } + // Transpose is necessary here, clone to get ownership + return Acts::detail::vectorToTensor2D(edges, 2).t().clone(); } +}; + +torch::Tensor Acts::detail::buildEdgesKDTree(torch::Tensor &embedFeatures, + float rVal, int kVal, + bool flipDirections) { + auto tensor = Acts::template_switch( + embedFeatures.size(1), embedFeatures, rVal, kVal); - return edge_index; + return postprocessEdgeTensor(tensor, true, true, flipDirections); } -} // namespace -torch::Tensor Acts::buildEdges(at::Tensor &embedFeatures, - int64_t numSpacepoints, int dim, float rVal, - int kVal, [[maybe_unused]] bool flipDirections) { +torch::Tensor Acts::detail::buildEdges(torch::Tensor &embedFeatures, float rVal, + int kVal, bool flipDirections) { #ifndef ACTS_EXATRKX_CPUONLY if (torch::cuda::is_available()) { - return buildEdgesFRNN(embedFeatures, numSpacepoints, dim, rVal, kVal, - flipDirections); + return detail::buildEdgesFRNN(embedFeatures, rVal, kVal, flipDirections); } else { - return buildEdgesBruteForce(embedFeatures, numSpacepoints, dim, rVal, kVal); + return detail::buildEdgesKDTree(embedFeatures, rVal, kVal, flipDirections); } #else - return buildEdgesBruteForce(embedFeatures, numSpacepoints, dim, rVal, kVal); + return detail::buildEdgesKDTree(embedFeatures, rVal, kVal, flipDirections); #endif } diff --git a/Plugins/ExaTrkX/src/printCudaMemInfo.hpp b/Plugins/ExaTrkX/src/printCudaMemInfo.hpp index 2d705a73311..739c89c7921 100644 --- a/Plugins/ExaTrkX/src/printCudaMemInfo.hpp +++ b/Plugins/ExaTrkX/src/printCudaMemInfo.hpp @@ -16,21 +16,27 @@ #include +#include + namespace { inline void printCudaMemInfo(const Acts::Logger& logger) { #ifndef ACTS_EXATRKX_CPUONLY - constexpr float kb = 1024; - constexpr float mb = kb * kb; - - int device; - std::size_t free, total; - cudaMemGetInfo(&free, &total); - cudaGetDevice(&device); - - ACTS_VERBOSE("Current CUDA device: " << device); - ACTS_VERBOSE("Memory (used / total) [in MB]: " << (total - free) / mb << " / " - << total / mb); + if (torch::cuda::is_available()) { + constexpr float kb = 1024; + constexpr float mb = kb * kb; + + int device; + std::size_t free, total; + cudaMemGetInfo(&free, &total); + cudaGetDevice(&device); + + ACTS_VERBOSE("Current CUDA device: " << device); + ACTS_VERBOSE("Memory (used / total) [in MB]: " << (total - free) / mb + << " / " << total / mb); + } else { + ACTS_VERBOSE("No memory info, CUDA disabled"); + } #else ACTS_VERBOSE("No memory info, CUDA disabled"); #endif diff --git a/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt b/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt index a241e679833..842fcac2533 100644 --- a/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt +++ b/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt @@ -1,3 +1,4 @@ set(unittest_extra_libraries ActsPluginExaTrkX ${TORCH_LIBRARIES}) +add_unittest(ExaTrkXTensorConversion ExaTrkXTensorConversionTests.cpp) add_unittest(ExaTrkXEdgeBuilding ExaTrkXEdgeBuildingTest.cpp) diff --git a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp index 21abdf66bab..85e53be8df4 100644 --- a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp +++ b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXEdgeBuildingTest.cpp @@ -8,7 +8,8 @@ #include -#include "Acts/Plugins/ExaTrkX/buildEdges.hpp" +#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp" +#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp" #include #include @@ -16,7 +17,8 @@ #include #include -namespace { +#define PRINT 0 + float distance(const at::Tensor &a, const at::Tensor &b) { assert(a.sizes() == b.sizes()); assert(a.sizes().size() == 1); @@ -24,31 +26,50 @@ float distance(const at::Tensor &a, const at::Tensor &b) { return std::sqrt(((a - b) * (a - b)).sum().item().to()); } -int cantor_pair(int x, int y) { - return y + ((x + y) * (x + y + 1)) / 2; -} +struct CantorPair { + int value; -std::pair cantor_pair_inverse(int a) { - auto f = [](int w) -> int { return (w * (w + 1)) / 2; }; - auto q = [](int w) -> int { - return std::floor((std::sqrt(8 * w + 1) - 1) / 2); - }; + CantorPair(int x, int y) : value(y + ((x + y) * (x + y + 1)) / 2) {} + + std::pair inverse() const { + auto f = [](int w) -> int { return (w * (w + 1)) / 2; }; + auto q = [](int w) -> int { + return std::floor((std::sqrt(8 * w + 1) - 1) / 2); + }; + + auto y = value - f(q(value)); + auto x = q(value) - y; + + return {x, y}; + } +}; + +#if PRINT +std::ostream &operator<<(std::ostream &os, CantorPair p) { + auto [a, b] = p.inverse(); + os << "(" << a << "," << b << ")"; + return os; +} +#endif - auto y = a - f(q(a)); - auto x = q(a) - y; +bool operator<(CantorPair a, CantorPair b) { + return a.value < b.value; +} - return {x, y}; +bool operator==(CantorPair a, CantorPair b) { + return a.value == b.value; } -} // namespace -void test_random_graph(int emb_dim, int n_nodes, float r, int knn) { +template +void test_random_graph(int emb_dim, int n_nodes, float r, int knn, + const edge_builder_t &edgeBuilder) { // Create a random point cloud auto random_features = at::randn({n_nodes, emb_dim}); // Generate the truth via brute-force Eigen::MatrixXf distance_matrix(n_nodes, n_nodes); - std::vector edges_ref_cantor; + std::vector edges_ref_cantor; std::vector edge_counts(n_nodes, 0); for (int i = 0; i < n_nodes; ++i) { @@ -58,87 +79,211 @@ void test_random_graph(int emb_dim, int n_nodes, float r, int knn) { distance_matrix(j, i) = d; if (d < r && i != j) { - edges_ref_cantor.push_back(cantor_pair(i, j)); + edges_ref_cantor.emplace_back(i, j); edge_counts[i]++; } } } - // Check if knn can find all edges - auto max_edges = *std::max_element(edge_counts.begin(), edge_counts.end()); - if (max_edges > knn) { - std::cout << "Warning: edge_count per node can be higher than knn, test " - "will fail\n"; - } + const auto max_edges = + *std::max_element(edge_counts.begin(), edge_counts.end()); - std::cout << "Max edge count: " << max_edges << "\n"; + // If this is not the case, the test is ill-formed + // knn specifies how many edges can be found by the function at max. Thus we + // should design the test in a way, that our brute-force test algorithm does + // not find more edges then the algorithm that we test against it can find + BOOST_REQUIRE(max_edges <= knn); // Run the edge building - auto random_features_cuda = random_features.to(torch::kCUDA); - auto edges_test = - Acts::buildEdges(random_features_cuda, n_nodes, emb_dim, r, knn); + auto edges_test = edgeBuilder(random_features, r, knn); // Map the edges to cantor pairs - std::vector edges_test_cantor; + std::vector edges_test_cantor; for (int i = 0; i < edges_test.size(1); ++i) { const auto a = edges_test[0][i].template item(); const auto b = edges_test[1][i].template item(); - edges_test_cantor.push_back(a < b ? cantor_pair(a, b) : cantor_pair(b, a)); + edges_test_cantor.push_back(a < b ? CantorPair(a, b) : CantorPair(b, a)); } std::sort(edges_ref_cantor.begin(), edges_ref_cantor.end()); std::sort(edges_test_cantor.begin(), edges_test_cantor.end()); - // Check - if ((edges_ref_cantor.size() != edges_test_cantor.size()) or - not std::equal(edges_test_cantor.begin(), edges_test_cantor.end(), - edges_ref_cantor.begin())) { - auto print_in_a_but_not_in_b = [&](const auto a, const auto b) { - std::vector diff; - - std::set_difference(a.begin(), a.end(), b.begin(), b.end(), - std::back_inserter(diff)); - - if (diff.empty()) { - std::cout << " none\n"; - return; - } +#if PRINT + std::cout << "test size " << edges_test_cantor.size() << std::endl; + std::cout << "ref size " << edges_ref_cantor.size() << std::endl; + std::cout << "test: "; + std::copy( + edges_test_cantor.begin(), + edges_test_cantor.begin() + std::min(edges_test_cantor.size(), 10ul), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + std::cout << "ref: "; + std::copy(edges_ref_cantor.begin(), + edges_ref_cantor.begin() + std::min(edges_ref_cantor.size(), 10ul), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; +#endif - for (auto c : diff) { - const auto [e0, e1] = cantor_pair_inverse(c); - std::cout << " element (" << e0 << "," << e1 - << ") with d = " << distance_matrix(e0, e1) << "\n"; - } - }; - - std::cout << "Elements in ref but not in test:\n"; - print_in_a_but_not_in_b(edges_ref_cantor, edges_test_cantor); - std::cout << "Elements in test but not in ref:\n"; - print_in_a_but_not_in_b(edges_test_cantor, edges_ref_cantor); - - throw std::runtime_error("edges mismatch"); - } - - std::cout << "OK!" << std::endl; + // Check + BOOST_CHECK(edges_ref_cantor.size() == edges_test_cantor.size()); + BOOST_CHECK(std::equal(edges_test_cantor.begin(), edges_test_cantor.end(), + edges_ref_cantor.begin())); } BOOST_AUTO_TEST_CASE(test_cantor_pair_functions) { int a = 345; int b = 23; - const auto [aa, bb] = cantor_pair_inverse(cantor_pair(a, b)); + const auto [aa, bb] = CantorPair(a, b).inverse(); BOOST_CHECK(a == aa); BOOST_CHECK(b == bb); } -BOOST_AUTO_TEST_CASE(test_random_graph_edge_building) { - const int emb_dim = 3; - const int n_nodes = 20; - const float r = 1.5; - const int knn = 50; - const int seed = 42; +const int emb_dim = 3; +const int n_nodes = 20; +const float r = 1.5; +const int knn = 50; +const int seed = 42; + +BOOST_AUTO_TEST_CASE(test_random_graph_edge_building_cuda, + *boost::unit_test::precondition([](auto) { + return torch::cuda::is_available(); + })) { + torch::manual_seed(seed); + + auto cudaEdgeBuilder = [](auto &features, auto radius, auto k) { + auto features_cuda = features.to(torch::kCUDA); + return Acts::detail::buildEdgesFRNN(features_cuda, radius, k); + }; + + test_random_graph(emb_dim, n_nodes, r, knn, cudaEdgeBuilder); +} +BOOST_AUTO_TEST_CASE(test_random_graph_edge_building_kdtree) { torch::manual_seed(seed); - test_random_graph(emb_dim, n_nodes, r, knn); + auto cpuEdgeBuilder = [](auto &features, auto radius, auto k) { + auto features_cpu = features.to(torch::kCPU); + return Acts::detail::buildEdgesKDTree(features_cpu, radius, k); + }; + + test_random_graph(emb_dim, n_nodes, r, knn, cpuEdgeBuilder); +} + +BOOST_AUTO_TEST_CASE(test_self_loop_removal) { + // clang-format off + std::vector edges = { + 1,1, + 2,3, + 2,2, + 5,4, + }; + // clang-format on + + auto opts = torch::TensorOptions().dtype(torch::kInt64); + const auto edgeTensor = + torch::from_blob(edges.data(), {static_cast(edges.size() / 2), 2}, + opts) + .transpose(0, 1); + + const auto withoutSelfLoops = + Acts::detail::postprocessEdgeTensor(edgeTensor, true, false, false) + .transpose(1, 0) + .flatten(); + + const std::vector postEdges( + withoutSelfLoops.data_ptr(), + withoutSelfLoops.data_ptr() + withoutSelfLoops.numel()); + + // clang-format off + const std::vector ref = { + 2,3, + 5,4, + }; + // clang-format on + + BOOST_CHECK(ref == postEdges); +} + +// Enable this test in PR where duplicate removal is fixed +/* +BOOST_AUTO_TEST_CASE(test_duplicate_removal) { + // clang-format off + std::vector edges = { + 1,2, + 2,1, // duplicate, flipped + 3,2, + 3,2, // duplicate, not flipped + 7,6, // should be flipped + }; + // clang-format on + + auto opts = torch::TensorOptions().dtype(torch::kInt64); + const auto edgeTensor = + torch::from_blob(edges.data(), {static_cast(edges.size() / 2), 2}, + opts) + .transpose(0, 1); + + const auto withoutDups = + Acts::detail::postprocessEdgeTensor(edgeTensor, false, true, false) + .transpose(1, 0) + .flatten(); + + const std::vector postEdges( + withoutDups.data_ptr(), + withoutDups.data_ptr() + withoutDups.numel()); + + // clang-format off + const std::vector ref = { + 1,2, + 2,3, + 6,7, + }; + // clang-format on + + BOOST_CHECK(ref == postEdges); +} +*/ + +BOOST_AUTO_TEST_CASE(test_random_flip) { + torch::manual_seed(seed); + + // clang-format off + std::vector edges = { + 1,2, + 2,3, + 3,4, + 4,5, + }; + // clang-format on + + auto opts = torch::TensorOptions().dtype(torch::kInt64); + const auto edgeTensor = + torch::from_blob(edges.data(), {static_cast(edges.size() / 2), 2}, + opts) + .transpose(0, 1); + + const auto flipped = + Acts::detail::postprocessEdgeTensor(edgeTensor, false, false, true) + .transpose(0, 1) + .flatten(); + + const std::vector postEdges( + flipped.data_ptr(), + flipped.data_ptr() + flipped.numel()); + + BOOST_CHECK(postEdges.size() == edges.size()); + for (auto preIt = edges.begin(); preIt != edges.end(); preIt += 2) { + int found = 0; + + for (auto postIt = postEdges.begin(); postIt != postEdges.end(); + postIt += 2) { + bool noflp = (*preIt == *postIt) and *(preIt + 1) == *(postIt + 1); + bool flp = *preIt == *(postIt + 1) and *(preIt + 1) == *(postIt); + + found += (flp or noflp); + } + + BOOST_CHECK(found == 1); + } } diff --git a/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp new file mode 100644 index 00000000000..25ed3cceea0 --- /dev/null +++ b/Tests/UnitTests/Plugins/ExaTrkX/ExaTrkXTensorConversionTests.cpp @@ -0,0 +1,109 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2022 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp" + +#include + +#include + +BOOST_AUTO_TEST_CASE(test_vector_tensor_conversion_int_2cols) { + std::vector start_vec = { + // clang-format off + 0, 1, + 1, 2, + 2, 3, + 3, 4 + // clang-format on + }; + + auto tensor = Acts::detail::vectorToTensor2D(start_vec, 2).clone(); + + BOOST_CHECK(tensor.options().dtype() == torch::kInt64); + BOOST_CHECK(tensor.sizes().size() == 2); + BOOST_CHECK(tensor.size(0) == 4); + BOOST_CHECK(tensor.size(1) == 2); + + BOOST_CHECK(tensor[0][0].item() == 0); + BOOST_CHECK(tensor[0][1].item() == 1); + + BOOST_CHECK(tensor[1][0].item() == 1); + BOOST_CHECK(tensor[1][1].item() == 2); + + BOOST_CHECK(tensor[2][0].item() == 2); + BOOST_CHECK(tensor[2][1].item() == 3); + + BOOST_CHECK(tensor[3][0].item() == 3); + BOOST_CHECK(tensor[3][1].item() == 4); + + auto test_vec = Acts::detail::tensor2DToVector(tensor); + + BOOST_CHECK(test_vec == start_vec); +} + +BOOST_AUTO_TEST_CASE(test_vector_tensor_conversion_float_3cols) { + std::vector start_vec = { + // clang-format off + 0.f, 0.f, 0.f, + 1.f, 1.f, 1.f, + 2.f, 2.f, 2.f, + 3.f, 3.f, 3.f + // clang-format on + }; + + auto tensor = Acts::detail::vectorToTensor2D(start_vec, 3).clone(); + + BOOST_CHECK(tensor.options().dtype() == torch::kFloat32); + BOOST_CHECK(tensor.sizes().size() == 2); + BOOST_CHECK(tensor.size(0) == 4); + BOOST_CHECK(tensor.size(1) == 3); + + for (auto i : {0, 1, 2, 3}) { + BOOST_CHECK(tensor[i][0].item() == static_cast(i)); + BOOST_CHECK(tensor[i][1].item() == static_cast(i)); + BOOST_CHECK(tensor[i][2].item() == static_cast(i)); + } + + auto test_vec = Acts::detail::tensor2DToVector(tensor); + + BOOST_CHECK(test_vec == start_vec); +} + +BOOST_AUTO_TEST_CASE(test_slicing) { + std::vector start_vec = { + // clang-format off + 0.f, 4.f, 0.f, + 1.f, 5.f, 1.f, + 2.f, 6.f, 2.f, + 3.f, 7.f, 3.f + // clang-format on + }; + + auto tensor = Acts::detail::vectorToTensor2D(start_vec, 3).clone(); + + using namespace torch::indexing; + tensor = tensor.index({Slice{}, Slice{0, None, 2}}); + + BOOST_CHECK(tensor.size(0) == 4); + BOOST_CHECK(tensor.size(1) == 2); + + const std::vector ref_vec = { + // clang-format off + 0.f, 0.f, + 1.f, 1.f, + 2.f, 2.f, + 3.f, 3.f, + // clang-format on + }; + + const auto test_vec = Acts::detail::tensor2DToVector(tensor); + + BOOST_CHECK(test_vec == ref_vec); +}