-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add View of Views debugging tool #267
base: develop
Are you sure you want to change the base?
Changes from all commits
64ca586
a5b4987
63c2d2e
9e7a2d2
a8372b3
497908d
d511c6b
4ab43a2
2808025
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
kp_add_library(kp_view_of_views_bug_finder kp_view_of_views_bug_finder.cpp) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#include <kp_core.hpp> | ||
|
||
#include <cassert> | ||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <iostream> | ||
#include <map> | ||
#include <mutex> | ||
#include <optional> | ||
#include <string> | ||
|
||
namespace { | ||
|
||
bool abort_on_error = true; | ||
|
||
class { | ||
uint64_t count_; | ||
std::map<uint64_t, std::string> map_; | ||
|
||
public: | ||
std::mutex mutex; | ||
uint64_t push(std::string s) { | ||
auto it = map_.emplace_hint(map_.end(), count_, std::move(s)); | ||
assert(++it == map_.end()); | ||
return count_++; | ||
} | ||
void pop(uint64_t x) { | ||
auto it = map_.find(x); | ||
assert(it != map_.end()); | ||
map_.erase(it); | ||
} | ||
std::string const &top() { | ||
assert(!map_.empty()); | ||
return map_.begin()->second; | ||
} | ||
bool is_empty() noexcept { return map_.empty(); } | ||
} current; | ||
|
||
bool ignore_fence(std::string_view s) { | ||
return (s == "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") || | ||
(s == "Kokkos::ThreadsInternal::fence: Unnamed Instance Fence"); | ||
} | ||
|
||
bool ignore_alloc(std::string_view s) { return (s.find("Kokkos::") == 0); } | ||
|
||
std::optional<std::string> get_substr(std::string const &str, | ||
std::string_view prefix, | ||
std::string_view suffix) { | ||
if (auto found = str.find(prefix); found != std::string::npos) { | ||
found += prefix.length(); | ||
return str.substr(found, str.rfind(suffix) - found); | ||
} | ||
return std::nullopt; | ||
} | ||
|
||
void vov_bug_finder_request_tool_settings(const uint32_t, | ||
Kokkos_Tools_ToolSettings *settings) { | ||
settings->requires_global_fencing = false; | ||
} | ||
|
||
void vov_bug_finder_begin_parallel_for(char const *kernelName, | ||
uint32_t /*deviceID*/, | ||
uint64_t *kernelID) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty()) { | ||
if (auto lbl = | ||
get_substr(kernelName, "Kokkos::View::initialization [", "]")) { | ||
std::cerr << "constructing view \"" << *lbl | ||
<< "\" within a parallel region \"" << current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
*kernelID = current.push(kernelName); | ||
} | ||
|
||
void vov_bug_finder_end_parallel_for(uint64_t kernelID) { | ||
std::lock_guard lock(current.mutex); | ||
current.pop(kernelID); | ||
} | ||
|
||
void vov_bug_finder_begin_fence(char const *fenceName, uint32_t /*deviceID*/, | ||
uint64_t * /*fenceID*/) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty() && !ignore_fence(fenceName)) { | ||
if (auto lbl = | ||
get_substr(current.top(), "Kokkos::View::destruction [", "]")) { | ||
std::cerr << "view of views \"" << *lbl | ||
<< "\" not properly cleared this fence labelled \"" << fenceName | ||
<< "\" will hang\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
void vov_bug_finder_allocate_data(SpaceHandle /*handle*/, char const *name, | ||
void const * /*ptr*/, uint64_t /*size*/) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty() && !ignore_alloc(name)) { | ||
std::cerr << "allocating \"" << name << "\" within parallel region \"" | ||
<< current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
|
||
void vov_bug_finder_deallocate_data(SpaceHandle /*handle*/, char const *name, | ||
void const * /*ptr*/, uint64_t /*size*/) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty() && !ignore_alloc(name)) { | ||
std::cerr << "deallocating \"" << name << "\" within parallel region \"" | ||
<< current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
|
||
} // namespace | ||
|
||
extern "C" { | ||
EXPOSE_TOOL_SETTINGS(vov_bug_finder_request_tool_settings) | ||
EXPOSE_BEGIN_PARALLEL_FOR(vov_bug_finder_begin_parallel_for) | ||
EXPOSE_END_PARALLEL_FOR(vov_bug_finder_end_parallel_for) | ||
EXPOSE_BEGIN_FENCE(vov_bug_finder_begin_fence) | ||
EXPOSE_ALLOCATE(vov_bug_finder_allocate_data) | ||
EXPOSE_DEALLOCATE(vov_bug_finder_deallocate_data) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
kp_add_executable_and_test( | ||
TARGET_NAME test_vov_bug_finder | ||
SOURCE_FILE test_view_of_views_bug_finder.cpp | ||
KOKKOS_TOOLS_LIBS kp_view_of_views_bug_finder | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#include "Kokkos_Core.hpp" | ||
#include "gtest/gtest.h" | ||
|
||
void test_no_throw_placement_new_in_parallel_for() { | ||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
Kokkos::parallel_for( | ||
"Fine", Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1), | ||
KOKKOS_LAMBDA(int) { | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
new (&vov(1, 0)) V(b); | ||
}); | ||
})); | ||
} | ||
|
||
void test_death_allocation_in_parallel_for() { | ||
ASSERT_DEATH( | ||
({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
Kokkos::parallel_for( | ||
"AllocatesInParallel]For", | ||
Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1), | ||
KOKKOS_LAMBDA(int) { | ||
V b("b", 5); | ||
new (&vov(1, 0)) V(b); | ||
}); | ||
}), | ||
"allocating \"b\" within parallel region \"AllocatesInParallel]For\""); | ||
} | ||
|
||
void test_no_throw_team_scratch_pad_parallel_for() { | ||
ASSERT_NO_THROW(({ | ||
Kokkos::parallel_for( | ||
"L0", | ||
Kokkos::TeamPolicy<>(1, Kokkos::AUTO) | ||
.set_scratch_size(0, Kokkos::PerTeam(1000)), | ||
KOKKOS_LAMBDA(Kokkos::TeamPolicy<>::member_type const &){}); | ||
|
||
Kokkos::parallel_for( | ||
"L1", | ||
Kokkos::TeamPolicy<>(1, Kokkos::AUTO) | ||
.set_scratch_size(1, Kokkos::PerTeam(1000)), | ||
KOKKOS_LAMBDA(Kokkos::TeamPolicy<>::member_type const &){}); | ||
})); | ||
} | ||
|
||
// TODO initialize in main and split unit tests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've tried to do this, but it seems not so easy. I'll open an issue for it, so we can move the discussion there. |
||
TEST(ViewOfViews, find_bugs) { | ||
Kokkos::initialize(); | ||
{ | ||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov("vov", 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
vov(0, 0) = a; | ||
vov(0, 1) = a; | ||
vov(1, 0) = b; | ||
|
||
vov(0, 0) = V(); | ||
vov(0, 1) = V(); | ||
vov(1, 0) = V(); | ||
})); | ||
|
||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
new (&vov(1, 0)) V(b); | ||
|
||
vov(0, 0).~V(); | ||
vov(0, 1).~V(); | ||
// vov(1, 0).~V(); | ||
// ^ leaking "b" but not caught by the tool | ||
})); | ||
|
||
ASSERT_DEATH(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov("vo]v", 2, 3); | ||
// ^ included a closing square bracket in the label to try | ||
// to trip the substring extraction | ||
V a("a", 4); | ||
V b("b", 5); | ||
vov(0, 0) = a; | ||
vov(0, 1) = a; | ||
vov(1, 0) = b; | ||
}), | ||
"view of views \"vo]v\" not properly cleared"); | ||
Comment on lines
+107
to
+118
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting that in my OpenMP build, I see this death test hang when I run with more than 1 omp thread. (Christian had noted that team tests sometimes hang, but I'm not sure if it's related.) |
||
|
||
test_no_throw_placement_new_in_parallel_for(); | ||
|
||
test_death_allocation_in_parallel_for(); | ||
|
||
test_no_throw_team_scratch_pad_parallel_for(); | ||
} | ||
Kokkos::finalize(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a
Cuda
build, the compiler is complaining about this line that:The reason seems to be that we end up in a constructor that isn't marked with Kokkos markup. One option would be to desactivate this part of the test in builds with a device backend. I'm not sure whether there's a better solution to this problem.