From 603a2b9fd628430c500f86b28150ddacd7666a8c Mon Sep 17 00:00:00 2001
From: Fabian Knorr <fabian.knorr@dps.uibk.ac.at>
Date: Tue, 26 Nov 2024 15:54:46 +0100
Subject: [PATCH] Do not disable CGF disagnostics in test_utils::add_*_task

This eliminates dead code from an earlier incomplete refactoring.

The CGF teardown / reinit was only required by a single test, which was
coincidentally also broken and didn't test the feature advertised. This
commit splits up the test between runtime_ and runtime_deprecation_tests
and also moves runtime-independent sibling tests to task_graph_tests.
---
 test/runtime_deprecation_tests.cc | 13 +++++
 test/runtime_tests.cc             | 81 +++----------------------------
 test/task_graph_tests.cc          | 66 +++++++++++++++++++++++++
 test/test_utils.h                 | 25 +++-------
 4 files changed, 93 insertions(+), 92 deletions(-)
diff --git a/test/runtime_deprecation_tests.cc b/test/runtime_deprecation_tests.cc
index 80ce5361..0d330687 100644
--- a/test/runtime_deprecation_tests.cc
+++ b/test/runtime_deprecation_tests.cc
@@ -71,6 +71,19 @@ namespace detail {
 		CHECK(my_int == 42);
 	}
 
+	TEST_CASE_METHOD(test_utils::runtime_fixture, "get_access can still be called on a const buffer", "[buffer]") {
+		const range<2> range{32, 64};
+		std::vector<float> init(range.size());
+		buffer<float, 2> buf_a{init.data(), range};
+		const auto cg = invoke_command_group_function([&](handler& cgh) {
+			auto acc = std::as_const(buf_a).get_access<sycl::access::mode::read>(cgh, celerity::access::one_to_one{});
+			cgh.parallel_for(range, [=](item<2>) { (void)acc; });
+		});
+		CHECK(cg.buffer_accesses.size() == 1);
+		CHECK(cg.buffer_accesses[0].bid == get_buffer_id(buf_a));
+		CHECK(cg.buffer_accesses[0].mode == access_mode::read);
+	}
+
 	TEST_CASE_METHOD(test_utils::runtime_fixture, "experimental::fence continues to work", "[deprecated][fence]") {
 		distr_queue q;
 
diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc
index 8bf52515..cbb24249 100644
--- a/test/runtime_tests.cc
+++ b/test/runtime_tests.cc
@@ -54,82 +54,17 @@ namespace detail {
 		buf_b = buf_c;
 	}
 
-	TEST_CASE_METHOD(test_utils::runtime_fixture, "get_access can be called on const buffer", "[buffer]") {
+	TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer::get_access records a buffer access in the command group", "[buffer]") {
 		const range<2> range{32, 64};
 		std::vector<float> init(range.size());
 		buffer<float, 2> buf_a{init.data(), range};
-		auto& tm = runtime_testspy::get_task_manager(runtime::get_instance());
-		const auto tid = test_utils::add_compute_task<class get_access_const>(
-		    tm, [&](handler& cgh) { buf_a.get_access<sycl::access::mode::read>(cgh, one_to_one{}); }, range);
-		const auto tsk = test_utils::get_task(runtime_testspy::get_task_graph(runtime::get_instance()), tid);
-		const auto bufs = tsk->get_buffer_access_map().get_accessed_buffers();
-		CHECK(bufs.size() == 1);
-		CHECK(tsk->get_buffer_access_map().get_nth_access(0) == std::pair{get_buffer_id(buf_a), access_mode::read});
-	}
-
-	TEST_CASE("task_manager calls into delegate on task creation", "[task_manager]") {
-		struct counter_delegate final : public task_manager::delegate {
-			size_t counter = 0;
-			void task_created(const task* /* tsk */) override { counter++; }
-		};
-
-		counter_delegate delegate;
-		task_graph tdag;
-		task_manager tm{1, tdag, nullptr, &delegate};
-		tm.generate_epoch_task(epoch_action::init);
-		CHECK(delegate.counter == 1);
-		const range<2> gs = {1, 1};
-		const id<2> go = {};
-		tm.generate_command_group_task(invoke_command_group_function([=](handler& cgh) { cgh.parallel_for<class kernel>(gs, go, [](auto) {}); }));
-		CHECK(delegate.counter == 2);
-		tm.generate_command_group_task(invoke_command_group_function([](handler& cgh) { cgh.host_task(on_master_node, [] {}); }));
-		CHECK(delegate.counter == 3);
-	}
-
-	TEST_CASE("task_manager correctly records compute task information", "[task_manager][task][device_compute_task]") {
-		test_utils::task_test_context tt;
-		auto buf_a = tt.mbf.create_buffer(range<2>(64, 152), true /* host_initialized */);
-		auto buf_b = tt.mbf.create_buffer(range<3>(7, 21, 99));
-		const auto tid = test_utils::add_compute_task(
-		    tt.tm,
-		    [&](handler& cgh) {
-			    buf_a.get_access<sycl::access::mode::read>(cgh, one_to_one{});
-			    buf_b.get_access<sycl::access::mode::discard_read_write>(cgh, fixed{subrange<3>{{}, {5, 18, 74}}});
-		    },
-		    range<2>{32, 128}, id<2>{32, 24});
-
-		const auto tsk = test_utils::get_task(tt.tdag, tid);
-		CHECK(tsk->get_type() == task_type::device_compute);
-		CHECK(tsk->get_dimensions() == 2);
-		CHECK(tsk->get_global_size() == range<3>{32, 128, 1});
-		CHECK(tsk->get_global_offset() == id<3>{32, 24, 0});
-
-		auto& bam = tsk->get_buffer_access_map();
-		const auto bufs = bam.get_accessed_buffers();
-		CHECK(bufs.size() == 2);
-		CHECK(std::find(bufs.cbegin(), bufs.cend(), buf_a.get_id()) != bufs.cend());
-		CHECK(std::find(bufs.cbegin(), bufs.cend(), buf_b.get_id()) != bufs.cend());
-		CHECK(bam.get_nth_access(0) == std::pair{buf_a.get_id(), access_mode::read});
-		CHECK(bam.get_nth_access(1) == std::pair{buf_b.get_id(), access_mode::discard_read_write});
-		const auto reqs_a = bam.compute_consumed_region(buf_a.get_id(), subrange{tsk->get_global_offset(), tsk->get_global_size()});
-		CHECK(reqs_a == box(subrange<3>({32, 24, 0}, {32, 128, 1})));
-		const auto reqs_b = bam.compute_produced_region(buf_b.get_id(), subrange{tsk->get_global_offset(), tsk->get_global_size()});
-		CHECK(reqs_b == box(subrange<3>({}, {5, 18, 74})));
-	}
-
-	TEST_CASE("buffer_access_map merges multiple accesses with the same mode", "[task][device_compute_task]") {
-		std::vector<buffer_access> accs;
-		accs.push_back(buffer_access{0, access_mode::read, std::make_unique<range_mapper<2, fixed<2>>>(subrange<2>{{3, 0}, {10, 20}}, range<2>{30, 30})});
-		accs.push_back(buffer_access{0, access_mode::read, std::make_unique<range_mapper<2, fixed<2>>>(subrange<2>{{10, 0}, {7, 20}}, range<2>{30, 30})});
-		const buffer_access_map bam{std::move(accs), task_geometry{2, {100, 100, 1}, {}, {}}};
-		const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 100, 1}));
-		CHECK(req == box(subrange<3>({3, 0, 0}, {14, 20, 1})));
-	}
-
-	TEST_CASE("tasks gracefully handle get_requirements() calls for buffers they don't access", "[task]") {
-		const buffer_access_map bam;
-		const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 1, 1}));
-		CHECK(req == box<3>());
+		const auto cg = invoke_command_group_function([&](handler& cgh) {
+			auto acc = buf_a.get_access<sycl::access::mode::read>(cgh, one_to_one{});
+			cgh.parallel_for(range, [=](item<2>) { (void)acc; });
+		});
+		CHECK(cg.buffer_accesses.size() == 1);
+		CHECK(cg.buffer_accesses[0].bid == get_buffer_id(buf_a));
+		CHECK(cg.buffer_accesses[0].mode == access_mode::read);
 	}
 
 	TEST_CASE_METHOD(test_utils::runtime_fixture, "queue::wait() returns only after all preceding tasks have completed", "[queue][sync][control-flow]") {
diff --git a/test/task_graph_tests.cc b/test/task_graph_tests.cc
index 5ef3f706..f3fda659 100644
--- a/test/task_graph_tests.cc
+++ b/test/task_graph_tests.cc
@@ -18,6 +18,72 @@ namespace detail {
 
 	using celerity::access::all;
 	using celerity::access::fixed;
+	using celerity::access::one_to_one;
+
+	TEST_CASE("task_manager calls into delegate on task creation", "[task_manager]") {
+		struct counter_delegate final : public task_manager::delegate {
+			size_t counter = 0;
+			void task_created(const task* /* tsk */) override { counter++; }
+		};
+
+		counter_delegate delegate;
+		task_graph tdag;
+		task_manager tm{1, tdag, nullptr, &delegate};
+		tm.generate_epoch_task(epoch_action::init);
+		CHECK(delegate.counter == 1);
+		const range<2> gs = {1, 1};
+		const id<2> go = {};
+		tm.generate_command_group_task(invoke_command_group_function([=](handler& cgh) { cgh.parallel_for<class kernel>(gs, go, [](auto) {}); }));
+		CHECK(delegate.counter == 2);
+		tm.generate_command_group_task(invoke_command_group_function([](handler& cgh) { cgh.host_task(on_master_node, [] {}); }));
+		CHECK(delegate.counter == 3);
+	}
+
+	TEST_CASE("task_manager correctly records compute task information", "[task_manager][task][device_compute_task]") {
+		test_utils::task_test_context tt;
+		auto buf_a = tt.mbf.create_buffer(range<2>(64, 152), true /* host_initialized */);
+		auto buf_b = tt.mbf.create_buffer(range<3>(7, 21, 99));
+		const auto tid = test_utils::add_compute_task(
+		    tt.tm,
+		    [&](handler& cgh) {
+			    buf_a.get_access<sycl::access::mode::read>(cgh, one_to_one{});
+			    buf_b.get_access<sycl::access::mode::discard_read_write>(cgh, fixed{subrange<3>{{}, {5, 18, 74}}});
+		    },
+		    range<2>{32, 128}, id<2>{32, 24});
+
+		const auto tsk = test_utils::get_task(tt.tdag, tid);
+		CHECK(tsk->get_type() == task_type::device_compute);
+		CHECK(tsk->get_dimensions() == 2);
+		CHECK(tsk->get_global_size() == range<3>{32, 128, 1});
+		CHECK(tsk->get_global_offset() == id<3>{32, 24, 0});
+
+		auto& bam = tsk->get_buffer_access_map();
+		const auto bufs = bam.get_accessed_buffers();
+		CHECK(bufs.size() == 2);
+		CHECK(std::find(bufs.cbegin(), bufs.cend(), buf_a.get_id()) != bufs.cend());
+		CHECK(std::find(bufs.cbegin(), bufs.cend(), buf_b.get_id()) != bufs.cend());
+		CHECK(bam.get_nth_access(0) == std::pair{buf_a.get_id(), access_mode::read});
+		CHECK(bam.get_nth_access(1) == std::pair{buf_b.get_id(), access_mode::discard_read_write});
+		const auto reqs_a = bam.compute_consumed_region(buf_a.get_id(), subrange{tsk->get_global_offset(), tsk->get_global_size()});
+		CHECK(reqs_a == box(subrange<3>({32, 24, 0}, {32, 128, 1})));
+		const auto reqs_b = bam.compute_produced_region(buf_b.get_id(), subrange{tsk->get_global_offset(), tsk->get_global_size()});
+		CHECK(reqs_b == box(subrange<3>({}, {5, 18, 74})));
+	}
+
+	TEST_CASE("buffer_access_map merges multiple accesses with the same mode", "[task][device_compute_task]") {
+		std::vector<buffer_access> accs;
+		accs.push_back(buffer_access{0, access_mode::read, std::make_unique<range_mapper<2, fixed<2>>>(subrange<2>{{3, 0}, {10, 20}}, range<2>{30, 30})});
+		accs.push_back(buffer_access{0, access_mode::read, std::make_unique<range_mapper<2, fixed<2>>>(subrange<2>{{10, 0}, {7, 20}}, range<2>{30, 30})});
+		const buffer_access_map bam{std::move(accs), task_geometry{2, {100, 100, 1}, {}, {}}};
+		const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 100, 1}));
+		CHECK(req == box(subrange<3>({3, 0, 0}, {14, 20, 1})));
+	}
+
+	TEST_CASE("tasks gracefully handle get_requirements() calls for buffers they don't access", "[task]") {
+		const buffer_access_map bam;
+		const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 1, 1}));
+		CHECK(req == box<3>());
+	}
 
 	TEST_CASE("task_manager does not create multiple dependencies between the same tasks", "[task_manager][task-graph]") {
 		using namespace sycl::access;
diff --git a/test/test_utils.h b/test/test_utils.h
index 202772e4..0366ca8f 100644
--- a/test/test_utils.h
+++ b/test/test_utils.h
@@ -314,39 +314,26 @@ namespace test_utils {
 
 	template <typename KernelName = detail::unnamed_kernel, typename CGF, int KernelDims = 2>
 	detail::task_id add_compute_task(detail::task_manager& tm, CGF cgf, range<KernelDims> global_size = {1, 1}, id<KernelDims> global_offset = {}) {
-		// Here and below: Using these functions will cause false-positive CGF diagnostic errors, b/c we are not capturing any accessors.
-		// TODO: For many test cases using these functions it may actually be preferable to circumvent the whole handler mechanism entirely.
-		detail::cgf_diagnostics::teardown();
-		auto cg = detail::invoke_command_group_function([&, gs = global_size, go = global_offset](handler& cgh) {
+		return tm.generate_command_group_task(detail::invoke_command_group_function([&, gs = global_size, go = global_offset](handler& cgh) {
 			cgf(cgh);
 			cgh.parallel_for<KernelName>(gs, go, [](id<KernelDims>) {});
-		});
-		return tm.generate_command_group_task(std::move(cg));
-		detail::cgf_diagnostics::make_available();
+		}));
 	}
 
 	template <typename KernelName = detail::unnamed_kernel, typename CGF, int KernelDims = 2>
 	detail::task_id add_nd_range_compute_task(detail::task_manager& tm, CGF cgf, celerity::nd_range<KernelDims> execution_range = {{1, 1}, {1, 1}}) {
-		// (See above).
-		detail::cgf_diagnostics::teardown();
-		auto cg = detail::invoke_command_group_function([&, er = execution_range](handler& cgh) {
+		return tm.generate_command_group_task(detail::invoke_command_group_function([&, er = execution_range](handler& cgh) {
 			cgf(cgh);
 			cgh.parallel_for<KernelName>(er, [](nd_item<KernelDims>) {});
-		});
-		return tm.generate_command_group_task(std::move(cg));
-		detail::cgf_diagnostics::make_available();
+		}));
 	}
 
 	template <typename Spec, typename CGF>
 	detail::task_id add_host_task(detail::task_manager& tm, Spec spec, CGF cgf) {
-		// (See above).
-		detail::cgf_diagnostics::teardown();
-		auto cg = detail::invoke_command_group_function([&](handler& cgh) {
+		return tm.generate_command_group_task(detail::invoke_command_group_function([&](handler& cgh) {
 			cgf(cgh);
 			cgh.host_task(spec, [](auto...) {});
-		});
-		return tm.generate_command_group_task(std::move(cg));
-		detail::cgf_diagnostics::make_available();
+		}));
 	}
 
 	inline detail::task_id add_fence_task(detail::task_manager& tm, mock_host_object ho) {