From d047bc676268ca67bc61e4109b6e4af25b1ee7da Mon Sep 17 00:00:00 2001
From: Eyal Rozenberg <eyalroz1@gmx.com>
Date: Tue, 10 Sep 2024 23:12:37 +0300
Subject: [PATCH] Fixes #678: Reworked unique_span:

* Dropped the deleter template parameter
* Added a deleter member - always of the same type
* The deleter now takes the span
* Added a unique_span generator for non-default-constructible elements
* Dropped the per-memory-space unique_span types - they're all the same type now
* Made the default constructor explicitly zero thing out to avoid spurious deletions of uninitialized / partially uninitialized spans
---
 .../p2pBandwidthLatencyTest.cu                |  12 +-
 .../other/io_compute_overlap_with_streams.cu  |  12 +-
 examples/other/new_cpp_standard/main.cpp      |   4 +-
 src/cuda/api/detail/unique_span.hpp           | 154 ++++++++++++++----
 src/cuda/api/memory.hpp                       | 129 +++++++++++----
 .../api/multi_wrapper_impls/unique_span.hpp   |  13 +-
 6 files changed, 248 insertions(+), 76 deletions(-)
diff --git a/examples/modified_cuda_samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu b/examples/modified_cuda_samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu
index d440b1d7..f7541c36 100644
--- a/examples/modified_cuda_samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu
+++ b/examples/modified_cuda_samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu
@@ -164,8 +164,8 @@ void outputBandwidthMatrix(P2PEngine mechanism, bool test_p2p, P2PDataTransfer p
     int numElems = 10000000;
     int repeat = 5;
 	vector<cuda::stream_t> streams;
-    vector<cuda::memory::device::unique_span<int>> buffers;
-    vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
+    vector<cuda::unique_span<int>> buffers;
+    vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
     vector<cuda::event_t> start;
     vector<cuda::event_t> stop;
 
@@ -294,8 +294,8 @@ void outputBidirectionalBandwidthMatrix(P2PEngine p2p_mechanism, bool test_p2p)
 
 	vector<cuda::stream_t> streams_0;
 	vector<cuda::stream_t> streams_1;
-    vector<cuda::memory::device::unique_span<int>> buffers;
-    vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
+    vector<cuda::unique_span<int>> buffers;
+    vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
     vector<cuda::event_t> start;
     vector<cuda::event_t> stop;
 
@@ -405,8 +405,8 @@ void outputLatencyMatrix(P2PEngine p2p_mechanism, bool test_p2p, P2PDataTransfer
 	//
 
 	vector<cuda::stream_t> streams;
-    vector<cuda::memory::device::unique_span<int>> buffers;
-    vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
+    vector<cuda::unique_span<int>> buffers;
+    vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
     vector<cuda::event_t> start;
     vector<cuda::event_t> stop;
 
diff --git a/examples/other/io_compute_overlap_with_streams.cu b/examples/other/io_compute_overlap_with_streams.cu
index f53b4e9f..08b9e185 100644
--- a/examples/other/io_compute_overlap_with_streams.cu
+++ b/examples/other/io_compute_overlap_with_streams.cu
@@ -46,12 +46,12 @@ constexpr I div_rounding_up(I dividend, const I2 divisor) noexcept
 }
 
 struct buffer_set_t {
-    cuda::memory::host::unique_span<element_t> host_lhs;
-    cuda::memory::host::unique_span<element_t> host_rhs;
-    cuda::memory::host::unique_span<element_t> host_result;
-    cuda::memory::device::unique_span<element_t> device_lhs;
-    cuda::memory::device::unique_span<element_t> device_rhs;
-    cuda::memory::device::unique_span<element_t> device_result;
+    cuda::unique_span<element_t> host_lhs;
+    cuda::unique_span<element_t> host_rhs;
+    cuda::unique_span<element_t> host_result;
+    cuda::unique_span<element_t> device_lhs;
+    cuda::unique_span<element_t> device_rhs;
+    cuda::unique_span<element_t> device_result;
 };
 
 std::vector<buffer_set_t> generate_buffers(
diff --git a/examples/other/new_cpp_standard/main.cpp b/examples/other/new_cpp_standard/main.cpp
index 6e0e876c..b3cae564 100644
--- a/examples/other/new_cpp_standard/main.cpp
+++ b/examples/other/new_cpp_standard/main.cpp
@@ -25,8 +25,8 @@ cuda::device::id_t get_current_device_id()
 
 void unique_spans()
 {
-	cuda::memory::host::unique_span<float> data1(nullptr, 0);
-	cuda::memory::host::unique_span<float> data2(nullptr, 0);
+	cuda::unique_span<float> data1(nullptr, 0, cuda::detail_::default_span_deleter<float>);
+	cuda::unique_span<float> data2(nullptr, 0, cuda::detail_::default_span_deleter<float>);
 
 	data1 = std::move(data2);
 }
diff --git a/src/cuda/api/detail/unique_span.hpp b/src/cuda/api/detail/unique_span.hpp
index 8bd12fa0..f04b60d1 100644
--- a/src/cuda/api/detail/unique_span.hpp
+++ b/src/cuda/api/detail/unique_span.hpp
@@ -33,45 +33,46 @@ namespace cuda {
  * included in, C++14. It can be though of as a variation on std::array, with the the size and capacity
  * set dynamically, at construction time, rather than statically.
  *
- * @note unique_span = unique_span+typing or span+ownership+non_null
+ * @note unique_span = unique_span+typing or span+ownership+non_null . Well, sort of, because this
+ * class supports complex construction-allocation and deletion patterns, through deleter objects.
  *
- * @tparam T the type of individual elements in the unique_span
+ * @tparam T an individual element in the unique_span
  */
-template<typename T, typename Deleter = ::std::default_delete<T[]>>
+template<typename T>
 class unique_span : public ::cuda::span<T> {
 public: // span types
 	using span_type = span<T>;
 
 	// Exposing some span type definitions, strictly for terseness
 	// (they're all visible on the outside anyway)
-	using size_type = typename span<T>::size_type;
-	using pointer = typename span<T>::pointer;
-	using reference = typename span<T>::reference;
-	using deleter_type = Deleter;
+	using size_type = typename span_type::size_type;
+	using pointer = typename span_type::pointer;
+	using reference = typename span_type::reference;
+	using deleter_type = void (*)(span_type);
 
-public: // exposing span data members
-	using span<T>::data;
-	using span<T>::size;
+public: // exposing span data members & adding our own
+	using span_type::data;
+	using span_type::size;
+	deleter_type deleter_;
 
 public: // constructors and destructor
 
-	constexpr unique_span() noexcept = default;
+	// Note: span_type's default ctor will create a {nullptr, 0} empty span.
+	constexpr unique_span() noexcept : span_type(), deleter_{nullptr} {}
 
 	// Disable copy construction - as this class never allocates;
 	unique_span(const unique_span&) = delete;
 	// ... and also match other kinds of unique_span's, which may get converted into
 	// a span and thus leak memory on construction!
-	template<typename U, typename UDeleter>
-	unique_span(const unique_span<U, UDeleter>&) = delete;
+	template<typename U>
+	unique_span(const unique_span<U>&) = delete;
 
 	// Note: This template provides constructibility of unique_span<const T> from unique_span<const T>
-	template<typename U, typename UDeleter>
-	unique_span(unique_span<U,UDeleter>&& other)
-		: unique_span{ other.release() }
+	template<typename U>
+	unique_span(unique_span<U>&& other) : unique_span{ other.release(), other.deleter_ }
 	{
 		static_assert(
-			::std::is_assignable<span_type, span<U>>::value and
-			::std::is_assignable<Deleter, UDeleter>::value,
+			::std::is_assignable<span_type, span<U>>::value,
 			"Invalid unique_span initializer");
 	}
 
@@ -81,25 +82,33 @@ class unique_span : public ::cuda::span<T> {
 	/// of a non-owned span when passing to a function, then trying to release that
 	/// memory returning from it.
 	///@{
-	explicit unique_span(span_type span) noexcept : span_type{span} { }
-	explicit unique_span(pointer data, size_type size) noexcept : unique_span{span_type{data, size}} { }
+	explicit unique_span(span_type span, deleter_type deleter) noexcept
+	: span_type{span}, deleter_(deleter) { }
+	explicit unique_span(pointer data, size_type size, deleter_type deleter) noexcept
+	: unique_span(span_type{data, size}, deleter) { }
+	explicit unique_span(memory::region_t region, deleter_type deleter) NOEXCEPT_IF_NDEBUG
+		: unique_span(span_type{region.start(), region.size() / sizeof(T)}, deleter)
+	{
+#ifndef NDEBUG
+		if (sizeof(T) * size != region.size()) {
+			throw ::std::invalid_argument("Attempt to create a unique_span with a memory region which"
+				"does not comprise an integral number of areas of the element type size");
+		}
+#endif
+	}
+
 	///@}
 
-	// Note: No constructor which also takes a deleter. We do not hold a deleter
-	// member - unlike unique_ptr's. Perhaps we should?
 
-	/** A move constructor.
-	 *
-	 * @note Moving is the only way a unique_span may have its @ref data_ field become
-	 * null; the user is strongly assumed not to use the `unique_span` after moving from
-	 * it.
-	 */
-	unique_span(unique_span&& other) noexcept : unique_span{ other.release() } { }
+	/// A move constructor.
+	///
+	/// @TODO Can we drop this one in favor of the general move ctor?
+	unique_span(unique_span&& other) noexcept : unique_span(other.release(), other.deleter_) { }
 
 	~unique_span() noexcept
 	{
 		if (data() != nullptr) {
-			deleter_type{}(data());
+			deleter_(*this);
 		}
 #ifndef NDEBUG
 		span_type::operator=(span_type{static_cast<T*>(nullptr), 0});
@@ -142,19 +151,41 @@ class unique_span : public ::cuda::span<T> {
 	 * @note This is not marked nodiscard by the same argument as for std::unique_ptr;
 	 * see also @url https://stackoverflow.com/q/60535399/1593077 and
 	 * @url http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0600r1.pdf
+	 *
+	 * @note it is the caller's responsibility to ensure it has a copy of the deleter
+	 * for the released span.
 	 */
 	span_type release() noexcept
 	{
 		span_type released { data(), size() };
 		span_type::operator=(span_type{ static_cast<T*>(nullptr), 0 });
+		// Note that we are _not_ replacing deleter.
 		return released;
 	}
 }; // class unique_span
 
+namespace detail_ {
+
+// @note you can't just use this always. Thus, only one of the make_ functions
+// below uses it.
+//
+// @note that if a nullptr happens to be deleted - that's not a problem;
+// it is supported by the delete operation(s).
+template <typename T>
+inline void default_span_deleter(span<T> sp)
+{
+	delete[] sp.data();
+}
+
+} // namespace detail_
+
+
 /**
  * A parallel of ::std::make_unique_for_overwrite, for @ref unique_span<T>'s, i.e. which maintains
  * the number of elements allocated.
  *
+ * @param size the number of elements in the unique_span to be created. It may legitimately be 0.
+ *
  * @tparam T the type of elements in the allocated @ref unique_span.
  *
  * @param size The number of @tparam T elements to allocate
@@ -162,7 +193,68 @@ class unique_span : public ::cuda::span<T> {
 template <typename T>
 unique_span<T> make_unique_span(size_t size)
 {
-	return unique_span<T>{ new T[size], size };
+	// Note: It _is_ acceptable pass 0 here.
+	// See https://stackoverflow.com/q/1087042/1593077
+	return unique_span<T>(new T[size], size, detail_::default_span_deleter<T>);
+}
+
+namespace detail_ {
+
+template <typename T>
+inline void elementwise_destruct(span<T> sp)
+{
+	for (auto& element : sp) { element.~T(); }
+}
+
+// Use this structure to wrap a deleter which takes trivially-destructible/raw memory,
+// to then pass on for use with a typed span<T>
+//
+// Note: Ignores alignment.
+template <typename RawDeleter>
+struct deleter_with_elementwise_destruction {
+	template <typename T>
+ 	void operator()(span<T> sp)
+	 {
+		elementwise_destruct(sp);
+		raw_deleter(static_cast<void *>(sp.data()));
+	}
+	RawDeleter raw_deleter;
+};
+
+template <typename T, typename RawDeleter>
+void delete_with_elementwise_destruction(span<T> sp, RawDeleter raw_deleter)
+{
+	elementwise_destruct(sp);
+	raw_deleter(static_cast<void *>(sp.data()));
+}
+
+} // namespace detail_
+
+/**
+ * The alternative to `std::generate` and similar functions, for the unique_span, seeing
+ * how its elements must be constructed as it is constructed.
+ *
+ * @param size the number of elements in the unique_span to be created. It may legitimately be 0.
+ * @param gen a function for generating new values for move-construction into the new unique_span
+ *
+ * @tparam T the type of elements in the allocated @ref unique_span.
+ * @tparam Generator A type invokable with the element index, to produce a T-constructor-argument
+ *
+ * @param size The number of @tparam T elements to allocate
+ */
+template <typename T, typename Generator>
+unique_span <T> generate_unique_span(size_t size, Generator generator_by_index) noexcept
+{
+	// Q: Do I need to check the alignment here? Perhaps allocate more to ensure alignment?
+	auto result_data = static_cast<T*>(::operator new(sizeof(T) * size));
+	for (size_t i = 0; i < size; i++) {
+		new(&result_data[i]) T(generator_by_index(i));
+	}
+	auto deleter = [](span<T> sp) {
+		auto raw_deleter = [](void* ptr) { ::operator delete(ptr); };
+		detail_::delete_with_elementwise_destruction(sp, raw_deleter);
+	};
+	return unique_span<T>(result_data, size, deleter);
 }
 
 } // namespace cuda
diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp
index 7711e5f3..b0a73105 100644
--- a/src/cuda/api/memory.hpp
+++ b/src/cuda/api/memory.hpp
@@ -98,6 +98,18 @@ struct allocation_options {
 
 namespace detail_ {
 
+template <typename T, bool CheckConstructibility = false>
+inline void check_allocation_type() noexcept
+{
+	static_assert(::std::is_trivially_constructible<T>::value,
+		"Attempt to create a typed buffer of a non-trivially-constructive type");
+	static_assert(not CheckConstructibility or ::std::is_trivially_destructible<T>::value,
+		"Attempt to create a typed buffer of a non-trivially-destructible type "
+		"without allowing for its destruction");
+	static_assert(::std::is_trivially_copyable<T>::value,
+		"Attempt to create a typed buffer of a non-trivially-copyable type");
+}
+
 inline unsigned make_cuda_host_alloc_flags(allocation_options options)
 {
 	return
@@ -325,6 +337,7 @@ namespace detail_ {
 struct allocator {
 	void* operator()(size_t num_bytes) const { return detail_::allocate_in_current_context(num_bytes).start(); }
 };
+
 struct deleter {
 	void operator()(void* ptr) const { cuda::memory::device::free(ptr); }
 };
@@ -2446,25 +2459,63 @@ inline bool is_part_of_a_region_pair(const void* ptr)
 
 } // namespace mapped
 
+namespace detail_ {
+/**
+ * Create a unique_span without default construction, using raw-memory allocator
+ * and deleter gadgets.
+ *
+ * @note We allow this only for "convenient" types; see @ref detail_check_allocation_type
+ *
+ * @tparam T Element of the created unique_span
+ * @tparam UntypedAllocator can allocate untyped memory given a size
+ * @tparam UntypedDeleter can delete memory given a pointer (disregarding the type)
+ *
+ * @param size number of elements in the unique_span to be created
+ * @param raw_allocator a gadget for allocating untyped memory
+ * @param raw_deleter a gadget which can de-allocate/delete allocations by @p raw_allocator
+ * @return the newly-created unique_span
+ */
+template <typename T, typename RawDeleter, typename RegionAllocator>
+unique_span<T> make_convenient_type_unique_span(size_t size, RegionAllocator allocator)
+{
+	memory::detail_::check_allocation_type<T>();
+	auto deleter = [](span<T> sp) {
+		return RawDeleter{}(sp.data());
+	};
+	region_t allocated_region = allocator(size * sizeof(T));
+	return unique_span<T>(
+		allocated_region.as_span<T>(), // no constructor calls - trivial construction
+		deleter // no destructor calls - trivial destruction
+	);
+}
 
+} // namespace detail_
 
 
 namespace device {
 
-/// A unique span of device-global memory
-template <typename T>
-using unique_span = cuda::unique_span<T, detail_::deleter>;
-
 namespace detail_ {
 
 template <typename T>
 unique_span<T> make_unique_span(const context::handle_t context_handle, size_t size)
 {
+	CAW_SET_SCOPE_CONTEXT(context_handle);
+	return memory::detail_::make_convenient_type_unique_span<T, detail_::deleter>(size, allocate_in_current_context);
+/*
+	memory::detail_::check_allocation_type<T>();
 	// Note: _Not_ asserting trivial-copy-constructibility here; so if you want to copy data
-	// to/from the device using this object - it's your own repsonsibility to ensure that's
+	// to/from the device using this object - it's your own responsibility to ensure that's
 	// a valid thing to do.
 	CAW_SET_SCOPE_CONTEXT(context_handle);
-	return unique_span<T>{ allocate_in_current_context(size * sizeof(T)) };
+	auto deleter = [](span<T> sp) {
+		return detail_::deleter{}(sp.data());
+	};
+	auto allocated = allocate_in_current_context(size * sizeof(T)).as_span<T>();
+	return unique_span<T>(
+		allocated, // no constructor calls - trivial construction
+		deleter // no destructor calls - trivial destruction
+	);
+*/
 }
 
 } // namespace detail_
@@ -2510,24 +2561,20 @@ unique_span<T> make_unique_span(size_t size);
 
 /// See @ref `device::make_unique_span(const context_t& context, size_t size)`
 template <typename T>
-inline device::unique_span<T> make_unique_span(const context_t& context, size_t size)
+inline unique_span<T> make_unique_span(const context_t& context, size_t size)
 {
 	return device::make_unique_span<T>(context, size);
 }
 
 /// See @ref `device::make_unique_span(const context_t& context, size_t num_elements)`
 template <typename T>
-inline device::unique_span<T> make_unique_span(const device_t& device, size_t size)
+inline unique_span<T> make_unique_span(const device_t& device, size_t size)
 {
 	return device::make_unique_span<T>(device, size);
 }
 
 namespace host {
 
-/// A unique span of CUDA-driver-allocated, pinned host (=system) memory
-template <typename T>
-using unique_span = cuda::unique_span<T, detail_::deleter>;
-
 /**
  * Allocate memory for a consecutive sequence of typed elements in system
  * (host-side) memory.
@@ -2543,34 +2590,60 @@ using unique_span = cuda::unique_span<T, detail_::deleter>;
  * similar to {@ref cuda::device::make_unique_region}, except that the allocation is
  * conceived as typed elements.
  *
- * @note Typically, this is used for trivially-constructible elements, for which reason the
- * non-construction of individual elements should not pose a problem. But - let the user
- * beware, especially since this is host-side memory.
+ * @note We assume this memory is used for copying to or from device-side memory; hence,
+ * we constrain the type to be trivially constructible, destructible and copyable
+ *
+ * @note ignoring alignment
  */
 template <typename T>
 unique_span<T> make_unique_span(size_t size)
 {
-	return unique_span<T>{ allocate(size * sizeof(T)) };
+	// Need this because of allocate takes more arguments and has default ones
+	auto allocator = [](size_t size) { return allocate(size); };
+	return memory::detail_::make_convenient_type_unique_span<T, detail_::deleter>(size, allocator);
+/*
+	memory::detail_::check_allocation_type<T>();
+	// Note: _Not_ asserting trivial-copy-constructibility here; so if you want to copy data
+	// to/from the device using this object - it's your own responsibility to ensure that's
+	// a valid thing to do.
+	auto deleter = [](span<T> sp) {
+		return detail_::deleter{}(sp.data());
+	};
+	auto allocated = allocate(size * sizeof(T)).as_span<T>();
+	return unique_span<T>(
+		allocated, // no constructor calls - trivial destruction
+		deleter // no destructor calls - trivial destruction
+	);
+*/
 }
 
 } // namespace host
 
 namespace managed {
 
-/// A unique span of CUDA-driver-allocated managed memory
-template <typename T>
-using unique_span = cuda::unique_span<T, detail_::deleter>;
-
 namespace detail_ {
 
-template <typename T>
+template <typename T, initial_visibility_t InitialVisibility = initial_visibility_t::to_all_devices>
 unique_span<T> make_unique_span(
 	const context::handle_t  context_handle,
-	size_t                   size,
-	initial_visibility_t     initial_visibility = initial_visibility_t::to_all_devices)
+	size_t                   size)
 {
 	CAW_SET_SCOPE_CONTEXT(context_handle);
-	return unique_span<T>{ allocate_in_current_context(size * sizeof(T), initial_visibility) };
+	auto allocator = [](size_t size) {
+		return allocate_in_current_context(size, InitialVisibility);
+	};
+	return memory::detail_::make_convenient_type_unique_span<T, detail_::deleter>(size, allocator);
+/*
+	memory::detail_::check_allocation_type<T>();
+	auto deleter = [](span<T> sp) {
+		return detail_::deleter{}(sp.data());
+	};
+	auto allocated = allocate_in_current_context(size * sizeof(T), initial_visibility).as_span<T>();
+	return unique_span<T>(
+		allocated, // no constructor calls - trivial destruction
+		deleter // no destructor calls - trivial destruction
+	);
+*/
 }
 
 } // namespace detail_
@@ -2601,7 +2674,7 @@ template <typename T>
 unique_span<T> make_unique_span(
 	const context_t&      context,
 	size_t                size,
-	initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
+    initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
 
 /**
  * @copydoc make_unique_span(const context_t&, size_t)
@@ -2612,7 +2685,7 @@ template <typename T>
 unique_span<T> make_unique_span(
 	const device_t&       device,
 	size_t                size,
-	initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
+    initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
 
 /**
  * @copydoc make_unique_span(const context_t&, size_t)
@@ -2622,8 +2695,8 @@ unique_span<T> make_unique_span(
  */
 template <typename T>
 unique_span<T> make_unique_span(
-	size_t                size,
-	initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
+    size_t size,
+    initial_visibility_t  initial_visibility = initial_visibility_t::to_all_devices);
 
 } // namespace managed
 
diff --git a/src/cuda/api/multi_wrapper_impls/unique_span.hpp b/src/cuda/api/multi_wrapper_impls/unique_span.hpp
index 0c0ee18b..248f26de 100644
--- a/src/cuda/api/multi_wrapper_impls/unique_span.hpp
+++ b/src/cuda/api/multi_wrapper_impls/unique_span.hpp
@@ -74,17 +74,24 @@ template <typename T>
 unique_span<T> make_unique_span(
 	const context_t&      context,
 	size_t                size,
-	initial_visibility_t  initial_visibility)
+    initial_visibility_t  initial_visibility)
 {
 	CAW_SET_SCOPE_CONTEXT(context.handle());
-	return unique_span<T>{ detail_::allocate_in_current_context(size * sizeof(T), initial_visibility) };
+    switch (initial_visibility) {
+    case initial_visibility_t::to_all_devices:
+        return detail_::make_unique_span<T, initial_visibility_t::to_all_devices>(context.handle(), size);
+    case initial_visibility_t::to_supporters_of_concurrent_managed_access:
+        return detail_::make_unique_span<T, initial_visibility_t::to_supporters_of_concurrent_managed_access>(context.handle(), size);
+    default:
+        throw ::std::logic_error("Library not yet updated to support additional initial visibility values");
+    }
 }
 
 template <typename T>
 unique_span<T> make_unique_span(
 	const device_t&       device,
 	size_t                size,
-	initial_visibility_t  initial_visibility)
+    initial_visibility_t  initial_visibility)
 {
 	auto pc = device.primary_context();
 	return make_unique_span<T>(pc, size, initial_visibility);