Skip to content

Commit

Permalink
Moving executor-specific code into namespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
JOOpdenhoevel committed Sep 2, 2021
1 parent af56949 commit 18726b5
Show file tree
Hide file tree
Showing 25 changed files with 263 additions and 211 deletions.
2 changes: 1 addition & 1 deletion Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.

INPUT = StencilStream README.md docs
INPUT = StencilStream StencilStream/monotile StencilStream/tiling README.md docs

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand Down
1 change: 1 addition & 0 deletions StencilStream/AbstractExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "GenericID.hpp"
#include "Index.hpp"
#include <CL/sycl.hpp>

Expand Down
6 changes: 3 additions & 3 deletions StencilStream/MonotileExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* SOFTWARE.
*/
#pragma once
#include "MonotileExecutionKernel.hpp"
#include "SingleQueueExecutor.hpp"
#include "monotile/ExecutionKernel.hpp"

namespace stencil {
template <typename T, uindex_t stencil_radius, typename TransFunc, uindex_t pipeline_length = 1,
Expand Down Expand Up @@ -81,8 +81,8 @@ class MonotileExecutor : public SingleQueueExecutor<T, stencil_radius, TransFunc
using in_pipe = cl::sycl::pipe<class monotile_in_pipe, T>;
using out_pipe = cl::sycl::pipe<class monotile_out_pipe, T>;
using ExecutionKernelImpl =
MonotileExecutionKernel<TransFunc, T, stencil_radius, pipeline_length, tile_width,
tile_height, in_pipe, out_pipe>;
monotile::ExecutionKernel<TransFunc, T, stencil_radius, pipeline_length, tile_width,
tile_height, in_pipe, out_pipe>;

cl::sycl::queue &queue = this->get_queue();

Expand Down
60 changes: 48 additions & 12 deletions StencilStream/SingleQueueExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ namespace stencil {
/**
* \brief An abstract executor with common code for executors that work with a single SYCL queue.
*
* This class contains common code for executors that submit an execution kernel to a single queue
* that computes up to a fixed number of generations. This includes queue selection and management
* as well as event-based runtime analysis and an \ref AbstractExecutor.run implementation that
* incorporates the former. Child classes need to implement \ref SingleQueueExecutor.run_pass,
* submitting their execution kernel to the queue.
* This class contains common code for executors that work with execution kernels running on a
* single queue. This includes queue selection and management as well as event-based runtime
* analysis.
*
* User code that may work with any kind of executor should use pointers to the general \ref
* AbstractExecutor.
*
* \tparam T The cell type.
* \tparam stencil_radius The radius of the stencil buffer supplied to the transition function.
Expand All @@ -40,10 +41,22 @@ namespace stencil {
template <typename T, uindex_t stencil_radius, typename TransFunc>
class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc> {
public:
/**
* \brief Create a new executor.
* \param halo_value The value of cells that are outside the grid.
* \param trans_func The instance of the transition function that should be used to calculate
* new generations.
*/
SingleQueueExecutor(T halo_value, TransFunc trans_func)
: AbstractExecutor<T, stencil_radius, TransFunc>(halo_value, trans_func),
queue(std::nullopt), runtime_sample() {}

/**
* \brief Return the configured queue.
*
* If no queue has been configured yet, this method will configure and return a queue targeting
* the FPGA emulator, without runtime analysis.
*/
cl::sycl::queue &get_queue() {
if (!this->queue.has_value()) {
select_emulator(false);
Expand All @@ -59,12 +72,17 @@ class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc
* lead to exceptions.
*
* In order to use runtime analysis features, the queue has to be configured with the
* `cl::sycl::property::queue::enable_profiling` property.
* `cl::sycl::property::queue::enable_profiling` property. A `std::runtime_error` is thrown if
* `runtime_analysis` is true and the passed queue does not have this property.
*
* \deprecated This method is deprecated since the `runtime_analysis` flag is redundant by now.
* Use the other variant without the `runtime_analysis` flag instead.
*
* \param queue The new SYCL queue to use for execution.
* \param runtime_analysis Enable event-level runtime analysis.
*/
void set_queue(cl::sycl::queue queue, bool runtime_analysis) {
[[deprecated("Use set_queue(cl::sycl::queue) instead")]] void set_queue(cl::sycl::queue queue,
bool runtime_analysis) {
if (runtime_analysis &&
!queue.has_property<cl::sycl::property::queue::enable_profiling>()) {
throw std::runtime_error(
Expand All @@ -74,7 +92,21 @@ class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc
}

/**
* \brief Set up a SYCL queue with the FPGA emulator device.
* \brief Manually set the SYCL queue to use for execution.
*
* Note that as of OneAPI Version 2021.1.1, device code is usually built either for CPU/GPU, for
* the FPGA emulator or for a specific FPGA. Using the wrong queue with the wrong device will
* lead to exceptions.
*
* Runtime analysis is enabled by configuring the queue with the
* `cl::sycl::property::queue::enable_profiling` property.
*
* \param queue The new SYCL queue to use for execution.
*/
void set_queue(cl::sycl::queue queue) { this->queue = queue; }

/**
* \brief Set up a SYCL queue with the FPGA emulator device, without runtime analysis.
*
* Note that as of OneAPI Version 2021.1.1, device code is usually built either for CPU/GPU, for
* the FPGA emulator or for a specific FPGA. Using the wrong queue with the wrong device will
Expand All @@ -83,7 +115,7 @@ class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc
void select_emulator() { select_emulator(false); }

/**
* \brief Set up a SYCL queue with an FPGA device.
* \brief Set up a SYCL queue with an FPGA device, without runtime analysis.
*
* Note that as of OneAPI Version 2021.1.1, device code is usually built either for CPU/GPU, for
* the FPGA emulator or for a specific FPGA. Using the wrong queue with the wrong device will
Expand Down Expand Up @@ -119,6 +151,11 @@ class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc
get_queue_properties(runtime_analysis));
}

/**
* \brief Check if the configured queue supports runtime analysis.
*
* False if no queue has been configured yet.
*/
bool is_runtime_analysis_enabled() const {
if (queue.has_value()) {
return queue->has_property<cl::sycl::property::queue::enable_profiling>();
Expand All @@ -128,10 +165,9 @@ class SingleQueueExecutor : public AbstractExecutor<T, stencil_radius, TransFunc
}

/**
* \brief Return the runtime information collected from the last \ref StencilExecutor.run call.
* \brief Return a reference to the runtime information struct.
*
* \return The collected runtime information. May be `nullopt` if no runtime analysis was
* configured.
* \return The collected runtime information.
*/
RuntimeSample &get_runtime_sample() { return runtime_sample; }

Expand Down
14 changes: 7 additions & 7 deletions StencilStream/StencilExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "Grid.hpp"
#include "SingleQueueExecutor.hpp"
#include "TilingExecutionKernel.hpp"
#include "tiling/ExecutionKernel.hpp"
#include "tiling/Grid.hpp"

namespace stencil {
/**
Expand Down Expand Up @@ -96,7 +96,7 @@ class StencilExecutor : public SingleQueueExecutor<T, stencil_radius, TransFunc>
/**
* \brief Copy the current state of the grid to the buffer.
*
* The \ref output_buffer has to have the exact range as returned by \ref
* The `output_buffer` has to have the exact range as returned by \ref
* StencilExecutor.get_grid_range.
*
* \param output_buffer Copy the state of the grid to this buffer.
Expand All @@ -116,8 +116,8 @@ class StencilExecutor : public SingleQueueExecutor<T, stencil_radius, TransFunc>
using in_pipe = cl::sycl::pipe<class tiling_in_pipe, T>;
using out_pipe = cl::sycl::pipe<class tiling_out_pipe, T>;
using ExecutionKernelImpl =
TilingExecutionKernel<TransFunc, T, stencil_radius, pipeline_length, tile_width,
tile_height, in_pipe, out_pipe>;
tiling::ExecutionKernel<TransFunc, T, stencil_radius, pipeline_length, tile_width,
tile_height, in_pipe, out_pipe>;

cl::sycl::queue &queue = this->get_queue();

Expand All @@ -126,7 +126,7 @@ class StencilExecutor : public SingleQueueExecutor<T, stencil_radius, TransFunc>
uindex_t grid_height = input_grid.get_grid_range().r;

while (this->get_i_generation() < target_i_generation) {
Grid output_grid = input_grid.make_output_grid();
GridImpl output_grid = input_grid.make_output_grid();

std::vector<cl::sycl::event> events;
events.reserve(input_grid.get_tile_range().c * input_grid.get_tile_range().r);
Expand Down Expand Up @@ -166,7 +166,7 @@ class StencilExecutor : public SingleQueueExecutor<T, stencil_radius, TransFunc>
}

private:
using GridImpl = Grid<T, tile_width, tile_height, halo_radius, burst_length>;
using GridImpl = tiling::Grid<T, tile_width, tile_height, halo_radius, burst_length>;
GridImpl input_grid;
};
} // namespace stencil
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "GenericID.hpp"
#include "Helpers.hpp"
#include "Index.hpp"
#include "Stencil.hpp"
#include "../GenericID.hpp"
#include "../Helpers.hpp"
#include "../Index.hpp"
#include "../Stencil.hpp"
#include <optional>

namespace stencil {
namespace monotile {

/**
* \brief A kernel that executes a stencil transition function using the monotile approach.
Expand All @@ -46,7 +47,7 @@ namespace stencil {
*/
template <typename TransFunc, typename T, uindex_t stencil_radius, uindex_t pipeline_length,
uindex_t tile_width, uindex_t tile_height, typename in_pipe, typename out_pipe>
class MonotileExecutionKernel {
class ExecutionKernel {
public:
static_assert(
std::is_invocable_r<T, TransFunc const, Stencil<T, stencil_radius> const &>::value);
Expand Down Expand Up @@ -90,8 +91,8 @@ class MonotileExecutionKernel {
* \param grid_height The number of cell rows in the grid.
* \param halo_value The value of cells outside the grid.
*/
MonotileExecutionKernel(TransFunc trans_func, uindex_t i_generation, uindex_t n_generations,
uindex_t grid_width, uindex_t grid_height, T halo_value)
ExecutionKernel(TransFunc trans_func, uindex_t i_generation, uindex_t n_generations,
uindex_t grid_width, uindex_t grid_height, T halo_value)
: trans_func(trans_func), i_generation(i_generation), n_generations(n_generations),
grid_width(grid_width), grid_height(grid_height), halo_value(halo_value) {}

Expand Down Expand Up @@ -221,4 +222,5 @@ class MonotileExecutionKernel {
T halo_value;
};

} // namespace monotile
} // namespace stencil
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "GenericID.hpp"
#include "Index.hpp"
#include "Stencil.hpp"
#include "../GenericID.hpp"
#include "../Helpers.hpp"
#include "../Index.hpp"
#include "../Stencil.hpp"
#include <optional>

namespace stencil {
namespace tiling {

/**
* \brief A kernel that executes a stencil transition function on a tile.
Expand All @@ -42,7 +44,7 @@ namespace stencil {
template <typename TransFunc, typename T, uindex_t stencil_radius, uindex_t pipeline_length,
uindex_t output_tile_width, uindex_t output_tile_height, typename in_pipe,
typename out_pipe>
class TilingExecutionKernel {
class ExecutionKernel {
public:
static_assert(
std::is_invocable_r<T, TransFunc const, Stencil<T, stencil_radius> const &>::value);
Expand Down Expand Up @@ -86,9 +88,9 @@ class TilingExecutionKernel {
* relative to the grid's origin. See `grid_c_offset` for details. \param grid_width The number
* of cell columns in the grid. \param grid_height The number of cell rows in the grid.
*/
TilingExecutionKernel(TransFunc trans_func, uindex_t i_generation, uindex_t target_i_generation,
uindex_t grid_c_offset, uindex_t grid_r_offset, uindex_t grid_width,
uindex_t grid_height, T halo_value)
ExecutionKernel(TransFunc trans_func, uindex_t i_generation, uindex_t target_i_generation,
uindex_t grid_c_offset, uindex_t grid_r_offset, uindex_t grid_width,
uindex_t grid_height, T halo_value)
: trans_func(trans_func), i_generation(i_generation),
target_i_generation(target_i_generation), grid_c_offset(grid_c_offset),
grid_r_offset(grid_r_offset), grid_width(grid_width), grid_height(grid_height),
Expand Down Expand Up @@ -200,4 +202,5 @@ class TilingExecutionKernel {
T halo_value;
};

} // namespace tiling
} // namespace stencil
12 changes: 7 additions & 5 deletions StencilStream/Grid.hpp → StencilStream/tiling/Grid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "GenericID.hpp"
#include "../GenericID.hpp"
#include "IOKernel.hpp"
#include "Tile.hpp"
#include <CL/sycl/accessor.hpp>
Expand All @@ -28,14 +28,15 @@
#include <vector>

namespace stencil {
namespace tiling {

/**
* \brief A rectangular container of cells with a dynamic, arbitrary size, used by the \ref
* StencilExecutor.
*
* This class is part of the \ref tiling architecture. It logically contains the grid the transition
* function is applied to and it partitions the grid into tiles of static size. These are the units
* the \ref TilingExecutionKernel works on.
* the \ref ExecutionKernel works on.
*
* Apart from providing copy operations to and from monolithic grid buffers, it also handles the
* input and output kernel submission for a given tile.
Expand All @@ -56,7 +57,7 @@ class Grid {
/**
* \brief Create a grid with undefined contents.
*
* This constructor is used to create the output grid of a \ref TilingExecutionKernel
* This constructor is used to create the output grid of a \ref ExecutionKernel
* invocation. It's contents do not need to be initialized or copied from another buffer since
* it will override cell values from the execution kernel anyway.
*
Expand Down Expand Up @@ -153,7 +154,7 @@ class Grid {
}

/**
* \brief Submit the input kernels required for one execution of the \ref TilingExecutionKernel.
* \brief Submit the input kernels required for one execution of the \ref ExecutionKernel.
*
* This will submit five \ref IOKernel invocations in total, which are executed in order. Those
* kernels write the contents of a tile and it's halo to the `in_pipe`.
Expand Down Expand Up @@ -227,7 +228,7 @@ class Grid {

/**
* \brief Submit the output kernels required for one execution of the \ref
* TilingExecutionKernel.
* ExecutionKernel.
*
* This will submit three \ref IOKernel invocations in total, which are executed in order. Those
* kernels will write cells from the `out_pipe` to one of the tiles.
Expand Down Expand Up @@ -356,4 +357,5 @@ class Grid {
cl::sycl::range<2> grid_range;
};

} // namespace tiling
} // namespace stencil
10 changes: 6 additions & 4 deletions StencilStream/IOKernel.hpp → StencilStream/tiling/IOKernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,22 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "CounterID.hpp"
#include "Index.hpp"
#include "../CounterID.hpp"
#include "../Index.hpp"
#include <CL/sycl.hpp>
#include <CL/sycl/accessor.hpp>
#include <array>

namespace stencil {
namespace tiling {

/**
* \brief Generic Input/Output kernel for use with the \ref TilingExecutionKernel and \ref Grid.
* \brief Generic Input/Output kernel for use with the \ref ExecutionKernel and \ref Grid.
*
* This kernel provides IO services to the execution kernel by writing the contents of a input tile
* with halo to the input pipe and writing the output of the execution kernel to an output tile. The
* input and output code only differs by one line. Therefore, both services are provided by the same
* class. Unlike \ref TilingExecutionKernel, this kernel is supposed to be constructed by a lambda
* class. Unlike \ref ExecutionKernel, this kernel is supposed to be constructed by a lambda
* expression that then either executes \ref IOKernel.read or \ref IOKernel.write.
*
* Logically, an instance of the IO kernel receives access to a horizontal slice of the input or
Expand Down Expand Up @@ -156,4 +157,5 @@ class IOKernel {
uindex_t n_columns;
};

} // namespace tiling
} // namespace stencil
Loading

0 comments on commit 18726b5

Please sign in to comment.