Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BP4 engine capable of using device buffers with Put #2777

Merged
merged 5 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ adios_option(SZ "Enable support for SZ transforms" AUTO)
adios_option(LIBPRESSIO "Enable support for LIBPRESSIO transforms" AUTO)
adios_option(MGARD "Enable support for MGARD transforms" AUTO)
adios_option(PNG "Enable support for PNG transforms" AUTO)
adios_option(CUDA "Enable support for Cuda" AUTO)
adios_option(MPI "Enable support for MPI" AUTO)
adios_option(DAOS "Enable support for DAOS" AUTO)
adios_option(DataMan "Enable support for DataMan" AUTO)
Expand Down Expand Up @@ -174,7 +175,7 @@ if(ADIOS2_HAVE_MPI)
endif()

set(ADIOS2_CONFIG_OPTS
Blosc BZip2 ZFP SZ MGARD PNG MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO
Blosc BZip2 ZFP SZ MGARD PNG CUDA MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO
)
GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS})
configure_file(
Expand Down Expand Up @@ -296,6 +297,9 @@ message(" C++ Compiler : ${CMAKE_CXX_COMPILER_ID} "
"${CMAKE_CXX_COMPILER_WRAPPER}")
message(" ${CMAKE_CXX_COMPILER}")
message("")
if(ADIOS2_HAVE_CUDA)
message(" Cuda Compiler : ${CMAKE_CUDA_COMPILER} ")
endif()
if(ADIOS2_HAVE_Fortran)
message(" Fortran Compiler : ${CMAKE_Fortran_COMPILER_ID} "
"${CMAKE_Fortran_COMPILER_VERSION} "
Expand Down
6 changes: 6 additions & 0 deletions bindings/CXX11/adios2/cxx11/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ namespace adios2
} \
\
template <> \
void Variable<T>::SetMemorySpace(const MemorySpace mem) \
{ \
m_Variable->SetMemorySpace(mem); \
} \
\
template <> \
void Variable<T>::SetShape(const Dims &shape) \
{ \
helper::CheckForNullptr(m_Variable, \
Expand Down
5 changes: 5 additions & 0 deletions bindings/CXX11/adios2/cxx11/Variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ class Variable
/** Checks if object is valid, e.g. if( variable ) { //..valid } */
explicit operator bool() const noexcept;

/**
* Sets the memory step for all following Puts
*/
void SetMemorySpace(const MemorySpace mem);

/**
* Set new shape, care must be taken when reading back the variable for
* different steps. Only applies to Global arrays.
Expand Down
10 changes: 10 additions & 0 deletions cmake/DetectOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ endif()

set(mpi_find_components C)

# Cuda
if(ADIOS2_USE_CUDA STREQUAL AUTO)
find_package(CUDAToolkit)
elseif(ADIOS2_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif()
if(CUDAToolkit_FOUND)
set(ADIOS2_HAVE_CUDA TRUE)
endif()

# Fortran
if(ADIOS2_USE_Fortran STREQUAL AUTO)
include(CheckLanguage)
Expand Down
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ endif()
if(ADIOS2_BUILD_EXAMPLES_EXPERIMENTAL)
add_subdirectory(experimental)
endif()

if(ADIOS2_HAVE_CUDA)
add_subdirectory(cuda)
endif()
10 changes: 10 additions & 0 deletions examples/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#------------------------------------------------------------------------------#
# Distributed under the OSI-approved Apache License, Version 2.0. See
# accompanying file Copyright.txt for details.
#------------------------------------------------------------------------------#

enable_language(CUDA)

add_executable(GPUWriteRead_cuda cudaWriteRead.cu)
target_link_libraries(GPUWriteRead_cuda PUBLIC adios2::cxx11 CUDA::cudart CUDA::cuda_driver)
set_target_properties(GPUWriteRead_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
103 changes: 103 additions & 0 deletions examples/cuda/cudaWriteRead.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Simple example of writing and reading data
* through ADIOS2 BP engine with multiple simulations steps
* for every IO step.
*/

#include <ios>
#include <vector>
#include <iostream>

#include <adios2.h>

#include <cuda.h>
#include <cuda_runtime.h>

__global__ void update_array(float *vect, int val) {
vect[blockIdx.x] += val;
}

int BPWrite(const std::string fname, const size_t N, int nSteps){
// Initialize the simulation data
float *gpuSimData;
cudaMalloc(&gpuSimData, N * sizeof(float));
cudaMemset(gpuSimData, 0, N);

// Set up the ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("WriteIO");

// Declare an array for the ADIOS data of size (NumOfProcesses * N)
const adios2::Dims shape{static_cast<size_t>(N)};
const adios2::Dims start{static_cast<size_t>(0)};
const adios2::Dims count{N};
auto data = io.DefineVariable<float>("data", shape, start, count);

adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);

// Simulation steps
for (size_t step = 0; step < nSteps; ++step)
{
// Make a 1D selection to describe the local dimensions of the
// variable we write and its offsets in the global spaces
adios2::Box<adios2::Dims> sel({0}, {N});
data.SetSelection(sel);

// Start IO step every write step
bpWriter.BeginStep();
data.SetMemorySpace(adios2::MemorySpace::CUDA);
bpWriter.Put(data, gpuSimData);
bpWriter.EndStep();

// Update values in the simulation data
update_array<<<N,1>>>(gpuSimData, 10);
}

bpWriter.Close();
return 0;
}

int BPRead(const std::string fname, const size_t N, int nSteps){
// Create ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("ReadIO");

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

auto data = io.InquireVariable<float>("data");
std::cout << "Steps expected by the reader: " << bpReader.Steps() << std::endl;
std::cout << "Expecting data per step: " << data.Shape()[0];
std::cout << " elements" << std::endl;

int write_step = bpReader.Steps();
// Create the local buffer and initialize the access point in the ADIOS file
std::vector<float> simData(N); //set size to N
const adios2::Dims start{0};
const adios2::Dims count{N};
const adios2::Box<adios2::Dims> sel(start, count);
data.SetSelection(sel);

// Read the data in each of the ADIOS steps
for (size_t step = 0; step < write_step; step++)
{
data.SetStepSelection({step, 1});
bpReader.Get(data, simData.data());
bpReader.PerformGets();
std::cout << "Simualation step " << step << " : ";
std::cout << simData.size() << " elements: " << simData[1] << std::endl;
}
bpReader.Close();
return 0;
}

int main(int argc, char **argv){
const std::string fname("GPUWriteRead.bp");
const int device_id = 1;
cudaSetDevice(device_id);
const size_t N = 6000;
int nSteps = 10, ret = 0;

ret += BPWrite(fname, N, nSteps);
ret += BPRead(fname, N, nSteps);
return ret;
}
7 changes: 7 additions & 0 deletions source/adios2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ add_library(adios2_core
helper/adiosXML.cpp
helper/adiosXMLUtil.cpp
helper/adiosYAML.cpp
helper/adiosCUDA.cu

#engine derived classes
engine/bp3/BP3Reader.cpp engine/bp3/BP3Reader.tcc
Expand Down Expand Up @@ -100,6 +101,12 @@ add_library(adios2_core
set_property(TARGET adios2_core PROPERTY EXPORT_NAME core)
set_property(TARGET adios2_core PROPERTY OUTPUT_NAME adios2${ADIOS2_LIBRARY_SUFFIX}_core)

if(ADIOS2_HAVE_CUDA)
enable_language(CUDA)
target_link_libraries(adios2_core PUBLIC CUDA::cudart CUDA::cuda_driver)
set_target_properties(adios2_core PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

target_include_directories(adios2_core
PUBLIC
$<BUILD_INTERFACE:${ADIOS2_SOURCE_DIR}/source>
Expand Down
8 changes: 8 additions & 0 deletions source/adios2/common/ADIOSTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
namespace adios2
{

/** Memory space for the buffers received with Put */
enum class MemorySpace
{
Detect, ///< Detect the memory space automatically
Host, ///< Host memory space (default)
CUDA ///< GPU memory spaces
};

/** Variable shape type identifier, assigned automatically from the signature of
* DefineVariable */
enum class ShapeID
Expand Down
1 change: 1 addition & 0 deletions source/adios2/core/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ namespace core
info.StepsCount = stepsCount; \
info.Data = const_cast<T *>(data); \
info.Operations = m_Operations; \
info.IsGPU = IsCUDAPointer((void *)data); \
m_BlocksInfo.push_back(info); \
return m_BlocksInfo.back(); \
} \
Expand Down
1 change: 1 addition & 0 deletions source/adios2/core/Variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class Variable : public VariableBase
SelectionType Selection = SelectionType::BoundingBox;
bool IsValue = false;
bool IsReverseDims = false;
bool IsGPU = false;
};

/** use for multiblock info */
Expand Down
21 changes: 21 additions & 0 deletions source/adios2/core/VariableBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,32 @@ VariableBase::VariableBase(const std::string &name, const DataType type,
InitShapeType();
}

bool VariableBase::IsCUDAPointer(void *ptr)
{
if (m_MemorySpace == MemorySpace::CUDA)
return true;
if (m_MemorySpace == MemorySpace::Host)
return false;

#ifdef ADIOS2_HAVE_CUDA
cudaPointerAttributes attr;
cudaPointerGetAttributes(&attr, ptr);
return attr.type == cudaMemoryTypeDevice;
#endif

return false;
}

size_t VariableBase::TotalSize() const noexcept
{
return helper::GetTotalSize(m_Count);
}

void VariableBase::SetMemorySpace(const MemorySpace mem)
{
m_MemorySpace = mem;
}

void VariableBase::SetShape(const adios2::Dims &shape)
{
if (m_Type == helper::GetDataType<std::string>())
Expand Down
13 changes: 13 additions & 0 deletions source/adios2/core/VariableBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class VariableBase
/** Variable -> sizeof(T),
* VariableCompound -> from constructor sizeof(struct) */
const size_t m_ElementSize;
MemorySpace m_MemorySpace = MemorySpace::Host;

ShapeID m_ShapeID = ShapeID::Unknown; ///< see shape types in ADIOSTypes.h
size_t m_BlockID = 0; ///< current block ID for local variables, global = 0
Expand Down Expand Up @@ -124,6 +125,18 @@ class VariableBase
*/
size_t TotalSize() const noexcept;

/**
* Check if buffer is allocated on CUDA space
* @param pointer to the user data
*/
bool IsCUDAPointer(void *ptr);

/**
* Set the memory space
* @param the memory space where the expected buffers were allocated
*/
void SetMemorySpace(const MemorySpace mem);

/**
* Set new shape
* @param shape input shape to be applied to this variable
Expand Down
46 changes: 46 additions & 0 deletions source/adios2/helper/adiosCUDA.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*
* adiosCUDA.cpp
*
* Created on: May 9, 2021
* Author: Ana Gainaru [email protected]
*/

#ifndef ADIOS2_HELPER_ADIOSCUDA_CU_
#define ADIOS2_HELPER_ADIOSCUDA_CU_

#include <thrust/extrema.h>
#include <thrust/device_ptr.h>
#include "adios2/common/ADIOSMacros.h"

#include "adiosCUDA.h"

namespace {
template <class T>
void CUDAMinMaxImpl(const T *values, const size_t size, T &min, T &max)
{
thrust::device_ptr<const T> dev_ptr(values);
auto res = thrust::minmax_element(dev_ptr, dev_ptr + size);
cudaMemcpy(&min, thrust::raw_pointer_cast(res.first), sizeof(T), cudaMemcpyDeviceToHost);
cudaMemcpy(&max, thrust::raw_pointer_cast(res.second), sizeof(T), cudaMemcpyDeviceToHost);
}
// types non supported on the device
void CUDAMinMaxImpl(const long double *values, const size_t size, long double &min, long double &max) {}
void CUDAMinMaxImpl(const std::complex<float> *values, const size_t size, std::complex<float> &min, std::complex<float> &max) {}
void CUDAMinMaxImpl(const std::complex<double> *values, const size_t size, std::complex<double> &min, std::complex<double> &max) {}
}

template <class T>
void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max)
{
CUDAMinMaxImpl(values, size, min, max);
}

#define declare_type(T) \
template void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max);
ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type)
#undef declare_type

#endif /* ADIOS2_HELPER_ADIOSCUDA_CU_ */
29 changes: 29 additions & 0 deletions source/adios2/helper/adiosCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*
* adiosCUDA.h CUDA functions used in the ADIOS framework
*
* Created on: May 9, 2021
* Author: Ana Gainaru [email protected]
*/

#ifndef ADIOS2_HELPER_ADIOSCUDA_H_
#define ADIOS2_HELPER_ADIOSCUDA_H_

namespace adios2
{
namespace helper
{

/*
* CUDA kernel for computing the min and max from a
* GPU buffer
*/
template <class T>
void CUDAMinMax(const T *values, const size_t size, T &min, T &max);

} // helper
} // adios2

#endif /* ADIOS2_HELPER_ADIOSCUDA_H_ */
1 change: 1 addition & 0 deletions source/adios2/helper/adiosFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#ifndef ADIOS2_HELPER_ADIOSFUNCTIONS_H_
#define ADIOS2_HELPER_ADIOSFUNCTIONS_H_

#include "adios2/helper/adiosCUDA.h" //CUDA functions
#include "adios2/helper/adiosMath.h" //math functions (cmath, algorithm)
#include "adios2/helper/adiosMemory.h" //memcpy, std::copy, insert, resize
#include "adios2/helper/adiosNetwork.h" //network and staging functions
Expand Down
Loading