From 0c3b98d5aa2033e3a6111abaa89e85cff84ae113 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Fri, 8 Oct 2021 13:41:47 -0400 Subject: [PATCH 1/5] Detect the CUDA environment and link adios2 with CUDA --- CMakeLists.txt | 6 +++++- cmake/DetectOptions.cmake | 10 ++++++++++ source/adios2/CMakeLists.txt | 6 ++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 79e9114188..7cd1c3090f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,6 +145,7 @@ adios_option(SZ "Enable support for SZ transforms" AUTO) adios_option(LIBPRESSIO "Enable support for LIBPRESSIO transforms" AUTO) adios_option(MGARD "Enable support for MGARD transforms" AUTO) adios_option(PNG "Enable support for PNG transforms" AUTO) +adios_option(CUDA "Enable support for Cuda" AUTO) adios_option(MPI "Enable support for MPI" AUTO) adios_option(DAOS "Enable support for DAOS" AUTO) adios_option(DataMan "Enable support for DataMan" AUTO) @@ -174,7 +175,7 @@ if(ADIOS2_HAVE_MPI) endif() set(ADIOS2_CONFIG_OPTS - Blosc BZip2 ZFP SZ MGARD PNG MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO + Blosc BZip2 ZFP SZ MGARD PNG CUDA MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO ) GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS}) configure_file( @@ -296,6 +297,9 @@ message(" C++ Compiler : ${CMAKE_CXX_COMPILER_ID} " "${CMAKE_CXX_COMPILER_WRAPPER}") message(" ${CMAKE_CXX_COMPILER}") message("") +if(ADIOS2_HAVE_CUDA) + message(" Cuda Compiler : ${CMAKE_CUDA_COMPILER} ") +endif() if(ADIOS2_HAVE_Fortran) message(" Fortran Compiler : ${CMAKE_Fortran_COMPILER_ID} " "${CMAKE_Fortran_COMPILER_VERSION} " diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake index 0b7b90a924..d89d47da80 100644 --- a/cmake/DetectOptions.cmake +++ b/cmake/DetectOptions.cmake @@ -141,6 +141,16 @@ endif() set(mpi_find_components C) +# Cuda +if(ADIOS2_USE_CUDA STREQUAL AUTO) + find_package(CUDAToolkit) +elseif(ADIOS2_USE_CUDA) + find_package(CUDAToolkit REQUIRED) +endif() +if(CUDAToolkit_FOUND) + set(ADIOS2_HAVE_CUDA TRUE) +endif() + # Fortran if(ADIOS2_USE_Fortran STREQUAL AUTO) include(CheckLanguage) diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index 820dbc6b47..81a90cc471 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -100,6 +100,12 @@ add_library(adios2_core set_property(TARGET adios2_core PROPERTY EXPORT_NAME core) set_property(TARGET adios2_core PROPERTY OUTPUT_NAME adios2${ADIOS2_LIBRARY_SUFFIX}_core) +if(ADIOS2_HAVE_CUDA) + enable_language(CUDA) + target_link_libraries(adios2_core PUBLIC CUDA::cudart CUDA::cuda_driver) + set_target_properties(adios2_core PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +endif() + target_include_directories(adios2_core PUBLIC $ From 495764c45da70677d6f309d8c82c32134ae03579 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Fri, 8 Oct 2021 13:42:53 -0400 Subject: [PATCH 2/5] CUDA specific functions (to compute the metadata) --- source/adios2/CMakeLists.txt | 1 + source/adios2/helper/adiosCUDA.cu | 46 +++++++++++++++++++++++++++ source/adios2/helper/adiosCUDA.h | 29 +++++++++++++++++ source/adios2/helper/adiosFunctions.h | 1 + 4 files changed, 77 insertions(+) create mode 100644 source/adios2/helper/adiosCUDA.cu create mode 100644 source/adios2/helper/adiosCUDA.h diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index 81a90cc471..83e9bb44e8 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -35,6 +35,7 @@ add_library(adios2_core helper/adiosXML.cpp helper/adiosXMLUtil.cpp helper/adiosYAML.cpp + helper/adiosCUDA.cu #engine derived classes engine/bp3/BP3Reader.cpp engine/bp3/BP3Reader.tcc diff --git a/source/adios2/helper/adiosCUDA.cu b/source/adios2/helper/adiosCUDA.cu new file mode 100644 index 0000000000..b75fd9f4e0 --- /dev/null +++ b/source/adios2/helper/adiosCUDA.cu @@ -0,0 +1,46 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * adiosCUDA.cpp + * + * Created on: May 9, 2021 + * Author: Ana Gainaru gainarua@ornl.gov + */ + +#ifndef ADIOS2_HELPER_ADIOSCUDA_CU_ +#define ADIOS2_HELPER_ADIOSCUDA_CU_ + +#include +#include +#include "adios2/common/ADIOSMacros.h" + +#include "adiosCUDA.h" + +namespace { +template +void CUDAMinMaxImpl(const T *values, const size_t size, T &min, T &max) +{ + thrust::device_ptr dev_ptr(values); + auto res = thrust::minmax_element(dev_ptr, dev_ptr + size); + cudaMemcpy(&min, thrust::raw_pointer_cast(res.first), sizeof(T), cudaMemcpyDeviceToHost); + cudaMemcpy(&max, thrust::raw_pointer_cast(res.second), sizeof(T), cudaMemcpyDeviceToHost); +} +// types non supported on the device +void CUDAMinMaxImpl(const long double *values, const size_t size, long double &min, long double &max) {} +void CUDAMinMaxImpl(const std::complex *values, const size_t size, std::complex &min, std::complex &max) {} +void CUDAMinMaxImpl(const std::complex *values, const size_t size, std::complex &min, std::complex &max) {} +} + +template +void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max) +{ + CUDAMinMaxImpl(values, size, min, max); +} + +#define declare_type(T) \ +template void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max); +ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type) +#undef declare_type + +#endif /* ADIOS2_HELPER_ADIOSCUDA_CU_ */ diff --git a/source/adios2/helper/adiosCUDA.h b/source/adios2/helper/adiosCUDA.h new file mode 100644 index 0000000000..51b1c591d7 --- /dev/null +++ b/source/adios2/helper/adiosCUDA.h @@ -0,0 +1,29 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * adiosCUDA.h CUDA functions used in the ADIOS framework + * + * Created on: May 9, 2021 + * Author: Ana Gainaru gainarua@ornl.gov + */ + +#ifndef ADIOS2_HELPER_ADIOSCUDA_H_ +#define ADIOS2_HELPER_ADIOSCUDA_H_ + +namespace adios2 +{ +namespace helper +{ + +/* + * CUDA kernel for computing the min and max from a + * GPU buffer + */ +template +void CUDAMinMax(const T *values, const size_t size, T &min, T &max); + +} // helper +} // adios2 + +#endif /* ADIOS2_HELPER_ADIOSCUDA_H_ */ diff --git a/source/adios2/helper/adiosFunctions.h b/source/adios2/helper/adiosFunctions.h index 49dc629c67..0e4ce40f8d 100644 --- a/source/adios2/helper/adiosFunctions.h +++ b/source/adios2/helper/adiosFunctions.h @@ -12,6 +12,7 @@ #ifndef ADIOS2_HELPER_ADIOSFUNCTIONS_H_ #define ADIOS2_HELPER_ADIOSFUNCTIONS_H_ +#include "adios2/helper/adiosCUDA.h" //CUDA functions #include "adios2/helper/adiosMath.h" //math functions (cmath, algorithm) #include "adios2/helper/adiosMemory.h" //memcpy, std::copy, insert, resize #include "adios2/helper/adiosNetwork.h" //network and staging functions From 39c8c999e57b3254710f9381d4b69064c4526024 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Fri, 8 Oct 2021 13:44:27 -0400 Subject: [PATCH 3/5] Add memory space information to BP4 Variables --- bindings/CXX11/adios2/cxx11/Variable.cpp | 6 ++++++ bindings/CXX11/adios2/cxx11/Variable.h | 5 +++++ source/adios2/common/ADIOSTypes.h | 8 ++++++++ source/adios2/core/Variable.cpp | 1 + source/adios2/core/Variable.h | 1 + source/adios2/core/VariableBase.cpp | 21 +++++++++++++++++++++ source/adios2/core/VariableBase.h | 13 +++++++++++++ 7 files changed, 55 insertions(+) diff --git a/bindings/CXX11/adios2/cxx11/Variable.cpp b/bindings/CXX11/adios2/cxx11/Variable.cpp index ce76cfd5f5..3ddb06416f 100644 --- a/bindings/CXX11/adios2/cxx11/Variable.cpp +++ b/bindings/CXX11/adios2/cxx11/Variable.cpp @@ -32,6 +32,12 @@ namespace adios2 } \ \ template <> \ + void Variable::SetMemorySpace(const MemorySpace mem) \ + { \ + m_Variable->SetMemorySpace(mem); \ + } \ + \ + template <> \ void Variable::SetShape(const Dims &shape) \ { \ helper::CheckForNullptr(m_Variable, \ diff --git a/bindings/CXX11/adios2/cxx11/Variable.h b/bindings/CXX11/adios2/cxx11/Variable.h index 1130c81689..1104b32a46 100644 --- a/bindings/CXX11/adios2/cxx11/Variable.h +++ b/bindings/CXX11/adios2/cxx11/Variable.h @@ -147,6 +147,11 @@ class Variable /** Checks if object is valid, e.g. if( variable ) { //..valid } */ explicit operator bool() const noexcept; + /** + * Sets the memory step for all following Puts + */ + void SetMemorySpace(const MemorySpace mem); + /** * Set new shape, care must be taken when reading back the variable for * different steps. Only applies to Global arrays. diff --git a/source/adios2/common/ADIOSTypes.h b/source/adios2/common/ADIOSTypes.h index 5030d0fe9a..2eb869e910 100644 --- a/source/adios2/common/ADIOSTypes.h +++ b/source/adios2/common/ADIOSTypes.h @@ -32,6 +32,14 @@ namespace adios2 { +/** Memory space for the buffers received with Put */ +enum class MemorySpace +{ + Detect, ///< Detect the memory space automatically + Host, ///< Host memory space (default) + CUDA ///< GPU memory spaces +}; + /** Variable shape type identifier, assigned automatically from the signature of * DefineVariable */ enum class ShapeID diff --git a/source/adios2/core/Variable.cpp b/source/adios2/core/Variable.cpp index 01a7494c78..187311e342 100644 --- a/source/adios2/core/Variable.cpp +++ b/source/adios2/core/Variable.cpp @@ -49,6 +49,7 @@ namespace core info.StepsCount = stepsCount; \ info.Data = const_cast(data); \ info.Operations = m_Operations; \ + info.IsGPU = IsCUDAPointer((void *)data); \ m_BlocksInfo.push_back(info); \ return m_BlocksInfo.back(); \ } \ diff --git a/source/adios2/core/Variable.h b/source/adios2/core/Variable.h index cf12d43428..d49f30afb3 100644 --- a/source/adios2/core/Variable.h +++ b/source/adios2/core/Variable.h @@ -112,6 +112,7 @@ class Variable : public VariableBase SelectionType Selection = SelectionType::BoundingBox; bool IsValue = false; bool IsReverseDims = false; + bool IsGPU = false; }; /** use for multiblock info */ diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp index c85b7d865a..ce0640642f 100644 --- a/source/adios2/core/VariableBase.cpp +++ b/source/adios2/core/VariableBase.cpp @@ -38,11 +38,32 @@ VariableBase::VariableBase(const std::string &name, const DataType type, InitShapeType(); } +bool VariableBase::IsCUDAPointer(void *ptr) +{ + if (m_MemorySpace == MemorySpace::CUDA) + return true; + if (m_MemorySpace == MemorySpace::Host) + return false; + +#ifdef ADIOS2_HAVE_CUDA + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, ptr); + return attr.type == cudaMemoryTypeDevice; +#endif + + return false; +} + size_t VariableBase::TotalSize() const noexcept { return helper::GetTotalSize(m_Count); } +void VariableBase::SetMemorySpace(const MemorySpace mem) +{ + m_MemorySpace = mem; +} + void VariableBase::SetShape(const adios2::Dims &shape) { if (m_Type == helper::GetDataType()) diff --git a/source/adios2/core/VariableBase.h b/source/adios2/core/VariableBase.h index fab1c719e8..2f46646cdc 100644 --- a/source/adios2/core/VariableBase.h +++ b/source/adios2/core/VariableBase.h @@ -46,6 +46,7 @@ class VariableBase /** Variable -> sizeof(T), * VariableCompound -> from constructor sizeof(struct) */ const size_t m_ElementSize; + MemorySpace m_MemorySpace = MemorySpace::Host; ShapeID m_ShapeID = ShapeID::Unknown; ///< see shape types in ADIOSTypes.h size_t m_BlockID = 0; ///< current block ID for local variables, global = 0 @@ -124,6 +125,18 @@ class VariableBase */ size_t TotalSize() const noexcept; + /** + * Check if buffer is allocated on CUDA space + * @param pointer to the user data + */ + bool IsCUDAPointer(void *ptr); + + /** + * Set the memory space + * @param the memory space where the expected buffers were allocated + */ + void SetMemorySpace(const MemorySpace mem); + /** * Set new shape * @param shape input shape to be applied to this variable From a310850e6f8da5766cee736b561e977275a3e2d2 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Fri, 8 Oct 2021 13:45:44 -0400 Subject: [PATCH 4/5] BP4 engine capable of using device buffers with Put --- source/adios2/helper/adiosMemory.h | 9 +++++++++ source/adios2/helper/adiosMemory.inl | 16 ++++++++++++++++ source/adios2/toolkit/format/bp/BPSerializer.tcc | 12 ++++++++++++ .../toolkit/format/bp/bp4/BP4Serializer.tcc | 9 +++++++++ 4 files changed, 46 insertions(+) diff --git a/source/adios2/helper/adiosMemory.h b/source/adios2/helper/adiosMemory.h index 009cf1b318..9a7fa96ada 100644 --- a/source/adios2/helper/adiosMemory.h +++ b/source/adios2/helper/adiosMemory.h @@ -39,6 +39,15 @@ template void InsertToBuffer(std::vector &buffer, const T *source, const size_t elements = 1) noexcept; +/* + * Copies data from a GPU buffer to a specific location in the adios buffer + */ +#ifdef ADIOS2_HAVE_CUDA +template +void CopyFromGPUToBuffer(std::vector &buffer, size_t &position, + const T *source, const size_t elements = 1) noexcept; +#endif + /** * Copies data to a specific location in the buffer updating position * Does not update vec.size(). diff --git a/source/adios2/helper/adiosMemory.inl b/source/adios2/helper/adiosMemory.inl index 32cb76b23f..724ed50e3d 100644 --- a/source/adios2/helper/adiosMemory.inl +++ b/source/adios2/helper/adiosMemory.inl @@ -20,6 +20,10 @@ #include #include /// \endcond +#ifdef ADIOS2_HAVE_CUDA + #include + #include +#endif #include "adios2/helper/adiosMath.h" #include "adios2/helper/adiosSystem.h" @@ -74,6 +78,18 @@ void InsertToBuffer(std::vector &buffer, const T *source, buffer.insert(buffer.end(), src, src + elements * sizeof(T)); } +#ifdef ADIOS2_HAVE_CUDA +template +void CopyFromGPUToBuffer(std::vector &buffer, size_t &position, + const T *source, const size_t elements) noexcept +{ + const char *src = reinterpret_cast(source); + cudaMemcpy(buffer.data() + position, src, elements * sizeof(T), + cudaMemcpyDeviceToHost); + position += elements * sizeof(T); +} +#endif + template void CopyToBuffer(std::vector &buffer, size_t &position, const T *source, const size_t elements) noexcept diff --git a/source/adios2/toolkit/format/bp/BPSerializer.tcc b/source/adios2/toolkit/format/bp/BPSerializer.tcc index 4deedbb2e7..f33779290d 100644 --- a/source/adios2/toolkit/format/bp/BPSerializer.tcc +++ b/source/adios2/toolkit/format/bp/BPSerializer.tcc @@ -72,6 +72,18 @@ inline void BPSerializer::PutPayloadInBuffer( { const size_t blockSize = helper::GetTotalSize(blockInfo.Count); m_Profiler.Start("memcpy"); + +#ifdef ADIOS2_HAVE_CUDA + if (blockInfo.IsGPU) + { + helper::CopyFromGPUToBuffer(m_Data.m_Buffer, m_Data.m_Position, + blockInfo.Data, blockSize); + m_Profiler.Stop("memcpy"); + m_Data.m_AbsolutePosition += blockSize * sizeof(T); + return; + } +#endif + if (!blockInfo.MemoryStart.empty()) { helper::CopyMemoryBlock( diff --git a/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc b/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc index 99a8371327..9045dd500a 100644 --- a/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc +++ b/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc @@ -334,6 +334,15 @@ BP4Serializer::GetBPStats(const bool singleValue, stats.Step = m_MetadataSet.TimeStep; stats.FileIndex = GetFileIndex(); +#ifdef ADIOS2_HAVE_CUDA + if (blockInfo.IsGPU) + { + const size_t size = helper::GetTotalSize(blockInfo.Count); + helper::CUDAMinMax(blockInfo.Data, size, stats.Min, stats.Max); + return stats; + } +#endif + // support span if (blockInfo.Data == nullptr && m_Parameters.StatsLevel > 0) { From 00b97e70f380dcf7f7ef840067c67b588d75e8b9 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Fri, 8 Oct 2021 13:46:38 -0400 Subject: [PATCH 5/5] Example code using device buffers in Put functions --- examples/CMakeLists.txt | 4 ++ examples/cuda/CMakeLists.txt | 10 ++++ examples/cuda/cudaWriteRead.cu | 103 +++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 examples/cuda/CMakeLists.txt create mode 100644 examples/cuda/cudaWriteRead.cu diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9608088d64..227d6ae2e7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,3 +16,7 @@ endif() if(ADIOS2_BUILD_EXAMPLES_EXPERIMENTAL) add_subdirectory(experimental) endif() + +if(ADIOS2_HAVE_CUDA) + add_subdirectory(cuda) +endif() diff --git a/examples/cuda/CMakeLists.txt b/examples/cuda/CMakeLists.txt new file mode 100644 index 0000000000..ab5a1cfde8 --- /dev/null +++ b/examples/cuda/CMakeLists.txt @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------# +# Distributed under the OSI-approved Apache License, Version 2.0. See +# accompanying file Copyright.txt for details. +#------------------------------------------------------------------------------# + +enable_language(CUDA) + +add_executable(GPUWriteRead_cuda cudaWriteRead.cu) +target_link_libraries(GPUWriteRead_cuda PUBLIC adios2::cxx11 CUDA::cudart CUDA::cuda_driver) +set_target_properties(GPUWriteRead_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/examples/cuda/cudaWriteRead.cu b/examples/cuda/cudaWriteRead.cu new file mode 100644 index 0000000000..e83754ac7c --- /dev/null +++ b/examples/cuda/cudaWriteRead.cu @@ -0,0 +1,103 @@ +/* + * Simple example of writing and reading data + * through ADIOS2 BP engine with multiple simulations steps + * for every IO step. + */ + +#include +#include +#include + +#include + +#include +#include + +__global__ void update_array(float *vect, int val) { + vect[blockIdx.x] += val; +} + +int BPWrite(const std::string fname, const size_t N, int nSteps){ + // Initialize the simulation data + float *gpuSimData; + cudaMalloc(&gpuSimData, N * sizeof(float)); + cudaMemset(gpuSimData, 0, N); + + // Set up the ADIOS structures + adios2::ADIOS adios; + adios2::IO io = adios.DeclareIO("WriteIO"); + + // Declare an array for the ADIOS data of size (NumOfProcesses * N) + const adios2::Dims shape{static_cast(N)}; + const adios2::Dims start{static_cast(0)}; + const adios2::Dims count{N}; + auto data = io.DefineVariable("data", shape, start, count); + + adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); + + // Simulation steps + for (size_t step = 0; step < nSteps; ++step) + { + // Make a 1D selection to describe the local dimensions of the + // variable we write and its offsets in the global spaces + adios2::Box sel({0}, {N}); + data.SetSelection(sel); + + // Start IO step every write step + bpWriter.BeginStep(); + data.SetMemorySpace(adios2::MemorySpace::CUDA); + bpWriter.Put(data, gpuSimData); + bpWriter.EndStep(); + + // Update values in the simulation data + update_array<<>>(gpuSimData, 10); + } + + bpWriter.Close(); + return 0; +} + +int BPRead(const std::string fname, const size_t N, int nSteps){ + // Create ADIOS structures + adios2::ADIOS adios; + adios2::IO io = adios.DeclareIO("ReadIO"); + + adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); + + auto data = io.InquireVariable("data"); + std::cout << "Steps expected by the reader: " << bpReader.Steps() << std::endl; + std::cout << "Expecting data per step: " << data.Shape()[0]; + std::cout << " elements" << std::endl; + + int write_step = bpReader.Steps(); + // Create the local buffer and initialize the access point in the ADIOS file + std::vector simData(N); //set size to N + const adios2::Dims start{0}; + const adios2::Dims count{N}; + const adios2::Box sel(start, count); + data.SetSelection(sel); + + // Read the data in each of the ADIOS steps + for (size_t step = 0; step < write_step; step++) + { + data.SetStepSelection({step, 1}); + bpReader.Get(data, simData.data()); + bpReader.PerformGets(); + std::cout << "Simualation step " << step << " : "; + std::cout << simData.size() << " elements: " << simData[1] << std::endl; + } + bpReader.Close(); + return 0; +} + +int main(int argc, char **argv){ + const std::string fname("GPUWriteRead.bp"); + const int device_id = 1; + cudaSetDevice(device_id); + const size_t N = 6000; + int nSteps = 10, ret = 0; + + ret += BPWrite(fname, N, nSteps); + ret += BPRead(fname, N, nSteps); + return ret; +}