diff --git a/include/picongpu/particles/collision/InterCollision.hpp b/include/picongpu/particles/collision/InterCollision.hpp index d91f9fda36..bb45d0ce9a 100644 --- a/include/picongpu/particles/collision/InterCollision.hpp +++ b/include/picongpu/particles/collision/InterCollision.hpp @@ -160,8 +160,22 @@ namespace picongpu::particles::collision using FramePtr0 = typename T_ParBox0::FramePtr; using FramePtr1 = typename T_ParBox1::FramePtr; - detail::cellDensity(worker, forEachCell, parCellList0, densityArray0, accFilter0); - detail::cellDensity(worker, forEachCell, parCellList1, densityArray1, accFilter1); + detail::cellDensity( + worker, + forEachCell, + pb0, + superCellIdx, + parCellList0, + densityArray0, + accFilter0); + detail::cellDensity( + worker, + forEachCell, + pb1, + superCellIdx, + parCellList1, + densityArray1, + accFilter1); worker.sync(); // shuffle indices list of the longest particle list @@ -187,8 +201,8 @@ namespace picongpu::particles::collision superCellIdx, densityArray0[linearIdx], densityArray1[linearIdx], - parCellList0.template getParticlesAccessor(linearIdx), - parCellList1.template getParticlesAccessor(linearIdx), + parCellList0.getParticlesAccessor(pb0, superCellIdx, linearIdx), + parCellList1.getParticlesAccessor(pb1, superCellIdx, linearIdx), linearIdx); }); diff --git a/include/picongpu/particles/collision/IntraCollision.hpp b/include/picongpu/particles/collision/IntraCollision.hpp index 86e2c539be..ae5f76f05f 100644 --- a/include/picongpu/particles/collision/IntraCollision.hpp +++ b/include/picongpu/particles/collision/IntraCollision.hpp @@ -131,7 +131,7 @@ namespace picongpu::particles::collision prepareList(worker, forEachCell, pb, superCellIdx, deviceHeapHandle, parCellList, nppc, accFilter); using FramePtr = typename T_ParBox::FramePtr; - detail::cellDensity(worker, forEachCell, parCellList, densityArray, accFilter); + detail::cellDensity(worker, forEachCell, pb, superCellIdx, parCellList, densityArray, accFilter); worker.sync(); @@ -148,7 +148,7 @@ namespace picongpu::particles::collision auto collisionFunctorCtx = forEachCell( [&](int32_t const idx) { - auto parAccess = parCellList.template getParticlesAccessor(idx); + auto parAccess = parCellList.getParticlesAccessor(pb, superCellIdx, idx); uint32_t const sizeAll = parAccess.size(); uint32_t potentialPartners = sizeAll - 1u + sizeAll % 2u; auto collisionFunctor = srcCollisionFunctor( diff --git a/include/picongpu/particles/collision/detail/ListEntry.hpp b/include/picongpu/particles/collision/detail/ListEntry.hpp index 012d45bb7c..53b5d6ba14 100644 --- a/include/picongpu/particles/collision/detail/ListEntry.hpp +++ b/include/picongpu/particles/collision/detail/ListEntry.hpp @@ -38,19 +38,26 @@ namespace picongpu::particles::collision * * @tparam T_FramePtrType frame pointer type */ - template + template struct ParticleAccessor { - T_FramePtrType* m_framePtrList = nullptr; + using FramePtrType = typename T_ParBox::FramePtr; + FramePtrType m_framePtr; uint32_t* m_parIdxList = nullptr; uint32_t m_numPars = 0u; + T_ParBox const& m_parBox; - static constexpr uint32_t frameSize = T_FramePtrType::type::frameSize; + static constexpr uint32_t frameSize = FramePtrType::type::frameSize; - DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, T_FramePtrType* framePtrList) - : m_framePtrList(framePtrList) + DINLINE ParticleAccessor( + T_ParBox const& parBox, + uint32_t* parIdxList, + uint32_t const numParticles, + FramePtrType const& framePtr) + : m_framePtr(framePtr) , m_parIdxList(parIdxList) , m_numPars(numParticles) + , m_parBox(parBox) { } @@ -67,8 +74,13 @@ namespace picongpu::particles::collision */ DINLINE auto operator[](uint32_t idx) const { - const uint32_t inSuperCellIdx = m_parIdxList[idx]; - return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize]; + uint32_t const inSuperCellIdx = m_parIdxList[idx]; + uint32_t const skipFrames = inSuperCellIdx / frameSize; + + auto frame = m_framePtr; + for(uint32_t i = 0; i < skipFrames; ++i) + frame = m_parBox.getNextFrame(frame); + return frame[inSuperCellIdx % frameSize]; } }; @@ -87,26 +99,12 @@ namespace picongpu::particles::collision //! number of particles per cell memory::Array numParticles; - /** Frame pointer array. - * - * A Frame pointer contains only a pointer therefore storing data as void* is allowed (but not - * nice). This keeps the ListEntry signature equal for all species. - */ - void** framePtr = nullptr; - public: DINLINE uint32_t& size(uint32_t cellIdx) { return numParticles[cellIdx]; } - template - DINLINE T_FramePtrType& frameData(uint32_t frameIdx) - { - static_assert(sizeof(void*) == sizeof(T_FramePtrType)); - return reinterpret_cast(framePtr)[frameIdx]; - } - /** Get particle index array. * * @param cellIdx index of the cell within the supercell @@ -139,31 +137,6 @@ namespace picongpu::particles::collision DataSpace superCellIdx, T_NumParticlesArray& numParArray) { - constexpr uint32_t frameSize = T_ParticlesBox::frameSize; - auto onlyMaster = lockstep::makeMaster(worker); - onlyMaster( - [&]() - { - auto& superCell = pb.getSuperCell(superCellIdx); - uint32_t numParticlesInSupercell = superCell.getNumParticles(); - - uint32_t numFrames = (numParticlesInSupercell + frameSize - 1u) / frameSize; - constexpr uint32_t framePtrBytes = sizeof(typename T_ParticlesBox::FramePtr); - - // Chunk size in bytes based on the typical initial number of frames within a supercell. - constexpr uint32_t frameListChunkSize = cellListChunkSize * framePtrBytes; - framePtr = (void**) - allocMem(worker, numFrames * framePtrBytes, deviceHeapHandle); - - auto frame = pb.getFirstFrame(superCellIdx); - uint32_t frameId = 0u; - while(frame.isValid()) - { - frameData(frameId) = frame; - frame = pb.getNextFrame(frame); - ++frameId; - } - }); auto forEachCell = lockstep::makeForEach(worker); // memory for particle indices forEachCell( @@ -227,20 +200,6 @@ namespace picongpu::particles::collision template DINLINE void finalize(T_Worker const& worker, T_DeviceHeapHandle& deviceHeapHandle) { - auto onlyMaster = lockstep::makeMaster(worker); - onlyMaster( - [&]() - { - if(framePtr != nullptr) - { -#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) - deviceHeapHandle.free(worker.getAcc(), (void*) framePtr); -#else - delete(framePtr); -#endif - framePtr = nullptr; - } - }); auto forEachCell = lockstep::makeForEach(worker); // memory for particle indices forEachCell( @@ -265,13 +224,18 @@ namespace picongpu::particles::collision * @param cellIdx cell index within the supercell, range [0, number of cells in supercell) * @return accessor to access particles via index */ - template - DINLINE auto getParticlesAccessor(uint32_t cellIdx) + template + DINLINE auto getParticlesAccessor( + T_ParBox const& parBox, + DataSpace const& superCellIdx, + uint32_t cellIdx) { - return ParticleAccessor( + using FramePtrType = typename T_ParBox::FramePtr; + return ParticleAccessor( + parBox, particleIds(cellIdx), size(cellIdx), - reinterpret_cast(framePtr)); + parBox.getFirstFrame(superCellIdx)); } private: diff --git a/include/picongpu/particles/collision/detail/cellDensity.hpp b/include/picongpu/particles/collision/detail/cellDensity.hpp index 8ceea4e60a..f7cb163fb2 100644 --- a/include/picongpu/particles/collision/detail/cellDensity.hpp +++ b/include/picongpu/particles/collision/detail/cellDensity.hpp @@ -29,12 +29,15 @@ namespace picongpu::particles::collision::detail typename T_FramePtr, typename T_Worker, typename T_ForEachCell, + typename T_ParBox, typename T_EntryListArray, typename T_Array, typename T_Filter> DINLINE void cellDensity( T_Worker const& worker, T_ForEachCell forEachCell, + T_ParBox const& pb, + DataSpace const& superCellIdx, T_EntryListArray& parCellList, T_Array& densityArray, T_Filter& filter) @@ -42,7 +45,7 @@ namespace picongpu::particles::collision::detail forEachCell( [&](uint32_t const linearIdx) { - auto parAccess = parCellList.template getParticlesAccessor(linearIdx); + auto parAccess = parCellList.getParticlesAccessor(pb, superCellIdx, linearIdx); uint32_t const numParInCell = parAccess.size(); float_X density(0.0); for(uint32_t partIdx = 0; partIdx < numParInCell; partIdx++)