Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix collision particle invalid memory #4810

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions include/picongpu/particles/collision/InterCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,22 @@ namespace picongpu::particles::collision

using FramePtr0 = typename T_ParBox0::FramePtr;
using FramePtr1 = typename T_ParBox1::FramePtr;
detail::cellDensity<FramePtr0>(worker, forEachCell, parCellList0, densityArray0, accFilter0);
detail::cellDensity<FramePtr1>(worker, forEachCell, parCellList1, densityArray1, accFilter1);
detail::cellDensity<FramePtr0>(
worker,
forEachCell,
pb0,
superCellIdx,
parCellList0,
densityArray0,
accFilter0);
detail::cellDensity<FramePtr1>(
worker,
forEachCell,
pb1,
superCellIdx,
parCellList1,
densityArray1,
accFilter1);
worker.sync();

// shuffle indices list of the longest particle list
Expand All @@ -187,8 +201,8 @@ namespace picongpu::particles::collision
superCellIdx,
densityArray0[linearIdx],
densityArray1[linearIdx],
parCellList0.template getParticlesAccessor<FramePtr0>(linearIdx),
parCellList1.template getParticlesAccessor<FramePtr1>(linearIdx),
parCellList0.getParticlesAccessor(pb0, superCellIdx, linearIdx),
parCellList1.getParticlesAccessor(pb1, superCellIdx, linearIdx),
linearIdx);
});

Expand Down
4 changes: 2 additions & 2 deletions include/picongpu/particles/collision/IntraCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ namespace picongpu::particles::collision
prepareList(worker, forEachCell, pb, superCellIdx, deviceHeapHandle, parCellList, nppc, accFilter);

using FramePtr = typename T_ParBox::FramePtr;
detail::cellDensity<FramePtr>(worker, forEachCell, parCellList, densityArray, accFilter);
detail::cellDensity<FramePtr>(worker, forEachCell, pb, superCellIdx, parCellList, densityArray, accFilter);

worker.sync();

Expand All @@ -148,7 +148,7 @@ namespace picongpu::particles::collision
auto collisionFunctorCtx = forEachCell(
[&](int32_t const idx)
{
auto parAccess = parCellList.template getParticlesAccessor<FramePtr>(idx);
auto parAccess = parCellList.getParticlesAccessor(pb, superCellIdx, idx);
uint32_t const sizeAll = parAccess.size();
uint32_t potentialPartners = sizeAll - 1u + sizeAll % 2u;
auto collisionFunctor = srcCollisionFunctor(
Expand Down
92 changes: 28 additions & 64 deletions include/picongpu/particles/collision/detail/ListEntry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,26 @@ namespace picongpu::particles::collision
*
* @tparam T_FramePtrType frame pointer type
*/
template<typename T_FramePtrType>
template<typename T_ParBox>
struct ParticleAccessor
{
T_FramePtrType* m_framePtrList = nullptr;
using FramePtrType = typename T_ParBox::FramePtr;
FramePtrType m_framePtr;
uint32_t* m_parIdxList = nullptr;
uint32_t m_numPars = 0u;
T_ParBox const& m_parBox;

static constexpr uint32_t frameSize = T_FramePtrType::type::frameSize;
static constexpr uint32_t frameSize = FramePtrType::type::frameSize;

DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, T_FramePtrType* framePtrList)
: m_framePtrList(framePtrList)
DINLINE ParticleAccessor(
T_ParBox const& parBox,
uint32_t* parIdxList,
uint32_t const numParticles,
FramePtrType const& framePtr)
: m_framePtr(framePtr)
, m_parIdxList(parIdxList)
, m_numPars(numParticles)
, m_parBox(parBox)
{
}

Expand All @@ -67,8 +74,13 @@ namespace picongpu::particles::collision
*/
DINLINE auto operator[](uint32_t idx) const
{
const uint32_t inSuperCellIdx = m_parIdxList[idx];
return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize];
uint32_t const inSuperCellIdx = m_parIdxList[idx];
uint32_t const skipFrames = inSuperCellIdx / frameSize;

auto frame = m_framePtr;
for(uint32_t i = 0; i < skipFrames; ++i)
frame = m_parBox.getNextFrame(frame);
return frame[inSuperCellIdx % frameSize];
}
};

Expand All @@ -87,26 +99,12 @@ namespace picongpu::particles::collision
//! number of particles per cell
memory::Array<uint32_t, T_numElem> numParticles;

/** Frame pointer array.
*
* A Frame pointer contains only a pointer therefore storing data as void* is allowed (but not
* nice). This keeps the ListEntry signature equal for all species.
*/
void** framePtr = nullptr;

public:
DINLINE uint32_t& size(uint32_t cellIdx)
{
return numParticles[cellIdx];
}

template<typename T_FramePtrType>
DINLINE T_FramePtrType& frameData(uint32_t frameIdx)
{
static_assert(sizeof(void*) == sizeof(T_FramePtrType));
return reinterpret_cast<T_FramePtrType*>(framePtr)[frameIdx];
}

/** Get particle index array.
*
* @param cellIdx index of the cell within the supercell
Expand Down Expand Up @@ -139,31 +137,6 @@ namespace picongpu::particles::collision
DataSpace<simDim> superCellIdx,
T_NumParticlesArray& numParArray)
{
constexpr uint32_t frameSize = T_ParticlesBox::frameSize;
auto onlyMaster = lockstep::makeMaster(worker);
onlyMaster(
[&]()
{
auto& superCell = pb.getSuperCell(superCellIdx);
uint32_t numParticlesInSupercell = superCell.getNumParticles();

uint32_t numFrames = (numParticlesInSupercell + frameSize - 1u) / frameSize;
constexpr uint32_t framePtrBytes = sizeof(typename T_ParticlesBox::FramePtr);

// Chunk size in bytes based on the typical initial number of frames within a supercell.
constexpr uint32_t frameListChunkSize = cellListChunkSize * framePtrBytes;
framePtr = (void**)
allocMem<frameListChunkSize>(worker, numFrames * framePtrBytes, deviceHeapHandle);

auto frame = pb.getFirstFrame(superCellIdx);
uint32_t frameId = 0u;
while(frame.isValid())
{
frameData<decltype(frame)>(frameId) = frame;
frame = pb.getNextFrame(frame);
++frameId;
}
});
auto forEachCell = lockstep::makeForEach<T_numElem>(worker);
// memory for particle indices
forEachCell(
Expand Down Expand Up @@ -227,20 +200,6 @@ namespace picongpu::particles::collision
template<typename T_Worker, typename T_DeviceHeapHandle>
DINLINE void finalize(T_Worker const& worker, T_DeviceHeapHandle& deviceHeapHandle)
{
auto onlyMaster = lockstep::makeMaster(worker);
onlyMaster(
[&]()
{
if(framePtr != nullptr)
{
#if(BOOST_LANG_CUDA || BOOST_COMP_HIP)
deviceHeapHandle.free(worker.getAcc(), (void*) framePtr);
#else
delete(framePtr);
#endif
framePtr = nullptr;
}
});
auto forEachCell = lockstep::makeForEach<T_numElem>(worker);
// memory for particle indices
forEachCell(
Expand All @@ -265,13 +224,18 @@ namespace picongpu::particles::collision
* @param cellIdx cell index within the supercell, range [0, number of cells in supercell)
* @return accessor to access particles via index
*/
template<typename T_FramePtrType>
DINLINE auto getParticlesAccessor(uint32_t cellIdx)
template<typename T_ParBox>
DINLINE auto getParticlesAccessor(
T_ParBox const& parBox,
DataSpace<simDim> const& superCellIdx,
uint32_t cellIdx)
{
return ParticleAccessor<T_FramePtrType>(
using FramePtrType = typename T_ParBox::FramePtr;
return ParticleAccessor<T_ParBox>(
parBox,
particleIds(cellIdx),
size(cellIdx),
reinterpret_cast<T_FramePtrType*>(framePtr));
parBox.getFirstFrame(superCellIdx));
}

private:
Expand Down
5 changes: 4 additions & 1 deletion include/picongpu/particles/collision/detail/cellDensity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,23 @@ namespace picongpu::particles::collision::detail
typename T_FramePtr,
typename T_Worker,
typename T_ForEachCell,
typename T_ParBox,
typename T_EntryListArray,
typename T_Array,
typename T_Filter>
DINLINE void cellDensity(
T_Worker const& worker,
T_ForEachCell forEachCell,
T_ParBox const& pb,
DataSpace<simDim> const& superCellIdx,
T_EntryListArray& parCellList,
T_Array& densityArray,
T_Filter& filter)
{
forEachCell(
[&](uint32_t const linearIdx)
{
auto parAccess = parCellList.template getParticlesAccessor<T_FramePtr>(linearIdx);
auto parAccess = parCellList.getParticlesAccessor(pb, superCellIdx, linearIdx);
uint32_t const numParInCell = parAccess.size();
float_X density(0.0);
for(uint32_t partIdx = 0; partIdx < numParInCell; partIdx++)
Expand Down