Skip to content

Commit

Permalink
migrate ranges to SoATemplate
Browse files Browse the repository at this point in the history
  • Loading branch information
ariostas committed Oct 24, 2024
1 parent fd99b50 commit 534ac1b
Show file tree
Hide file tree
Showing 16 changed files with 350 additions and 362 deletions.
1 change: 1 addition & 0 deletions RecoTracker/LSTCore/interface/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ namespace lst {
using ArrayUxHits = edm::StdArray<unsigned int, kHits>;
};

using ArrayIx2 = edm::StdArray<int, 2>;
using ArrayUx2 = edm::StdArray<unsigned int, 2>;

} //namespace lst
Expand Down
50 changes: 50 additions & 0 deletions RecoTracker/LSTCore/interface/ObjectRangesSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#ifndef RecoTracker_LSTCore_interface_ObjectRangesSoA_h
#define RecoTracker_LSTCore_interface_ObjectRangesSoA_h

#include "DataFormats/SoATemplate/interface/SoALayout.h"
#include "DataFormats/Portable/interface/PortableCollection.h"

namespace lst {

GENERATE_SOA_LAYOUT(ObjectRangesSoALayout,
SOA_COLUMN(ArrayIx2, hitRanges),
SOA_COLUMN(int, hitRangesLower),
SOA_COLUMN(int, hitRangesUpper),
SOA_COLUMN(int8_t, hitRangesnLower),
SOA_COLUMN(int8_t, hitRangesnUpper),
SOA_COLUMN(ArrayIx2, mdRanges),
SOA_COLUMN(ArrayIx2, segmentRanges),
SOA_COLUMN(ArrayIx2, trackletRanges),
SOA_COLUMN(ArrayIx2, tripletRanges),
SOA_COLUMN(ArrayIx2, quintupletRanges))

// triplets and quintuplets end up with an ununsed pixel entry at the end
GENERATE_SOA_LAYOUT(ObjectOccupancySoALayout,
SOA_COLUMN(int, miniDoubletModuleIndices),
SOA_COLUMN(int, miniDoubletModuleOccupancy),
SOA_COLUMN(int, segmentModuleIndices),
SOA_COLUMN(int, segmentModuleOccupancy),
SOA_COLUMN(int, tripletModuleIndices),
SOA_COLUMN(int, tripletModuleOccupancy),
SOA_COLUMN(int, quintupletModuleIndices),
SOA_COLUMN(int, quintupletModuleOccupancy),
SOA_COLUMN(uint16_t, indicesOfEligibleT5Modules),
SOA_SCALAR(unsigned int, nTotalMDs),
SOA_SCALAR(unsigned int, nTotalSegs),
SOA_SCALAR(unsigned int, nTotalTrips),
SOA_SCALAR(unsigned int, nTotalQuints),
SOA_SCALAR(uint16_t, nEligibleT5Modules))

using ObjectRangesSoA = ObjectRangesSoALayout<>;
using ObjectOccupancySoA = ObjectOccupancySoALayout<>;

using ObjectRanges = ObjectRangesSoA::View;
using ObjectRangesConst = ObjectRangesSoA::ConstView;
using ObjectOccupancy = ObjectOccupancySoA::View;
using ObjectOccupancyConst = ObjectOccupancySoA::ConstView;

using ObjectRangesHostCollection = PortableHostMultiCollection<ObjectRangesSoA, ObjectOccupancySoA>;

} // namespace lst

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h
#define RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h

#include "DataFormats/Portable/interface/alpaka/PortableCollection.h"

#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
using ObjectRangesDeviceCollection = PortableCollection2<ObjectRangesSoA, ObjectOccupancySoA>;
} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst

#endif
155 changes: 93 additions & 62 deletions RecoTracker/LSTCore/src/alpaka/Event.dev.cc

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions RecoTracker/LSTCore/src/alpaka/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "RecoTracker/LSTCore/interface/alpaka/Constants.h"
#include "RecoTracker/LSTCore/interface/alpaka/LST.h"
#include "RecoTracker/LSTCore/interface/Module.h"
#include "RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h"

#include "Hit.h"
#include "Segment.h"
Expand Down Expand Up @@ -43,8 +44,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
unsigned int nTotalSegments_;

//Device stuff
std::optional<ObjectRanges> rangesInGPU_;
std::optional<ObjectRangesBuffer<Device>> rangesBuffers_;
std::optional<ObjectRangesDeviceCollection> rangesDC_;
std::optional<Hits> hitsInGPU_;
std::optional<HitsBuffer<Device>> hitsBuffers_;
std::optional<MiniDoubletsDeviceCollection> miniDoubletsDC_;
Expand All @@ -60,7 +60,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
std::optional<PixelQuintupletsBuffer<Device>> pixelQuintupletsBuffers_;

//CPU interface stuff
std::optional<ObjectRangesBuffer<DevHost>> rangesInCPU_;
std::optional<ObjectRangesHostCollection> rangesHC_;
std::optional<HitsBuffer<DevHost>> hitsInCPU_;
std::optional<MiniDoubletsHostCollection> miniDoubletsHC_;
std::optional<SegmentsHostCollection> segmentsHC_;
Expand Down Expand Up @@ -180,7 +180,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
// HANDLE WITH CARE
HitsBuffer<DevHost>& getHits(bool sync = true);
HitsBuffer<DevHost>& getHitsInCMSSW(bool sync = true);
ObjectRangesBuffer<DevHost>& getRanges(bool sync = true);
template <typename TSoA, typename TDev = Device>
typename TSoA::ConstView getRanges(bool sync = true);
template <typename TSoA, typename TDev = Device>
typename TSoA::ConstView getMiniDoublets(bool sync = true);
template <typename TSoA, typename TDev = Device>
Expand Down
23 changes: 13 additions & 10 deletions RecoTracker/LSTCore/src/alpaka/Kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

#include "RecoTracker/LSTCore/interface/alpaka/Constants.h"
#include "RecoTracker/LSTCore/interface/Module.h"
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"

#include "Hit.h"
#include "MiniDoublet.h"
#include "ObjectRanges.h"
#include "Segment.h"
#include "Triplet.h"
#include "Quintuplet.h"
Expand Down Expand Up @@ -147,14 +147,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
ALPAKA_FN_ACC void operator()(TAcc const& acc,
Modules modulesInGPU,
Quintuplets quintupletsInGPU,
ObjectRanges rangesInGPU) const {
ObjectOccupancyConst objectOccupancy) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmod = globalThreadIdx[0]; lowmod < *modulesInGPU.nLowerModules;
lowmod += gridThreadExtent[0]) {
unsigned int nQuintuplets_lowmod = quintupletsInGPU.nQuintuplets[lowmod];
int quintupletModuleIndices_lowmod = rangesInGPU.quintupletModuleIndices[lowmod];
int quintupletModuleIndices_lowmod = objectOccupancy.quintupletModuleIndices()[lowmod];

for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
Expand Down Expand Up @@ -194,27 +194,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

struct RemoveDupQuintupletsInGPUBeforeTC {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const {
ALPAKA_FN_ACC void operator()(TAcc const& acc,
Quintuplets quintupletsInGPU,
ObjectOccupancyConst objectOccupancy) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < *(rangesInGPU.nEligibleT5Modules);
for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < objectOccupancy.nEligibleT5Modules();
lowmodIdx1 += gridThreadExtent[1]) {
uint16_t lowmod1 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx1];
uint16_t lowmod1 = objectOccupancy.indicesOfEligibleT5Modules()[lowmodIdx1];
unsigned int nQuintuplets_lowmod1 = quintupletsInGPU.nQuintuplets[lowmod1];
if (nQuintuplets_lowmod1 == 0)
continue;

unsigned int quintupletModuleIndices_lowmod1 = rangesInGPU.quintupletModuleIndices[lowmod1];
unsigned int quintupletModuleIndices_lowmod1 = objectOccupancy.quintupletModuleIndices()[lowmod1];

for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules);
for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1;
lowmodIdx2 < objectOccupancy.nEligibleT5Modules();
lowmodIdx2 += gridThreadExtent[2]) {
uint16_t lowmod2 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx2];
uint16_t lowmod2 = objectOccupancy.indicesOfEligibleT5Modules()[lowmodIdx2];
unsigned int nQuintuplets_lowmod2 = quintupletsInGPU.nQuintuplets[lowmod2];
if (nQuintuplets_lowmod2 == 0)
continue;

unsigned int quintupletModuleIndices_lowmod2 = rangesInGPU.quintupletModuleIndices[lowmod2];
unsigned int quintupletModuleIndices_lowmod2 = objectOccupancy.quintupletModuleIndices()[lowmod2];

for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) {
unsigned int ix = quintupletModuleIndices_lowmod1 + ix1;
Expand Down
29 changes: 15 additions & 14 deletions RecoTracker/LSTCore/src/alpaka/MiniDoublet.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h"
#include "RecoTracker/LSTCore/interface/Module.h"
#include "RecoTracker/LSTCore/interface/EndcapGeometry.h"
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"

#include "Hit.h"
#include "ObjectRanges.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
template <typename TAcc>
Expand Down Expand Up @@ -698,7 +698,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
Hits hitsInGPU,
MiniDoublets mds,
MiniDoubletsOccupancy mdsOccupancy,
ObjectRanges rangesInGPU) const {
ObjectOccupancyConst objectOccupancy) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

Expand Down Expand Up @@ -757,14 +757,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
if (success) {
int totOccupancyMDs = alpaka::atomicAdd(
acc, &mdsOccupancy.totOccupancyMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{});
if (totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex])) {
if (totOccupancyMDs >= (objectOccupancy.miniDoubletModuleOccupancy()[lowerModuleIndex])) {
#ifdef WARNINGS
printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex);
#endif
} else {
int mdModuleIndex =
alpaka::atomicAdd(acc, &mdsOccupancy.nMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{});
unsigned int mdIndex = rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex] + mdModuleIndex;
unsigned int mdIndex = objectOccupancy.miniDoubletModuleIndices()[lowerModuleIndex] + mdModuleIndex;

addMDToMemory(acc,
mds,
Expand All @@ -791,7 +791,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

struct CreateMDArrayRangesGPU {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU) const {
ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectOccupancy objectOccupancy) const {
// implementation is 1D with a single block
static_assert(std::is_same_v<TAcc, ALPAKA_ACCELERATOR_NAMESPACE::Acc1D>, "Should be Acc1D");
ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0] == 1));
Expand Down Expand Up @@ -870,15 +870,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{});

rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs;
rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy;
objectOccupancy.miniDoubletModuleIndices()[i] = nTotMDs;
objectOccupancy.miniDoubletModuleOccupancy()[i] = occupancy;
}

// Wait for all threads to finish before reporting final values
alpaka::syncBlockThreads(acc);
if (cms::alpakatools::once_per_block(acc)) {
rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs;
*rangesInGPU.device_nTotalMDs = nTotalMDs;
objectOccupancy.miniDoubletModuleIndices()[*modulesInGPU.nLowerModules] = nTotalMDs;
objectOccupancy.nTotalMDs() = nTotalMDs;
}
}
};
Expand All @@ -888,7 +888,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
ALPAKA_FN_ACC void operator()(TAcc const& acc,
Modules modulesInGPU,
MiniDoubletsOccupancy mdsOccupancy,
ObjectRanges rangesInGPU,
ObjectRanges ranges,
ObjectOccupancyConst objectOccupancy,
Hits hitsInGPU) const {
// implementation is 1D with a single block
static_assert(std::is_same_v<TAcc, ALPAKA_ACCELERATOR_NAMESPACE::Acc1D>, "Should be Acc1D");
Expand All @@ -899,11 +900,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) {
if (mdsOccupancy.nMDs()[i] == 0 or hitsInGPU.hitRanges[i * 2] == -1) {
rangesInGPU.mdRanges[i * 2] = -1;
rangesInGPU.mdRanges[i * 2 + 1] = -1;
ranges.mdRanges()[i][0] = -1;
ranges.mdRanges()[i][1] = -1;
} else {
rangesInGPU.mdRanges[i * 2] = rangesInGPU.miniDoubletModuleIndices[i];
rangesInGPU.mdRanges[i * 2 + 1] = rangesInGPU.miniDoubletModuleIndices[i] + mdsOccupancy.nMDs()[i] - 1;
ranges.mdRanges()[i][0] = objectOccupancy.miniDoubletModuleIndices()[i];
ranges.mdRanges()[i][1] = objectOccupancy.miniDoubletModuleIndices()[i] + mdsOccupancy.nMDs()[i] - 1;
}
}
}
Expand Down
Loading

0 comments on commit 534ac1b

Please sign in to comment.