diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc index 0f0c53344de18..d0e103b1e315b 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -1,5 +1,4 @@ // LST includes -#include "RecoTracker/LSTCore/interface/Module.h" #include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 350857ac0b2e5..b1bd4e124eacc 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -2,6 +2,15 @@ #define RecoTracker_LSTCore_interface_Constants_h #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Common/interface/StdArray.h" + +#if defined(FP16_Base) +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif +#endif #ifdef CACHE_ALLOC #include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h" @@ -55,27 +64,68 @@ namespace lst { constexpr unsigned int size_superbins = 45000; +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half float FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + +// Needed for files that are compiled by g++ to not throw an error. +// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. +#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; +#endif + // Defining the constant host device variables right up here // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. + struct Params_Modules { + using ArrayU16xMaxConnected = edm::StdArray; + }; struct Params_pLS { static constexpr int kLayers = 2, kHits = 4; }; struct Params_LS { static constexpr int kLayers = 2, kHits = 4; + using ArrayUxLayers = edm::StdArray; }; struct Params_T3 { static constexpr int kLayers = 3, kHits = 6; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_pT3 { static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_T5 { static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_pT5 { static constexpr int kLayers = 7, kHits = 14; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; + using ArrayIx2 = edm::StdArray; + using ArrayUx2 = edm::StdArray; + } //namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h deleted file mode 100644 index ce037b026fc22..0000000000000 --- a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef RecoTracker_LSTCore_interface_EndcapGeometryBuffers_h -#define RecoTracker_LSTCore_interface_EndcapGeometryBuffers_h - -#include -#include -#include -#include -#include -#include -#include - -#include "RecoTracker/LSTCore/interface/Constants.h" - -namespace lst { - - struct EndcapGeometryDev { - const unsigned int* geoMapDetId; - const float* geoMapPhi; - - template - void setData(TBuff const& buf) { - geoMapDetId = buf.geoMapDetId_buf.data(); - geoMapPhi = buf.geoMapPhi_buf.data(); - } - }; - - template - struct EndcapGeometryBuffer { - Buf geoMapDetId_buf; - Buf geoMapPhi_buf; - - EndcapGeometryBuffer(TDev const& dev, unsigned int nEndCapMap) - : geoMapDetId_buf(allocBufWrapper(dev, nEndCapMap)), - geoMapPhi_buf(allocBufWrapper(dev, nEndCapMap)) { - data_.setData(*this); - } - - template - inline void copyFromSrc(TQueue queue, EndcapGeometryBuffer const& src) { - alpaka::memcpy(queue, geoMapDetId_buf, src.geoMapDetId_buf); - alpaka::memcpy(queue, geoMapPhi_buf, src.geoMapPhi_buf); - } - - template - EndcapGeometryBuffer(TQueue queue, EndcapGeometryBuffer const& src, unsigned int nEndCapMap) - : EndcapGeometryBuffer(alpaka::getDev(queue), nEndCapMap) { - copyFromSrc(queue, src); - } - - inline EndcapGeometryDev const* data() const { return &data_; } - - private: - EndcapGeometryDev data_; - }; - -} // namespace lst - -#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h new file mode 100644 index 0000000000000..e761ac5942bf8 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h + +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using EndcapGeometryDevHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h new file mode 100644 index 0000000000000..587abfdaec66a --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h @@ -0,0 +1,18 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(EndcapGeometryDevSoALayout, SOA_COLUMN(unsigned int, geoMapDetId), SOA_COLUMN(float, geoMapPhi)) + + using EndcapGeometryDevSoA = EndcapGeometryDevSoALayout<>; + + using EndcapGeometryDev = EndcapGeometryDevSoA::View; + using EndcapGeometryDevConst = EndcapGeometryDevSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/HitsHostCollection.h b/RecoTracker/LSTCore/interface/HitsHostCollection.h new file mode 100644 index 0000000000000..f26c98c36e069 --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_HitsHostCollection_h +#define RecoTracker_LSTCore_interface_HitsHostCollection_h + +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using HitsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/HitsSoA.h b/RecoTracker/LSTCore/interface/HitsSoA.h new file mode 100644 index 0000000000000..4aed06e1bc31f --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsSoA.h @@ -0,0 +1,43 @@ +#ifndef RecoTracker_LSTCore_interface_HitsSoA_h +#define RecoTracker_LSTCore_interface_HitsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(HitsSoALayout, + SOA_COLUMN(float, xs), + SOA_COLUMN(float, ys), + SOA_COLUMN(float, zs), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(unsigned int, idxs), + SOA_COLUMN(unsigned int, detid), + SOA_COLUMN(float, rts), + SOA_COLUMN(float, phis), + SOA_COLUMN(float, etas), + SOA_COLUMN(float, highEdgeXs), + SOA_COLUMN(float, highEdgeYs), + SOA_COLUMN(float, lowEdgeXs), + SOA_COLUMN(float, lowEdgeYs)) + + GENERATE_SOA_LAYOUT(HitsRangesSoALayout, + SOA_COLUMN(ArrayIx2, hitRanges), + SOA_COLUMN(int, hitRangesLower), + SOA_COLUMN(int, hitRangesUpper), + SOA_COLUMN(int8_t, hitRangesnLower), + SOA_COLUMN(int8_t, hitRangesnUpper)) + + using HitsSoA = HitsSoALayout<>; + using HitsRangesSoA = HitsRangesSoALayout<>; + + using Hits = HitsSoA::View; + using HitsConst = HitsSoA::ConstView; + using HitsRanges = HitsRangesSoA::View; + using HitsRangesConst = HitsRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h index 9f51be48f28b6..83798e92dcb3f 100644 --- a/RecoTracker/LSTCore/interface/LSTESData.h +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -2,8 +2,8 @@ #define RecoTracker_LSTCore_interface_LSTESData_h #include "RecoTracker/LSTCore/interface/Constants.h" -#include "RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" #include "RecoTracker/LSTCore/interface/PixelMap.h" #include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" @@ -19,23 +19,23 @@ namespace lst { uint16_t nLowerModules; unsigned int nPixels; unsigned int nEndCapMap; - ModulesBuffer modulesBuffers; - EndcapGeometryBuffer endcapGeometryBuffers; + std::unique_ptr> modules; + std::unique_ptr> endcapGeometry; std::shared_ptr pixelMapping; LSTESData(uint16_t const& nModulesIn, uint16_t const& nLowerModulesIn, unsigned int const& nPixelsIn, unsigned int const& nEndCapMapIn, - ModulesBuffer const& modulesBuffersIn, - EndcapGeometryBuffer const& endcapGeometryBuffersIn, + std::unique_ptr> modulesIn, + std::unique_ptr> endcapGeometryIn, std::shared_ptr const& pixelMappingIn) : nModules(nModulesIn), nLowerModules(nLowerModulesIn), nPixels(nPixelsIn), nEndCapMap(nEndCapMapIn), - modulesBuffers(modulesBuffersIn), - endcapGeometryBuffers(endcapGeometryBuffersIn), + modules(std::move(modulesIn)), + endcapGeometry(std::move(endcapGeometryIn)), pixelMapping(pixelMappingIn) {} }; @@ -44,24 +44,49 @@ namespace lst { } // namespace lst namespace cms::alpakatools { + + // The templated definition in CMSSW doesn't work when using CPU as the device + template <> + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PortableHostCollection const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PortableCollection dstData(srcData->metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; + + template <> + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, + PortableHostMultiCollection const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PortableMultiCollection dstData(srcData.sizes(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; + template <> struct CopyToDevice> { template static lst::LSTESData> copyAsync(TQueue& queue, lst::LSTESData const& srcData) { - auto deviceModulesBuffers = - lst::ModulesBuffer>(alpaka::getDev(queue), srcData.nModules, srcData.nPixels); - deviceModulesBuffers.copyFromSrc(queue, srcData.modulesBuffers); - auto deviceEndcapGeometryBuffers = - lst::EndcapGeometryBuffer>(alpaka::getDev(queue), srcData.nEndCapMap); - deviceEndcapGeometryBuffers.copyFromSrc(queue, srcData.endcapGeometryBuffers); + auto deviceModules = + std::make_unique, lst::ModulesSoA, lst::ModulesPixelSoA>>( + CopyToDevice>::copyAsync( + queue, *srcData.modules)); + auto deviceEndcapGeometry = std::make_unique>>( + CopyToDevice>::copyAsync(queue, *srcData.endcapGeometry)); return lst::LSTESData>(srcData.nModules, srcData.nLowerModules, srcData.nPixels, srcData.nEndCapMap, - std::move(deviceModulesBuffers), - std::move(deviceEndcapGeometryBuffers), + std::move(deviceModules), + std::move(deviceEndcapGeometry), srcData.pixelMapping); } }; diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h new file mode 100644 index 0000000000000..33169a07b9e51 --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h +#define RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using MiniDoubletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h new file mode 100644 index 0000000000000..c93a00e1a8aec --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h @@ -0,0 +1,58 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsSoA_h +#define RecoTracker_LSTCore_interface_MiniDoubletsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(MiniDoubletsSoALayout, + SOA_COLUMN(unsigned int, anchorHitIndices), + SOA_COLUMN(unsigned int, outerHitIndices), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(float, dphichanges), + SOA_COLUMN(float, dzs), + SOA_COLUMN(float, dphis), + SOA_COLUMN(float, shiftedXs), + SOA_COLUMN(float, shiftedYs), + SOA_COLUMN(float, shiftedZs), + SOA_COLUMN(float, noShiftedDphis), + SOA_COLUMN(float, noShiftedDphiChanges), + SOA_COLUMN(float, anchorX), + SOA_COLUMN(float, anchorY), + SOA_COLUMN(float, anchorZ), + SOA_COLUMN(float, anchorRt), + SOA_COLUMN(float, anchorPhi), + SOA_COLUMN(float, anchorEta), + SOA_COLUMN(float, anchorHighEdgeX), + SOA_COLUMN(float, anchorHighEdgeY), + SOA_COLUMN(float, anchorLowEdgeX), + SOA_COLUMN(float, anchorLowEdgeY), + SOA_COLUMN(float, anchorLowEdgePhi), + SOA_COLUMN(float, anchorHighEdgePhi), + SOA_COLUMN(float, outerX), + SOA_COLUMN(float, outerY), + SOA_COLUMN(float, outerZ), + SOA_COLUMN(float, outerRt), + SOA_COLUMN(float, outerPhi), + SOA_COLUMN(float, outerEta), + SOA_COLUMN(float, outerHighEdgeX), + SOA_COLUMN(float, outerHighEdgeY), + SOA_COLUMN(float, outerLowEdgeX), + SOA_COLUMN(float, outerLowEdgeY)); + + GENERATE_SOA_LAYOUT(MiniDoubletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nMDs), + SOA_COLUMN(unsigned int, totOccupancyMDs)); + + using MiniDoubletsSoA = MiniDoubletsSoALayout<>; + using MiniDoubletsOccupancySoA = MiniDoubletsOccupancySoALayout<>; + + using MiniDoublets = MiniDoubletsSoA::View; + using MiniDoubletsConst = MiniDoubletsSoA::ConstView; + using MiniDoubletsOccupancy = MiniDoubletsOccupancySoA::View; + using MiniDoubletsOccupancyConst = MiniDoubletsOccupancySoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h deleted file mode 100644 index 7266ebd7bc49b..0000000000000 --- a/RecoTracker/LSTCore/interface/Module.h +++ /dev/null @@ -1,227 +0,0 @@ -#ifndef RecoTracker_LSTCore_interface_Module_h -#define RecoTracker_LSTCore_interface_Module_h - -#include "RecoTracker/LSTCore/interface/Constants.h" - -namespace lst { - enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; - - enum Side { NegZ = 1, PosZ = 2, Center = 3 }; - - enum ModuleType { PS, TwoS, PixelModule }; - - enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; - - struct Modules { - const unsigned int* detIds; - const uint16_t* moduleMap; - const unsigned int* mapdetId; - const uint16_t* mapIdx; - const uint16_t* nConnectedModules; - const float* drdzs; - const float* dxdys; - const uint16_t* nModules; - const uint16_t* nLowerModules; - const uint16_t* partnerModuleIndices; - - const short* layers; - const short* rings; - const short* modules; - const short* rods; - const short* subdets; - const short* sides; - const float* eta; - const float* r; - const bool* isInverted; - const bool* isLower; - const bool* isAnchor; - const ModuleType* moduleType; - const ModuleLayerType* moduleLayerType; - const int* lstLayers; - const unsigned int* connectedPixels; - - static bool parseIsInverted(short subdet, short side, short module, short layer) { - if (subdet == Endcap) { - if (side == NegZ) { - return module % 2 == 1; - } else if (side == PosZ) { - return module % 2 == 0; - } else { - return false; - } - } else if (subdet == Barrel) { - if (side == Center) { - if (layer <= 3) { - return module % 2 == 1; - } else if (layer >= 4) { - return module % 2 == 0; - } else { - return false; - } - } else if (side == NegZ or side == PosZ) { - if (layer <= 2) { - return module % 2 == 1; - } else if (layer == 3) { - return module % 2 == 0; - } else { - return false; - } - } else { - return false; - } - } else { - return false; - } - } - - static bool parseIsLower(bool isInvertedx, unsigned int detId) { - return (isInvertedx) ? !(detId & 1) : (detId & 1); - } - - static unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { - return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); - } - - template - void setData(TBuff const& buf) { - detIds = buf.detIds_buf.data(); - moduleMap = buf.moduleMap_buf.data(); - mapdetId = buf.mapdetId_buf.data(); - mapIdx = buf.mapIdx_buf.data(); - nConnectedModules = buf.nConnectedModules_buf.data(); - drdzs = buf.drdzs_buf.data(); - dxdys = buf.dxdys_buf.data(); - nModules = buf.nModules_buf.data(); - nLowerModules = buf.nLowerModules_buf.data(); - partnerModuleIndices = buf.partnerModuleIndices_buf.data(); - - layers = buf.layers_buf.data(); - rings = buf.rings_buf.data(); - modules = buf.modules_buf.data(); - rods = buf.rods_buf.data(); - subdets = buf.subdets_buf.data(); - sides = buf.sides_buf.data(); - eta = buf.eta_buf.data(); - r = buf.r_buf.data(); - isInverted = buf.isInverted_buf.data(); - isLower = buf.isLower_buf.data(); - isAnchor = buf.isAnchor_buf.data(); - moduleType = buf.moduleType_buf.data(); - moduleLayerType = buf.moduleLayerType_buf.data(); - lstLayers = buf.lstLayers_buf.data(); - connectedPixels = buf.connectedPixels_buf.data(); - } - }; - - template - struct ModulesBuffer { - Buf detIds_buf; - Buf moduleMap_buf; - Buf mapdetId_buf; - Buf mapIdx_buf; - Buf nConnectedModules_buf; - Buf drdzs_buf; - Buf dxdys_buf; - Buf nModules_buf; - Buf nLowerModules_buf; - Buf partnerModuleIndices_buf; - - Buf layers_buf; - Buf rings_buf; - Buf modules_buf; - Buf rods_buf; - Buf subdets_buf; - Buf sides_buf; - Buf eta_buf; - Buf r_buf; - Buf isInverted_buf; - Buf isLower_buf; - Buf isAnchor_buf; - Buf moduleType_buf; - Buf moduleLayerType_buf; - Buf lstLayers_buf; - Buf connectedPixels_buf; - - Modules data_; - - ModulesBuffer(TDev const& dev, unsigned int nMod, unsigned int nPixs) - : detIds_buf(allocBufWrapper(dev, nMod)), - moduleMap_buf(allocBufWrapper(dev, nMod * max_connected_modules)), - mapdetId_buf(allocBufWrapper(dev, nMod)), - mapIdx_buf(allocBufWrapper(dev, nMod)), - nConnectedModules_buf(allocBufWrapper(dev, nMod)), - drdzs_buf(allocBufWrapper(dev, nMod)), - dxdys_buf(allocBufWrapper(dev, nMod)), - nModules_buf(allocBufWrapper(dev, 1)), - nLowerModules_buf(allocBufWrapper(dev, 1)), - partnerModuleIndices_buf(allocBufWrapper(dev, nMod)), - - layers_buf(allocBufWrapper(dev, nMod)), - rings_buf(allocBufWrapper(dev, nMod)), - modules_buf(allocBufWrapper(dev, nMod)), - rods_buf(allocBufWrapper(dev, nMod)), - subdets_buf(allocBufWrapper(dev, nMod)), - sides_buf(allocBufWrapper(dev, nMod)), - eta_buf(allocBufWrapper(dev, nMod)), - r_buf(allocBufWrapper(dev, nMod)), - isInverted_buf(allocBufWrapper(dev, nMod)), - isLower_buf(allocBufWrapper(dev, nMod)), - isAnchor_buf(allocBufWrapper(dev, nMod)), - moduleType_buf(allocBufWrapper(dev, nMod)), - moduleLayerType_buf(allocBufWrapper(dev, nMod)), - lstLayers_buf(allocBufWrapper(dev, nMod)), - connectedPixels_buf(allocBufWrapper(dev, nPixs)) { - data_.setData(*this); - } - - template - inline void copyFromSrc(TQueue queue, ModulesBuffer const& src, bool isFull = true) { - alpaka::memcpy(queue, detIds_buf, src.detIds_buf); - if (isFull) { - alpaka::memcpy(queue, moduleMap_buf, src.moduleMap_buf); - alpaka::memcpy(queue, mapdetId_buf, src.mapdetId_buf); - alpaka::memcpy(queue, mapIdx_buf, src.mapIdx_buf); - alpaka::memcpy(queue, nConnectedModules_buf, src.nConnectedModules_buf); - alpaka::memcpy(queue, drdzs_buf, src.drdzs_buf); - alpaka::memcpy(queue, dxdys_buf, src.dxdys_buf); - } - alpaka::memcpy(queue, nModules_buf, src.nModules_buf); - alpaka::memcpy(queue, nLowerModules_buf, src.nLowerModules_buf); - if (isFull) { - alpaka::memcpy(queue, partnerModuleIndices_buf, src.partnerModuleIndices_buf); - } - - alpaka::memcpy(queue, layers_buf, src.layers_buf); - alpaka::memcpy(queue, rings_buf, src.rings_buf); - alpaka::memcpy(queue, modules_buf, src.modules_buf); - alpaka::memcpy(queue, rods_buf, src.rods_buf); - alpaka::memcpy(queue, subdets_buf, src.subdets_buf); - alpaka::memcpy(queue, sides_buf, src.sides_buf); - alpaka::memcpy(queue, eta_buf, src.eta_buf); - alpaka::memcpy(queue, r_buf, src.r_buf); - if (isFull) { - alpaka::memcpy(queue, isInverted_buf, src.isInverted_buf); - } - alpaka::memcpy(queue, isLower_buf, src.isLower_buf); - if (isFull) { - alpaka::memcpy(queue, isAnchor_buf, src.isAnchor_buf); - } - alpaka::memcpy(queue, moduleType_buf, src.moduleType_buf); - if (isFull) { - alpaka::memcpy(queue, moduleLayerType_buf, src.moduleLayerType_buf); - alpaka::memcpy(queue, lstLayers_buf, src.lstLayers_buf); - alpaka::memcpy(queue, connectedPixels_buf, src.connectedPixels_buf); - } - } - - template - ModulesBuffer(TQueue queue, ModulesBuffer const& src, unsigned int nMod, unsigned int nPixs) - : ModulesBuffer(alpaka::getDev(queue), nMod, nPixs) { - copyFromSrc(queue, src); - } - - inline Modules const* data() const { return &data_; } - }; - -} // namespace lst -#endif diff --git a/RecoTracker/LSTCore/interface/ModulesHostCollection.h b/RecoTracker/LSTCore/interface/ModulesHostCollection.h new file mode 100644 index 0000000000000..4119fb6ffb1a2 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesHostCollection_h +#define RecoTracker_LSTCore_interface_ModulesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ModulesHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ModulesSoA.h b/RecoTracker/LSTCore/interface/ModulesSoA.h new file mode 100644 index 0000000000000..8bf1dc9232cd1 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesSoA.h @@ -0,0 +1,57 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesSoA_h +#define RecoTracker_LSTCore_interface_ModulesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; + + enum Side { NegZ = 1, PosZ = 2, Center = 3 }; + + enum ModuleType { PS, TwoS, PixelModule }; + + enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; + + GENERATE_SOA_LAYOUT(ModulesSoALayout, + SOA_COLUMN(unsigned int, detIds), + SOA_COLUMN(Params_Modules::ArrayU16xMaxConnected, moduleMap), + SOA_COLUMN(unsigned int, mapdetId), + SOA_COLUMN(uint16_t, mapIdx), + SOA_COLUMN(uint16_t, nConnectedModules), + SOA_COLUMN(float, drdzs), + SOA_COLUMN(float, dxdys), + SOA_COLUMN(uint16_t, partnerModuleIndices), + SOA_COLUMN(short, layers), + SOA_COLUMN(short, rings), + SOA_COLUMN(short, modules), + SOA_COLUMN(short, rods), + SOA_COLUMN(short, subdets), + SOA_COLUMN(short, sides), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, r), + SOA_COLUMN(bool, isInverted), + SOA_COLUMN(bool, isLower), + SOA_COLUMN(bool, isAnchor), + SOA_COLUMN(ModuleType, moduleType), + SOA_COLUMN(ModuleLayerType, moduleLayerType), + SOA_COLUMN(int, lstLayers), + SOA_SCALAR(uint16_t, nModules), + SOA_SCALAR(uint16_t, nLowerModules)) + + GENERATE_SOA_LAYOUT(ModulesPixelSoALayout, SOA_COLUMN(unsigned int, connectedPixels)) + + using ModulesSoA = ModulesSoALayout<>; + using ModulesPixelSoA = ModulesPixelSoALayout<>; + + using Modules = ModulesSoA::View; + using ModulesConst = ModulesSoA::ConstView; + using ModulesPixel = ModulesPixelSoA::View; + using ModulesPixelConst = ModulesPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h new file mode 100644 index 0000000000000..5a6d3e8ca13b4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h +#define RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ObjectRangesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesSoA.h b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h new file mode 100644 index 0000000000000..09371cd4b2b56 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h @@ -0,0 +1,38 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesSoA_h +#define RecoTracker_LSTCore_interface_ObjectRangesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(ObjectRangesSoALayout, + SOA_COLUMN(ArrayIx2, mdRanges), + SOA_COLUMN(ArrayIx2, segmentRanges), + SOA_COLUMN(ArrayIx2, tripletRanges), + SOA_COLUMN(ArrayIx2, quintupletRanges), + SOA_COLUMN(int, miniDoubletModuleIndices), + SOA_COLUMN(int, miniDoubletModuleOccupancy), + SOA_COLUMN(int, segmentModuleIndices), + SOA_COLUMN(int, segmentModuleOccupancy), + SOA_COLUMN(int, tripletModuleIndices), + SOA_COLUMN(int, tripletModuleOccupancy), + SOA_COLUMN(int, quintupletModuleIndices), + SOA_COLUMN(int, quintupletModuleOccupancy), + SOA_COLUMN(uint16_t, indicesOfEligibleT5Modules), + SOA_SCALAR(unsigned int, nTotalMDs), + SOA_SCALAR(unsigned int, nTotalSegs), + SOA_SCALAR(unsigned int, nTotalTrips), + SOA_SCALAR(unsigned int, nTotalQuints), + SOA_SCALAR(uint16_t, nEligibleT5Modules)) + + using ObjectRangesSoA = ObjectRangesSoALayout<>; + + using ObjectRanges = ObjectRangesSoA::View; + using ObjectRangesConst = ObjectRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h new file mode 100644 index 0000000000000..afb2560680621 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelQuintupletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h new file mode 100644 index 0000000000000..504594dae6d94 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h @@ -0,0 +1,35 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelQuintupletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, quintupletIndices), + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T5 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T5 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, quintupletRadius), // T5 circle + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelQuintuplets), + SOA_SCALAR(unsigned int, totOccupancyPixelQuintuplets)); + + using PixelQuintupletsSoA = PixelQuintupletsSoALayout<>; + using PixelQuintuplets = PixelQuintupletsSoA::View; + using PixelQuintupletsConst = PixelQuintupletsSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h new file mode 100644 index 0000000000000..67678e64bfc03 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelTripletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsSoA.h b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h new file mode 100644 index 0000000000000..bf940e2cd3bd0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h @@ -0,0 +1,39 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsSoA_h +#define RecoTracker_LSTCore_interface_PixelTripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelTripletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, tripletIndices), + SOA_COLUMN(Params_pT3::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T3 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T3 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, tripletRadius), // T3 circle + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, eta_pix), // eta from pLS + SOA_COLUMN(FPX, phi_pix), // phi from pLS + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelTriplets), + SOA_SCALAR(unsigned int, totOccupancyPixelTriplets)); + + using PixelTripletsSoA = PixelTripletsSoALayout<>; + using PixelTriplets = PixelTripletsSoA::View; + using PixelTripletsConst = PixelTripletsSoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h new file mode 100644 index 0000000000000..734ce03057be7 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using QuintupletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsSoA.h b/RecoTracker/LSTCore/interface/QuintupletsSoA.h new file mode 100644 index 0000000000000..05da002e5e343 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsSoA.h @@ -0,0 +1,46 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsSoA_h +#define RecoTracker_LSTCore_interface_QuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(QuintupletsSoALayout, + SOA_COLUMN(ArrayUx2, tripletIndices), // inner and outer triplet indices + SOA_COLUMN(Params_T5::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, innerRadius), // inner triplet circle radius + SOA_COLUMN(FPX, bridgeRadius), // "middle"/bridge triplet radius + SOA_COLUMN(FPX, outerRadius), // outer triplet radius + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score_rphisum), // r-phi based score + SOA_COLUMN(char, isDup), // duplicate flag + SOA_COLUMN(bool, tightCutFlag), // tight pass to be a TC + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, regressionRadius), + SOA_COLUMN(float, regressionG), + SOA_COLUMN(float, regressionF), + SOA_COLUMN(float, rzChiSquared), // r-z only chi2 + SOA_COLUMN(float, chiSquared), + SOA_COLUMN(float, nonAnchorChiSquared)); + + using QuintupletsSoA = QuintupletsSoALayout<>; + using Quintuplets = QuintupletsSoA::View; + using QuintupletsConst = QuintupletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(QuintupletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nQuintuplets), + SOA_COLUMN(unsigned int, totOccupancyQuintuplets)); + + using QuintupletsOccupancySoA = QuintupletsOccupancySoALayout<>; + using QuintupletsOccupancy = QuintupletsOccupancySoA::View; + using QuintupletsOccupancyConst = QuintupletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsHostCollection.h b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h new file mode 100644 index 0000000000000..2fa6ac912a732 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsHostCollection_h +#define RecoTracker_LSTCore_interface_SegmentsHostCollection_h + +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using SegmentsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsSoA.h b/RecoTracker/LSTCore/interface/SegmentsSoA.h new file mode 100644 index 0000000000000..b5154dea56e49 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsSoA.h @@ -0,0 +1,65 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsSoA_h +#define RecoTracker_LSTCore_interface_SegmentsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(SegmentsSoALayout, + SOA_COLUMN(FPX, dPhis), + SOA_COLUMN(FPX, dPhiMins), + SOA_COLUMN(FPX, dPhiMaxs), + SOA_COLUMN(FPX, dPhiChanges), + SOA_COLUMN(FPX, dPhiChangeMins), + SOA_COLUMN(FPX, dPhiChangeMaxs), + SOA_COLUMN(uint16_t, innerLowerModuleIndices), + SOA_COLUMN(uint16_t, outerLowerModuleIndices), + SOA_COLUMN(Params_LS::ArrayUxLayers, mdIndices), + SOA_COLUMN(unsigned int, innerMiniDoubletAnchorHitIndices), + SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices) + //SOA_SCALAR(unsigned int, nMemoryLocations) + ) + + GENERATE_SOA_LAYOUT(SegmentsOccupancySoALayout, + SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module + SOA_COLUMN(unsigned int, totOccupancySegments)) + + GENERATE_SOA_LAYOUT(SegmentsPixelSoALayout, + SOA_COLUMN(unsigned int, seedIdx), + SOA_COLUMN(int, charge), + SOA_COLUMN(int, superbin), + SOA_COLUMN(uint4, pLSHitsIdxs), + SOA_COLUMN(PixelType, pixelType), + SOA_COLUMN(char, isQuad), + SOA_COLUMN(char, isDup), + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, ptIn), + SOA_COLUMN(float, ptErr), + SOA_COLUMN(float, px), + SOA_COLUMN(float, py), + SOA_COLUMN(float, pz), + SOA_COLUMN(float, etaErr), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, phi), + SOA_COLUMN(float, score), + SOA_COLUMN(float, circleCenterX), + SOA_COLUMN(float, circleCenterY), + SOA_COLUMN(float, circleRadius)) + + using SegmentsSoA = SegmentsSoALayout<>; + using SegmentsOccupancySoA = SegmentsOccupancySoALayout<>; + using SegmentsPixelSoA = SegmentsPixelSoALayout<>; + + using Segments = SegmentsSoA::View; + using SegmentsConst = SegmentsSoA::ConstView; + using SegmentsOccupancy = SegmentsOccupancySoA::View; + using SegmentsOccupancyConst = SegmentsOccupancySoA::ConstView; + using SegmentsPixel = SegmentsPixelSoA::View; + using SegmentsPixelConst = SegmentsPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h new file mode 100644 index 0000000000000..3ffd2bedf945e --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TrackCandidatesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h new file mode 100644 index 0000000000000..18bea1e51c6ba --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -0,0 +1,32 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h +#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + + using TrackCandidatesSoA = TrackCandidatesSoALayout<>; + using TrackCandidates = TrackCandidatesSoA::View; + using TrackCandidatesConst = TrackCandidatesSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsHostCollection.h b/RecoTracker/LSTCore/interface/TripletsHostCollection.h new file mode 100644 index 0000000000000..6eaebd97e5bf6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsHostCollection_h +#define RecoTracker_LSTCore_interface_TripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TripletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsSoA.h b/RecoTracker/LSTCore/interface/TripletsSoA.h new file mode 100644 index 0000000000000..e0407ef3a0912 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsSoA.h @@ -0,0 +1,42 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsSoA_h +#define RecoTracker_LSTCore_interface_TripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TripletsSoALayout, + SOA_COLUMN(ArrayUx2, segmentIndices), // inner and outer segment indices + SOA_COLUMN(Params_T3::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment + SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x + SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y + SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius +#ifdef CUT_VALUE_DEBUG + SOA_COLUMN(float, zOut), + SOA_COLUMN(float, rtOut), + SOA_COLUMN(float, betaInCut), +#endif + SOA_COLUMN(bool, partOfPT5), // is it used in a pT5 + SOA_COLUMN(bool, partOfT5), // is it used in a T5 + SOA_COLUMN(bool, partOfPT3)); // is it used in a pT3 + + using TripletsSoA = TripletsSoALayout<>; + using Triplets = TripletsSoA::View; + using TripletsConst = TripletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(TripletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nTriplets), + SOA_COLUMN(unsigned int, totOccupancyTriplets)); + + using TripletsOccupancySoA = TripletsOccupancySoALayout<>; + using TripletsOccupancy = TripletsOccupancySoA::View; + using TripletsOccupancyConst = TripletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 1a16dad68420e..208f49cc52538 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -3,40 +3,12 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -#if defined ALPAKA_ACC_GPU_CUDA_ENABLED -#include -#elif defined ALPAKA_ACC_GPU_HIP_ENABLED -#include -#endif - namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { using namespace ::lst; -// Half precision wrapper functions. -#if defined(FP16_Base) -#define __F2H __float2half -#define __H2F __half2float - typedef __half float FPX; -#else -#define __F2H -#define __H2F - typedef float FPX; -#endif - Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); -// Needed for files that are compiled by g++ to not throw an error. -// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. -#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) - struct uint4 { - unsigned int x; - unsigned int y; - unsigned int z; - unsigned int w; - }; -#endif - // Adjust grid and block sizes based on backend configuration template > ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, diff --git a/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h new file mode 100644 index 0000000000000..002e1e0423804 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using EndcapGeometryDevDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h new file mode 100644 index 0000000000000..99494499271e0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/HitsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using HitsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h index 1f3c08804540f..df1319462432e 100644 --- a/RecoTracker/LSTCore/interface/alpaka/LST.h +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -66,10 +66,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::vector const& ph2_z); void getOutput(Event& event); - std::vector getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices); // Input and output vectors std::vector in_trkX_; diff --git a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h new file mode 100644 index 0000000000000..3011e1d2f87b7 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using MiniDoubletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h new file mode 100644 index 0000000000000..a7510feb0d540 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ModulesDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h new file mode 100644 index 0000000000000..edeab720bdbaf --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ObjectRangesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..e2553f7b42c50 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelQuintupletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h new file mode 100644 index 0000000000000..ac010b9028ac4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelTripletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..df1aa2e554e2d --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using QuintupletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h new file mode 100644 index 0000000000000..ac634aa51bade --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using SegmentsDeviceCollection = PortableCollection3; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h new file mode 100644 index 0000000000000..057d86180d967 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h new file mode 100644 index 0000000000000..ea709a7d78efd --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_TripletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TripletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 1acf085a0f491..18fa3adf3a72f 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -87,12 +87,12 @@ std::unique_ptr> lst::loadAndFillESHost() ModuleConnectionMap moduleConnectionMap; ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); - auto endcapGeometryBuffers = - EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); - std::memcpy(endcapGeometryBuffers.geoMapDetId_buf.data(), + auto endcapGeometryDev = + std::make_unique(endcapGeometry.nEndCapMap, cms::alpakatools::host()); + std::memcpy(endcapGeometryDev->view().geoMapDetId(), endcapGeometry.geoMapDetId_buf.data(), endcapGeometry.nEndCapMap * sizeof(unsigned int)); - std::memcpy(endcapGeometryBuffers.geoMapPhi_buf.data(), + std::memcpy(endcapGeometryDev->view().geoMapPhi(), endcapGeometry.geoMapPhi_buf.data(), endcapGeometry.nEndCapMap * sizeof(float)); @@ -113,6 +113,6 @@ std::unique_ptr> lst::loadAndFillESHost() nPixels, endcapGeometry.nEndCapMap, std::move(modulesBuffers), - std::move(endcapGeometryBuffers), + std::move(endcapGeometryDev), pixelMappingPtr); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index bf51e262f69e5..aee17629c7e96 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -5,7 +5,8 @@ #include #include "RecoTracker/LSTCore/interface/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" #include "RecoTracker/LSTCore/interface/TiltedGeometry.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" #include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" @@ -24,14 +25,53 @@ namespace lst { // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 }; - inline void fillPixelMap(ModulesBuffer& modulesBuf, - uint16_t nModules, - unsigned int& nPixels, - PixelMap& pixelMapping, - MapPLStoLayer const& pLStoLayer, - ModuleMetaData const& mmd) { - pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); + bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); } + + unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { + return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); + } + bool parseIsInverted(short subdet, short side, short module, short layer) { + if (subdet == Endcap) { + if (side == NegZ) { + return module % 2 == 1; + } else if (side == PosZ) { + return module % 2 == 0; + } else { + return false; + } + } else if (subdet == Barrel) { + if (side == Center) { + if (layer <= 3) { + return module % 2 == 1; + } else if (layer >= 4) { + return module % 2 == 0; + } else { + return false; + } + } else if (side == NegZ or side == PosZ) { + if (layer <= 2) { + return module % 2 == 1; + } else if (layer == 3) { + return module % 2 == 0; + } else { + return false; + } + } else { + return false; + } + } else { + return false; + } + } + + inline std::tuple, + unsigned int, + std::vector, + unsigned int, + std::vector> + getConnectedPixels(uint16_t nModules, unsigned int& nPixels, PixelMap& pixelMapping, MapPLStoLayer const& pLStoLayer) { std::vector connectedModuleDetIds; std::vector connectedModuleDetIds_pos; std::vector connectedModuleDetIds_neg; @@ -77,31 +117,21 @@ namespace lst { totalSizes_neg += sizes_neg; } - unsigned int connectedPix_size = totalSizes + totalSizes_pos + totalSizes_neg; - nPixels = connectedPix_size; - - // Now we re-initialize connectedPixels_buf since nPixels is now known - modulesBuf.connectedPixels_buf = cms::alpakatools::make_host_buffer(nPixels); - modulesBuf.data_.setData(modulesBuf); - - unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); + nPixels = totalSizes + totalSizes_pos + totalSizes_neg; - for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { - connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); - } - for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { - connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); - } - for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { - connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); - } + return {totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg}; } - inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, + inline void fillConnectedModuleArrayExplicit(Modules modules, ModuleMetaData const& mmd, ModuleConnectionMap const& moduleConnectionMap) { - uint16_t* moduleMap = modulesBuf.moduleMap_buf.data(); - uint16_t* nConnectedModules = modulesBuf.nConnectedModules_buf.data(); + Params_Modules::ArrayU16xMaxConnected* moduleMap = modules.moduleMap(); + uint16_t* nConnectedModules = modules.nConnectedModules(); for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { unsigned int detId = it->first; @@ -109,14 +139,14 @@ namespace lst { auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); nConnectedModules[index] = connectedModules.size(); for (uint16_t i = 0; i < nConnectedModules[index]; i++) { - moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex.at(connectedModules[i]); + moduleMap[index][i] = mmd.detIdToIndex.at(connectedModules[i]); } } } - inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { - uint16_t* mapIdx = modulesBuf.mapIdx_buf.data(); - unsigned int* mapdetId = modulesBuf.mapdetId_buf.data(); + inline void fillMapArraysExplicit(Modules modules, ModuleMetaData const& mmd) { + uint16_t* mapIdx = modules.mapIdx(); + unsigned int* mapdetId = modules.mapdetId(); unsigned int counter = 0; for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { @@ -188,44 +218,54 @@ namespace lst { nModules = counter; } - inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, - const char* moduleMetaDataFilePath, - uint16_t& nModules, - uint16_t& nLowerModules, - unsigned int& nPixels, - PixelMap& pixelMapping, - const EndcapGeometry& endcapGeometry, - const TiltedGeometry& tiltedGeometry, - const ModuleConnectionMap& moduleConnectionMap) { + inline std::unique_ptr loadModulesFromFile(MapPLStoLayer const& pLStoLayer, + const char* moduleMetaDataFilePath, + uint16_t& nModules, + uint16_t& nLowerModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { ModuleMetaData mmd; loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); - // Initialize modulesBuf, but with nPixels = 0 - // The fields that require nPixels are re-initialized in fillPixelMap - ModulesBuffer modulesBuf(cms::alpakatools::host(), nModules, 0); + // TODO: this whole section could use some refactoring + auto [totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg] = getConnectedPixels(nModules, nPixels, pixelMapping, pLStoLayer); + + std::array const modules_sizes{{static_cast(nModules), static_cast(nPixels)}}; + + auto modulesHC = std::make_unique(modules_sizes, cms::alpakatools::host()); + + auto modules_view = modulesHC->view(); // Getting the underlying data pointers - unsigned int* host_detIds = modulesBuf.detIds_buf.data(); - short* host_layers = modulesBuf.layers_buf.data(); - short* host_rings = modulesBuf.rings_buf.data(); - short* host_rods = modulesBuf.rods_buf.data(); - short* host_modules = modulesBuf.modules_buf.data(); - short* host_subdets = modulesBuf.subdets_buf.data(); - short* host_sides = modulesBuf.sides_buf.data(); - float* host_eta = modulesBuf.eta_buf.data(); - float* host_r = modulesBuf.r_buf.data(); - bool* host_isInverted = modulesBuf.isInverted_buf.data(); - bool* host_isLower = modulesBuf.isLower_buf.data(); - bool* host_isAnchor = modulesBuf.isAnchor_buf.data(); - ModuleType* host_moduleType = modulesBuf.moduleType_buf.data(); - ModuleLayerType* host_moduleLayerType = modulesBuf.moduleLayerType_buf.data(); - float* host_dxdys = modulesBuf.dxdys_buf.data(); - float* host_drdzs = modulesBuf.drdzs_buf.data(); - uint16_t* host_nModules = modulesBuf.nModules_buf.data(); - uint16_t* host_nLowerModules = modulesBuf.nLowerModules_buf.data(); - uint16_t* host_partnerModuleIndices = modulesBuf.partnerModuleIndices_buf.data(); - int* host_lstLayers = modulesBuf.lstLayers_buf.data(); + unsigned int* host_detIds = modules_view.detIds(); + short* host_layers = modules_view.layers(); + short* host_rings = modules_view.rings(); + short* host_rods = modules_view.rods(); + short* host_modules = modules_view.modules(); + short* host_subdets = modules_view.subdets(); + short* host_sides = modules_view.sides(); + float* host_eta = modules_view.eta(); + float* host_r = modules_view.r(); + bool* host_isInverted = modules_view.isInverted(); + bool* host_isLower = modules_view.isLower(); + bool* host_isAnchor = modules_view.isAnchor(); + ModuleType* host_moduleType = modules_view.moduleType(); + ModuleLayerType* host_moduleLayerType = modules_view.moduleLayerType(); + float* host_dxdys = modules_view.dxdys(); + float* host_drdzs = modules_view.drdzs(); + uint16_t* host_nModules = &modules_view.nModules(); + uint16_t* host_nLowerModules = &modules_view.nLowerModules(); + uint16_t* host_partnerModuleIndices = modules_view.partnerModuleIndices(); + int* host_lstLayers = modules_view.lstLayers(); //reassign detIdToIndex indices here nLowerModules = (nModules - 1) / 2; @@ -257,8 +297,8 @@ namespace lst { r = 0; } else { setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); - isInverted = lst::Modules::parseIsInverted(subdet, side, module, layer); - isLower = lst::Modules::parseIsLower(isInverted, detId); + isInverted = parseIsInverted(subdet, side, module, layer); + isLower = parseIsLower(isInverted, detId); } if (isLower) { index = lowerModuleCounter; @@ -316,7 +356,7 @@ namespace lst { auto& index = it->second; if (detId != 1) { host_partnerModuleIndices[index] = - mmd.detIdToIndex[lst::Modules::parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; + mmd.detIdToIndex[parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; //add drdz and slope importing stuff here! if (host_drdzs[index] == 0) { host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; @@ -327,15 +367,29 @@ namespace lst { } } - fillPixelMap(modulesBuf, nModules, nPixels, pixelMapping, pLStoLayer, mmd); - *host_nModules = nModules; *host_nLowerModules = nLowerModules; - fillConnectedModuleArrayExplicit(modulesBuf, mmd, moduleConnectionMap); - fillMapArraysExplicit(modulesBuf, mmd); + // Fill pixel part + pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); + + auto modulesPixel_view = modulesHC->view(); + auto connectedPixels = alpaka::createView( + cms::alpakatools::host(), modulesPixel_view.connectedPixels(), modulesPixel_view.metadata().size()); + for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { + connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { + connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { + connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); + } + + fillConnectedModuleArrayExplicit(modules_view, mmd, moduleConnectionMap); + fillMapArraysExplicit(modules_view, mmd); - return modulesBuf; + return modulesHC; } } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 659591b836ec9..984f8ea5d1e9a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -2,6 +2,14 @@ #include "Event.h" +#include "MiniDoublet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" +#include "Quintuplet.h" +#include "Segment.h" +#include "TrackCandidate.h" +#include "Triplet.h" + using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; @@ -51,35 +59,26 @@ void Event::resetEventSync() { n_quintuplets_by_layer_endcap_[i] = 0; } } - hitsInGPU_.reset(); - hitsBuffers_.reset(); - mdsInGPU_.reset(); - miniDoubletsBuffers_.reset(); - rangesInGPU_.reset(); - rangesBuffers_.reset(); - segmentsInGPU_.reset(); - segmentsBuffers_.reset(); - tripletsInGPU_.reset(); - tripletsBuffers_.reset(); - quintupletsInGPU_.reset(); - quintupletsBuffers_.reset(); - trackCandidatesInGPU_.reset(); - trackCandidatesBuffers_.reset(); - pixelTripletsInGPU_.reset(); - pixelTripletsBuffers_.reset(); - pixelQuintupletsInGPU_.reset(); - pixelQuintupletsBuffers_.reset(); - - hitsInCPU_.reset(); - rangesInCPU_.reset(); - mdsInCPU_.reset(); - segmentsInCPU_.reset(); - tripletsInCPU_.reset(); - quintupletsInCPU_.reset(); - pixelTripletsInCPU_.reset(); - pixelQuintupletsInCPU_.reset(); - trackCandidatesInCPU_.reset(); - modulesInCPU_.reset(); + hitsDC_.reset(); + miniDoubletsDC_.reset(); + rangesDC_.reset(); + segmentsDC_.reset(); + tripletsDC_.reset(); + quintupletsDC_.reset(); + trackCandidatesDC_.reset(); + pixelTripletsDC_.reset(); + pixelQuintupletsDC_.reset(); + + hitsHC_.reset(); + rangesHC_.reset(); + miniDoubletsHC_.reset(); + segmentsHC_.reset(); + tripletsHC_.reset(); + quintupletsHC_.reset(); + pixelTripletsHC_.reset(); + pixelQuintupletsHC_.reset(); + trackCandidatesHC_.reset(); + modulesHC_.reset(); } void Event::addHitToEvent(std::vector const& x, @@ -91,28 +90,41 @@ void Event::addHitToEvent(std::vector const& x, unsigned int nHits = x.size(); // Initialize space on device/host for next event. - if (!hitsInGPU_) { - hitsInGPU_.emplace(); - hitsBuffers_.emplace(nModules_, nHits, devAcc_, queue_); - hitsInGPU_->setData(*hitsBuffers_); + if (!hitsDC_) { + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsDC_.emplace(hits_sizes, queue_); + + auto hitsRanges = hitsDC_->view(); + auto hitRanges_view = alpaka::createView(devAcc_, hitsRanges.hitRanges(), hitsRanges.metadata().size()); + auto hitRangesLower_view = alpaka::createView(devAcc_, hitsRanges.hitRangesLower(), hitsRanges.metadata().size()); + auto hitRangesUpper_view = alpaka::createView(devAcc_, hitsRanges.hitRangesUpper(), hitsRanges.metadata().size()); + auto hitRangesnLower_view = alpaka::createView(devAcc_, hitsRanges.hitRangesnLower(), hitsRanges.metadata().size()); + auto hitRangesnUpper_view = alpaka::createView(devAcc_, hitsRanges.hitRangesnUpper(), hitsRanges.metadata().size()); + alpaka::memset(queue_, hitRanges_view, 0xff); + alpaka::memset(queue_, hitRangesLower_view, 0xff); + alpaka::memset(queue_, hitRangesUpper_view, 0xff); + alpaka::memset(queue_, hitRangesnLower_view, 0xff); + alpaka::memset(queue_, hitRangesnUpper_view, 0xff); } - if (!rangesInGPU_) { - rangesInGPU_.emplace(); - rangesBuffers_.emplace(nModules_, nLowerModules_, devAcc_, queue_); - rangesInGPU_->setData(*rangesBuffers_); + if (!rangesDC_) { + rangesDC_.emplace(nLowerModules_ + 1, queue_); + auto buf = rangesDC_->buffer(); + alpaka::memset(queue_, buf, 0xff); } - // Need a view here before transferring to the device. - auto nHits_view = alpaka::createView(cms::alpakatools::host(), &nHits, (Idx)1u); - // Copy the host arrays to the GPU. - alpaka::memcpy(queue_, hitsBuffers_->xs_buf, x, nHits); - alpaka::memcpy(queue_, hitsBuffers_->ys_buf, y, nHits); - alpaka::memcpy(queue_, hitsBuffers_->zs_buf, z, nHits); - alpaka::memcpy(queue_, hitsBuffers_->detid_buf, detId, nHits); - alpaka::memcpy(queue_, hitsBuffers_->idxs_buf, idxInNtuple, nHits); - alpaka::memcpy(queue_, hitsBuffers_->nHits_buf, nHits_view); + auto hits = hitsDC_->view(); + auto xs_d = alpaka::createView(devAcc_, hits.xs(), (Idx)hits.metadata().size()); + auto ys_d = alpaka::createView(devAcc_, hits.ys(), (Idx)hits.metadata().size()); + auto zs_d = alpaka::createView(devAcc_, hits.zs(), (Idx)hits.metadata().size()); + auto detId_d = alpaka::createView(devAcc_, hits.detid(), (Idx)hits.metadata().size()); + auto idxs_d = alpaka::createView(devAcc_, hits.idxs(), (Idx)hits.metadata().size()); + alpaka::memcpy(queue_, xs_d, x, (Idx)nHits); + alpaka::memcpy(queue_, ys_d, y, (Idx)nHits); + alpaka::memcpy(queue_, zs_d, z, (Idx)nHits); + alpaka::memcpy(queue_, detId_d, detId, (Idx)nHits); + alpaka::memcpy(queue_, idxs_d, idxInNtuple, (Idx)nHits); alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; @@ -126,18 +138,22 @@ void Event::addHitToEvent(std::vector const& x, TwoS, nModules_, nEndCapMap_, - endcapGeometryBuffers_.geoMapDetId_buf.data(), - endcapGeometryBuffers_.geoMapPhi_buf.data(), - *modulesBuffers_.data(), - *hitsInGPU_, + endcapGeometry_.const_view(), + modules_.const_view(), + hitsDC_->view(), + hitsDC_->view(), nHits); Vec3D const threadsPerBlock2{1, 1, 256}; Vec3D const blocksPerGrid2{1, 1, max_blocks}; WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); - alpaka::exec( - queue_, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU_, nLowerModules_); + alpaka::exec(queue_, + module_ranges_workdiv, + ModuleRangesKernel{}, + modules_.const_view(), + hitsDC_->view(), + nLowerModules_); } void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, @@ -172,10 +188,13 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, unsigned int mdSize = 2 * size; uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; - if (!mdsInGPU_) { - // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy + if (!miniDoubletsDC_) { + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto miniDoubletModuleOccupancy_view = alpaka::createView( + devAcc_, rangesOccupancy.miniDoubletModuleOccupancy(), (Idx)rangesOccupancy.metadata().size()); auto dst_view_miniDoubletModuleOccupancy = - alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + alpaka::createSubView(miniDoubletModuleOccupancy_view, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); @@ -185,23 +204,31 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec( - queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); - alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + auto nTotalMDs_buf_d = alpaka::createView(devAcc_, &rangesOccupancy.nTotalMDs(), (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); alpaka::wait(queue_); // wait to get the data before manipulation *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU_.emplace(); - miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); - mdsInGPU_->setData(*miniDoubletsBuffers_); + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); - alpaka::memcpy(queue_, miniDoubletsBuffers_->nMemoryLocations_buf, nTotalMDs_buf_h); + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); } - if (!segmentsInGPU_) { + if (!segmentsDC_) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them @@ -210,22 +237,30 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, alpaka::exec(queue_, createSegmentArrayRanges_workDiv, CreateSegmentArrayRanges{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *mdsInGPU_); - - auto nTotalSegments_view = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u); - - alpaka::memcpy(queue_, nTotalSegments_view, rangesBuffers_->device_nTotalSegs_buf); + modules_.const_view(), + rangesDC_->view(), + miniDoubletsDC_->const_view()); + + auto rangesOccupancy = rangesDC_->view(); + auto nTotalSegments_view_h = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u); + auto nTotalSegments_view_d = alpaka::createView(devAcc_, &rangesOccupancy.nTotalSegs(), (Idx)1u); + alpaka::memcpy(queue_, nTotalSegments_view_h, nTotalSegments_view_d); alpaka::wait(queue_); // wait to get the value before manipulation nTotalSegments_ += n_max_pixel_segments_per_module; - segmentsInGPU_.emplace(); - segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); - segmentsInGPU_->setData(*segmentsBuffers_); - - alpaka::memcpy(queue_, segmentsBuffers_->nMemoryLocations_buf, nTotalSegments_view); + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); } auto hitIndices0_dev = allocBufWrapper(devAcc_, size, queue_); @@ -240,36 +275,44 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); - alpaka::memcpy(queue_, segmentsBuffers_->ptIn_buf, ptIn, size); - alpaka::memcpy(queue_, segmentsBuffers_->ptErr_buf, ptErr, size); - alpaka::memcpy(queue_, segmentsBuffers_->px_buf, px, size); - alpaka::memcpy(queue_, segmentsBuffers_->py_buf, py, size); - alpaka::memcpy(queue_, segmentsBuffers_->pz_buf, pz, size); - alpaka::memcpy(queue_, segmentsBuffers_->etaErr_buf, etaErr, size); - alpaka::memcpy(queue_, segmentsBuffers_->isQuad_buf, isQuad, size); - alpaka::memcpy(queue_, segmentsBuffers_->eta_buf, eta, size); - alpaka::memcpy(queue_, segmentsBuffers_->phi_buf, phi, size); - alpaka::memcpy(queue_, segmentsBuffers_->charge_buf, charge, size); - alpaka::memcpy(queue_, segmentsBuffers_->seedIdx_buf, seedIdx, size); - alpaka::memcpy(queue_, segmentsBuffers_->superbin_buf, superbin, size); - alpaka::memcpy(queue_, segmentsBuffers_->pixelType_buf, pixelType, size); + SegmentsPixel segmentsPixel = segmentsDC_->view(); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.ptIn(), size), ptIn, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.ptErr(), size), ptErr, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.px(), size), px, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.py(), size), py, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.pz(), size), pz, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.etaErr(), size), etaErr, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.isQuad(), size), isQuad, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.eta(), size), eta, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.phi(), size), phi, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.charge(), size), charge, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.seedIdx(), size), seedIdx, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.superbin(), size), superbin, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.pixelType(), size), pixelType, size); // Create source views for size and mdSize auto src_view_size = alpaka::createView(cms::alpakatools::host(), &size, (Idx)1u); auto src_view_mdSize = alpaka::createView(cms::alpakatools::host(), &mdSize, (Idx)1u); - auto dst_view_segments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), (Idx)segmentsOccupancy.metadata().size()); + auto dst_view_segments = alpaka::createSubView(nSegments_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_segments, src_view_size); - auto dst_view_totOccupancySegments = - alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + auto totOccupancySegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.totOccupancySegments(), (Idx)segmentsOccupancy.metadata().size()); + auto dst_view_totOccupancySegments = alpaka::createSubView(totOccupancySegments_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); - auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers_->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), (Idx)mdsOccupancy.metadata().size()); + auto dst_view_nMDs = alpaka::createSubView(nMDs_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); - auto dst_view_totOccupancyMDs = - alpaka::createSubView(miniDoubletsBuffers_->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + auto totOccupancyMDs_view = + alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), (Idx)mdsOccupancy.metadata().size()); + auto dst_view_totOccupancyMDs = alpaka::createSubView(totOccupancyMDs_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize); alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory @@ -281,11 +324,12 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, alpaka::exec(queue_, addPixelSegmentToEvent_workdiv, AddPixelSegmentToEventKernel{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *hitsInGPU_, - *mdsInGPU_, - *segmentsInGPU_, + modules_.const_view(), + rangesDC_->const_view(), + hitsDC_->view(), + miniDoubletsDC_->view(), + segmentsDC_->view(), + segmentsDC_->view(), hitIndices0_dev.data(), hitIndices1_dev.data(), hitIndices2_dev.data(), @@ -296,9 +340,12 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, } void Event::createMiniDoublets() { - // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto miniDoubletModuleOccupancy_view = + alpaka::createView(devAcc_, rangesOccupancy.miniDoubletModuleOccupancy(), (Idx)rangesOccupancy.metadata().size()); auto dst_view_miniDoubletModuleOccupancy = - alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + alpaka::createSubView(miniDoubletModuleOccupancy_view, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); @@ -308,44 +355,56 @@ void Event::createMiniDoublets() { WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec( - queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); - alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + auto nTotalMDs_buf_d = alpaka::createView(devAcc_, &rangesOccupancy.nTotalMDs(), (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); alpaka::wait(queue_); // wait to get the data before manipulation *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - if (!mdsInGPU_) { - mdsInGPU_.emplace(); - miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); - mdsInGPU_->setData(*miniDoubletsBuffers_); + if (!miniDoubletsDC_) { + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); } - Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; - Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; - WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); + Vec3D const threadsPerBlockCreateMD{1, 16, 32}; + Vec3D const blocksPerGridCreateMD{1, nLowerModules_ / threadsPerBlockCreateMD[1], 1}; + WorkDiv3D const createMiniDoublets_workDiv = + createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); alpaka::exec(queue_, - createMiniDoubletsInGPUv2_workDiv, - CreateMiniDoubletsInGPUv2{}, - *modulesBuffers_.data(), - *hitsInGPU_, - *mdsInGPU_, - *rangesInGPU_); + createMiniDoublets_workDiv, + CreateMiniDoublets{}, + modules_.const_view(), + hitsDC_->const_view(), + hitsDC_->const_view(), + miniDoubletsDC_->view(), + miniDoubletsDC_->view(), + rangesDC_->const_view()); WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, addMiniDoubletRangesToEventExplicit_workDiv, AddMiniDoubletRangesToEventExplicit{}, - *modulesBuffers_.data(), - *mdsInGPU_, - *rangesInGPU_, - *hitsInGPU_); + modules_.const_view(), + miniDoubletsDC_->view(), + rangesDC_->view(), + hitsDC_->const_view()); if (addObjects_) { addMiniDoubletsToEventExplicit(); @@ -353,33 +412,44 @@ void Event::createMiniDoublets() { } void Event::createSegmentsWithModuleMap() { - if (!segmentsInGPU_) { - segmentsInGPU_.emplace(); - segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); - segmentsInGPU_->setData(*segmentsBuffers_); + if (!segmentsDC_) { + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; - WorkDiv3D const createSegmentsInGPUv2_workDiv = + WorkDiv3D const createSegments_workDiv = createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); alpaka::exec(queue_, - createSegmentsInGPUv2_workDiv, - CreateSegmentsInGPUv2{}, - *modulesBuffers_.data(), - *mdsInGPU_, - *segmentsInGPU_, - *rangesInGPU_); + createSegments_workDiv, + CreateSegments{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->view(), + segmentsDC_->view(), + rangesDC_->const_view()); WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, addSegmentRangesToEventExplicit_workDiv, AddSegmentRangesToEventExplicit{}, - *modulesBuffers_.data(), - *segmentsInGPU_, - *rangesInGPU_); + modules_.const_view(), + segmentsDC_->view(), + rangesDC_->view()); if (addObjects_) { addSegmentsToEventExplicit(); @@ -387,27 +457,41 @@ void Event::createSegmentsWithModuleMap() { } void Event::createTriplets() { - if (!tripletsInGPU_) { + if (!tripletsDC_) { WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, createTripletArrayRanges_workDiv, CreateTripletArrayRanges{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *segmentsInGPU_); + modules_.const_view(), + rangesDC_->view(), + segmentsDC_->const_view()); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? + auto rangesOccupancy = rangesDC_->view(); auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); - - alpaka::memcpy(queue_, maxTriplets_buf_h, rangesBuffers_->device_nTotalTrips_buf); + auto maxTriplets_buf_d = alpaka::createView(devAcc_, &rangesOccupancy.nTotalTrips(), (Idx)1u); + alpaka::memcpy(queue_, maxTriplets_buf_h, maxTriplets_buf_d); alpaka::wait(queue_); // wait to get the value before using it - tripletsInGPU_.emplace(); - tripletsBuffers_.emplace(*maxTriplets_buf_h.data(), nLowerModules_, devAcc_, queue_); - tripletsInGPU_->setData(*tripletsBuffers_); - - alpaka::memcpy(queue_, tripletsBuffers_->nMemoryLocations_buf, maxTriplets_buf_h); + std::array const triplets_sizes{ + {static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)}}; + tripletsDC_.emplace(triplets_sizes, queue_); + + auto tripletsOccupancy = tripletsDC_->view(); + auto nTriplets_view = + alpaka::createView(devAcc_, tripletsOccupancy.nTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, nTriplets_view, 0u); + auto totOccupancyTriplets_view = + alpaka::createView(devAcc_, tripletsOccupancy.totOccupancyTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyTriplets_view, 0u); + auto triplets = tripletsDC_->view(); + auto partOfPT5_view = alpaka::createView(devAcc_, triplets.partOfPT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + auto partOfT5_view = alpaka::createView(devAcc_, triplets.partOfT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfT5_view, 0u); + auto partOfPT3_view = alpaka::createView(devAcc_, triplets.partOfPT3(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT3_view, 0u); } uint16_t nonZeroModules = 0; @@ -415,12 +499,17 @@ void Event::createTriplets() { // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); - alpaka::memcpy(queue_, nSegments_buf_h, segmentsBuffers_->nSegments_buf, nLowerModules_); + auto nSegments_buf_d = + alpaka::createView(devAcc_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_); // ... same for module_nConnectedModules // FIXME: replace by ES host data + auto modules = modules_.const_view(); auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); - alpaka::memcpy(queue_, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); + auto module_nConnectedModules_buf_d = + alpaka::createView(devAcc_, modules.nConnectedModules(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_nConnectedModules_buf_h, module_nConnectedModules_buf_d, nLowerModules_); alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using @@ -447,17 +536,19 @@ void Event::createTriplets() { Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; - WorkDiv3D const createTripletsInGPUv2_workDiv = + WorkDiv3D const createTriplets_workDiv = createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); alpaka::exec(queue_, - createTripletsInGPUv2_workDiv, - CreateTripletsInGPUv2{}, - *modulesBuffers_.data(), - *mdsInGPU_, - *segmentsInGPU_, - *tripletsInGPU_, - *rangesInGPU_, + createTriplets_workDiv, + CreateTriplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->view(), + rangesDC_->const_view(), index_gpu_buf.data(), nonZeroModules); @@ -466,9 +557,9 @@ void Event::createTriplets() { alpaka::exec(queue_, addTripletRangesToEventExplicit_workDiv, AddTripletRangesToEventExplicit{}, - *modulesBuffers_.data(), - *tripletsInGPU_, - *rangesInGPU_); + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); if (addObjects_) { addTripletsToEventExplicit(); @@ -476,10 +567,10 @@ void Event::createTriplets() { } void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -490,39 +581,42 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { alpaka::exec(queue_, crossCleanpT3_workDiv, CrossCleanpT3{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *pixelTripletsInGPU_, - *segmentsInGPU_, - *pixelQuintupletsInGPU_); + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->view(), + segmentsDC_->const_view(), + pixelQuintupletsDC_->const_view()); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidates_workDiv = createWorkDiv({1}, {512}, {1}); alpaka::exec(queue_, - addpT3asTrackCandidatesInGPU_workDiv, - AddpT3asTrackCandidatesInGPU{}, + addpT3asTrackCandidates_workDiv, + AddpT3asTrackCandidates{}, nLowerModules_, - *pixelTripletsInGPU_, - *trackCandidatesInGPU_, - *segmentsInGPU_, - *rangesInGPU_); + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); // Pull nEligibleT5Modules from the device. + auto rangesOccupancy = rangesDC_->view(); auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nEligibleModules_buf_h, rangesBuffers_->nEligibleT5Modules_buf); + auto nEligibleModules_buf_d = alpaka::createView(devAcc_, &rangesOccupancy.nEligibleT5Modules(), (Idx)1u); + alpaka::memcpy(queue_, nEligibleModules_buf_h, nEligibleModules_buf_d); alpaka::wait(queue_); // wait to get the value before using auto const nEligibleModules = *nEligibleModules_buf_h.data(); Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; - WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = + WorkDiv3D const removeDupQuintupletsBeforeTC_workDiv = createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); alpaka::exec(queue_, - removeDupQuintupletsInGPUBeforeTC_workDiv, - RemoveDupQuintupletsInGPUBeforeTC{}, - *quintupletsInGPU_, - *rangesInGPU_); + removeDupQuintupletsBeforeTC_workDiv, + RemoveDupQuintupletsBeforeTC{}, + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view()); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; @@ -532,24 +626,26 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { alpaka::exec(queue_, crossCleanT5_workDiv, CrossCleanT5{}, - *modulesBuffers_.data(), - *quintupletsInGPU_, - *pixelQuintupletsInGPU_, - *pixelTripletsInGPU_, - *rangesInGPU_); - - Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; - Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->const_view(), + pixelTripletsDC_->const_view(), + rangesDC_->const_view()); + + Vec3D const threadsPerBlock_addT5asTrackCandidate{1, 8, 128}; + Vec3D const blocksPerGrid_addT5asTrackCandidate{1, 8, 10}; + WorkDiv3D const addT5asTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addT5asTrackCandidate, threadsPerBlock_addT5asTrackCandidate, elementsPerThread); alpaka::exec(queue_, - addT5asTrackCandidateInGPU_workDiv, - AddT5asTrackCandidateInGPU{}, + addT5asTrackCandidate_workDiv, + AddT5asTrackCandidate{}, nLowerModules_, - *quintupletsInGPU_, - *trackCandidatesInGPU_, - *rangesInGPU_); + quintupletsDC_->const_view(), + quintupletsDC_->const_view(), + trackCandidatesDC_->view(), + rangesDC_->const_view()); if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; @@ -557,7 +653,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, true); + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; @@ -568,26 +670,29 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { alpaka::exec(queue_, crossCleanpLS_workDiv, CrossCleanpLS{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *pixelTripletsInGPU_, - *trackCandidatesInGPU_, - *segmentsInGPU_, - *mdsInGPU_, - *hitsInGPU_, - *quintupletsInGPU_); - - Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; - Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; - WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + miniDoubletsDC_->const_view(), + hitsDC_->const_view(), + quintupletsDC_->const_view()); + + Vec3D const threadsPerBlock_addpLSasTrackCandidate{1, 1, 384}; + Vec3D const blocksPerGrid_addpLSasTrackCandidate{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addpLSasTrackCandidate, threadsPerBlock_addpLSasTrackCandidate, elementsPerThread); alpaka::exec(queue_, - addpLSasTrackCandidateInGPU_workDiv, - AddpLSasTrackCandidateInGPU{}, + addpLSasTrackCandidate_workDiv, + AddpLSasTrackCandidate{}, nLowerModules_, - *trackCandidatesInGPU_, - *segmentsInGPU_, + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached @@ -595,10 +700,14 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { auto nTrackCanpT3Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf); - alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf); - alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf); - alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); + alpaka::memcpy( + queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); + alpaka::memcpy( + queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); + alpaka::memcpy( + queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); @@ -617,17 +726,24 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { } void Event::createPixelTriplets() { - if (!pixelTripletsInGPU_) { - pixelTripletsInGPU_.emplace(); - pixelTripletsBuffers_.emplace(n_max_pixel_triplets, devAcc_, queue_); - pixelTripletsInGPU_->setData(*pixelTripletsBuffers_); + if (!pixelTripletsDC_) { + pixelTripletsDC_.emplace(n_max_pixel_triplets, queue_); + auto nPixelTriplets_view = alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u); + alpaka::memset(queue_, nPixelTriplets_view, 0u); + auto totOccupancyPixelTriplets_view = + alpaka::createView(devAcc_, &(*pixelTripletsDC_)->totOccupancyPixelTriplets(), 1u); + alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u); } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); - alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + alpaka::memcpy( + queue_, superbins_buf, alpaka::createView(devAcc_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, pixelTypes_buf, alpaka::createView(devAcc_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); @@ -635,7 +751,9 @@ void Event::createPixelTriplets() { auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = alpaka::createSubView(dev_view_nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using @@ -696,18 +814,20 @@ void Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; - WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + WorkDiv3D const createPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue_, - createPixelTripletsInGPUFromMapv2_workDiv, - CreatePixelTripletsInGPUFromMapv2{}, - *modulesBuffers_.data(), - *rangesInGPU_, - *mdsInGPU_, - *segmentsInGPU_, - *tripletsInGPU_, - *pixelTripletsInGPU_, + createPixelTripletsFromMap_workDiv, + CreatePixelTripletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + rangesDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + pixelTripletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments); @@ -715,7 +835,7 @@ void Event::createPixelTriplets() { #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, nPixelTriplets_buf, alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u)); alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; @@ -725,77 +845,93 @@ void Event::createPixelTriplets() { Vec3D const threadsPerBlockDupPixTrip{1, 16, 16}; //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; - WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = + WorkDiv3D const removeDupPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); alpaka::exec( - queue_, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU_); + queue_, removeDupPixelTripletsFromMap_workDiv, RemoveDupPixelTripletsFromMap{}, pixelTripletsDC_->view()); } void Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createEligibleModulesListForQuintuplets_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, - createEligibleModulesListForQuintupletsGPU_workDiv, - CreateEligibleModulesListForQuintupletsGPU{}, - *modulesBuffers_.data(), - *tripletsInGPU_, - *rangesInGPU_); + createEligibleModulesListForQuintuplets_workDiv, + CreateEligibleModulesListForQuintuplets{}, + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); auto nEligibleT5Modules_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTotalQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - - alpaka::memcpy(queue_, nEligibleT5Modules_buf, rangesBuffers_->nEligibleT5Modules_buf); - alpaka::memcpy(queue_, nTotalQuintuplets_buf, rangesBuffers_->device_nTotalQuints_buf); + auto rangesOccupancy = rangesDC_->view(); + auto nEligibleT5Modules_view_d = alpaka::createView(devAcc_, &rangesOccupancy.nEligibleT5Modules(), (Idx)1u); + auto nTotalQuintuplets_view_d = alpaka::createView(devAcc_, &rangesOccupancy.nTotalQuints(), (Idx)1u); + alpaka::memcpy(queue_, nEligibleT5Modules_buf, nEligibleT5Modules_view_d); + alpaka::memcpy(queue_, nTotalQuintuplets_buf, nTotalQuintuplets_view_d); alpaka::wait(queue_); // wait for the values before using them auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); - if (!quintupletsInGPU_) { - quintupletsInGPU_.emplace(); - quintupletsBuffers_.emplace(nTotalQuintuplets, nLowerModules_, devAcc_, queue_); - quintupletsInGPU_->setData(*quintupletsBuffers_); - - alpaka::memcpy(queue_, quintupletsBuffers_->nMemoryLocations_buf, nTotalQuintuplets_buf); + if (!quintupletsDC_) { + std::array const quintuplets_sizes{{static_cast(nTotalQuintuplets), static_cast(nLowerModules_)}}; + quintupletsDC_.emplace(quintuplets_sizes, queue_); + auto quintupletsOccupancy = quintupletsDC_->view(); + auto nQuintuplets_view = + alpaka::createView(devAcc_, quintupletsOccupancy.nQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, nQuintuplets_view, 0u); + auto totOccupancyQuintuplets_view = alpaka::createView( + devAcc_, quintupletsOccupancy.totOccupancyQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u); + auto quintuplets = quintupletsDC_->view(); + auto isDup_view = alpaka::createView(devAcc_, quintuplets.isDup(), quintuplets.metadata().size()); + alpaka::memset(queue_, isDup_view, 0u); + auto tightCutFlag_view = alpaka::createView(devAcc_, quintuplets.tightCutFlag(), quintuplets.metadata().size()); + alpaka::memset(queue_, tightCutFlag_view, 0u); + auto partOfPT5_view = alpaka::createView(devAcc_, quintuplets.partOfPT5(), quintuplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); } Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; - WorkDiv3D const createQuintupletsInGPUv2_workDiv = + WorkDiv3D const createQuintuplets_workDiv = createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); alpaka::exec(queue_, - createQuintupletsInGPUv2_workDiv, - CreateQuintupletsInGPUv2{}, - *modulesBuffers_.data(), - *mdsInGPU_, - *segmentsInGPU_, - *tripletsInGPU_, - *quintupletsInGPU_, - *rangesInGPU_, + createQuintuplets_workDiv, + CreateQuintuplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view(), nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; - WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = + WorkDiv3D const removeDupQuintupletsAfterBuild_workDiv = createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); alpaka::exec(queue_, - removeDupQuintupletsInGPUAfterBuild_workDiv, - RemoveDupQuintupletsInGPUAfterBuild{}, - *modulesBuffers_.data(), - *quintupletsInGPU_, - *rangesInGPU_); + removeDupQuintupletsAfterBuild_workDiv, + RemoveDupQuintupletsAfterBuild{}, + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + rangesDC_->const_view()); WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, addQuintupletRangesToEventExplicit_workDiv, AddQuintupletRangesToEventExplicit{}, - *modulesBuffers_.data(), - *quintupletsInGPU_, - *rangesInGPU_); + modules_.const_view(), + quintupletsDC_->const_view(), + rangesDC_->view()); if (addObjects_) { addQuintupletsToEventExplicit(); @@ -809,27 +945,40 @@ void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, false); + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + false); } } void Event::createPixelQuintuplets() { - if (!pixelQuintupletsInGPU_) { - pixelQuintupletsInGPU_.emplace(); - pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); - pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); + if (!pixelQuintupletsDC_) { + pixelQuintupletsDC_.emplace(n_max_pixel_quintuplets, queue_); + auto nPixelQuintuplets_view = alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u); + alpaka::memset(queue_, nPixelQuintuplets_view, 0u); + auto totOccupancyPixelQuintuplets_view = + alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->totOccupancyPixelQuintuplets(), 1u); + alpaka::memset(queue_, totOccupancyPixelQuintuplets_view, 0u); } - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); - alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + alpaka::memcpy( + queue_, superbins_buf, alpaka::createView(devAcc_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, pixelTypes_buf, alpaka::createView(devAcc_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); @@ -837,7 +986,9 @@ void Event::createPixelQuintuplets() { auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = alpaka::createSubView(dev_view_nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using @@ -897,48 +1048,52 @@ void Event::createPixelQuintuplets() { Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; - WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = + WorkDiv3D const createPixelQuintupletsFromMap_workDiv = createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); alpaka::exec(queue_, - createPixelQuintupletsInGPUFromMapv2_workDiv, - CreatePixelQuintupletsInGPUFromMapv2{}, - *modulesBuffers_.data(), - *mdsInGPU_, - *segmentsInGPU_, - *tripletsInGPU_, - *quintupletsInGPU_, - *pixelQuintupletsInGPU_, + createPixelQuintupletsFromMap_workDiv, + CreatePixelQuintupletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + tripletsDC_->view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments, - *rangesInGPU_); + rangesDC_->const_view()); Vec3D const threadsPerBlockDupPix{1, 16, 16}; Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; - WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = + WorkDiv3D const removeDupPixelQuintupletsFromMap_workDiv = createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); alpaka::exec(queue_, - removeDupPixelQuintupletsInGPUFromMap_workDiv, - RemoveDupPixelQuintupletsInGPUFromMap{}, - *pixelQuintupletsInGPU_); + removeDupPixelQuintupletsFromMap_workDiv, + RemoveDupPixelQuintupletsFromMap{}, + pixelQuintupletsDC_->view()); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidate_workDiv = createWorkDiv({1}, {256}, {1}); alpaka::exec(queue_, - addpT5asTrackCandidateInGPU_workDiv, - AddpT5asTrackCandidateInGPU{}, + addpT5asTrackCandidate_workDiv, + AddpT5asTrackCandidate{}, nLowerModules_, - *pixelQuintupletsInGPU_, - *trackCandidatesInGPU_, - *segmentsInGPU_, - *rangesInGPU_); + pixelQuintupletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy( + queue_, nPixelQuintuplets_buf, alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u)); alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; @@ -947,17 +1102,25 @@ void Event::createPixelQuintuplets() { void Event::addMiniDoubletsToEventExplicit() { auto nMDsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nMDsCPU_buf, miniDoubletsBuffers_->nMDs_buf, nLowerModules_); + auto mdsOccupancy = miniDoubletsDC_->const_view(); + auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part + alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_); + + auto modules = modules_.const_view(); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_view = alpaka::createView(devAcc_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_view = alpaka::createView(devAcc_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); - auto module_hitRanges_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_ * 2, queue_); - alpaka::memcpy(queue_, module_hitRanges_buf, hitsBuffers_->hitRanges_buf, nLowerModules_ * 2u); + auto module_hitRanges_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + auto hits = hitsDC_->view(); + auto hitRanges_view = alpaka::createView(devAcc_, hits.hitRanges(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_hitRanges_buf, hitRanges_view, nLowerModules_); alpaka::wait(queue_); // wait for inputs before using them @@ -967,7 +1130,7 @@ void Event::addMiniDoubletsToEventExplicit() { auto const* module_hitRanges = module_hitRanges_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { - if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { + if (!(nMDsCPU[i] == 0 or module_hitRanges[i][0] == -1)) { if (module_subdets[i] == Barrel) { n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; } else { @@ -979,14 +1142,20 @@ void Event::addMiniDoubletsToEventExplicit() { void Event::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nSegmentsCPU_buf, segmentsBuffers_->nSegments_buf, nLowerModules_); + auto nSegments_buf = + alpaka::createView(devAcc_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_); + + auto modules = modules_.const_view(); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_view = alpaka::createView(devAcc_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_view = alpaka::createView(devAcc_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); alpaka::wait(queue_); // wait for inputs before using them @@ -1006,18 +1175,27 @@ void Event::addSegmentsToEventExplicit() { } void Event::addQuintupletsToEventExplicit() { + auto quintupletsOccupancy = quintupletsDC_->const_view(); + auto nQuintuplets_view = alpaka::createView(devAcc_, quintupletsOccupancy.nQuintuplets(), nLowerModules_); auto nQuintupletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nQuintupletsCPU_buf, quintupletsBuffers_->nQuintuplets_buf); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view); + + auto modules = modules_.const_view(); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nModules_, queue_); - alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); + auto module_subdets_view = alpaka::createView(devAcc_, modules.subdets(), modules.metadata().size()); + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nModules_); auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_view = alpaka::createView(devAcc_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); auto module_quintupletModuleIndices_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); + auto rangesOccupancy = rangesDC_->view(); + auto quintupletModuleIndices_view_d = + alpaka::createView(devAcc_, rangesOccupancy.quintupletModuleIndices(), nLowerModules_); + alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, quintupletModuleIndices_view_d); alpaka::wait(queue_); // wait for inputs before using them @@ -1038,15 +1216,21 @@ void Event::addQuintupletsToEventExplicit() { } void Event::addTripletsToEventExplicit() { + auto tripletsOccupancy = tripletsDC_->const_view(); + auto nTriplets_view = alpaka::createView(devAcc_, tripletsOccupancy.nTriplets(), nLowerModules_); auto nTripletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nTripletsCPU_buf, tripletsBuffers_->nTriplets_buf); + alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view); + + auto modules = modules_.const_view(); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_view = alpaka::createView(devAcc_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_view = alpaka::createView(devAcc_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); alpaka::wait(queue_); // wait for inputs before using them @@ -1164,7 +1348,7 @@ unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { retur int Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, nPixelTriplets_buf_h, alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u)); alpaka::wait(queue_); return *nPixelTriplets_buf_h.data(); @@ -1173,7 +1357,8 @@ int Event::getNumberOfPixelTriplets() { int Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy( + queue_, nPixelQuintuplets_buf_h, alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u)); alpaka::wait(queue_); return *nPixelQuintuplets_buf_h.data(); @@ -1209,7 +1394,8 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::memcpy( + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); @@ -1218,7 +1404,9 @@ int Event::getNumberOfTrackCandidates() { int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPT5_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); @@ -1227,7 +1415,9 @@ int Event::getNumberOfPT5TrackCandidates() { int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPT3_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); @@ -1236,7 +1426,9 @@ int Event::getNumberOfPT3TrackCandidates() { int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPLS_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); @@ -1246,8 +1438,10 @@ int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); @@ -1256,368 +1450,242 @@ int Event::getNumberOfPixelTrackCandidates() { int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); } -HitsBuffer& Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection +template +typename TSoA::ConstView Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { - if (!hitsInCPU_) { - auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nHits = *nHits_buf_h.data(); - hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); - hitsInCPU_->setData(*hitsInCPU_); - - alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); - alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); - alpaka::memcpy(queue_, hitsInCPU_->detid_buf, hitsBuffers_->detid_buf, nHits); - alpaka::memcpy(queue_, hitsInCPU_->xs_buf, hitsBuffers_->xs_buf, nHits); - alpaka::memcpy(queue_, hitsInCPU_->ys_buf, hitsBuffers_->ys_buf, nHits); - alpaka::memcpy(queue_, hitsInCPU_->zs_buf, hitsBuffers_->zs_buf, nHits); - alpaka::memcpy(queue_, hitsInCPU_->moduleIndices_buf, hitsBuffers_->moduleIndices_buf, nHits); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data + if constexpr (std::is_same_v) { + return hitsDC_->const_view(); + } else { + if (!hitsHC_) { + hitsHC_.emplace(cms::alpakatools::CopyToHost>::copyAsync( + queue_, *hitsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsHC_->const_view(); } - return hitsInCPU_.value(); } - -HitsBuffer& Event::getHitsInCMSSW(bool sync) { - if (!hitsInCPU_) { - auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nHits = *nHits_buf_h.data(); - hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); - hitsInCPU_->setData(*hitsInCPU_); - - alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); - alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template HitsConst Event::getHits(bool); +template HitsRangesConst Event::getHits(bool); + +template +typename TSoA::ConstView Event::getHitsInCMSSW(bool sync) { + if constexpr (std::is_same_v) { + return hitsDC_->const_view(); + } else { + if (!hitsHC_) { + auto hits_d = hitsDC_->view(); + auto nHits = hits_d.metadata().size(); + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsHC_.emplace(hits_sizes, queue_); + auto hits_h = hitsHC_->view(); + auto idxs_h = alpaka::createView(cms::alpakatools::host(), hits_h.idxs(), nHits); + auto idxs_d = alpaka::createView(devAcc_, hits_d.idxs(), nHits); + alpaka::memcpy(queue_, idxs_h, idxs_d); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsHC_->const_view(); } - return hitsInCPU_.value(); } - -ObjectRangesBuffer& Event::getRanges(bool sync) { - if (!rangesInCPU_) { - rangesInCPU_.emplace(nModules_, nLowerModules_, cms::alpakatools::host(), queue_); - rangesInCPU_->setData(*rangesInCPU_); - - alpaka::memcpy(queue_, rangesInCPU_->hitRanges_buf, rangesBuffers_->hitRanges_buf); - alpaka::memcpy(queue_, rangesInCPU_->quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); - alpaka::memcpy(queue_, rangesInCPU_->miniDoubletModuleIndices_buf, rangesBuffers_->miniDoubletModuleIndices_buf); - alpaka::memcpy(queue_, rangesInCPU_->segmentModuleIndices_buf, rangesBuffers_->segmentModuleIndices_buf); - alpaka::memcpy(queue_, rangesInCPU_->tripletModuleIndices_buf, rangesBuffers_->tripletModuleIndices_buf); - if (sync) - alpaka::wait(queue_); // wait to get completed host data +template HitsConst Event::getHitsInCMSSW(bool); +template HitsRangesConst Event::getHitsInCMSSW(bool); + +template +ObjectRangesConst Event::getRanges(bool sync) { + if constexpr (std::is_same_v) { + return rangesDC_->const_view(); + } else { + if (!rangesHC_) { + rangesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync(queue_, *rangesDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return rangesHC_->const_view(); } - return rangesInCPU_.value(); } - -MiniDoubletsBuffer& Event::getMiniDoublets(bool sync) { - if (!mdsInCPU_) { - // Get nMemoryLocations parameter to initialize host based mdsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, miniDoubletsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); - mdsInCPU_->setData(*mdsInCPU_); - - alpaka::memcpy(queue_, mdsInCPU_->nMemoryLocations_buf, miniDoubletsBuffers_->nMemoryLocations_buf); - alpaka::memcpy(queue_, mdsInCPU_->anchorHitIndices_buf, miniDoubletsBuffers_->anchorHitIndices_buf, nMemHost); - alpaka::memcpy(queue_, mdsInCPU_->outerHitIndices_buf, miniDoubletsBuffers_->outerHitIndices_buf, nMemHost); - alpaka::memcpy(queue_, mdsInCPU_->dphichanges_buf, miniDoubletsBuffers_->dphichanges_buf, nMemHost); - alpaka::memcpy(queue_, mdsInCPU_->nMDs_buf, miniDoubletsBuffers_->nMDs_buf); - alpaka::memcpy(queue_, mdsInCPU_->totOccupancyMDs_buf, miniDoubletsBuffers_->totOccupancyMDs_buf); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template ObjectRangesConst Event::getRanges<>(bool); + +template +typename TSoA::ConstView Event::getMiniDoublets(bool sync) { + if constexpr (std::is_same_v) { + return miniDoubletsDC_->const_view(); + } else { + if (!miniDoubletsHC_) { + miniDoubletsHC_.emplace( + cms::alpakatools::CopyToHost< + PortableMultiCollection>::copyAsync(queue_, + *miniDoubletsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return miniDoubletsHC_->const_view(); } - return mdsInCPU_.value(); } - -SegmentsBuffer& Event::getSegments(bool sync) { - if (!segmentsInCPU_) { - // Get nMemoryLocations parameter to initialize host based segmentsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, segmentsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU_.emplace(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, cms::alpakatools::host(), queue_); - segmentsInCPU_->setData(*segmentsInCPU_); - - alpaka::memcpy(queue_, segmentsInCPU_->nMemoryLocations_buf, segmentsBuffers_->nMemoryLocations_buf); - alpaka::memcpy(queue_, segmentsInCPU_->nSegments_buf, segmentsBuffers_->nSegments_buf); - alpaka::memcpy(queue_, segmentsInCPU_->mdIndices_buf, segmentsBuffers_->mdIndices_buf, 2u * nMemHost); - alpaka::memcpy(queue_, - segmentsInCPU_->innerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers_->innerMiniDoubletAnchorHitIndices_buf, - nMemHost); - alpaka::memcpy(queue_, - segmentsInCPU_->outerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers_->outerMiniDoubletAnchorHitIndices_buf, - nMemHost); - alpaka::memcpy(queue_, segmentsInCPU_->totOccupancySegments_buf, segmentsBuffers_->totOccupancySegments_buf); - alpaka::memcpy(queue_, segmentsInCPU_->ptIn_buf, segmentsBuffers_->ptIn_buf); - alpaka::memcpy(queue_, segmentsInCPU_->eta_buf, segmentsBuffers_->eta_buf); - alpaka::memcpy(queue_, segmentsInCPU_->phi_buf, segmentsBuffers_->phi_buf); - alpaka::memcpy(queue_, segmentsInCPU_->seedIdx_buf, segmentsBuffers_->seedIdx_buf); - alpaka::memcpy(queue_, segmentsInCPU_->isDup_buf, segmentsBuffers_->isDup_buf); - alpaka::memcpy(queue_, segmentsInCPU_->isQuad_buf, segmentsBuffers_->isQuad_buf); - alpaka::memcpy(queue_, segmentsInCPU_->score_buf, segmentsBuffers_->score_buf); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template MiniDoubletsConst Event::getMiniDoublets(bool); +template MiniDoubletsOccupancyConst Event::getMiniDoublets(bool); + +template +typename TSoA::ConstView Event::getSegments(bool sync) { + if constexpr (std::is_same_v) { + return segmentsDC_->const_view(); + } else { + if (!segmentsHC_) { + segmentsHC_.emplace( + cms::alpakatools:: + CopyToHost>::copyAsync( + queue_, *segmentsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return segmentsHC_->const_view(); } - return segmentsInCPU_.value(); } - -TripletsBuffer& Event::getTriplets(bool sync) { - if (!tripletsInCPU_) { - // Get nMemoryLocations parameter to initialize host based tripletsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, tripletsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); - tripletsInCPU_->setData(*tripletsInCPU_); - - alpaka::memcpy(queue_, tripletsInCPU_->nMemoryLocations_buf, tripletsBuffers_->nMemoryLocations_buf); -#ifdef CUT_VALUE_DEBUG - alpaka::memcpy(queue_, tripletsInCPU_->zOut_buf, tripletsBuffers_->zOut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zLo_buf, tripletsBuffers_->zLo_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zHi_buf, tripletsBuffers_->zHi_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zLoPointed_buf, tripletsBuffers_->zLoPointed_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zHiPointed_buf, tripletsBuffers_->zHiPointed_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->dPhiCut_buf, tripletsBuffers_->dPhiCut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->betaInCut_buf, tripletsBuffers_->betaInCut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->rtLo_buf, tripletsBuffers_->rtLo_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->rtHi_buf, tripletsBuffers_->rtHi_buf, nMemHost); -#endif - alpaka::memcpy( - queue_, tripletsInCPU_->hitIndices_buf, tripletsBuffers_->hitIndices_buf, Params_T3::kHits * nMemHost); - alpaka::memcpy( - queue_, tripletsInCPU_->logicalLayers_buf, tripletsBuffers_->logicalLayers_buf, Params_T3::kLayers * nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->segmentIndices_buf, tripletsBuffers_->segmentIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->betaIn_buf, tripletsBuffers_->betaIn_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->circleRadius_buf, tripletsBuffers_->circleRadius_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->nTriplets_buf, tripletsBuffers_->nTriplets_buf); - alpaka::memcpy(queue_, tripletsInCPU_->totOccupancyTriplets_buf, tripletsBuffers_->totOccupancyTriplets_buf); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template SegmentsConst Event::getSegments(bool); +template SegmentsOccupancyConst Event::getSegments(bool); +template SegmentsPixelConst Event::getSegments(bool); + +template +typename TSoA::ConstView Event::getTriplets(bool sync) { + if constexpr (std::is_same_v) { + return tripletsDC_->const_view(); + } else { + if (!tripletsHC_) { + tripletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *tripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return tripletsInCPU_.value(); + return tripletsHC_->const_view(); } - -QuintupletsBuffer& Event::getQuintuplets(bool sync) { - if (!quintupletsInCPU_) { - // Get nMemoryLocations parameter to initialize host based quintupletsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, quintupletsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); - quintupletsInCPU_->setData(*quintupletsInCPU_); - - alpaka::memcpy(queue_, quintupletsInCPU_->nMemoryLocations_buf, quintupletsBuffers_->nMemoryLocations_buf); - alpaka::memcpy(queue_, quintupletsInCPU_->nQuintuplets_buf, quintupletsBuffers_->nQuintuplets_buf); - alpaka::memcpy( - queue_, quintupletsInCPU_->totOccupancyQuintuplets_buf, quintupletsBuffers_->totOccupancyQuintuplets_buf); - alpaka::memcpy( - queue_, quintupletsInCPU_->tripletIndices_buf, quintupletsBuffers_->tripletIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue_, - quintupletsInCPU_->lowerModuleIndices_buf, - quintupletsBuffers_->lowerModuleIndices_buf, - Params_T5::kLayers * nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->innerRadius_buf, quintupletsBuffers_->innerRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->bridgeRadius_buf, quintupletsBuffers_->bridgeRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->outerRadius_buf, quintupletsBuffers_->outerRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->isDup_buf, quintupletsBuffers_->isDup_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->score_rphisum_buf, quintupletsBuffers_->score_rphisum_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->eta_buf, quintupletsBuffers_->eta_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->phi_buf, quintupletsBuffers_->phi_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->chiSquared_buf, quintupletsBuffers_->chiSquared_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->rzChiSquared_buf, quintupletsBuffers_->rzChiSquared_buf, nMemHost); - alpaka::memcpy( - queue_, quintupletsInCPU_->nonAnchorChiSquared_buf, quintupletsBuffers_->nonAnchorChiSquared_buf, nMemHost); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template TripletsConst Event::getTriplets(bool); +template TripletsOccupancyConst Event::getTriplets(bool); + +template +typename TSoA::ConstView Event::getQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return quintupletsDC_->const_view(); + } else { + if (!quintupletsHC_) { + quintupletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *quintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return quintupletsInCPU_.value(); + return quintupletsHC_->const_view(); } - -PixelTripletsBuffer& Event::getPixelTriplets(bool sync) { - if (!pixelTripletsInCPU_) { - // Get nPixelTriplets parameter to initialize host based quintupletsInCPU_ - auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU_.emplace(nPixelTriplets, cms::alpakatools::host(), queue_); - pixelTripletsInCPU_->setData(*pixelTripletsInCPU_); - - alpaka::memcpy(queue_, pixelTripletsInCPU_->nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->totOccupancyPixelTriplets_buf, - pixelTripletsBuffers_->totOccupancyPixelTriplets_buf); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->rzChiSquared_buf, pixelTripletsBuffers_->rzChiSquared_buf, nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->rPhiChiSquared_buf, pixelTripletsBuffers_->rPhiChiSquared_buf, nPixelTriplets); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->rPhiChiSquaredInwards_buf, - pixelTripletsBuffers_->rPhiChiSquaredInwards_buf, - nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->tripletIndices_buf, pixelTripletsBuffers_->tripletIndices_buf, nPixelTriplets); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->pixelSegmentIndices_buf, - pixelTripletsBuffers_->pixelSegmentIndices_buf, - nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->pixelRadius_buf, pixelTripletsBuffers_->pixelRadius_buf, nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->tripletRadius_buf, pixelTripletsBuffers_->tripletRadius_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->isDup_buf, pixelTripletsBuffers_->isDup_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->eta_buf, pixelTripletsBuffers_->eta_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->phi_buf, pixelTripletsBuffers_->phi_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->score_buf, pixelTripletsBuffers_->score_buf, nPixelTriplets); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template QuintupletsConst Event::getQuintuplets(bool); +template QuintupletsOccupancyConst Event::getQuintuplets(bool); + +template +PixelTripletsConst Event::getPixelTriplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelTripletsDC_->const_view(); + } else { + if (!pixelTripletsHC_) { + pixelTripletsHC_.emplace(cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelTripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return pixelTripletsInCPU_.value(); + return pixelTripletsHC_->const_view(); } - -PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool sync) { - if (!pixelQuintupletsInCPU_) { - // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU_ - auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU_.emplace(nPixelQuintuplets, cms::alpakatools::host(), queue_); - pixelQuintupletsInCPU_->setData(*pixelQuintupletsInCPU_); - - alpaka::memcpy( - queue_, pixelQuintupletsInCPU_->nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->totOccupancyPixelQuintuplets_buf, - pixelQuintupletsBuffers_->totOccupancyPixelQuintuplets_buf); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rzChiSquared_buf, - pixelQuintupletsBuffers_->rzChiSquared_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rPhiChiSquared_buf, - pixelQuintupletsBuffers_->rPhiChiSquared_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rPhiChiSquaredInwards_buf, - pixelQuintupletsBuffers_->rPhiChiSquaredInwards_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->pixelIndices_buf, - pixelQuintupletsBuffers_->pixelIndices_buf, - nPixelQuintuplets); - alpaka::memcpy( - queue_, pixelQuintupletsInCPU_->T5Indices_buf, pixelQuintupletsBuffers_->T5Indices_buf, nPixelQuintuplets); - alpaka::memcpy(queue_, pixelQuintupletsInCPU_->isDup_buf, pixelQuintupletsBuffers_->isDup_buf, nPixelQuintuplets); - alpaka::memcpy(queue_, pixelQuintupletsInCPU_->score_buf, pixelQuintupletsBuffers_->score_buf, nPixelQuintuplets); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template PixelTripletsConst Event::getPixelTriplets<>(bool); + +template +PixelQuintupletsConst Event::getPixelQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelQuintupletsDC_->const_view(); + } else { + if (!pixelQuintupletsHC_) { + pixelQuintupletsHC_.emplace( + cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelQuintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return pixelQuintupletsInCPU_.value(); + return pixelQuintupletsHC_->const_view(); } +template PixelQuintupletsConst Event::getPixelQuintuplets<>(bool); -TrackCandidatesBuffer& Event::getTrackCandidates(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ +const TrackCandidatesConst& Event::getTrackCandidatesWithSelection(bool inCMSSW, bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable - - auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->logicalLayers_buf, - trackCandidatesBuffers_->logicalLayers_buf, - Params_pT5::kLayers * nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->directObjectIndices_buf, - trackCandidatesBuffers_->directObjectIndices_buf, - nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->objectIndices_buf, - trackCandidatesBuffers_->objectIndices_buf, - 2 * nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data - } - return trackCandidatesInCPU_.value(); -} - -TrackCandidatesBuffer& Event::getTrackCandidatesInCMSSW(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ - auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + trackCandidatesHC_.emplace(nTrackCanHost, queue_); - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + queue_, + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + if (not inCMSSW) { + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), + (*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + alpaka::createView( + devAcc_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); + } + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU_.value(); + return trackCandidatesHC_.value().const_view(); } -ModulesBuffer& Event::getModules(bool isFull, bool sync) { - if (!modulesInCPU_) { - // The last input here is just a small placeholder for the allocation. - modulesInCPU_.emplace(cms::alpakatools::host(), nModules_, nPixels_); - - modulesInCPU_->copyFromSrc(queue_, modulesBuffers_, isFull); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template +typename TSoA::ConstView Event::getModules(bool sync) { + if constexpr (std::is_same_v) { + return modules_.const_view(); + } else { + if (!modulesHC_) { + modulesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, modules_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return modulesHC_->const_view(); } - return modulesInCPU_.value(); } +template ModulesConst Event::getModules(bool); +template ModulesPixelConst Event::getModules(bool); diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 2b09565cf4176..c692dd9ed43bb 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -3,19 +3,31 @@ #include +#include "RecoTracker/LSTCore/interface/HitsHostCollection.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/QuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/SegmentsHostCollection.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/alpaka/LST.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h" #include "Hit.h" -#include "Segment.h" -#include "Triplet.h" #include "Kernels.h" -#include "Quintuplet.h" -#include "MiniDoublet.h" -#include "PixelQuintuplet.h" -#include "PixelTriplet.h" -#include "TrackCandidate.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" @@ -42,36 +54,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int nTotalSegments_; //Device stuff - std::optional rangesInGPU_; - std::optional> rangesBuffers_; - std::optional hitsInGPU_; - std::optional> hitsBuffers_; - std::optional mdsInGPU_; - std::optional> miniDoubletsBuffers_; - std::optional segmentsInGPU_; - std::optional> segmentsBuffers_; - std::optional tripletsInGPU_; - std::optional> tripletsBuffers_; - std::optional quintupletsInGPU_; - std::optional> quintupletsBuffers_; - std::optional trackCandidatesInGPU_; - std::optional> trackCandidatesBuffers_; - std::optional pixelTripletsInGPU_; - std::optional> pixelTripletsBuffers_; - std::optional pixelQuintupletsInGPU_; - std::optional> pixelQuintupletsBuffers_; + std::optional rangesDC_; + std::optional hitsDC_; + std::optional miniDoubletsDC_; + std::optional segmentsDC_; + std::optional tripletsDC_; + std::optional quintupletsDC_; + std::optional trackCandidatesDC_; + std::optional pixelTripletsDC_; + std::optional pixelQuintupletsDC_; //CPU interface stuff - std::optional> rangesInCPU_; - std::optional> hitsInCPU_; - std::optional> mdsInCPU_; - std::optional> segmentsInCPU_; - std::optional> tripletsInCPU_; - std::optional> trackCandidatesInCPU_; - std::optional> modulesInCPU_; - std::optional> quintupletsInCPU_; - std::optional> pixelTripletsInCPU_; - std::optional> pixelQuintupletsInCPU_; + std::optional rangesHC_; + std::optional hitsHC_; + std::optional miniDoubletsHC_; + std::optional segmentsHC_; + std::optional tripletsHC_; + std::optional trackCandidatesHC_; + std::optional modulesHC_; + std::optional quintupletsHC_; + std::optional pixelTripletsHC_; + std::optional pixelQuintupletsHC_; void initSync(bool verbose); @@ -79,9 +82,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const uint16_t nLowerModules_; const unsigned int nPixels_; const unsigned int nEndCapMap_; - ModulesBuffer const& modulesBuffers_; + ModulesDeviceCollection const& modules_; PixelMap const& pixelMapping_; - EndcapGeometryBuffer const& endcapGeometryBuffers_; + EndcapGeometryDevDeviceCollection const& endcapGeometry_; public: // Constructor used for CMSSW integration. Uses an external queue. @@ -92,9 +95,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { nLowerModules_(deviceESData->nLowerModules), nPixels_(deviceESData->nPixels), nEndCapMap_(deviceESData->nEndCapMap), - modulesBuffers_(deviceESData->modulesBuffers), + modules_(*deviceESData->modules), pixelMapping_(*deviceESData->pixelMapping), - endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { + endcapGeometry_(*deviceESData->endcapGeometry) { initSync(verbose); } void resetEventSync(); // synchronizes @@ -180,18 +183,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // (has no effect on repeated calls) // set to false may allow faster operation with concurrent calls of get* // HANDLE WITH CARE - HitsBuffer& getHits(bool sync = true); - HitsBuffer& getHitsInCMSSW(bool sync = true); - ObjectRangesBuffer& getRanges(bool sync = true); - MiniDoubletsBuffer& getMiniDoublets(bool sync = true); - SegmentsBuffer& getSegments(bool sync = true); - TripletsBuffer& getTriplets(bool sync = true); - QuintupletsBuffer& getQuintuplets(bool sync = true); - PixelTripletsBuffer& getPixelTriplets(bool sync = true); - PixelQuintupletsBuffer& getPixelQuintuplets(bool sync = true); - TrackCandidatesBuffer& getTrackCandidates(bool sync = true); - TrackCandidatesBuffer& getTrackCandidatesInCMSSW(bool sync = true); - ModulesBuffer& getModules(bool isFull = false, bool sync = true); + template + typename TSoA::ConstView getHits(bool sync = true); + template + typename TSoA::ConstView getHitsInCMSSW(bool sync = true); + template + ObjectRangesConst getRanges(bool sync = true); + template + typename TSoA::ConstView getMiniDoublets(bool sync = true); + template + typename TSoA::ConstView getSegments(bool sync = true); + template + typename TSoA::ConstView getTriplets(bool sync = true); + template + typename TSoA::ConstView getQuintuplets(bool sync = true); + template + PixelTripletsConst getPixelTriplets(bool sync = true); + template + PixelQuintupletsConst getPixelQuintuplets(bool sync = true); + const TrackCandidatesConst& getTrackCandidatesWithSelection(bool inCMSSW, bool sync); + const TrackCandidatesConst& getTrackCandidates(bool sync = true) { + return getTrackCandidatesWithSelection(false, sync); + } + const TrackCandidatesConst& getTrackCandidatesInCMSSW(bool sync = true) { + return getTrackCandidatesWithSelection(true, sync); + } + template + typename TSoA::ConstView getModules(bool sync = true); }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index 3f559f4492df7..758ea03afb992 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -2,109 +2,10 @@ #define RecoTracker_LSTCore_src_alpaka_Hit_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Hits { - unsigned int* nHits; - float* xs; - float* ys; - float* zs; - uint16_t* moduleIndices; - unsigned int* idxs; - unsigned int* detid; - float* rts; - float* phis; - float* etas; - float* highEdgeXs; - float* highEdgeYs; - float* lowEdgeXs; - float* lowEdgeYs; - int* hitRanges; - int* hitRangesLower; - int* hitRangesUpper; - int8_t* hitRangesnLower; - int8_t* hitRangesnUpper; - - template - void setData(TBuff& buf) { - nHits = buf.nHits_buf.data(); - xs = buf.xs_buf.data(); - ys = buf.ys_buf.data(); - zs = buf.zs_buf.data(); - moduleIndices = buf.moduleIndices_buf.data(); - idxs = buf.idxs_buf.data(); - detid = buf.detid_buf.data(); - rts = buf.rts_buf.data(); - phis = buf.phis_buf.data(); - etas = buf.etas_buf.data(); - highEdgeXs = buf.highEdgeXs_buf.data(); - highEdgeYs = buf.highEdgeYs_buf.data(); - lowEdgeXs = buf.lowEdgeXs_buf.data(); - lowEdgeYs = buf.lowEdgeYs_buf.data(); - hitRanges = buf.hitRanges_buf.data(); - hitRangesLower = buf.hitRangesLower_buf.data(); - hitRangesUpper = buf.hitRangesUpper_buf.data(); - hitRangesnLower = buf.hitRangesnLower_buf.data(); - hitRangesnUpper = buf.hitRangesnUpper_buf.data(); - } - }; - - template - struct HitsBuffer { - Buf nHits_buf; - Buf xs_buf; - Buf ys_buf; - Buf zs_buf; - Buf moduleIndices_buf; - Buf idxs_buf; - Buf detid_buf; - Buf rts_buf; - Buf phis_buf; - Buf etas_buf; - Buf highEdgeXs_buf; - Buf highEdgeYs_buf; - Buf lowEdgeXs_buf; - Buf lowEdgeYs_buf; - Buf hitRanges_buf; - Buf hitRangesLower_buf; - Buf hitRangesUpper_buf; - Buf hitRangesnLower_buf; - Buf hitRangesnUpper_buf; - - Hits data_; - - template - HitsBuffer(unsigned int nModules, unsigned int nMaxHits, TDevAcc const& devAccIn, TQueue& queue) - : nHits_buf(allocBufWrapper(devAccIn, 1u, queue)), - xs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - ys_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - zs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - moduleIndices_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - idxs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - detid_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - rts_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - phis_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - etas_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - highEdgeXs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - highEdgeYs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - lowEdgeXs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - lowEdgeYs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), - hitRanges_buf(allocBufWrapper(devAccIn, nModules * 2, queue)), - hitRangesLower_buf(allocBufWrapper(devAccIn, nModules, queue)), - hitRangesUpper_buf(allocBufWrapper(devAccIn, nModules, queue)), - hitRangesnLower_buf(allocBufWrapper(devAccIn, nModules, queue)), - hitRangesnUpper_buf(allocBufWrapper(devAccIn, nModules, queue)) { - alpaka::memset(queue, hitRanges_buf, 0xff); - alpaka::memset(queue, hitRangesLower_buf, 0xff); - alpaka::memset(queue, hitRangesUpper_buf, 0xff); - alpaka::memset(queue, hitRangesnLower_buf, 0xff); - alpaka::memset(queue, hitRangesnUpper_buf, 0xff); - } - - inline Hits const* data() const { return &data_; } - inline void setData(HitsBuffer& buf) { data_.setData(buf); } - }; template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const& acc, float x, float y, float z) { @@ -178,19 +79,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct ModuleRangesKernel { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, int nLowerModules) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsRanges hitsRanges, + int nLowerModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) { - uint16_t upperIndex = modulesInGPU.partnerModuleIndices[lowerIndex]; - if (hitsInGPU.hitRanges[lowerIndex * 2] != -1 && hitsInGPU.hitRanges[upperIndex * 2] != -1) { - hitsInGPU.hitRangesLower[lowerIndex] = hitsInGPU.hitRanges[lowerIndex * 2]; - hitsInGPU.hitRangesUpper[lowerIndex] = hitsInGPU.hitRanges[upperIndex * 2]; - hitsInGPU.hitRangesnLower[lowerIndex] = - hitsInGPU.hitRanges[lowerIndex * 2 + 1] - hitsInGPU.hitRanges[lowerIndex * 2] + 1; - hitsInGPU.hitRangesnUpper[lowerIndex] = - hitsInGPU.hitRanges[upperIndex * 2 + 1] - hitsInGPU.hitRanges[upperIndex * 2] + 1; + uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex]; + if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) { + hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0]; + hitsRanges.hitRangesUpper()[lowerIndex] = hitsRanges.hitRanges()[upperIndex][0]; + hitsRanges.hitRangesnLower()[lowerIndex] = + hitsRanges.hitRanges()[lowerIndex][1] - hitsRanges.hitRanges()[lowerIndex][0] + 1; + hitsRanges.hitRangesnUpper()[lowerIndex] = + hitsRanges.hitRanges()[upperIndex][1] - hitsRanges.hitRanges()[upperIndex][0] + 1; } } } @@ -199,56 +103,60 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct HitLoopKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - uint16_t Endcap, // Integer corresponding to endcap in module subdets - uint16_t TwoS, // Integer corresponding to TwoS in moduleType - unsigned int nModules, // Number of modules - unsigned int nEndCapMap, // Number of elements in endcap map - const unsigned int* geoMapDetId, // DetId's from endcap map - const float* geoMapPhi, // Phi values from endcap map - Modules modulesInGPU, - Hits hitsInGPU, + uint16_t Endcap, // Integer corresponding to endcap in module subdets + uint16_t TwoS, // Integer corresponding to TwoS in moduleType + unsigned int nModules, // Number of modules + unsigned int nEndCapMap, // Number of elements in endcap map + EndcapGeometryDevConst endcapGeometry, + ModulesConst modules, + Hits hits, + HitsRanges hitsRanges, unsigned int nHits) const // Total number of hits in event { + auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map + auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) { - float ihit_x = hitsInGPU.xs[ihit]; - float ihit_y = hitsInGPU.ys[ihit]; - float ihit_z = hitsInGPU.zs[ihit]; - int iDetId = hitsInGPU.detid[ihit]; - - hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); - hitsInGPU.phis[ihit] = phi(acc, ihit_x, ihit_y); - hitsInGPU.etas[ihit] = + float ihit_x = hits.xs()[ihit]; + float ihit_y = hits.ys()[ihit]; + float ihit_z = hits.zs()[ihit]; + int iDetId = hits.detid()[ihit]; + + hits.rts()[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); + hits.phis()[ihit] = phi(acc, ihit_x, ihit_y); + hits.etas()[ihit] = ((ihit_z > 0) - (ihit_z < 0)) * alpaka::math::acosh( - acc, - alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hitsInGPU.rts[ihit]); - int found_index = binary_search(modulesInGPU.mapdetId, iDetId, nModules); - uint16_t lastModuleIndex = modulesInGPU.mapIdx[found_index]; + acc, alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hits.rts()[ihit]); + int found_index = binary_search(modules.mapdetId(), iDetId, nModules); + uint16_t lastModuleIndex = modules.mapIdx()[found_index]; - hitsInGPU.moduleIndices[ihit] = lastModuleIndex; + hits.moduleIndices()[ihit] = lastModuleIndex; - if (modulesInGPU.subdets[lastModuleIndex] == Endcap && modulesInGPU.moduleType[lastModuleIndex] == TwoS) { + if (modules.subdets()[lastModuleIndex] == Endcap && modules.moduleType()[lastModuleIndex] == TwoS) { found_index = binary_search(geoMapDetId, iDetId, nEndCapMap); float phi = geoMapPhi[found_index]; float cos_phi = alpaka::math::cos(acc, phi); - hitsInGPU.highEdgeXs[ihit] = ihit_x + 2.5f * cos_phi; - hitsInGPU.lowEdgeXs[ihit] = ihit_x - 2.5f * cos_phi; + hits.highEdgeXs()[ihit] = ihit_x + 2.5f * cos_phi; + hits.lowEdgeXs()[ihit] = ihit_x - 2.5f * cos_phi; float sin_phi = alpaka::math::sin(acc, phi); - hitsInGPU.highEdgeYs[ihit] = ihit_y + 2.5f * sin_phi; - hitsInGPU.lowEdgeYs[ihit] = ihit_y - 2.5f * sin_phi; + hits.highEdgeYs()[ihit] = ihit_y + 2.5f * sin_phi; + hits.lowEdgeYs()[ihit] = ihit_y - 2.5f * sin_phi; } // Need to set initial value if index hasn't been seen before. - int old = alpaka::atomicCas( - acc, &(hitsInGPU.hitRanges[lastModuleIndex * 2]), -1, static_cast(ihit), alpaka::hierarchy::Threads{}); + int old = alpaka::atomicCas(acc, + &(hitsRanges.hitRanges()[lastModuleIndex][0]), + -1, + static_cast(ihit), + alpaka::hierarchy::Threads{}); // For subsequent visits, stores the min value. if (old != -1) alpaka::atomicMin( - acc, &hitsInGPU.hitRanges[lastModuleIndex * 2], static_cast(ihit), alpaka::hierarchy::Threads{}); + acc, &hitsRanges.hitRanges()[lastModuleIndex][0], static_cast(ihit), alpaka::hierarchy::Threads{}); alpaka::atomicMax( - acc, &hitsInGPU.hitRanges[lastModuleIndex * 2 + 1], static_cast(ihit), alpaka::hierarchy::Threads{}); + acc, &hitsRanges.hitRanges()[lastModuleIndex][1], static_cast(ihit), alpaka::hierarchy::Threads{}); } } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index bc284d052cc05..525d1bf23eecb 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -2,49 +2,45 @@ #define RecoTracker_LSTCore_src_alpaka_Kernels_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" - -#include "Hit.h" -#include "MiniDoublet.h" -#include "ObjectRanges.h" -#include "Segment.h" -#include "Triplet.h" -#include "Quintuplet.h" -#include "PixelQuintuplet.h" -#include "PixelTriplet.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets quintuplets, unsigned int quintupletIndex, bool secondpass = false) { - quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; + quintuplets.isDup()[quintupletIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets pixelTriplets, unsigned int pixelTripletIndex) { - pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + pixelTriplets.isDup()[pixelTripletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets pixelQuintuplets, unsigned int pixelQuintupletIndex) { - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { - segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; + segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, - unsigned int jx, - Quintuplets const& quintupletsInGPU) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, QuintupletsConst quintuplets) { unsigned int hits1[Params_T5::kHits]; unsigned int hits2[Params_T5::kHits]; for (int i = 0; i < Params_T5::kHits; i++) { - hits1[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * ix + i]; - hits2[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * jx + i]; + hits1[i] = quintuplets.hitIndices()[ix][i]; + hits2[i] = quintuplets.hitIndices()[jx][i]; } int nMatched = 0; @@ -65,13 +61,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, - PixelQuintuplets const& pixelQuintupletsInGPU) { + PixelQuintupletsConst pixelQuintuplets) { unsigned int hits1[Params_pT5::kHits]; unsigned int hits2[Params_pT5::kHits]; for (int i = 0; i < Params_pT5::kHits; i++) { - hits1[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * ix + i]; - hits2[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * jx + i]; + hits1[i] = pixelQuintuplets.hitIndices()[ix][i]; + hits2[i] = pixelQuintuplets.hitIndices()[jx][i]; } int nMatched = 0; @@ -92,14 +88,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, - PixelTriplets const& pixelTripletsInGPU, + PixelTripletsConst pixelTriplets, int* matched) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; for (int i = 0; i < Params_pLS::kHits; i++) { - phits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i]; - phits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i]; + phits1[i] = pixelTriplets.hitIndices()[ix][i]; + phits2[i] = pixelTriplets.hitIndices()[jx][i]; } int npMatched = 0; @@ -120,8 +116,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { int hits2[Params_T3::kHits]; for (int i = 0; i < Params_T3::kHits; i++) { - hits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i + 4]; // Omitting the pLS hits - hits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i + 4]; // Omitting the pLS hits + hits1[i] = pixelTriplets.hitIndices()[ix][i + 4]; // Omitting the pLS hits + hits2[i] = pixelTriplets.hitIndices()[jx][i + 4]; // Omitting the pLS hits } int nMatched = 0; @@ -142,34 +138,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { matched[1] = nMatched; } - struct RemoveDupQuintupletsInGPUAfterBuild { + struct RemoveDupQuintupletsAfterBuild { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Quintuplets quintupletsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (unsigned int lowmod = globalThreadIdx[0]; lowmod < *modulesInGPU.nLowerModules; - lowmod += gridThreadExtent[0]) { - unsigned int nQuintuplets_lowmod = quintupletsInGPU.nQuintuplets[lowmod]; - int quintupletModuleIndices_lowmod = rangesInGPU.quintupletModuleIndices[lowmod]; + for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) { + unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod]; + int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod]; for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) { unsigned int ix = quintupletModuleIndices_lowmod + ix1; - float eta1 = __H2F(quintupletsInGPU.eta[ix]); - float phi1 = __H2F(quintupletsInGPU.phi[ix]); - float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) { unsigned int jx = quintupletModuleIndices_lowmod + jx1; - float eta2 = __H2F(quintupletsInGPU.eta[jx]); - float phi2 = __H2F(quintupletsInGPU.phi[jx]); + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); - float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); if (dEta > 0.1f) continue; @@ -177,13 +173,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > 0.1f) continue; - int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + int nMatched = checkHitsT5(ix, jx, quintuplets); const int minNHitsForDup_T5 = 7; if (nMatched >= minNHitsForDup_T5) { if (score_rphisum1 >= score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, ix); + rmQuintupletFromMemory(quintuplets, ix); } else { - rmQuintupletFromMemory(quintupletsInGPU, jx); + rmQuintupletFromMemory(quintuplets, jx); } } } @@ -192,33 +188,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupQuintupletsInGPUBeforeTC { + struct RemoveDupQuintupletsBeforeTC { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < *(rangesInGPU.nEligibleT5Modules); + for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules(); lowmodIdx1 += gridThreadExtent[1]) { - uint16_t lowmod1 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx1]; - unsigned int nQuintuplets_lowmod1 = quintupletsInGPU.nQuintuplets[lowmod1]; + uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1]; + unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1]; if (nQuintuplets_lowmod1 == 0) continue; - unsigned int quintupletModuleIndices_lowmod1 = rangesInGPU.quintupletModuleIndices[lowmod1]; + unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1]; - for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules); + for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules(); lowmodIdx2 += gridThreadExtent[2]) { - uint16_t lowmod2 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx2]; - unsigned int nQuintuplets_lowmod2 = quintupletsInGPU.nQuintuplets[lowmod2]; + uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2]; + unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2]; if (nQuintuplets_lowmod2 == 0) continue; - unsigned int quintupletModuleIndices_lowmod2 = rangesInGPU.quintupletModuleIndices[lowmod2]; + unsigned int quintupletModuleIndices_lowmod2 = ranges.quintupletModuleIndices()[lowmod2]; for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; - if (quintupletsInGPU.partOfPT5[ix] || (quintupletsInGPU.isDup[ix] & 1)) + if (quintuplets.partOfPT5()[ix] || (quintuplets.isDup()[ix] & 1)) continue; for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { @@ -226,16 +225,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (ix == jx) continue; - if (quintupletsInGPU.partOfPT5[jx] || (quintupletsInGPU.isDup[jx] & 1)) + if (quintuplets.partOfPT5()[jx] || (quintuplets.isDup()[jx] & 1)) continue; - float eta1 = __H2F(quintupletsInGPU.eta[ix]); - float phi1 = __H2F(quintupletsInGPU.phi[ix]); - float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); - float eta2 = __H2F(quintupletsInGPU.eta[jx]); - float phi2 = __H2F(quintupletsInGPU.phi[jx]); - float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); @@ -247,15 +246,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; float dR2 = dEta * dEta + dPhi * dPhi; - int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + int nMatched = checkHitsT5(ix, jx, quintuplets); const int minNHitsForDup_T5 = 5; if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) { if (score_rphisum1 > score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, ix, true); + rmQuintupletFromMemory(quintuplets, ix, true); } else if (score_rphisum1 < score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, jx, true); + rmQuintupletFromMemory(quintuplets, jx, true); } else { - rmQuintupletFromMemory(quintupletsInGPU, (ix < jx ? ix : jx), true); + rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true); } } } @@ -265,35 +264,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupPixelTripletsInGPUFromMap { + struct RemoveDupPixelTripletsFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTripletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (unsigned int ix = globalThreadIdx[1]; ix < *pixelTripletsInGPU.nPixelTriplets; ix += gridThreadExtent[1]) { - for (unsigned int jx = globalThreadIdx[2]; jx < *pixelTripletsInGPU.nPixelTriplets; jx += gridThreadExtent[2]) { + for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) { + for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) { if (ix == jx) continue; int nMatched[2]; - checkHitspT3(ix, jx, pixelTripletsInGPU, nMatched); + checkHitspT3(ix, jx, pixelTriplets, nMatched); const int minNHitsForDup_pT3 = 5; if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) { // Check the layers - if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] < - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2]) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + if (pixelTriplets.logicalLayers()[jx][2] < pixelTriplets.logicalLayers()[ix][2]) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; - } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && - __H2F(pixelTripletsInGPU.score[ix]) > __H2F(pixelTripletsInGPU.score[jx])) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + __H2F(pixelTriplets.score()[ix]) > __H2F(pixelTriplets.score()[jx])) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; - } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && - (__H2F(pixelTripletsInGPU.score[ix]) == __H2F(pixelTripletsInGPU.score[jx])) && (ix < jx)) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + (__H2F(pixelTriplets.score()[ix]) == __H2F(pixelTriplets.score()[jx])) && (ix < jx)) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; } } @@ -302,25 +298,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupPixelQuintupletsInGPUFromMap { + struct RemoveDupPixelQuintupletsFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintupletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) { - float score1 = __H2F(pixelQuintupletsInGPU.score[ix]); + float score1 = __H2F(pixelQuintuplets.score()[ix]); for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) { if (ix == jx) continue; - int nMatched = checkHitspT5(ix, jx, pixelQuintupletsInGPU); - float score2 = __H2F(pixelQuintupletsInGPU.score[jx]); + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); + float score2 = __H2F(pixelQuintuplets.score()[jx]); const int minNHitsForDup_pT5 = 7; if (nMatched >= minNHitsForDup_pT5) { if (score1 > score2 or ((score1 == score2) and (ix > jx))) { - rmPixelQuintupletFromMemory(pixelQuintupletsInGPU, ix); + rmPixelQuintupletFromMemory(pixelQuintuplets, ix); break; } } @@ -331,40 +327,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CheckHitspLS { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Segments segmentsInGPU, bool secondpass) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + bool secondpass) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - int pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nPixelSegments = segmentsInGPU.nSegments[pixelModuleIndex]; + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex]; if (nPixelSegments > n_max_pixel_segments_per_module) nPixelSegments = n_max_pixel_segments_per_module; for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) { - if (secondpass && (!segmentsInGPU.isQuad[ix] || (segmentsInGPU.isDup[ix] & 1))) + if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1))) continue; unsigned int phits1[Params_pLS::kHits]; - phits1[0] = segmentsInGPU.pLSHitsIdxs[ix].x; - phits1[1] = segmentsInGPU.pLSHitsIdxs[ix].y; - phits1[2] = segmentsInGPU.pLSHitsIdxs[ix].z; - phits1[3] = segmentsInGPU.pLSHitsIdxs[ix].w; - float eta_pix1 = segmentsInGPU.eta[ix]; - float phi_pix1 = segmentsInGPU.phi[ix]; + phits1[0] = segmentsPixel.pLSHitsIdxs()[ix].x; + phits1[1] = segmentsPixel.pLSHitsIdxs()[ix].y; + phits1[2] = segmentsPixel.pLSHitsIdxs()[ix].z; + phits1[3] = segmentsPixel.pLSHitsIdxs()[ix].w; + float eta_pix1 = segmentsPixel.eta()[ix]; + float phi_pix1 = segmentsPixel.phi()[ix]; for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) { - float eta_pix2 = segmentsInGPU.eta[jx]; - float phi_pix2 = segmentsInGPU.phi[jx]; + float eta_pix2 = segmentsPixel.eta()[jx]; + float phi_pix2 = segmentsPixel.phi()[jx]; if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) continue; - if (secondpass && (!segmentsInGPU.isQuad[jx] || (segmentsInGPU.isDup[jx] & 1))) + if (secondpass && (!segmentsPixel.isQuad()[jx] || (segmentsPixel.isDup()[jx] & 1))) continue; - int8_t quad_diff = segmentsInGPU.isQuad[ix] - segmentsInGPU.isQuad[jx]; - float score_diff = segmentsInGPU.score[ix] - segmentsInGPU.score[jx]; + int8_t quad_diff = segmentsPixel.isQuad()[ix] - segmentsPixel.isQuad()[jx]; + float score_diff = segmentsPixel.score()[ix] - segmentsPixel.score()[jx]; // Always keep quads over trips. If they are the same, we want the object with better score int idxToRemove; if (quad_diff > 0) @@ -379,10 +379,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { idxToRemove = ix; unsigned int phits2[Params_pLS::kHits]; - phits2[0] = segmentsInGPU.pLSHitsIdxs[jx].x; - phits2[1] = segmentsInGPU.pLSHitsIdxs[jx].y; - phits2[2] = segmentsInGPU.pLSHitsIdxs[jx].z; - phits2[3] = segmentsInGPU.pLSHitsIdxs[jx].w; + phits2[0] = segmentsPixel.pLSHitsIdxs()[jx].x; + phits2[1] = segmentsPixel.pLSHitsIdxs()[jx].y; + phits2[2] = segmentsPixel.pLSHitsIdxs()[jx].z; + phits2[3] = segmentsPixel.pLSHitsIdxs()[jx].w; int npMatched = 0; for (int i = 0; i < Params_pLS::kHits; i++) { @@ -402,7 +402,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } const int minNHitsForDup_pLS = 3; if (npMatched >= minNHitsForDup_pLS) { - rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); } if (secondpass) { float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); @@ -410,7 +410,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if ((npMatched >= 1) || (dR2 < 1e-5f)) { - rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); } } } diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 65543720a1d34..9ab82ec99f64d 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -18,6 +18,42 @@ namespace { const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; return {vx, vy, vz}; } + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + std::vector getHitIdxs(short trackCandidateType, + Params_pT5::ArrayUxHits const& tcHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == 7) + maxNHits = Params_pT5::kHits; // pT5 + else if (trackCandidateType == 5) + maxNHits = Params_pT3::kHits; // pT3 + else if (trackCandidateType == 4) + maxNHits = Params_T5::kHits; // T5 + else if (trackCandidateType == 8) + maxNHits = Params_pLS::kHits; // pLS + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxDev = tcHitIndices[i]; + unsigned int hitIdx = + (trackCandidateType == 8) + ? hitIdxDev + : hitIndices[hitIdxDev]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; + } + } // namespace void LST::prepareInput(std::vector const& see_px, @@ -212,60 +248,24 @@ void LST::prepareInput(std::vector const& see_px, in_isQuad_vec_ = isQuad_vec; } -std::vector LST::getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices) { - std::vector hits; - - unsigned int maxNHits = 0; - if (trackCandidateType == 7) - maxNHits = Params_pT5::kHits; // pT5 - else if (trackCandidateType == 5) - maxNHits = Params_pT3::kHits; // pT3 - else if (trackCandidateType == 4) - maxNHits = Params_T5::kHits; // T5 - else if (trackCandidateType == 8) - maxNHits = Params_pLS::kHits; // pLS - - for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; - unsigned int hitIdx = - (trackCandidateType == 8) - ? hitIdxInGPU - : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. - - // For p objects, the 3rd and 4th hit maybe the same, - // due to the way pLS hits are stored in the standalone. - // This is because pixel seeds can be either triplets or quadruplets. - if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. - continue; - - hits.push_back(hitIdx); - } - - return hits; -} - void LST::getOutput(Event& event) { std::vector> tc_hitIdxs; std::vector tc_len; std::vector tc_seedIdx; std::vector tc_trackCandidateType; - HitsBuffer& hitsInGPU = event.getHitsInCMSSW(false); // sync on next line - TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW().data(); + auto const hits = event.getHitsInCMSSW(false); // sync on next line + auto const& trackCandidates = event.getTrackCandidatesInCMSSW(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { - short trackCandidateType = trackCandidates->trackCandidateType[idx]; - std::vector hit_idx = - getHitIdxs(trackCandidateType, idx, trackCandidates->hitIndices, hitsInGPU.data()->idxs); + short trackCandidateType = trackCandidates.trackCandidateType()[idx]; + std::vector hit_idx = getHitIdxs(trackCandidateType, trackCandidates.hitIndices()[idx], hits.idxs()); tc_hitIdxs.push_back(hit_idx); tc_len.push_back(hit_idx.size()); - tc_seedIdx.push_back(trackCandidates->pixelSeedIndex[idx]); + tc_seedIdx.push_back(trackCandidates.pixelSeedIndex()[idx]); tc_trackCandidateType.push_back(trackCandidateType); } diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 27ce7b97bffdd..94adbd43dedb7 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -4,194 +4,20 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" #include "Hit.h" -#include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct MiniDoublets { - unsigned int* nMemoryLocations; - - unsigned int* anchorHitIndices; - unsigned int* outerHitIndices; - uint16_t* moduleIndices; - unsigned int* nMDs; //counter per module - unsigned int* totOccupancyMDs; //counter per module - float* dphichanges; - - float* dzs; //will store drt if the module is endcap - float* dphis; - - float* shiftedXs; - float* shiftedYs; - float* shiftedZs; - float* noShiftedDphis; //if shifted module - float* noShiftedDphiChanges; //if shifted module - - float* anchorX; - float* anchorY; - float* anchorZ; - float* anchorRt; - float* anchorPhi; - float* anchorEta; - float* anchorHighEdgeX; - float* anchorHighEdgeY; - float* anchorLowEdgeX; - float* anchorLowEdgeY; - float* anchorLowEdgePhi; - float* anchorHighEdgePhi; - - float* outerX; - float* outerY; - float* outerZ; - float* outerRt; - float* outerPhi; - float* outerEta; - float* outerHighEdgeX; - float* outerHighEdgeY; - float* outerLowEdgeX; - float* outerLowEdgeY; - - template - void setData(TBuf& buf) { - nMemoryLocations = buf.nMemoryLocations_buf.data(); - anchorHitIndices = buf.anchorHitIndices_buf.data(); - outerHitIndices = buf.outerHitIndices_buf.data(); - moduleIndices = buf.moduleIndices_buf.data(); - nMDs = buf.nMDs_buf.data(); - totOccupancyMDs = buf.totOccupancyMDs_buf.data(); - dphichanges = buf.dphichanges_buf.data(); - dzs = buf.dzs_buf.data(); - dphis = buf.dphis_buf.data(); - shiftedXs = buf.shiftedXs_buf.data(); - shiftedYs = buf.shiftedYs_buf.data(); - shiftedZs = buf.shiftedZs_buf.data(); - noShiftedDphis = buf.noShiftedDphis_buf.data(); - noShiftedDphiChanges = buf.noShiftedDphiChanges_buf.data(); - anchorX = buf.anchorX_buf.data(); - anchorY = buf.anchorY_buf.data(); - anchorZ = buf.anchorZ_buf.data(); - anchorRt = buf.anchorRt_buf.data(); - anchorPhi = buf.anchorPhi_buf.data(); - anchorEta = buf.anchorEta_buf.data(); - anchorHighEdgeX = buf.anchorHighEdgeX_buf.data(); - anchorHighEdgeY = buf.anchorHighEdgeY_buf.data(); - anchorLowEdgeX = buf.anchorLowEdgeX_buf.data(); - anchorLowEdgeY = buf.anchorLowEdgeY_buf.data(); - outerX = buf.outerX_buf.data(); - outerY = buf.outerY_buf.data(); - outerZ = buf.outerZ_buf.data(); - outerRt = buf.outerRt_buf.data(); - outerPhi = buf.outerPhi_buf.data(); - outerEta = buf.outerEta_buf.data(); - outerHighEdgeX = buf.outerHighEdgeX_buf.data(); - outerHighEdgeY = buf.outerHighEdgeY_buf.data(); - outerLowEdgeX = buf.outerLowEdgeX_buf.data(); - outerLowEdgeY = buf.outerLowEdgeY_buf.data(); - anchorLowEdgePhi = buf.anchorLowEdgePhi_buf.data(); - anchorHighEdgePhi = buf.anchorHighEdgePhi_buf.data(); - } - }; - - template - struct MiniDoubletsBuffer { - Buf nMemoryLocations_buf; - - Buf anchorHitIndices_buf; - Buf outerHitIndices_buf; - Buf moduleIndices_buf; - Buf nMDs_buf; - Buf totOccupancyMDs_buf; - Buf dphichanges_buf; - - Buf dzs_buf; - Buf dphis_buf; - - Buf shiftedXs_buf; - Buf shiftedYs_buf; - Buf shiftedZs_buf; - Buf noShiftedDphis_buf; - Buf noShiftedDphiChanges_buf; - - Buf anchorX_buf; - Buf anchorY_buf; - Buf anchorZ_buf; - Buf anchorRt_buf; - Buf anchorPhi_buf; - Buf anchorEta_buf; - Buf anchorHighEdgeX_buf; - Buf anchorHighEdgeY_buf; - Buf anchorLowEdgeX_buf; - Buf anchorLowEdgeY_buf; - Buf anchorLowEdgePhi_buf; - Buf anchorHighEdgePhi_buf; - - Buf outerX_buf; - Buf outerY_buf; - Buf outerZ_buf; - Buf outerRt_buf; - Buf outerPhi_buf; - Buf outerEta_buf; - Buf outerHighEdgeX_buf; - Buf outerHighEdgeY_buf; - Buf outerLowEdgeX_buf; - Buf outerLowEdgeY_buf; - - MiniDoublets data_; - - template - MiniDoubletsBuffer(unsigned int nMemoryLoc, uint16_t nLowerModules, TDevAcc const& devAccIn, TQueue& queue) - : nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - anchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - moduleIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - nMDs_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - totOccupancyMDs_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - dphichanges_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - dzs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - dphis_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - shiftedXs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - shiftedYs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - shiftedZs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - noShiftedDphis_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - noShiftedDphiChanges_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorZ_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorRt_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorPhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorEta_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorHighEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorHighEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorLowEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorLowEdgePhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - anchorHighEdgePhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerZ_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerRt_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerPhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerEta_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerHighEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerHighEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerLowEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), - outerLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)) { - alpaka::memset(queue, nMDs_buf, 0u); - alpaka::memset(queue, totOccupancyMDs_buf, 0u); - } - - inline MiniDoublets const* data() const { return &data_; } - inline void setData(MiniDoubletsBuffer& buf) { data_.setData(buf); } - }; - template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, - MiniDoublets& mdsInGPU, - Hits const& hitsInGPU, - Modules const& modulesInGPU, + MiniDoublets mds, + HitsConst hits, + ModulesConst modules, unsigned int lowerHitIdx, unsigned int upperHitIdx, uint16_t lowerModuleIdx, @@ -207,68 +33,66 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //the index into which this MD needs to be written will be computed in the kernel //nMDs variable will be incremented in the kernel, no need to worry about that here - mdsInGPU.moduleIndices[idx] = lowerModuleIdx; + mds.moduleIndices()[idx] = lowerModuleIdx; unsigned int anchorHitIndex, outerHitIndex; - if (modulesInGPU.moduleType[lowerModuleIdx] == PS and modulesInGPU.moduleLayerType[lowerModuleIdx] == Strip) { - mdsInGPU.anchorHitIndices[idx] = upperHitIdx; - mdsInGPU.outerHitIndices[idx] = lowerHitIdx; + if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) { + mds.anchorHitIndices()[idx] = upperHitIdx; + mds.outerHitIndices()[idx] = lowerHitIdx; anchorHitIndex = upperHitIdx; outerHitIndex = lowerHitIdx; } else { - mdsInGPU.anchorHitIndices[idx] = lowerHitIdx; - mdsInGPU.outerHitIndices[idx] = upperHitIdx; + mds.anchorHitIndices()[idx] = lowerHitIdx; + mds.outerHitIndices()[idx] = upperHitIdx; anchorHitIndex = lowerHitIdx; outerHitIndex = upperHitIdx; } - mdsInGPU.dphichanges[idx] = dPhiChange; - - mdsInGPU.dphis[idx] = dPhi; - mdsInGPU.dzs[idx] = dz; - mdsInGPU.shiftedXs[idx] = shiftedX; - mdsInGPU.shiftedYs[idx] = shiftedY; - mdsInGPU.shiftedZs[idx] = shiftedZ; - - mdsInGPU.noShiftedDphis[idx] = noShiftedDphi; - mdsInGPU.noShiftedDphiChanges[idx] = noShiftedDPhiChange; - - mdsInGPU.anchorX[idx] = hitsInGPU.xs[anchorHitIndex]; - mdsInGPU.anchorY[idx] = hitsInGPU.ys[anchorHitIndex]; - mdsInGPU.anchorZ[idx] = hitsInGPU.zs[anchorHitIndex]; - mdsInGPU.anchorRt[idx] = hitsInGPU.rts[anchorHitIndex]; - mdsInGPU.anchorPhi[idx] = hitsInGPU.phis[anchorHitIndex]; - mdsInGPU.anchorEta[idx] = hitsInGPU.etas[anchorHitIndex]; - mdsInGPU.anchorHighEdgeX[idx] = hitsInGPU.highEdgeXs[anchorHitIndex]; - mdsInGPU.anchorHighEdgeY[idx] = hitsInGPU.highEdgeYs[anchorHitIndex]; - mdsInGPU.anchorLowEdgeX[idx] = hitsInGPU.lowEdgeXs[anchorHitIndex]; - mdsInGPU.anchorLowEdgeY[idx] = hitsInGPU.lowEdgeYs[anchorHitIndex]; - mdsInGPU.anchorHighEdgePhi[idx] = - alpaka::math::atan2(acc, mdsInGPU.anchorHighEdgeY[idx], mdsInGPU.anchorHighEdgeX[idx]); - mdsInGPU.anchorLowEdgePhi[idx] = - alpaka::math::atan2(acc, mdsInGPU.anchorLowEdgeY[idx], mdsInGPU.anchorLowEdgeX[idx]); - - mdsInGPU.outerX[idx] = hitsInGPU.xs[outerHitIndex]; - mdsInGPU.outerY[idx] = hitsInGPU.ys[outerHitIndex]; - mdsInGPU.outerZ[idx] = hitsInGPU.zs[outerHitIndex]; - mdsInGPU.outerRt[idx] = hitsInGPU.rts[outerHitIndex]; - mdsInGPU.outerPhi[idx] = hitsInGPU.phis[outerHitIndex]; - mdsInGPU.outerEta[idx] = hitsInGPU.etas[outerHitIndex]; - mdsInGPU.outerHighEdgeX[idx] = hitsInGPU.highEdgeXs[outerHitIndex]; - mdsInGPU.outerHighEdgeY[idx] = hitsInGPU.highEdgeYs[outerHitIndex]; - mdsInGPU.outerLowEdgeX[idx] = hitsInGPU.lowEdgeXs[outerHitIndex]; - mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; + mds.dphichanges()[idx] = dPhiChange; + + mds.dphis()[idx] = dPhi; + mds.dzs()[idx] = dz; + mds.shiftedXs()[idx] = shiftedX; + mds.shiftedYs()[idx] = shiftedY; + mds.shiftedZs()[idx] = shiftedZ; + + mds.noShiftedDphis()[idx] = noShiftedDphi; + mds.noShiftedDphiChanges()[idx] = noShiftedDPhiChange; + + mds.anchorX()[idx] = hits.xs()[anchorHitIndex]; + mds.anchorY()[idx] = hits.ys()[anchorHitIndex]; + mds.anchorZ()[idx] = hits.zs()[anchorHitIndex]; + mds.anchorRt()[idx] = hits.rts()[anchorHitIndex]; + mds.anchorPhi()[idx] = hits.phis()[anchorHitIndex]; + mds.anchorEta()[idx] = hits.etas()[anchorHitIndex]; + mds.anchorHighEdgeX()[idx] = hits.highEdgeXs()[anchorHitIndex]; + mds.anchorHighEdgeY()[idx] = hits.highEdgeYs()[anchorHitIndex]; + mds.anchorLowEdgeX()[idx] = hits.lowEdgeXs()[anchorHitIndex]; + mds.anchorLowEdgeY()[idx] = hits.lowEdgeYs()[anchorHitIndex]; + mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); + mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + + mds.outerX()[idx] = hits.xs()[outerHitIndex]; + mds.outerY()[idx] = hits.ys()[outerHitIndex]; + mds.outerZ()[idx] = hits.zs()[outerHitIndex]; + mds.outerRt()[idx] = hits.rts()[outerHitIndex]; + mds.outerPhi()[idx] = hits.phis()[outerHitIndex]; + mds.outerEta()[idx] = hits.etas()[outerHitIndex]; + mds.outerHighEdgeX()[idx] = hits.highEdgeXs()[outerHitIndex]; + mds.outerHighEdgeY()[idx] = hits.highEdgeYs()[outerHitIndex]; + mds.outerLowEdgeX()[idx] = hits.lowEdgeXs()[outerHitIndex]; + mds.outerLowEdgeY()[idx] = hits.lowEdgeYs()[outerHitIndex]; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf - short subdet = modulesInGPU.subdets[moduleIndex]; - short layer = modulesInGPU.layers[moduleIndex]; - short side = modulesInGPU.sides[moduleIndex]; - short rod = modulesInGPU.rods[moduleIndex]; + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; if (subdet == Barrel) { if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or @@ -281,7 +105,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(ModulesConst modules, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -311,16 +135,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } } - unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; - unsigned int iR = modulesInGPU.rings[moduleIndex] - 1; - short subdet = modulesInGPU.subdets[moduleIndex]; - short side = modulesInGPU.sides[moduleIndex]; + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; float moduleSeparation = 0; if (subdet == Barrel and side == Center) { moduleSeparation = miniDeltaFlat[iL]; - } else if (isTighterTiltedModules(modulesInGPU, moduleIndex)) { + } else if (isTighterTiltedModules(modules, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; } else if (subdet == Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; @@ -334,7 +158,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( - TAcc const& acc, float rt, Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + TAcc const& acc, float rt, ModulesConst modules, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { // ================================================================= // Various constants // ================================================================= @@ -344,28 +168,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Computing some components that make up the cut threshold // ================================================================= - unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; + unsigned int iL = modules.layers()[moduleIndex] - 1; const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); const float rLayNominal = - ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); const float miniPVoff = 0.1f / rLayNominal; - const float miniMuls = ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); - const bool isTilted = modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] != Center; + const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center; //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly float drdz; if (isTilted) { - if (modulesInGPU.moduleType[moduleIndex] == PS and modulesInGPU.moduleLayerType[moduleIndex] == Strip) { - drdz = modulesInGPU.drdzs[moduleIndex]; + if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) { + drdz = modules.drdzs()[moduleIndex]; } else { - drdz = modulesInGPU.drdzs[modulesInGPU.partnerModuleIndices[moduleIndex]]; + drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]]; } } else { drdz = 0; } const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / - (1.f + drdz * drdz) / moduleGapSize(modulesInGPU, moduleIndex) + (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex) : 0); // Compute luminous region requirement for endcap @@ -375,12 +199,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Return the threshold value // ================================================================= // Following condition is met if the module is central and flatly lying - if (modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] == Center) { + if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); } // Following condition is met if the module is central and tilted - else if (modulesInGPU.subdets[moduleIndex] == Barrel and - modulesInGPU.sides[moduleIndex] != Center) //all types of tilted modules + else if (modules.subdets()[moduleIndex] == Barrel and + modules.sides()[moduleIndex] != Center) //all types of tilted modules { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); @@ -393,7 +217,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -443,16 +267,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float drprime_x; // x-component of drprime float drprime_y; // y-component of drprime const float& slope = - modulesInGPU.dxdys[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane + modules.dxdys()[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane float absArctanSlope; float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees float absdzprime; // The distance between the two points after shifting - const float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; + const float& drdz_ = modules.drdzs()[lowerModuleIndex]; // Assign hit pointers based on their hit type - if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + if (modules.moduleType()[lowerModuleIndex] == PS) { // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.subdets[lowerModuleIndex] == Barrel ? modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel - : modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modules.subdets()[lowerModuleIndex] == Barrel ? modules.moduleLayerType()[lowerModuleIndex] != Pixel + : modules.moduleLayerType()[lowerModuleIndex] == Pixel) { xo = xUpper; yo = yUpper; xp = xLower; @@ -477,7 +301,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } // If it is endcap some of the math gets simplified (and also computers don't like infinities) - isEndcap = modulesInGPU.subdets[lowerModuleIndex] == Endcap; + isEndcap = modules.subdets()[lowerModuleIndex] == Endcap; // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time @@ -489,10 +313,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { acc, drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa - moduleSeparation = moduleGapSize(modulesInGPU, lowerModuleIndex); + moduleSeparation = moduleGapSize(modules, lowerModuleIndex); // Sign flips if the pixel is later layer - if (modulesInGPU.moduleType[lowerModuleIndex] == PS and modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + if (modules.moduleType()[lowerModuleIndex] == PS and modules.moduleLayerType()[lowerModuleIndex] != Pixel) { moduleSeparation *= -1; } @@ -544,7 +368,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { abszn = alpaka::math::abs(acc, zp) + absdzprime; } else { abszn = alpaka::math::abs(acc, zp) - absdzprime; @@ -559,7 +383,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -581,7 +405,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float zUpper, float rtUpper) { dz = zLower - zUpper; - const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == PS ? 2.f : 10.f; + const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; @@ -590,20 +414,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex) - : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex); + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex); // Cut #2: dphi difference // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 float xn = 0.f, yn = 0.f; // , zn = 0; float shiftedRt2; - if (modulesInGPU.sides[lowerModuleIndex] != Center) // If barrel and not center it is tilted + if (modules.sides()[lowerModuleIndex] != Center) // If barrel and not center it is tilted { // Shift the hits and calculate new xn, yn position float shiftedCoords[3]; shiftStripHits(acc, - modulesInGPU, + modules, lowerModuleIndex, upperModuleIndex, lowerHitIndex, @@ -621,7 +445,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yn = shiftedCoords[1]; // Lower or the upper hit needs to be modified depending on which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; @@ -650,10 +474,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Cut #3: The dphi change going from lower Hit to upper Hit // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 - if (modulesInGPU.sides[lowerModuleIndex] != Center) { + if (modules.sides()[lowerModuleIndex] != Center) { // When it is tilted, use the new shifted positions // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + if (modules.moduleLayerType()[lowerModuleIndex] != Pixel) { // dPhi Change should be calculated so that the upper hit has higher rt. // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) @@ -686,7 +510,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -720,7 +544,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 - const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == PS ? 2.f : 10.f; + const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; drt = rtLower - rtUpper; if (alpaka::math::abs(acc, drt) >= drtCut) return false; @@ -729,7 +553,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float shiftedCoords[3]; shiftStripHits(acc, - modulesInGPU, + modules, lowerModuleIndex, upperModuleIndex, lowerHitIndex, @@ -748,9 +572,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yn = shiftedCoords[1]; zn = shiftedCoords[2]; - if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + if (modules.moduleType()[lowerModuleIndex] == PS) { // Appropriate lower or upper hit is modified after checking which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; @@ -773,14 +597,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" // if it was an endcap it will have zero effect - if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { - dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + if (modules.moduleType()[lowerModuleIndex] == PS) { + dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; } float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex, dPhi, dz) - : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex, dPhi, dz); + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, dPhi, dz) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, dPhi, dz); if (alpaka::math::abs(acc, dPhi) >= miniCut) return false; @@ -797,7 +621,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -818,9 +642,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float yUpper, float zUpper, float rtUpper) { - if (modulesInGPU.subdets[lowerModuleIndex] == Barrel) { + if (modules.subdets()[lowerModuleIndex] == Barrel) { return runMiniDoubletDefaultAlgoBarrel(acc, - modulesInGPU, + modules, lowerModuleIndex, upperModuleIndex, lowerHitIndex, @@ -843,7 +667,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rtUpper); } else { return runMiniDoubletDefaultAlgoEndcap(acc, - modulesInGPU, + modules, lowerModuleIndex, upperModuleIndex, lowerHitIndex, @@ -867,22 +691,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } } - struct CreateMiniDoubletsInGPUv2 { + struct CreateMiniDoublets { template - ALPAKA_FN_ACC void operator()( - TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsConst hits, + HitsRangesConst hitsRanges, + MiniDoublets mds, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRangesConst ranges) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t lowerModuleIndex = globalThreadIdx[1]; lowerModuleIndex < (*modulesInGPU.nLowerModules); + for (uint16_t lowerModuleIndex = globalThreadIdx[1]; lowerModuleIndex < modules.nLowerModules(); lowerModuleIndex += gridThreadExtent[1]) { - uint16_t upperModuleIndex = modulesInGPU.partnerModuleIndices[lowerModuleIndex]; - int nLowerHits = hitsInGPU.hitRangesnLower[lowerModuleIndex]; - int nUpperHits = hitsInGPU.hitRangesnUpper[lowerModuleIndex]; - if (hitsInGPU.hitRangesLower[lowerModuleIndex] == -1) + uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex]; + int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex]; + int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex]; + if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1) continue; - unsigned int upHitArrayIndex = hitsInGPU.hitRangesUpper[lowerModuleIndex]; - unsigned int loHitArrayIndex = hitsInGPU.hitRangesLower[lowerModuleIndex]; + unsigned int upHitArrayIndex = hitsRanges.hitRangesUpper()[lowerModuleIndex]; + unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex]; int limit = nUpperHits * nLowerHits; for (int hitIndex = globalThreadIdx[2]; hitIndex < limit; hitIndex += gridThreadExtent[2]) { @@ -893,19 +722,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (lowerHitIndex >= nLowerHits) continue; unsigned int lowerHitArrayIndex = loHitArrayIndex + lowerHitIndex; - float xLower = hitsInGPU.xs[lowerHitArrayIndex]; - float yLower = hitsInGPU.ys[lowerHitArrayIndex]; - float zLower = hitsInGPU.zs[lowerHitArrayIndex]; - float rtLower = hitsInGPU.rts[lowerHitArrayIndex]; + float xLower = hits.xs()[lowerHitArrayIndex]; + float yLower = hits.ys()[lowerHitArrayIndex]; + float zLower = hits.zs()[lowerHitArrayIndex]; + float rtLower = hits.rts()[lowerHitArrayIndex]; unsigned int upperHitArrayIndex = upHitArrayIndex + upperHitIndex; - float xUpper = hitsInGPU.xs[upperHitArrayIndex]; - float yUpper = hitsInGPU.ys[upperHitArrayIndex]; - float zUpper = hitsInGPU.zs[upperHitArrayIndex]; - float rtUpper = hitsInGPU.rts[upperHitArrayIndex]; + float xUpper = hits.xs()[upperHitArrayIndex]; + float yUpper = hits.ys()[upperHitArrayIndex]; + float zUpper = hits.zs()[upperHitArrayIndex]; + float rtUpper = hits.rts()[upperHitArrayIndex]; float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; bool success = runMiniDoubletDefaultAlgo(acc, - modulesInGPU, + modules, lowerModuleIndex, upperModuleIndex, lowerHitArrayIndex, @@ -927,21 +756,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zUpper, rtUpper); if (success) { - int totOccupancyMDs = - alpaka::atomicAdd(acc, &mdsInGPU.totOccupancyMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); - if (totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex])) { + int totOccupancyMDs = alpaka::atomicAdd( + acc, &mdsOccupancy.totOccupancyMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyMDs >= (ranges.miniDoubletModuleOccupancy()[lowerModuleIndex])) { #ifdef WARNINGS printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex); #endif } else { int mdModuleIndex = - alpaka::atomicAdd(acc, &mdsInGPU.nMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); - unsigned int mdIndex = rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex] + mdModuleIndex; + alpaka::atomicAdd(acc, &mdsOccupancy.nMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int mdIndex = ranges.miniDoubletModuleIndices()[lowerModuleIndex] + mdModuleIndex; addMDToMemory(acc, - mdsInGPU, - hitsInGPU, - modulesInGPU, + mds, + hits, + modules, lowerHitArrayIndex, upperHitArrayIndex, lowerModuleIndex, @@ -963,7 +792,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CreateMDArrayRangesGPU { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, ModulesConst modules, ObjectRanges ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -981,11 +810,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - short module_rings = modulesInGPU.rings[i]; - short module_layers = modulesInGPU.layers[i]; - short module_subdets = modulesInGPU.subdets[i]; - float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); if (module_layers <= 3 && module_subdets == 5) category_number = 0; @@ -1042,23 +871,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{}); - rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs; - rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy; + ranges.miniDoubletModuleIndices()[i] = nTotMDs; + ranges.miniDoubletModuleOccupancy()[i] = occupancy; } // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); if (cms::alpakatools::once_per_block(acc)) { - rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs; - *rangesInGPU.device_nTotalMDs = nTotalMDs; + ranges.miniDoubletModuleIndices()[modules.nLowerModules()] = nTotalMDs; + ranges.nTotalMDs() = nTotalMDs; } } }; struct AddMiniDoubletRangesToEventExplicit { template - ALPAKA_FN_ACC void operator()( - TAcc const& acc, Modules modulesInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU, Hits hitsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRanges ranges, + HitsRangesConst hitsRanges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1066,16 +898,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (mdsInGPU.nMDs[i] == 0 or hitsInGPU.hitRanges[i * 2] == -1) { - rangesInGPU.mdRanges[i * 2] = -1; - rangesInGPU.mdRanges[i * 2 + 1] = -1; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (mdsOccupancy.nMDs()[i] == 0 or hitsRanges.hitRanges()[i][0] == -1) { + ranges.mdRanges()[i][0] = -1; + ranges.mdRanges()[i][1] = -1; } else { - rangesInGPU.mdRanges[i * 2] = rangesInGPU.miniDoubletModuleIndices[i]; - rangesInGPU.mdRanges[i * 2 + 1] = rangesInGPU.miniDoubletModuleIndices[i] + mdsInGPU.nMDs[i] - 1; + ranges.mdRanges()[i][0] = ranges.miniDoubletModuleIndices()[i]; + ranges.mdRanges()[i][1] = ranges.miniDoubletModuleIndices()[i] + mdsOccupancy.nMDs()[i] - 1; } } } }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + #endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index 85b7b08dc075b..e027222890702 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -2,13 +2,13 @@ #define RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" #include "NeuralNetworkWeights.h" -#include "Segment.h" -#include "MiniDoublet.h" -#include "Hit.h" -#include "Triplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { @@ -16,10 +16,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets const& tripletsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, const float* xVec, const float* yVec, const unsigned int* mdIndices, @@ -55,54 +55,54 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; // Compute some convenience variables short layer2_adjustment = 0; - if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + if (modules.layers()[lowerModuleIndex1] == 1) { layer2_adjustment = 1; // get upper segment to be in second layer } unsigned int md_idx_for_t5_eta_phi = - segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; - bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap - bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap - bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap - bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap - bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]][layer2_adjustment]; + bool is_endcap1 = (modules.subdets()[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modules.subdets()[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modules.subdets()[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modules.subdets()[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modules.subdets()[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) float x[38] = { - alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) - alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) - alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) - alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) - alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) - alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mds.anchorEta()[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mds.anchorPhi()[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mds.anchorZ()[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modules.layers()[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mds.anchorEta()[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mds.anchorPhi()[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mds.anchorZ()[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modules.layers()[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mds.anchorEta()[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mds.anchorPhi()[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mds.anchorZ()[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mds.anchorEta()[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mds.anchorPhi()[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mds.anchorZ()[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mds.anchorEta()[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mds.anchorPhi()[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mds.anchorZ()[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modules.layers()[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mds.anchorEta()[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mds.anchorPhi()[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mds.anchorZ()[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modules.layers()[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) - mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) - mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + mds.anchorEta()[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mds.anchorPhi()[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h deleted file mode 100644 index 81e4358ab30d6..0000000000000 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ /dev/null @@ -1,154 +0,0 @@ -#ifndef RecoTracker_LSTCore_interface_ObjectRanges_h -#define RecoTracker_LSTCore_interface_ObjectRanges_h - -#include "RecoTracker/LSTCore/interface/Constants.h" - -namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - - struct ObjectRanges { - int* hitRanges; - int* hitRangesLower; - int* hitRangesUpper; - int8_t* hitRangesnLower; - int8_t* hitRangesnUpper; - int* mdRanges; - int* segmentRanges; - int* trackletRanges; - int* tripletRanges; - int* trackCandidateRanges; - // Others will be added later - int* quintupletRanges; - - // This number is just nEligibleModules - 1, but still we want this to be independent of the TC kernel - uint16_t* nEligibleT5Modules; - // Will be allocated in createQuintuplets kernel! - uint16_t* indicesOfEligibleT5Modules; - // To store different starting points for variable occupancy stuff - int* quintupletModuleIndices; - int* quintupletModuleOccupancy; - int* miniDoubletModuleIndices; - int* miniDoubletModuleOccupancy; - int* segmentModuleIndices; - int* segmentModuleOccupancy; - int* tripletModuleIndices; - int* tripletModuleOccupancy; - - unsigned int* device_nTotalMDs; - unsigned int* device_nTotalSegs; - unsigned int* device_nTotalTrips; - unsigned int* device_nTotalQuints; - - template - void setData(TBuff& buf) { - hitRanges = buf.hitRanges_buf.data(); - hitRangesLower = buf.hitRangesLower_buf.data(); - hitRangesUpper = buf.hitRangesUpper_buf.data(); - hitRangesnLower = buf.hitRangesnLower_buf.data(); - hitRangesnUpper = buf.hitRangesnUpper_buf.data(); - mdRanges = buf.mdRanges_buf.data(); - segmentRanges = buf.segmentRanges_buf.data(); - trackletRanges = buf.trackletRanges_buf.data(); - tripletRanges = buf.tripletRanges_buf.data(); - trackCandidateRanges = buf.trackCandidateRanges_buf.data(); - quintupletRanges = buf.quintupletRanges_buf.data(); - - nEligibleT5Modules = buf.nEligibleT5Modules_buf.data(); - indicesOfEligibleT5Modules = buf.indicesOfEligibleT5Modules_buf.data(); - - quintupletModuleIndices = buf.quintupletModuleIndices_buf.data(); - quintupletModuleOccupancy = buf.quintupletModuleOccupancy_buf.data(); - miniDoubletModuleIndices = buf.miniDoubletModuleIndices_buf.data(); - miniDoubletModuleOccupancy = buf.miniDoubletModuleOccupancy_buf.data(); - segmentModuleIndices = buf.segmentModuleIndices_buf.data(); - segmentModuleOccupancy = buf.segmentModuleOccupancy_buf.data(); - tripletModuleIndices = buf.tripletModuleIndices_buf.data(); - tripletModuleOccupancy = buf.tripletModuleOccupancy_buf.data(); - - device_nTotalMDs = buf.device_nTotalMDs_buf.data(); - device_nTotalSegs = buf.device_nTotalSegs_buf.data(); - device_nTotalTrips = buf.device_nTotalTrips_buf.data(); - device_nTotalQuints = buf.device_nTotalQuints_buf.data(); - } - }; - - template - struct ObjectRangesBuffer { - Buf hitRanges_buf; - Buf hitRangesLower_buf; - Buf hitRangesUpper_buf; - Buf hitRangesnLower_buf; - Buf hitRangesnUpper_buf; - Buf mdRanges_buf; - Buf segmentRanges_buf; - Buf trackletRanges_buf; - Buf tripletRanges_buf; - Buf trackCandidateRanges_buf; - Buf quintupletRanges_buf; - - Buf nEligibleT5Modules_buf; - Buf indicesOfEligibleT5Modules_buf; - - Buf quintupletModuleIndices_buf; - Buf quintupletModuleOccupancy_buf; - Buf miniDoubletModuleIndices_buf; - Buf miniDoubletModuleOccupancy_buf; - Buf segmentModuleIndices_buf; - Buf segmentModuleOccupancy_buf; - Buf tripletModuleIndices_buf; - Buf tripletModuleOccupancy_buf; - - Buf device_nTotalMDs_buf; - Buf device_nTotalSegs_buf; - Buf device_nTotalTrips_buf; - Buf device_nTotalQuints_buf; - - ObjectRanges data_; - - template - ObjectRangesBuffer(unsigned int nMod, unsigned int nLowerMod, TDevAcc const& devAccIn, TQueue& queue) - : hitRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - hitRangesLower_buf(allocBufWrapper(devAccIn, nMod, queue)), - hitRangesUpper_buf(allocBufWrapper(devAccIn, nMod, queue)), - hitRangesnLower_buf(allocBufWrapper(devAccIn, nMod, queue)), - hitRangesnUpper_buf(allocBufWrapper(devAccIn, nMod, queue)), - mdRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - segmentRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - trackletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - tripletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - trackCandidateRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - quintupletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), - nEligibleT5Modules_buf(allocBufWrapper(devAccIn, 1, queue)), - indicesOfEligibleT5Modules_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), - quintupletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), - quintupletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), - miniDoubletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), - miniDoubletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), - segmentModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), - segmentModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), - tripletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), - tripletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), - device_nTotalMDs_buf(allocBufWrapper(devAccIn, 1, queue)), - device_nTotalSegs_buf(allocBufWrapper(devAccIn, 1, queue)), - device_nTotalTrips_buf(allocBufWrapper(devAccIn, 1, queue)), - device_nTotalQuints_buf(allocBufWrapper(devAccIn, 1, queue)) { - alpaka::memset(queue, hitRanges_buf, 0xff); - alpaka::memset(queue, hitRangesLower_buf, 0xff); - alpaka::memset(queue, hitRangesUpper_buf, 0xff); - alpaka::memset(queue, hitRangesnLower_buf, 0xff); - alpaka::memset(queue, hitRangesnUpper_buf, 0xff); - alpaka::memset(queue, mdRanges_buf, 0xff); - alpaka::memset(queue, segmentRanges_buf, 0xff); - alpaka::memset(queue, trackletRanges_buf, 0xff); - alpaka::memset(queue, tripletRanges_buf, 0xff); - alpaka::memset(queue, trackCandidateRanges_buf, 0xff); - alpaka::memset(queue, quintupletRanges_buf, 0xff); - alpaka::memset(queue, quintupletModuleIndices_buf, 0xff); - data_.setData(*this); - } - - inline ObjectRanges const* data() const { return &data_; } - void setData(ObjectRangesBuffer& buf) { data_.setData(buf); } - }; - -} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst -#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index e773bdf9ce5b0..30a48ac101b54 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -2,117 +2,25 @@ #define RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" -#include "Triplet.h" -#include "Quintuplet.h" #include "PixelTriplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct PixelQuintuplets { - unsigned int* pixelIndices; - unsigned int* T5Indices; - unsigned int* nPixelQuintuplets; - unsigned int* totOccupancyPixelQuintuplets; - bool* isDup; - FPX* score; - FPX* eta; - FPX* phi; - uint8_t* logicalLayers; - unsigned int* hitIndices; - uint16_t* lowerModuleIndices; - FPX* pixelRadius; - FPX* quintupletRadius; - FPX* centerX; - FPX* centerY; - float* rzChiSquared; - float* rPhiChiSquared; - float* rPhiChiSquaredInwards; - - template - void setData(TBuff& buf) { - pixelIndices = buf.pixelIndices_buf.data(); - T5Indices = buf.T5Indices_buf.data(); - nPixelQuintuplets = buf.nPixelQuintuplets_buf.data(); - totOccupancyPixelQuintuplets = buf.totOccupancyPixelQuintuplets_buf.data(); - isDup = buf.isDup_buf.data(); - score = buf.score_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - pixelRadius = buf.pixelRadius_buf.data(); - quintupletRadius = buf.quintupletRadius_buf.data(); - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - rPhiChiSquared = buf.rPhiChiSquared_buf.data(); - rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); - } - }; - - template - struct PixelQuintupletsBuffer { - Buf pixelIndices_buf; - Buf T5Indices_buf; - Buf nPixelQuintuplets_buf; - Buf totOccupancyPixelQuintuplets_buf; - Buf isDup_buf; - Buf score_buf; - Buf eta_buf; - Buf phi_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf pixelRadius_buf; - Buf quintupletRadius_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf rzChiSquared_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; - - PixelQuintuplets data_; - - template - PixelQuintupletsBuffer(unsigned int maxPixelQuintuplets, TDevAcc const& devAccIn, TQueue& queue) - : pixelIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - T5Indices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - nPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), - totOccupancyPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kHits, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), - pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - quintupletRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)) { - alpaka::memset(queue, nPixelQuintuplets_buf, 0u); - alpaka::memset(queue, totOccupancyPixelQuintuplets_buf, 0u); - } - - inline PixelQuintuplets const* data() const { return &data_; } - inline void setData(PixelQuintupletsBuffer& buf) { data_.setData(buf); } - }; - - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Quintuplets const& quintupletsInGPU, - PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + QuintupletsConst quintuplets, + PixelQuintuplets pixelQuintuplets, unsigned int pixelIndex, - unsigned int T5Index, + unsigned int t5Index, unsigned int pixelQuintupletIndex, float rzChiSquared, float rPhiChiSquared, @@ -124,85 +32,59 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float quintupletRadius, float centerX, float centerY) { - pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex] = pixelIndex; - pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex] = T5Index; - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = false; - pixelQuintupletsInGPU.score[pixelQuintupletIndex] = __F2H(score); - pixelQuintupletsInGPU.eta[pixelQuintupletIndex] = __F2H(eta); - pixelQuintupletsInGPU.phi[pixelQuintupletIndex] = __F2H(phi); - - pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex] = __F2H(pixelRadius); - pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex] = __F2H(quintupletRadius); - pixelQuintupletsInGPU.centerX[pixelQuintupletIndex] = __F2H(centerX); - pixelQuintupletsInGPU.centerY[pixelQuintupletIndex] = __F2H(centerY); - - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex] = 0; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 1] = 0; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 2] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 3] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 1]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 4] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 2]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 5] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 3]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 6] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 4]; - - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex] = - segmentsInGPU.innerLowerModuleIndices[pixelIndex]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 1] = - segmentsInGPU.outerLowerModuleIndices[pixelIndex]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 2] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 3] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 1]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 4] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 2]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 5] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 3]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 6] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 4]; - - unsigned int pixelInnerMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex]; - unsigned int pixelOuterMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex + 1]; - - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex] = - mdsInGPU.anchorHitIndices[pixelInnerMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 1] = - mdsInGPU.outerHitIndices[pixelInnerMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 2] = - mdsInGPU.anchorHitIndices[pixelOuterMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 3] = - mdsInGPU.outerHitIndices[pixelOuterMD]; - - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 4] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 5] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 1]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 6] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 2]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 7] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 3]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 8] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 4]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 9] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 5]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 10] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 6]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 11] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 7]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 12] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 8]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 13] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 9]; - - pixelQuintupletsInGPU.rzChiSquared[pixelQuintupletIndex] = rzChiSquared; - pixelQuintupletsInGPU.rPhiChiSquared[pixelQuintupletIndex] = rPhiChiSquared; - pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; + pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex] = pixelIndex; + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex] = t5Index; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = false; + pixelQuintuplets.score()[pixelQuintupletIndex] = __F2H(score); + pixelQuintuplets.eta()[pixelQuintupletIndex] = __F2H(eta); + pixelQuintuplets.phi()[pixelQuintupletIndex] = __F2H(phi); + + pixelQuintuplets.pixelRadius()[pixelQuintupletIndex] = __F2H(pixelRadius); + pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex] = __F2H(quintupletRadius); + pixelQuintuplets.centerX()[pixelQuintupletIndex] = __F2H(centerX); + pixelQuintuplets.centerY()[pixelQuintupletIndex] = __F2H(centerY); + + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][0] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][1] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][2] = quintuplets.logicalLayers()[t5Index][0]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][3] = quintuplets.logicalLayers()[t5Index][1]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][4] = quintuplets.logicalLayers()[t5Index][2]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][5] = quintuplets.logicalLayers()[t5Index][3]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][6] = quintuplets.logicalLayers()[t5Index][4]; + + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][0] = segments.innerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][1] = segments.outerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][2] = quintuplets.lowerModuleIndices()[t5Index][0]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][3] = quintuplets.lowerModuleIndices()[t5Index][1]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][4] = quintuplets.lowerModuleIndices()[t5Index][2]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][5] = quintuplets.lowerModuleIndices()[t5Index][3]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][6] = quintuplets.lowerModuleIndices()[t5Index][4]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelIndex][1]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][4] = quintuplets.hitIndices()[t5Index][0]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][5] = quintuplets.hitIndices()[t5Index][1]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][6] = quintuplets.hitIndices()[t5Index][2]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][7] = quintuplets.hitIndices()[t5Index][3]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][8] = quintuplets.hitIndices()[t5Index][4]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][9] = quintuplets.hitIndices()[t5Index][5]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][10] = quintuplets.hitIndices()[t5Index][6]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][11] = quintuplets.hitIndices()[t5Index][7]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][12] = quintuplets.hitIndices()[t5Index][8]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][13] = quintuplets.hitIndices()[t5Index][9]; + + pixelQuintuplets.rzChiSquared()[pixelQuintupletIndex] = rzChiSquared; + pixelQuintuplets.rPhiChiSquared()[pixelQuintupletIndex] = rPhiChiSquared; + pixelQuintuplets.rPhiChiSquaredInwards()[pixelQuintupletIndex] = rPhiChiSquaredInwards; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -210,20 +92,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndex5, float rzChiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); const int layer4 = - modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); const int layer5 = - modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -287,7 +169,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -295,20 +177,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); const int layer4 = - modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); const int layer5 = - modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -420,7 +302,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -442,11 +324,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float inv2 = kPixelPSZpitch / kWidth2S; float inv3 = kStripPSZpitch / kWidth2S; for (size_t i = 0; i < nPoints; i++) { - moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; - moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; - moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; - const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; - slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; //category 1 - barrel PS flat if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { delta1[i] = inv1; @@ -506,7 +388,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t* lowerModuleIndices, float g, float f, @@ -521,7 +403,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool isFlat[5]; float chiSquared = 0; - computeSigmasForRegression_pT5(acc, modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat); + computeSigmasForRegression_pT5(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); return chiSquared; @@ -542,7 +424,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return chiSquared; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -550,20 +432,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); const int layer4 = - modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); const int layer5 = - modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -629,7 +511,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t* lowerModuleIndices, float* rtPix, float* zPix, @@ -645,13 +527,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float RMSE = 0; for (size_t i = 0; i < Params_T5::kLayers; i++) { uint16_t& lowerModuleIndex = lowerModuleIndices[i]; - const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; - const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; - const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; residual = (moduleSubdet == Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; - const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; + const float& drdz = modules.drdzs()[lowerModuleIndex]; //PS Modules if (moduleType == 0) { error2 = kPixelPSZpitch * kPixelPSZpitch; @@ -673,12 +555,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, - Modules const& modulesInGPU, - ObjectRanges const& rangesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets const& tripletsInGPU, - Quintuplets const& quintupletsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, + QuintupletsConst quintuplets, unsigned int pixelSegmentIndex, unsigned int quintupletIndex, float& rzChiSquared, @@ -689,20 +572,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& centerX, float& centerY, unsigned int pixelSegmentArrayIndex) { - unsigned int T5InnerT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex]; - unsigned int T5OuterT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]; + unsigned int t5InnerT3Index = quintuplets.tripletIndices()[quintupletIndex][0]; + unsigned int t5OuterT3Index = quintuplets.tripletIndices()[quintupletIndex][1]; float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, centerYTemp; if (not runPixelTripletDefaultAlgo(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, pixelSegmentIndex, - T5InnerT3Index, + t5InnerT3Index, pixelRadiusTemp, tripletRadius, centerXTemp, @@ -713,45 +597,45 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { false)) return false; - unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index]; - unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index + 1]; - unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index]; - unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index + 1]; - - unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; - unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; - unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; - - uint16_t lowerModuleIndex1 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex]; - uint16_t lowerModuleIndex2 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1]; - uint16_t lowerModuleIndex3 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2]; - uint16_t lowerModuleIndex4 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3]; - uint16_t lowerModuleIndex5 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4]; + unsigned int firstSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; + + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; + + uint16_t lowerModuleIndex1 = quintuplets.lowerModuleIndices()[quintupletIndex][0]; + uint16_t lowerModuleIndex2 = quintuplets.lowerModuleIndices()[quintupletIndex][1]; + uint16_t lowerModuleIndex3 = quintuplets.lowerModuleIndices()[quintupletIndex][2]; + uint16_t lowerModuleIndex4 = quintuplets.lowerModuleIndices()[quintupletIndex][3]; + uint16_t lowerModuleIndex5 = quintuplets.lowerModuleIndices()[quintupletIndex][4]; uint16_t lowerModuleIndices[Params_T5::kLayers] = { lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; - float zPix[Params_pLS::kLayers] = {mdsInGPU.anchorZ[pixelInnerMDIndex], mdsInGPU.anchorZ[pixelOuterMDIndex]}; - float rtPix[Params_pLS::kLayers] = {mdsInGPU.anchorRt[pixelInnerMDIndex], mdsInGPU.anchorRt[pixelOuterMDIndex]}; - float zs[Params_T5::kLayers] = {mdsInGPU.anchorZ[firstMDIndex], - mdsInGPU.anchorZ[secondMDIndex], - mdsInGPU.anchorZ[thirdMDIndex], - mdsInGPU.anchorZ[fourthMDIndex], - mdsInGPU.anchorZ[fifthMDIndex]}; - float rts[Params_T5::kLayers] = {mdsInGPU.anchorRt[firstMDIndex], - mdsInGPU.anchorRt[secondMDIndex], - mdsInGPU.anchorRt[thirdMDIndex], - mdsInGPU.anchorRt[fourthMDIndex], - mdsInGPU.anchorRt[fifthMDIndex]}; - - rzChiSquared = computePT5RZChiSquared(acc, modulesInGPU, lowerModuleIndices, rtPix, zPix, rts, zs); + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex], + mds.anchorZ()[secondMDIndex], + mds.anchorZ()[thirdMDIndex], + mds.anchorZ()[fourthMDIndex], + mds.anchorZ()[fifthMDIndex]}; + float rts[Params_T5::kLayers] = {mds.anchorRt()[firstMDIndex], + mds.anchorRt()[secondMDIndex], + mds.anchorRt()[thirdMDIndex], + mds.anchorRt()[fourthMDIndex], + mds.anchorRt()[fifthMDIndex]}; + + rzChiSquared = computePT5RZChiSquared(acc, modules, lowerModuleIndices, rtPix, zPix, rts, zs); if (/*pixelRadius*/ 0 < 5.0f * kR1GeVf) { // FIXME: pixelRadius is not defined yet - if (not passPT5RZChiSquaredCuts(modulesInGPU, + if (not passPT5RZChiSquaredCuts(modules, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -762,31 +646,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } //outer T5 - float xs[Params_T5::kLayers] = {mdsInGPU.anchorX[firstMDIndex], - mdsInGPU.anchorX[secondMDIndex], - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorX[fourthMDIndex], - mdsInGPU.anchorX[fifthMDIndex]}; - float ys[Params_T5::kLayers] = {mdsInGPU.anchorY[firstMDIndex], - mdsInGPU.anchorY[secondMDIndex], - mdsInGPU.anchorY[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex], - mdsInGPU.anchorY[fifthMDIndex]}; + float xs[Params_T5::kLayers] = {mds.anchorX()[firstMDIndex], + mds.anchorX()[secondMDIndex], + mds.anchorX()[thirdMDIndex], + mds.anchorX()[fourthMDIndex], + mds.anchorX()[fifthMDIndex]}; + float ys[Params_T5::kLayers] = {mds.anchorY()[firstMDIndex], + mds.anchorY()[secondMDIndex], + mds.anchorY()[thirdMDIndex], + mds.anchorY()[fourthMDIndex], + mds.anchorY()[fifthMDIndex]}; //get the appropriate radii and centers - centerX = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; - centerY = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; - pixelRadius = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + centerX = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + centerY = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + pixelRadius = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; - float T5CenterX = quintupletsInGPU.regressionG[quintupletIndex]; - float T5CenterY = quintupletsInGPU.regressionF[quintupletIndex]; - quintupletRadius = quintupletsInGPU.regressionRadius[quintupletIndex]; + float T5CenterX = quintuplets.regressionG()[quintupletIndex]; + float T5CenterY = quintuplets.regressionF()[quintupletIndex]; + quintupletRadius = quintuplets.regressionRadius()[quintupletIndex]; - rPhiChiSquared = - computePT5RPhiChiSquared(acc, modulesInGPU, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); + rPhiChiSquared = computePT5RPhiChiSquared(acc, modules, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); if (pixelRadius < 5.0f * kR1GeVf) { - if (not passPT5RPhiChiSquaredCuts(modulesInGPU, + if (not passPT5RPhiChiSquaredCuts(modules, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -796,12 +679,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; } - float xPix[] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; - float yPix[] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; + float xPix[] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; rPhiChiSquaredInwards = computePT5RPhiChiSquaredInwards(T5CenterX, T5CenterY, quintupletRadius, xPix, yPix); - if (quintupletsInGPU.regressionRadius[quintupletIndex] < 5.0f * kR1GeVf) { - if (not passPT5RPhiChiSquaredInwardsCuts(modulesInGPU, + if (quintuplets.regressionRadius()[quintupletIndex] < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredInwardsCuts(modules, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -817,19 +700,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - struct CreatePixelQuintupletsInGPUFromMapv2 { + struct CreatePixelQuintupletsFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, - Triplets tripletsInGPU, - Quintuplets quintupletsInGPU, - PixelQuintuplets pixelQuintupletsInGPU, + ModulesConst modules, + ModulesPixelConst modulesPixel, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixel segmentsPixel, + Triplets triplets, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintuplets pixelQuintuplets, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments, - ObjectRanges rangesInGPU) const { + ObjectRangesConst ranges) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); @@ -840,40 +726,41 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; iLSModule += gridBlockExtent[0]) { //these are actual module indices - uint16_t quintupletLowerModuleIndex = modulesInGPU.connectedPixels[iLSModule]; - if (quintupletLowerModuleIndex >= *modulesInGPU.nLowerModules) + uint16_t quintupletLowerModuleIndex = modulesPixel.connectedPixels()[iLSModule]; + if (quintupletLowerModuleIndex >= modules.nLowerModules()) continue; - if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == TwoS) + if (modules.moduleType()[quintupletLowerModuleIndex] == TwoS) continue; - uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; - if (segmentsInGPU.isDup[i_pLS]) + uint16_t pixelModuleIndex = modules.nLowerModules(); + if (segmentsPixel.isDup()[i_pLS]) continue; - unsigned int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex]; + unsigned int nOuterQuintuplets = quintupletsOccupancy.nQuintuplets()[quintupletLowerModuleIndex]; if (nOuterQuintuplets == 0) continue; - unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + i_pLS; + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; //fetch the quintuplet for (unsigned int outerQuintupletArrayIndex = globalThreadIdx[2]; outerQuintupletArrayIndex < nOuterQuintuplets; outerQuintupletArrayIndex += gridThreadExtent[2]) { unsigned int quintupletIndex = - rangesInGPU.quintupletModuleIndices[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; + ranges.quintupletModuleIndices()[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; - if (quintupletsInGPU.isDup[quintupletIndex]) + if (quintuplets.isDup()[quintupletIndex]) continue; float rzChiSquared, rPhiChiSquared, rPhiChiSquaredInwards, pixelRadius, quintupletRadius, centerX, centerY; bool success = runPixelQuintupletDefaultAlgo(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, - quintupletsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + quintuplets, pixelSegmentIndex, quintupletIndex, rzChiSquared, @@ -886,22 +773,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { static_cast(i_pLS)); if (success) { unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( - acc, pixelQuintupletsInGPU.totOccupancyPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); + acc, &pixelQuintuplets.totOccupancyPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { #ifdef WARNINGS printf("Pixel Quintuplet excess alert!\n"); #endif } else { unsigned int pixelQuintupletIndex = - alpaka::atomicAdd(acc, pixelQuintupletsInGPU.nPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); - float eta = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi = __H2F(quintupletsInGPU.phi[quintupletIndex]); - - addPixelQuintupletToMemory(modulesInGPU, - mdsInGPU, - segmentsInGPU, - quintupletsInGPU, - pixelQuintupletsInGPU, + alpaka::atomicAdd(acc, &pixelQuintuplets.nPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); + + addPixelQuintupletToMemory(modules, + mds, + segments, + quintuplets, + pixelQuintuplets, pixelSegmentIndex, quintupletIndex, pixelQuintupletIndex, @@ -916,10 +803,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { centerX, centerY); - tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; - tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; - segmentsInGPU.partOfPT5[i_pLS] = true; - quintupletsInGPU.partOfPT5[quintupletIndex] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; + segmentsPixel.partOfPT5()[i_pLS] = true; + quintuplets.partOfPT5()[quintupletIndex] = true; } // tot occupancy } // end success } // end T5 diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 70c269dce6c10..71822bb2544cb 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -2,137 +2,53 @@ #define RecoTracker_LSTCore_src_alpaka_PixelTriplet_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" - -#include "Triplet.h" -#include "Segment.h" -#include "MiniDoublet.h" -#include "Hit.h" -#include "ObjectRanges.h" -#include "Quintuplet.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - // One pixel segment, one outer tracker triplet! - struct PixelTriplets { - unsigned int* pixelSegmentIndices; - unsigned int* tripletIndices; - unsigned int* nPixelTriplets; - unsigned int* totOccupancyPixelTriplets; - - float* rPhiChiSquared; - float* rPhiChiSquaredInwards; - float* rzChiSquared; - - FPX* pixelRadius; - FPX* tripletRadius; - FPX* pt; - FPX* eta; - FPX* phi; - FPX* eta_pix; - FPX* phi_pix; - FPX* score; - bool* isDup; - bool* partOfPT5; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - uint16_t* lowerModuleIndices; - FPX* centerX; - FPX* centerY; - - template - void setData(TBuff& buf) { - pixelSegmentIndices = buf.pixelSegmentIndices_buf.data(); - tripletIndices = buf.tripletIndices_buf.data(); - nPixelTriplets = buf.nPixelTriplets_buf.data(); - totOccupancyPixelTriplets = buf.totOccupancyPixelTriplets_buf.data(); - pixelRadius = buf.pixelRadius_buf.data(); - tripletRadius = buf.tripletRadius_buf.data(); - pt = buf.pt_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - eta_pix = buf.eta_pix_buf.data(); - phi_pix = buf.phi_pix_buf.data(); - score = buf.score_buf.data(); - isDup = buf.isDup_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - rPhiChiSquared = buf.rPhiChiSquared_buf.data(); - rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - } - }; - template - struct PixelTripletsBuffer { - Buf pixelSegmentIndices_buf; - Buf tripletIndices_buf; - Buf nPixelTriplets_buf; - Buf totOccupancyPixelTriplets_buf; - Buf pixelRadius_buf; - Buf tripletRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf eta_pix_buf; - Buf phi_pix_buf; - Buf score_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf pixelRadiusError_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; - Buf rzChiSquared_buf; - - PixelTriplets data_; - - template - PixelTripletsBuffer(unsigned int maxPixelTriplets, TDevAcc const& devAccIn, TQueue& queue) - : pixelSegmentIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - tripletIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - nPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), - totOccupancyPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), - pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - tripletRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - pt_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - eta_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - phi_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kHits, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - pixelRadiusError_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)) { - alpaka::memset(queue, nPixelTriplets_buf, 0u); - alpaka::memset(queue, totOccupancyPixelTriplets_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - } - - inline PixelTriplets const* data() const { return &data_; } - inline void setData(PixelTripletsBuffer& buf) { data_.setData(buf); } - }; + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets const& tripletsInGPU, - PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + PixelTriplets pixelTriplets, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float pixelRadius, @@ -149,92 +65,80 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float eta_pix, float phi_pix, float score) { - pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex] = pixelSegmentIndex; - pixelTripletsInGPU.tripletIndices[pixelTripletIndex] = tripletIndex; - pixelTripletsInGPU.pixelRadius[pixelTripletIndex] = __F2H(pixelRadius); - pixelTripletsInGPU.tripletRadius[pixelTripletIndex] = __F2H(tripletRadius); - pixelTripletsInGPU.pt[pixelTripletIndex] = __F2H(pt); - pixelTripletsInGPU.eta[pixelTripletIndex] = __F2H(eta); - pixelTripletsInGPU.phi[pixelTripletIndex] = __F2H(phi); - pixelTripletsInGPU.eta_pix[pixelTripletIndex] = __F2H(eta_pix); - pixelTripletsInGPU.phi_pix[pixelTripletIndex] = __F2H(phi_pix); - pixelTripletsInGPU.isDup[pixelTripletIndex] = false; - pixelTripletsInGPU.score[pixelTripletIndex] = __F2H(score); - - pixelTripletsInGPU.centerX[pixelTripletIndex] = __F2H(centerX); - pixelTripletsInGPU.centerY[pixelTripletIndex] = __F2H(centerY); - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex] = 0; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 1] = 0; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 2] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers]; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 3] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1]; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 4] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2]; - - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex] = - segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 1] = - segmentsInGPU.outerLowerModuleIndices[pixelSegmentIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 2] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 3] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 4] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; - - unsigned int pixelInnerMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; - unsigned int pixelOuterMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; - - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex] = mdsInGPU.anchorHitIndices[pixelInnerMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 1] = mdsInGPU.outerHitIndices[pixelInnerMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 2] = mdsInGPU.anchorHitIndices[pixelOuterMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 3] = mdsInGPU.outerHitIndices[pixelOuterMD]; - - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 4] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 5] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 1]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 6] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 2]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 7] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 3]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 8] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 4]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 9] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 5]; - pixelTripletsInGPU.rPhiChiSquared[pixelTripletIndex] = rPhiChiSquared; - pixelTripletsInGPU.rPhiChiSquaredInwards[pixelTripletIndex] = rPhiChiSquaredInwards; - pixelTripletsInGPU.rzChiSquared[pixelTripletIndex] = rzChiSquared; + pixelTriplets.pixelSegmentIndices()[pixelTripletIndex] = pixelSegmentIndex; + pixelTriplets.tripletIndices()[pixelTripletIndex] = tripletIndex; + pixelTriplets.pixelRadius()[pixelTripletIndex] = __F2H(pixelRadius); + pixelTriplets.tripletRadius()[pixelTripletIndex] = __F2H(tripletRadius); + pixelTriplets.pt()[pixelTripletIndex] = __F2H(pt); + pixelTriplets.eta()[pixelTripletIndex] = __F2H(eta); + pixelTriplets.phi()[pixelTripletIndex] = __F2H(phi); + pixelTriplets.eta_pix()[pixelTripletIndex] = __F2H(eta_pix); + pixelTriplets.phi_pix()[pixelTripletIndex] = __F2H(phi_pix); + pixelTriplets.isDup()[pixelTripletIndex] = false; + pixelTriplets.score()[pixelTripletIndex] = __F2H(score); + + pixelTriplets.centerX()[pixelTripletIndex] = __F2H(centerX); + pixelTriplets.centerY()[pixelTripletIndex] = __F2H(centerY); + pixelTriplets.logicalLayers()[pixelTripletIndex][0] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][1] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][2] = triplets.logicalLayers()[tripletIndex][0]; + pixelTriplets.logicalLayers()[pixelTripletIndex][3] = triplets.logicalLayers()[tripletIndex][1]; + pixelTriplets.logicalLayers()[pixelTripletIndex][4] = triplets.logicalLayers()[tripletIndex][2]; + + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][0] = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][1] = segments.outerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][2] = triplets.lowerModuleIndices()[tripletIndex][0]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][3] = triplets.lowerModuleIndices()[tripletIndex][1]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][4] = triplets.lowerModuleIndices()[tripletIndex][2]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelSegmentIndex][1]; + + pixelTriplets.hitIndices()[pixelTripletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelTriplets.hitIndices()[pixelTripletIndex][4] = triplets.hitIndices()[tripletIndex][0]; + pixelTriplets.hitIndices()[pixelTripletIndex][5] = triplets.hitIndices()[tripletIndex][1]; + pixelTriplets.hitIndices()[pixelTripletIndex][6] = triplets.hitIndices()[tripletIndex][2]; + pixelTriplets.hitIndices()[pixelTripletIndex][7] = triplets.hitIndices()[tripletIndex][3]; + pixelTriplets.hitIndices()[pixelTripletIndex][8] = triplets.hitIndices()[tripletIndex][4]; + pixelTriplets.hitIndices()[pixelTripletIndex][9] = triplets.hitIndices()[tripletIndex][5]; + pixelTriplets.rPhiChiSquared()[pixelTripletIndex] = rPhiChiSquared; + pixelTriplets.rPhiChiSquaredInwards()[pixelTripletIndex] = rPhiChiSquaredInwards; + pixelTriplets.rzChiSquared()[pixelTripletIndex] = rzChiSquared; }; template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, - Modules const& modulesInGPU, - ObjectRanges const& rangesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex) { - short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; - short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex + 1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; if (outerInnerLowerModuleSubdet == Barrel and (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { return runTripletDefaultAlgoPPBB(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, pixelLowerModuleIndex, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, @@ -246,10 +150,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { fourthMDIndex); } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runTripletDefaultAlgoPPEE(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, pixelLowerModuleIndex, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, @@ -263,20 +168,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float rzChiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return rzChiSquared < 13.6067f; @@ -363,7 +268,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //TODO: merge this one and the pT5 function later into a single function template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, uint16_t* lowerModuleIndices, float g, float f, @@ -376,11 +281,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; for (size_t i = 0; i < 3; i++) { - ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; - short moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; - short moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; - float drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; - slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + ModuleType moduleType = modules.moduleType()[lowerModuleIndices[i]]; + short moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + short moduleSide = modules.sides()[lowerModuleIndices[i]]; + float drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; //category 1 - barrel PS flat if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { delta1[i] = inv1; @@ -444,20 +349,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { }; //90pc threshold - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return chiSquared < 7.003f; @@ -488,20 +393,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = - modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); const int layer2 = - modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); const int layer3 = - modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps { @@ -654,18 +559,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, float pixelRadius, float pixelRadiusError, float tripletRadius, int16_t lowerModuleIndex, uint16_t middleModuleIndex, uint16_t upperModuleIndex) { - if (modulesInGPU.subdets[lowerModuleIndex] == Endcap) { + if (modules.subdets()[lowerModuleIndex] == Endcap) { return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[middleModuleIndex] == Endcap) { + } else if (modules.subdets()[middleModuleIndex] == Endcap) { return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[upperModuleIndex] == Endcap) { + } else if (modules.subdets()[upperModuleIndex] == Endcap) { return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); } else { return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); @@ -674,7 +579,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, const uint16_t* lowerModuleIndices, const float* rtPix, const float* xPix, @@ -706,9 +611,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float zsi = zs[i] / 100; float rtsi = rts[i] / 100; uint16_t lowerModuleIndex = lowerModuleIndices[i]; - const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; - const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; - const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; // calculation is detailed documented here https://indico.cern.ch/event/1185895/contributions/4982756/attachments/2526561/4345805/helix%20pT3%20summarize.pdf float diffr, diffz; @@ -750,7 +655,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //special dispensation to tilted PS modules! if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { - float drdz = modulesInGPU.drdzs[lowerModuleIndex]; + float drdz = modules.drdzs()[lowerModuleIndex]; error2 /= (1 + drdz * drdz); } RMSE += (residual * residual) / error2; @@ -763,11 +668,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, - Modules const& modulesInGPU, - ObjectRanges const& rangesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets const& tripletsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, @@ -779,77 +685,79 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& rPhiChiSquaredInwards, bool runChiSquaredCuts = true) { //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet - uint16_t pixelModuleIndex = segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; + uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; - uint16_t lowerModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; - uint16_t middleModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; - uint16_t upperModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; + uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; + uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; + uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; { // pixel segment vs inner segment of the triplet if (not runPixelTrackletDefaultAlgopT3(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, pixelModuleIndex, lowerModuleIndex, middleModuleIndex, pixelSegmentIndex, - tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex])) + triplets.segmentIndices()[tripletIndex][0])) return false; //pixel segment vs outer segment of triplet if (not runPixelTrackletDefaultAlgopT3(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, pixelModuleIndex, middleModuleIndex, upperModuleIndex, pixelSegmentIndex, - tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex + 1])) + triplets.segmentIndices()[tripletIndex][1])) return false; } //pt matching between the pixel ptin and the triplet circle pt - unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - float pixelSegmentPt = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; - float pixelSegmentPtError = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float pixelSegmentPx = segmentsInGPU.px[pixelSegmentArrayIndex]; - float pixelSegmentPy = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pixelSegmentPz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - int pixelSegmentCharge = segmentsInGPU.charge[pixelSegmentArrayIndex]; + unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float pixelSegmentPt = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float pixelSegmentPtError = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float pixelSegmentPx = segmentsPixel.px()[pixelSegmentArrayIndex]; + float pixelSegmentPy = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pixelSegmentPz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + int pixelSegmentCharge = segmentsPixel.charge()[pixelSegmentArrayIndex]; - float pixelG = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; - float pixelF = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; - float pixelRadiusPCA = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + float pixelG = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + float pixelF = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + float pixelRadiusPCA = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; - unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex]; - unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex + 1]; + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; pixelRadius = pixelSegmentPt * kR1GeVf; float pixelRadiusError = pixelSegmentPtError * kR1GeVf; - unsigned int tripletInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex]; - unsigned int tripletOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex + 1]; + unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; + unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex + 1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * tripletOuterSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[tripletOuterSegmentIndex][1]; float xs[Params_T3::kLayers] = { - mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorX[thirdMDIndex]}; + mds.anchorX()[firstMDIndex], mds.anchorX()[secondMDIndex], mds.anchorX()[thirdMDIndex]}; float ys[Params_T3::kLayers] = { - mdsInGPU.anchorY[firstMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorY[thirdMDIndex]}; + mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; float g, f; - tripletRadius = tripletsInGPU.circleRadius[tripletIndex]; - g = tripletsInGPU.circleCenterX[tripletIndex]; - f = tripletsInGPU.circleCenterY[tripletIndex]; + tripletRadius = triplets.radius()[tripletIndex]; + g = triplets.centerX()[tripletIndex]; + f = triplets.centerY()[tripletIndex]; if (not passRadiusCriterion(acc, - modulesInGPU, + modules, pixelRadius, pixelRadiusError, tripletRadius, @@ -862,16 +770,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { float rts[Params_T3::kLayers] = { - mdsInGPU.anchorRt[firstMDIndex], mdsInGPU.anchorRt[secondMDIndex], mdsInGPU.anchorRt[thirdMDIndex]}; + mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]}; float zs[Params_T3::kLayers] = { - mdsInGPU.anchorZ[firstMDIndex], mdsInGPU.anchorZ[secondMDIndex], mdsInGPU.anchorZ[thirdMDIndex]}; - float rtPix[Params_pLS::kLayers] = {mdsInGPU.anchorRt[pixelInnerMDIndex], mdsInGPU.anchorRt[pixelOuterMDIndex]}; - float xPix[Params_pLS::kLayers] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; - float zPix[Params_pLS::kLayers] = {mdsInGPU.anchorZ[pixelInnerMDIndex], mdsInGPU.anchorZ[pixelOuterMDIndex]}; + mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; rzChiSquared = computePT3RZChiSquared(acc, - modulesInGPU, + modules, lowerModuleIndices, rtPix, xPix, @@ -886,29 +794,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelSegmentPy, pixelSegmentPz, pixelSegmentCharge); - if (not passPT3RZChiSquaredCuts( - modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) + if (not passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) return false; } else { rzChiSquared = -1; } - rPhiChiSquared = - computePT3RPhiChiSquared(acc, modulesInGPU, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + rPhiChiSquared = computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { - if (not passPT3RPhiChiSquaredCuts( - modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquared)) + if (not passPT3RPhiChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquared)) return false; } - float xPix[Params_pLS::kLayers] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { if (not passPT3RPhiChiSquaredInwardsCuts( - modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquaredInwards)) + modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquaredInwards)) return false; } centerX = 0; @@ -916,15 +821,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; }; - struct CreatePixelTripletsInGPUFromMapv2 { + struct CreatePixelTripletsFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, - Triplets tripletsInGPU, - PixelTriplets pixelTripletsInGPU, + ModulesConst modules, + ModulesPixelConst modulesPixel, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + PixelTriplets pixelTriplets, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments) const { @@ -939,37 +847,37 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; iLSModule += gridBlockExtent[0]) { uint16_t tripletLowerModuleIndex = - modulesInGPU - .connectedPixels[iLSModule]; //connected pixels will have the appropriate lower module index by default! + modulesPixel.connectedPixels() + [iLSModule]; //connected pixels will have the appropriate lower module index by default! #ifdef WARNINGS - if (tripletLowerModuleIndex >= *modulesInGPU.nLowerModules) { - printf("tripletLowerModuleIndex %d >= modulesInGPU.nLowerModules %d \n", + if (tripletLowerModuleIndex >= modules.nLowerModules()) { + printf("tripletLowerModuleIndex %d >= modules.nLowerModules %d \n", tripletLowerModuleIndex, - *modulesInGPU.nLowerModules); + modules.nLowerModules()); continue; //sanity check } #endif //Removes 2S-2S :FIXME: filter these out in the pixel map - if (modulesInGPU.moduleType[tripletLowerModuleIndex] == TwoS) + if (modules.moduleType()[tripletLowerModuleIndex] == TwoS) continue; - uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nOuterTriplets = tripletsInGPU.nTriplets[tripletLowerModuleIndex]; + uint16_t pixelModuleIndex = modules.nLowerModules(); + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; if (nOuterTriplets == 0) continue; - unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + i_pLS; + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; - if (segmentsInGPU.isDup[i_pLS]) + if (segmentsPixel.isDup()[i_pLS]) continue; - if (segmentsInGPU.partOfPT5[i_pLS]) + if (segmentsPixel.partOfPT5()[i_pLS]) continue; //don't make pT3s for those pixels that are part of pT5 short layer2_adjustment; - if (modulesInGPU.layers[tripletLowerModuleIndex] == 1) { + if (modules.layers()[tripletLowerModuleIndex] == 1) { layer2_adjustment = 1; } //get upper segment to be in second layer - else if (modulesInGPU.layers[tripletLowerModuleIndex] == 2) { + else if (modules.layers()[tripletLowerModuleIndex] == 2) { layer2_adjustment = 0; } // get lower segment to be in second layer else { @@ -980,20 +888,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; outerTripletArrayIndex += gridThreadExtent[2]) { unsigned int outerTripletIndex = - rangesInGPU.tripletModuleIndices[tripletLowerModuleIndex] + outerTripletArrayIndex; - if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == TwoS) + ranges.tripletModuleIndices()[tripletLowerModuleIndex] + outerTripletArrayIndex; + if (modules.moduleType()[triplets.lowerModuleIndices()[outerTripletIndex][1]] == TwoS) continue; //REMOVES PS-2S - if (tripletsInGPU.partOfPT5[outerTripletIndex]) + if (triplets.partOfPT5()[outerTripletIndex]) continue; //don't create pT3s for T3s accounted in pT5s float pixelRadius, tripletRadius, rPhiChiSquared, rzChiSquared, rPhiChiSquaredInwards, centerX, centerY; bool success = runPixelTripletDefaultAlgo(acc, - modulesInGPU, - rangesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, pixelSegmentIndex, outerTripletIndex, pixelRadius, @@ -1006,28 +915,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (success) { float phi = - mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + - layer2_adjustment]]; + mds.anchorPhi()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; float eta = - mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + - layer2_adjustment]]; - float eta_pix = segmentsInGPU.eta[i_pLS]; - float phi_pix = segmentsInGPU.phi[i_pLS]; - float pt = segmentsInGPU.ptIn[i_pLS]; + mds.anchorEta()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta_pix = segmentsPixel.eta()[i_pLS]; + float phi_pix = segmentsPixel.phi()[i_pLS]; + float pt = segmentsPixel.ptIn()[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; - unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd( - acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u, alpaka::hierarchy::Threads{}); + unsigned int totOccupancyPixelTriplets = + alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { #ifdef WARNINGS printf("Pixel Triplet excess alert!\n"); #endif } else { unsigned int pixelTripletIndex = - alpaka::atomicAdd(acc, pixelTripletsInGPU.nPixelTriplets, 1u, alpaka::hierarchy::Threads{}); - addPixelTripletToMemory(mdsInGPU, - segmentsInGPU, - tripletsInGPU, - pixelTripletsInGPU, + alpaka::atomicAdd(acc, &pixelTriplets.nPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); + addPixelTripletToMemory(mds, + segments, + triplets, + pixelTriplets, pixelSegmentIndex, outerTripletIndex, pixelRadius, @@ -1044,7 +953,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { eta_pix, phi_pix, score); - tripletsInGPU.partOfPT3[outerTripletIndex] = true; + triplets.partOfPT3()[outerTripletIndex] = true; } } } // for outerTripletArrayIndex @@ -1154,10 +1063,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, - Modules const& modulesInGPU, - ObjectRanges const& rangesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1169,39 +1079,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; - float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; - float rt_OutUp = mdsInGPU.anchorRt[fourthMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; - float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; - float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; - float x_InLo = mdsInGPU.anchorX[firstMDIndex]; - float x_InUp = mdsInGPU.anchorX[secondMDIndex]; - float x_OutLo = mdsInGPU.anchorX[thirdMDIndex]; - float x_OutUp = mdsInGPU.anchorX[fourthMDIndex]; + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; - float y_InLo = mdsInGPU.anchorY[firstMDIndex]; - float y_InUp = mdsInGPU.anchorY[secondMDIndex]; - float y_OutLo = mdsInGPU.anchorY[thirdMDIndex]; - float y_OutUp = mdsInGPU.anchorY[fourthMDIndex]; + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; float rt_InOut = rt_InUp; if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) return false; - unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; float ptSLo = ptIn; - float px = segmentsInGPU.px[pixelSegmentArrayIndex]; - float py = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1272,11 +1182,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //lots of array accesses below this... - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); @@ -1304,29 +1214,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (isEC_lastLayer) { alpha_OutUp_highEdge = deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); alpha_OutUp_lowEdge = deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); - tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; - tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; - tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; - tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], tl_axis_highEdge_x, tl_axis_highEdge_y); betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], tl_axis_lowEdge_x, tl_axis_lowEdge_y); } @@ -1381,14 +1291,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dBetaROut = 0; if (isEC_lastLayer) { - dBetaROut = - (alpaka::math::sqrt(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + - mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - - alpaka::math::sqrt(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + - mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * - sinDPhi / drt_tl_axis; + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; } const float dBetaROut2 = dBetaROut * dBetaROut; @@ -1412,10 +1321,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, - Modules const& modulesInGPU, - ObjectRanges const& rangesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1427,38 +1337,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); - float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; - float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; if (z_InUp * z_OutLo <= 0) return false; - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; - float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; - float rt_OutUp = mdsInGPU.anchorRt[fourthMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; - float x_InLo = mdsInGPU.anchorX[firstMDIndex]; - float x_InUp = mdsInGPU.anchorX[secondMDIndex]; - float x_OutLo = mdsInGPU.anchorX[thirdMDIndex]; - float x_OutUp = mdsInGPU.anchorX[fourthMDIndex]; + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; - float y_InLo = mdsInGPU.anchorY[firstMDIndex]; - float y_InUp = mdsInGPU.anchorY[secondMDIndex]; - float y_OutLo = mdsInGPU.anchorY[thirdMDIndex]; - float y_OutUp = mdsInGPU.anchorY[fourthMDIndex]; + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; - unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; float ptSLo = ptIn; - float px = segmentsInGPU.px[pixelSegmentArrayIndex]; - float py = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1471,7 +1381,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; const float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch @@ -1534,11 +1444,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; @@ -1565,29 +1475,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (isEC_lastLayer) { alpha_OutUp_highEdge = deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); alpha_OutUp_lowEdge = deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); - tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; - tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; - tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; - tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], tl_axis_highEdge_x, tl_axis_highEdge_y); betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], tl_axis_lowEdge_x, tl_axis_lowEdge_y); } @@ -1641,14 +1551,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dBetaROut = 0; if (isEC_lastLayer) { - dBetaROut = - (alpaka::math::sqrt(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + - mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - - alpaka::math::sqrt(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + - mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * - sinDPhi / drt_tl_axis; + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; } const float dBetaROut2 = dBetaROut * dBetaROut; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 1b75100c874e8..e5388851ce8aa 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -3,145 +3,21 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" #include "NeuralNetwork.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" -#include "ObjectRanges.h" -#include "Triplet.h" +#include "Triplet.h" // FIXME: need to refactor common functions to a common place namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Quintuplets { - unsigned int* tripletIndices; - uint16_t* lowerModuleIndices; - unsigned int* nQuintuplets; - unsigned int* totOccupancyQuintuplets; - unsigned int* nMemoryLocations; - - FPX* innerRadius; - FPX* bridgeRadius; - FPX* outerRadius; - FPX* pt; - FPX* eta; - FPX* phi; - FPX* score_rphisum; - uint8_t* layer; - char* isDup; - bool* TightCutFlag; - bool* partOfPT5; - - float* regressionRadius; - float* regressionG; - float* regressionF; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - float* rzChiSquared; - float* chiSquared; - float* nonAnchorChiSquared; - - template - void setData(TBuff& buf) { - tripletIndices = buf.tripletIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - nQuintuplets = buf.nQuintuplets_buf.data(); - totOccupancyQuintuplets = buf.totOccupancyQuintuplets_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - innerRadius = buf.innerRadius_buf.data(); - bridgeRadius = buf.bridgeRadius_buf.data(); - outerRadius = buf.outerRadius_buf.data(); - pt = buf.pt_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - score_rphisum = buf.score_rphisum_buf.data(); - layer = buf.layer_buf.data(); - isDup = buf.isDup_buf.data(); - TightCutFlag = buf.TightCutFlag_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - regressionRadius = buf.regressionRadius_buf.data(); - regressionG = buf.regressionG_buf.data(); - regressionF = buf.regressionF_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - chiSquared = buf.chiSquared_buf.data(); - nonAnchorChiSquared = buf.nonAnchorChiSquared_buf.data(); - } - }; - - template - struct QuintupletsBuffer { - Buf tripletIndices_buf; - Buf lowerModuleIndices_buf; - Buf nQuintuplets_buf; - Buf totOccupancyQuintuplets_buf; - Buf nMemoryLocations_buf; - - Buf innerRadius_buf; - Buf bridgeRadius_buf; - Buf outerRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_rphisum_buf; - Buf layer_buf; - Buf isDup_buf; - Buf TightCutFlag_buf; - Buf partOfPT5_buf; - - Buf regressionRadius_buf; - Buf regressionG_buf; - Buf regressionF_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf rzChiSquared_buf; - Buf chiSquared_buf; - Buf nonAnchorChiSquared_buf; - - Quintuplets data_; - - template - QuintupletsBuffer(unsigned int nTotalQuintuplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) - : tripletIndices_buf(allocBufWrapper(devAccIn, 2 * nTotalQuintuplets, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), - nQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - totOccupancyQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - innerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - bridgeRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - outerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - pt_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - eta_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - phi_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - score_rphisum_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - layer_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - isDup_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - TightCutFlag_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionG_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionF_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, Params_T5::kHits * nTotalQuintuplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - chiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - nonAnchorChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)) { - alpaka::memset(queue, nQuintuplets_buf, 0u); - alpaka::memset(queue, totOccupancyQuintuplets_buf, 0u); - alpaka::memset(queue, isDup_buf, 0u); - alpaka::memset(queue, TightCutFlag_buf, false); - alpaka::memset(queue, partOfPT5_buf, false); - } - - inline Quintuplets const* data() const { return &data_; } - inline void setData(QuintupletsBuffer& buf) { data_.setData(buf); } - }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(float firstMin, float firstMax, float secondMin, @@ -149,8 +25,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(Triplets const& tripletsInGPU, - Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(TripletsConst triplets, + Quintuplets quintuplets, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t lowerModule1, @@ -173,66 +49,50 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float scores, uint8_t layer, unsigned int quintupletIndex, - bool TightCutFlag) { - quintupletsInGPU.tripletIndices[2 * quintupletIndex] = innerTripletIndex; - quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1] = outerTripletIndex; - - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex] = lowerModule1; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1] = lowerModule2; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2] = lowerModule3; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3] = lowerModule4; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4] = lowerModule5; - quintupletsInGPU.innerRadius[quintupletIndex] = __F2H(innerRadius); - quintupletsInGPU.outerRadius[quintupletIndex] = __F2H(outerRadius); - quintupletsInGPU.pt[quintupletIndex] = __F2H(pt); - quintupletsInGPU.eta[quintupletIndex] = __F2H(eta); - quintupletsInGPU.phi[quintupletIndex] = __F2H(phi); - quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores); - quintupletsInGPU.layer[quintupletIndex] = layer; - quintupletsInGPU.isDup[quintupletIndex] = 0; - quintupletsInGPU.TightCutFlag[quintupletIndex] = TightCutFlag; - quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius; - quintupletsInGPU.regressionG[quintupletIndex] = regressionG; - quintupletsInGPU.regressionF[quintupletIndex] = regressionF; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 1] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 1]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 2] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 2]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 3] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 1]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 4] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 2]; - - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 1] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 1]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 2] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 2]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 3] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 3]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 4] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 4]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 5] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 5]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 6] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 2]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 7] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 3]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 8] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 4]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 9] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 5]; - quintupletsInGPU.bridgeRadius[quintupletIndex] = bridgeRadius; - quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared; - quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared; - quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared; + bool tightCutFlag) { + quintuplets.tripletIndices()[quintupletIndex][0] = innerTripletIndex; + quintuplets.tripletIndices()[quintupletIndex][1] = outerTripletIndex; + + quintuplets.lowerModuleIndices()[quintupletIndex][0] = lowerModule1; + quintuplets.lowerModuleIndices()[quintupletIndex][1] = lowerModule2; + quintuplets.lowerModuleIndices()[quintupletIndex][2] = lowerModule3; + quintuplets.lowerModuleIndices()[quintupletIndex][3] = lowerModule4; + quintuplets.lowerModuleIndices()[quintupletIndex][4] = lowerModule5; + quintuplets.innerRadius()[quintupletIndex] = __F2H(innerRadius); + quintuplets.outerRadius()[quintupletIndex] = __F2H(outerRadius); + quintuplets.pt()[quintupletIndex] = __F2H(pt); + quintuplets.eta()[quintupletIndex] = __F2H(eta); + quintuplets.phi()[quintupletIndex] = __F2H(phi); + quintuplets.score_rphisum()[quintupletIndex] = __F2H(scores); + quintuplets.isDup()[quintupletIndex] = 0; + quintuplets.tightCutFlag()[quintupletIndex] = tightCutFlag; + quintuplets.regressionRadius()[quintupletIndex] = regressionRadius; + quintuplets.regressionG()[quintupletIndex] = regressionG; + quintuplets.regressionF()[quintupletIndex] = regressionF; + quintuplets.logicalLayers()[quintupletIndex][0] = triplets.logicalLayers()[innerTripletIndex][0]; + quintuplets.logicalLayers()[quintupletIndex][1] = triplets.logicalLayers()[innerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][2] = triplets.logicalLayers()[innerTripletIndex][2]; + quintuplets.logicalLayers()[quintupletIndex][3] = triplets.logicalLayers()[outerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][4] = triplets.logicalLayers()[outerTripletIndex][2]; + + quintuplets.hitIndices()[quintupletIndex][0] = triplets.hitIndices()[innerTripletIndex][0]; + quintuplets.hitIndices()[quintupletIndex][1] = triplets.hitIndices()[innerTripletIndex][1]; + quintuplets.hitIndices()[quintupletIndex][2] = triplets.hitIndices()[innerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][3] = triplets.hitIndices()[innerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][4] = triplets.hitIndices()[innerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][5] = triplets.hitIndices()[innerTripletIndex][5]; + quintuplets.hitIndices()[quintupletIndex][6] = triplets.hitIndices()[outerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][7] = triplets.hitIndices()[outerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][8] = triplets.hitIndices()[outerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][9] = triplets.hitIndices()[outerTripletIndex][5]; + quintuplets.bridgeRadius()[quintupletIndex] = bridgeRadius; + quintuplets.rzChiSquared()[quintupletIndex] = rzChiSquared; + quintuplets.chiSquared()[quintupletIndex] = rPhiChiSquared; + quintuplets.nonAnchorChiSquared()[quintupletIndex] = nonAnchorChiSquared; } //90% constraint - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(ModulesConst modules, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -240,11 +100,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndex5, float chiSquared) { // Using lstLayer numbering convention defined in ModuleMethods.h - const int layer1 = modulesInGPU.lstLayers[lowerModuleIndex1]; - const int layer2 = modulesInGPU.lstLayers[lowerModuleIndex2]; - const int layer3 = modulesInGPU.lstLayers[lowerModuleIndex3]; - const int layer4 = modulesInGPU.lstLayers[lowerModuleIndex4]; - const int layer5 = modulesInGPU.lstLayers[lowerModuleIndex5]; + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; if (layer1 == 7 and layer2 == 8 and layer3 == 9) { if (layer4 == 10 and layer5 == 11) { @@ -317,8 +177,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, @@ -336,55 +196,55 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float f, bool& TightCutFlag) { //(g,f) is the center of the circle fitted by the innermost 3 points on x,y coordinates - const float& rt1 = mdsInGPU.anchorRt[firstMDIndex] / 100; //in the unit of m instead of cm - const float& rt2 = mdsInGPU.anchorRt[secondMDIndex] / 100; - const float& rt3 = mdsInGPU.anchorRt[thirdMDIndex] / 100; - const float& rt4 = mdsInGPU.anchorRt[fourthMDIndex] / 100; - const float& rt5 = mdsInGPU.anchorRt[fifthMDIndex] / 100; - - const float& z1 = mdsInGPU.anchorZ[firstMDIndex] / 100; - const float& z2 = mdsInGPU.anchorZ[secondMDIndex] / 100; - const float& z3 = mdsInGPU.anchorZ[thirdMDIndex] / 100; - const float& z4 = mdsInGPU.anchorZ[fourthMDIndex] / 100; - const float& z5 = mdsInGPU.anchorZ[fifthMDIndex] / 100; + const float& rt1 = mds.anchorRt()[firstMDIndex] / 100; //in the unit of m instead of cm + const float& rt2 = mds.anchorRt()[secondMDIndex] / 100; + const float& rt3 = mds.anchorRt()[thirdMDIndex] / 100; + const float& rt4 = mds.anchorRt()[fourthMDIndex] / 100; + const float& rt5 = mds.anchorRt()[fifthMDIndex] / 100; + + const float& z1 = mds.anchorZ()[firstMDIndex] / 100; + const float& z2 = mds.anchorZ()[secondMDIndex] / 100; + const float& z3 = mds.anchorZ()[thirdMDIndex] / 100; + const float& z4 = mds.anchorZ()[fourthMDIndex] / 100; + const float& z5 = mds.anchorZ()[fifthMDIndex] / 100; // Using lst_layer numbering convention defined in ModuleMethods.h - const int layer1 = modulesInGPU.lstLayers[lowerModuleIndex1]; - const int layer2 = modulesInGPU.lstLayers[lowerModuleIndex2]; - const int layer3 = modulesInGPU.lstLayers[lowerModuleIndex3]; - const int layer4 = modulesInGPU.lstLayers[lowerModuleIndex4]; - const int layer5 = modulesInGPU.lstLayers[lowerModuleIndex5]; + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; //slope computed using the internal T3s - const int moduleType1 = modulesInGPU.moduleType[lowerModuleIndex1]; //0 is ps, 1 is 2s - const int moduleType2 = modulesInGPU.moduleType[lowerModuleIndex2]; - const int moduleType3 = modulesInGPU.moduleType[lowerModuleIndex3]; - const int moduleType4 = modulesInGPU.moduleType[lowerModuleIndex4]; - const int moduleType5 = modulesInGPU.moduleType[lowerModuleIndex5]; - - const float& x1 = mdsInGPU.anchorX[firstMDIndex] / 100; - const float& x2 = mdsInGPU.anchorX[secondMDIndex] / 100; - const float& x3 = mdsInGPU.anchorX[thirdMDIndex] / 100; - const float& x4 = mdsInGPU.anchorX[fourthMDIndex] / 100; - const float& y1 = mdsInGPU.anchorY[firstMDIndex] / 100; - const float& y2 = mdsInGPU.anchorY[secondMDIndex] / 100; - const float& y3 = mdsInGPU.anchorY[thirdMDIndex] / 100; - const float& y4 = mdsInGPU.anchorY[fourthMDIndex] / 100; + const int moduleType1 = modules.moduleType()[lowerModuleIndex1]; //0 is ps, 1 is 2s + const int moduleType2 = modules.moduleType()[lowerModuleIndex2]; + const int moduleType3 = modules.moduleType()[lowerModuleIndex3]; + const int moduleType4 = modules.moduleType()[lowerModuleIndex4]; + const int moduleType5 = modules.moduleType()[lowerModuleIndex5]; + + const float& x1 = mds.anchorX()[firstMDIndex] / 100; + const float& x2 = mds.anchorX()[secondMDIndex] / 100; + const float& x3 = mds.anchorX()[thirdMDIndex] / 100; + const float& x4 = mds.anchorX()[fourthMDIndex] / 100; + const float& y1 = mds.anchorY()[firstMDIndex] / 100; + const float& y2 = mds.anchorY()[secondMDIndex] / 100; + const float& y3 = mds.anchorY()[thirdMDIndex] / 100; + const float& y4 = mds.anchorY()[fourthMDIndex] / 100; float residual = 0; float error2 = 0; float x_center = g / 100, y_center = f / 100; - float x_init = mdsInGPU.anchorX[thirdMDIndex] / 100; - float y_init = mdsInGPU.anchorY[thirdMDIndex] / 100; - float z_init = mdsInGPU.anchorZ[thirdMDIndex] / 100; - float rt_init = mdsInGPU.anchorRt[thirdMDIndex] / 100; //use the second MD as initial point + float x_init = mds.anchorX()[thirdMDIndex] / 100; + float y_init = mds.anchorY()[thirdMDIndex] / 100; + float z_init = mds.anchorZ()[thirdMDIndex] / 100; + float rt_init = mds.anchorRt()[thirdMDIndex] / 100; //use the second MD as initial point if (moduleType3 == 1) // 1: if MD3 is in 2s layer { - x_init = mdsInGPU.anchorX[secondMDIndex] / 100; - y_init = mdsInGPU.anchorY[secondMDIndex] / 100; - z_init = mdsInGPU.anchorZ[secondMDIndex] / 100; - rt_init = mdsInGPU.anchorRt[secondMDIndex] / 100; + x_init = mds.anchorX()[secondMDIndex] / 100; + y_init = mds.anchorY()[secondMDIndex] / 100; + z_init = mds.anchorZ()[secondMDIndex] / 100; + rt_init = mds.anchorRt()[secondMDIndex] / 100; } // start from a circle of inner T3. @@ -576,14 +436,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float drdz; short side, subdets; if (i == 2) { - drdz = alpaka::math::abs(acc, modulesInGPU.drdzs[lowerModuleIndex2]); - side = modulesInGPU.sides[lowerModuleIndex2]; - subdets = modulesInGPU.subdets[lowerModuleIndex2]; + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex2]); + side = modules.sides()[lowerModuleIndex2]; + subdets = modules.subdets()[lowerModuleIndex2]; } if (i == 3) { - drdz = alpaka::math::abs(acc, modulesInGPU.drdzs[lowerModuleIndex3]); - side = modulesInGPU.sides[lowerModuleIndex3]; - subdets = modulesInGPU.subdets[lowerModuleIndex3]; + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex3]); + side = modules.sides()[lowerModuleIndex3]; + subdets = modules.subdets()[lowerModuleIndex3]; } if (i == 2 || i == 3) { residual = (layeri <= 6 && ((side == Center) or (drdz < 1))) ? diffz : diffr; @@ -750,16 +610,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, - Segments const& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(TripletsConst triplets, + SegmentsConst segments, unsigned int innerTripletIndex, unsigned int outerTripletIndex) { - unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; - unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; + unsigned int innerOuterSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int outerInnerSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; unsigned int innerOuterOuterMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * innerOuterSegmentIndex + 1]; //inner triplet outer segment outer MD index + segments.mdIndices()[innerOuterSegmentIndex][1]; //inner triplet outer segment outer MD index unsigned int outerInnerInnerMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segment inner MD index + segments.mdIndices()[outerInnerSegmentIndex][0]; //outer triplet inner segment inner MD index return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); } @@ -1011,7 +871,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, - Modules const& modulesInGPU, + ModulesConst modules, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -1034,11 +894,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float inv2 = kPixelPSZpitch / kWidth2S; float inv3 = kStripPSZpitch / kWidth2S; for (size_t i = 0; i < nPoints; i++) { - moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; - moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; - moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; - const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; - slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; //category 1 - barrel PS flat if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { delta1[i] = inv1; @@ -1341,9 +1201,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1354,16 +1214,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; - float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; - float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; - float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; - float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -1413,15 +1273,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); // Cut #3: FIXME:deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; - float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); - float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); - float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); @@ -1430,36 +1290,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; alpha_OutUp = phi_mpi_pi(acc, phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; - float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; float tl_axis_highEdge_x = tl_axis_x; float tl_axis_highEdge_y = tl_axis_y; float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; @@ -1467,26 +1326,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (isEC_lastLayer) { alpha_OutUp_highEdge = phi_mpi_pi(acc, phi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorHighEdgePhi()[fourthMDIndex]); alpha_OutUp_lowEdge = phi_mpi_pi(acc, phi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorLowEdgePhi()[fourthMDIndex]); - tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; betaOutRHmin = -alpha_OutUp_highEdge + - phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mds.anchorHighEdgePhi()[fourthMDIndex]); betaOutRHmax = -alpha_OutUp_lowEdge + - phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mds.anchorLowEdgePhi()[fourthMDIndex]); } //beta computation @@ -1494,12 +1353,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float corrF = 1.f; //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = - alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); float betaInCut = alpaka::math::asin( acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + @@ -1514,11 +1372,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { int lIn = 5; int lOut = isEC_lastLayer ? 11 : 5; float sdOut_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); - float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); @@ -1553,14 +1411,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; if (isEC_lastLayer) { - dBetaROut = - (alpaka::math::sqrt(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + - mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - - alpaka::math::sqrt(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + - mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * - sinDPhi / drt_tl_axis; + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; } const float dBetaROut2 = dBetaROut * dBetaROut; @@ -1586,9 +1443,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1599,16 +1456,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; - float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; - float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; - float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; - float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -1624,7 +1481,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - @@ -1670,53 +1527,52 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); //Cut #4: deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; - float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); - float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); - float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float sdIn_alpha_min = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdIn_alpha_min = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alpha_max = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; float sdOut_alphaOut = phi_mpi_pi(acc, phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); float sdOut_alphaOut_min = phi_mpi_pi( - acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex])); + acc, __H2F(segments.dPhiChangeMins()[outerSegmentIndex]) - __H2F(segments.dPhiMins()[outerSegmentIndex])); float sdOut_alphaOut_max = phi_mpi_pi( - acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex])); + acc, __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]) - __H2F(segments.dPhiMaxs()[outerSegmentIndex])); - float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; - float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - float betaOut = - -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; - bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == Endcap) and - (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == TwoS); + bool isEC_secondLayer = (modules.subdets()[innerOuterLowerModuleIndex] == Endcap) and + (modules.moduleType()[innerOuterLowerModuleIndex] == TwoS); if (isEC_secondLayer) { betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; @@ -1740,10 +1596,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } float sdIn_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -1763,11 +1619,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float lOut = 11; float sdOut_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); - float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); @@ -1801,15 +1657,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; - if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS) { - dBetaROut = - (alpaka::math::sqrt(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + - mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - - alpaka::math::sqrt(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + - mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * - sinDPhi / dr; + if (modules.moduleType()[outerOuterLowerModuleIndex] == TwoS) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / dr; } const float dBetaROut2 = dBetaROut * dBetaROut; @@ -1833,9 +1688,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1846,13 +1701,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; - float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; - float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; - float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; - float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -1865,8 +1720,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS; + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch @@ -1884,7 +1739,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == PS; + bool isInSgOuterMDPS = modules.moduleType()[innerOuterLowerModuleIndex] == PS; const float drtSDIn = rt_InOut - rt_InLo; const float dzSDIn = z_InOut - z_InLo; @@ -1921,15 +1776,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float pvOffset = 0.1f / rtOut; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; - float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); - float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); - float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); @@ -1937,30 +1792,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; //weird - float sdOut_dPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); + float sdOut_dPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[thirdMDIndex]); - float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]); - float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]); + float sdOut_dPhiChange = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + float sdOut_dPhiChange_min = __H2F(segments.dPhiChangeMins()[outerSegmentIndex]); + float sdOut_dPhiChange_max = __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]); float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); - float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; - float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); - float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; - float betaOut = - -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; @@ -1978,10 +1832,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { betaInRHmax = swapTemp; } float sdIn_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -2001,11 +1855,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { int lOut = 13; //endcap float sdOut_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * - (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * - (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); - float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); @@ -2059,9 +1913,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -2072,17 +1926,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex]; - short innerOuterLowerModuleSubdet = modulesInGPU.subdets[innerOuterLowerModuleIndex]; - short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; - short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short innerOuterLowerModuleSubdet = modules.subdets()[innerOuterLowerModuleIndex]; + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Barrel) { return runQuintupletDefaultAlgoBBBB(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2096,9 +1950,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runQuintupletDefaultAlgoBBEE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2112,9 +1966,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Endcap) { return runQuintupletDefaultAlgoBBBB(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2128,9 +1982,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Endcap and outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runQuintupletDefaultAlgoBBEE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2144,9 +1998,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Endcap and innerOuterLowerModuleSubdet == Endcap and outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runQuintupletDefaultAlgoEEEE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2164,10 +2018,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, - Modules& modulesInGPU, - MiniDoublets& mdsInGPU, - Segments& segmentsInGPU, - Triplets& tripletsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -2185,30 +2039,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& chiSquared, float& nonAnchorChiSquared, bool& TightCutFlag) { - unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex]; - unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; - unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; - unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1]; + unsigned int firstSegmentIndex = triplets.segmentIndices()[innerTripletIndex][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[outerTripletIndex][1]; unsigned int innerOuterOuterMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; //inner triplet outer segment outer MD index + segments.mdIndices()[secondSegmentIndex][1]; //inner triplet outer segment outer MD index unsigned int outerInnerInnerMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * thirdSegmentIndex]; //outer triplet inner segment inner MD index + segments.mdIndices()[thirdSegmentIndex][0]; //outer triplet inner segment inner MD index //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here! if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) return false; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; - unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; if (not runQuintupletAlgoSelector(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -2222,9 +2076,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; if (not runQuintupletAlgoSelector(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex4, @@ -2237,17 +2091,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { fifthMDIndex)) return false; - float x1 = mdsInGPU.anchorX[firstMDIndex]; - float x2 = mdsInGPU.anchorX[secondMDIndex]; - float x3 = mdsInGPU.anchorX[thirdMDIndex]; - float x4 = mdsInGPU.anchorX[fourthMDIndex]; - float x5 = mdsInGPU.anchorX[fifthMDIndex]; + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float x4 = mds.anchorX()[fourthMDIndex]; + float x5 = mds.anchorX()[fifthMDIndex]; - float y1 = mdsInGPU.anchorY[firstMDIndex]; - float y2 = mdsInGPU.anchorY[secondMDIndex]; - float y3 = mdsInGPU.anchorY[thirdMDIndex]; - float y4 = mdsInGPU.anchorY[fourthMDIndex]; - float y5 = mdsInGPU.anchorY[fifthMDIndex]; + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + float y4 = mds.anchorY()[fourthMDIndex]; + float y5 = mds.anchorY()[fifthMDIndex]; //construct the arrays float x1Vec[] = {x1, x1, x1}; @@ -2257,26 +2111,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float x3Vec[] = {x3, x3, x3}; float y3Vec[] = {y3, y3, y3}; - if (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS) { - x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex]; - x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex]; + if (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[firstMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[firstMDIndex]; - y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex]; - y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex]; + y1Vec[1] = mds.anchorLowEdgeY()[firstMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[firstMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS) { - x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex]; - x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex]; + if (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[secondMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[secondMDIndex]; - y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex]; - y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex]; + y2Vec[1] = mds.anchorLowEdgeY()[secondMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[secondMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS) { - x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex]; - x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex]; + if (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS) { + x3Vec[1] = mds.anchorLowEdgeX()[thirdMDIndex]; + x3Vec[2] = mds.anchorHighEdgeX()[thirdMDIndex]; - y3Vec[1] = mdsInGPU.anchorLowEdgeY[thirdMDIndex]; - y3Vec[2] = mdsInGPU.anchorHighEdgeY[thirdMDIndex]; + y3Vec[1] = mds.anchorLowEdgeY()[thirdMDIndex]; + y3Vec[2] = mds.anchorHighEdgeY()[thirdMDIndex]; } float innerRadiusMin2S, innerRadiusMax2S; @@ -2286,12 +2140,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { x1Vec[i] = x4; y1Vec[i] = y4; } - if (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS) { - x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex]; - x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex]; + if (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[fourthMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[fourthMDIndex]; - y1Vec[1] = mdsInGPU.anchorLowEdgeY[fourthMDIndex]; - y1Vec[2] = mdsInGPU.anchorHighEdgeY[fourthMDIndex]; + y1Vec[1] = mds.anchorLowEdgeY()[fourthMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[fourthMDIndex]; } float bridgeRadiusMin2S, bridgeRadiusMax2S; @@ -2301,30 +2155,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { x2Vec[i] = x5; y2Vec[i] = y5; } - if (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS) { - x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex]; - x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex]; + if (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[fifthMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[fifthMDIndex]; - y2Vec[1] = mdsInGPU.anchorLowEdgeY[fifthMDIndex]; - y2Vec[2] = mdsInGPU.anchorHighEdgeY[fifthMDIndex]; + y2Vec[1] = mds.anchorLowEdgeY()[fifthMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[fifthMDIndex]; } float outerRadiusMin2S, outerRadiusMax2S; computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S); float g, f; - outerRadius = tripletsInGPU.circleRadius[outerTripletIndex]; + outerRadius = triplets.radius()[outerTripletIndex]; bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f); - innerRadius = tripletsInGPU.circleRadius[innerTripletIndex]; - g = tripletsInGPU.circleCenterX[innerTripletIndex]; - f = tripletsInGPU.circleCenterY[innerTripletIndex]; + innerRadius = triplets.radius()[innerTripletIndex]; + g = triplets.centerX()[innerTripletIndex]; + f = triplets.centerY()[innerTripletIndex]; #ifdef USE_RZCHI2 float inner_pt = 2 * k2Rinv1GeVf * innerRadius; if (not passT5RZConstraint(acc, - modulesInGPU, - mdsInGPU, + modules, + mds, firstMDIndex, secondMDIndex, thirdMDIndex, @@ -2350,25 +2204,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //split by category bool matchedRadii; - if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and modulesInGPU.subdets[lowerModuleIndex2] == Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == Barrel and modulesInGPU.subdets[lowerModuleIndex4] == Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == Barrel) { + if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Barrel) { matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Endcap) { matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { - if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + if (modules.layers()[lowerModuleIndex1] == 1) { matchedRadii = matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); - } else if (modulesInGPU.layers[lowerModuleIndex1] == 2) { + } else if (modules.layers()[lowerModuleIndex1] == 2) { matchedRadii = matchRadiiBBBEE23478(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); } else { @@ -2377,15 +2227,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } } - else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and modulesInGPU.subdets[lowerModuleIndex2] == Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.subdets[lowerModuleIndex4] == Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == Endcap and - modulesInGPU.subdets[lowerModuleIndex3] == Endcap and - modulesInGPU.subdets[lowerModuleIndex4] == Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Endcap and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { matchedRadii = matchRadiiBEEEE(acc, innerRadius, bridgeRadius, @@ -2418,7 +2266,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float sigmas2[5], delta1[5], delta2[5], slopes[5]; bool isFlat[5]; - computeSigmasForRegression(acc, modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat); + computeSigmasForRegression(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); regressionRadius = computeRadiusUsingRegression(acc, Params_T5::kLayers, xVec, @@ -2435,10 +2283,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #ifdef USE_T5_DNN unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; float inference = t5dnn::runInference(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, + modules, + mds, + segments, + triplets, xVec, yVec, mdIndices, @@ -2456,7 +2304,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #ifdef USE_RPHICHI2 // extra chi squared cuts! if (regressionRadius < 5.0f / (2.f * k2Rinv1GeVf)) { - if (not passChiSquaredConstraint(modulesInGPU, + if (not passChiSquaredConstraint(modules, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -2470,19 +2318,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //compute the other chisquared //non anchor is always shifted for tilted and endcap! float nonAnchorDelta1[Params_T5::kLayers], nonAnchorDelta2[Params_T5::kLayers], nonAnchorSlopes[Params_T5::kLayers]; - float nonAnchorxs[] = {mdsInGPU.outerX[firstMDIndex], - mdsInGPU.outerX[secondMDIndex], - mdsInGPU.outerX[thirdMDIndex], - mdsInGPU.outerX[fourthMDIndex], - mdsInGPU.outerX[fifthMDIndex]}; - float nonAnchorys[] = {mdsInGPU.outerY[firstMDIndex], - mdsInGPU.outerY[secondMDIndex], - mdsInGPU.outerY[thirdMDIndex], - mdsInGPU.outerY[fourthMDIndex], - mdsInGPU.outerY[fifthMDIndex]}; + float nonAnchorxs[] = {mds.outerX()[firstMDIndex], + mds.outerX()[secondMDIndex], + mds.outerX()[thirdMDIndex], + mds.outerX()[fourthMDIndex], + mds.outerX()[fifthMDIndex]}; + float nonAnchorys[] = {mds.outerY()[firstMDIndex], + mds.outerY()[secondMDIndex], + mds.outerY()[thirdMDIndex], + mds.outerY()[fourthMDIndex], + mds.outerY()[fifthMDIndex]}; computeSigmasForRegression(acc, - modulesInGPU, + modules, lowerModuleIndices, nonAnchorDelta1, nonAnchorDelta2, @@ -2504,23 +2352,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - struct CreateQuintupletsInGPUv2 { + struct CreateQuintuplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, - Triplets tripletsInGPU, - Quintuplets quintupletsInGPU, - ObjectRanges rangesInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + Quintuplets quintuplets, + QuintupletsOccupancy quintupletsOccupancy, + ObjectRangesConst ranges, uint16_t nEligibleT5Modules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (int iter = globalThreadIdx[0]; iter < nEligibleT5Modules; iter += gridThreadExtent[0]) { - uint16_t lowerModule1 = rangesInGPU.indicesOfEligibleT5Modules[iter]; + uint16_t lowerModule1 = ranges.indicesOfEligibleT5Modules()[iter]; short layer2_adjustment; - int layer = modulesInGPU.layers[lowerModule1]; + int layer = modules.layers()[lowerModule1]; if (layer == 1) { layer2_adjustment = 1; } // get upper segment to be in second layer @@ -2530,28 +2380,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { else { continue; } - unsigned int nInnerTriplets = tripletsInGPU.nTriplets[lowerModule1]; + unsigned int nInnerTriplets = tripletsOccupancy.nTriplets()[lowerModule1]; for (unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; innerTripletArrayIndex += gridThreadExtent[1]) { - unsigned int innerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule1] + innerTripletArrayIndex; - uint16_t lowerModule2 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 1]; - uint16_t lowerModule3 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 2]; - unsigned int nOuterTriplets = tripletsInGPU.nTriplets[lowerModule3]; + unsigned int innerTripletIndex = ranges.tripletModuleIndices()[lowerModule1] + innerTripletArrayIndex; + uint16_t lowerModule2 = triplets.lowerModuleIndices()[innerTripletIndex][1]; + uint16_t lowerModule3 = triplets.lowerModuleIndices()[innerTripletIndex][2]; + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[lowerModule3]; for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; outerTripletArrayIndex += gridThreadExtent[2]) { - unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule3] + outerTripletArrayIndex; - uint16_t lowerModule4 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 1]; - uint16_t lowerModule5 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 2]; + unsigned int outerTripletIndex = ranges.tripletModuleIndices()[lowerModule3] + outerTripletArrayIndex; + uint16_t lowerModule4 = triplets.lowerModuleIndices()[outerTripletIndex][1]; + uint16_t lowerModule5 = triplets.lowerModuleIndices()[outerTripletIndex][2]; float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared; //required for making distributions bool TightCutFlag = false; bool success = runQuintupletDefaultAlgo(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, + modules, + mds, + segments, + triplets, lowerModule1, lowerModule2, lowerModule3, @@ -2572,32 +2422,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (success) { int totOccupancyQuintuplets = alpaka::atomicAdd( - acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); - if (totOccupancyQuintuplets >= rangesInGPU.quintupletModuleOccupancy[lowerModule1]) { + acc, &quintupletsOccupancy.totOccupancyQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyQuintuplets >= ranges.quintupletModuleOccupancy()[lowerModule1]) { #ifdef WARNINGS printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); #endif } else { int quintupletModuleIndex = alpaka::atomicAdd( - acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + acc, &quintupletsOccupancy.nQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); //this if statement should never get executed! - if (rangesInGPU.quintupletModuleIndices[lowerModule1] == -1) { + if (ranges.quintupletModuleIndices()[lowerModule1] == -1) { #ifdef WARNINGS printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1); #endif } else { - unsigned int quintupletIndex = - rangesInGPU.quintupletModuleIndices[lowerModule1] + quintupletModuleIndex; - float phi = - mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + - layer2_adjustment]]]; - float eta = - mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + - layer2_adjustment]]]; + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[lowerModule1] + quintupletModuleIndex; + float phi = mds.anchorPhi()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; + float eta = mds.anchorEta()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; float scores = chiSquared + nonAnchorChiSquared; - addQuintupletToMemory(tripletsInGPU, - quintupletsInGPU, + addQuintupletToMemory(triplets, + quintuplets, innerTripletIndex, outerTripletIndex, lowerModule1, @@ -2622,8 +2469,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { quintupletIndex, TightCutFlag); - tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; - tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; } } } @@ -2633,12 +2480,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct CreateEligibleModulesListForQuintupletsGPU { + struct CreateEligibleModulesListForQuintuplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Triplets tripletsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2658,15 +2505,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (int i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + for (int i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { // Condition for a quintuple to exist for a module // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap - short module_rings = modulesInGPU.rings[i]; - short module_layers = modulesInGPU.layers[i]; - short module_subdets = modulesInGPU.subdets[i]; - float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); - if (tripletsInGPU.nTriplets[i] == 0) + if (tripletsOccupancy.nTriplets()[i] == 0) continue; if (module_subdets == Barrel and module_layers >= 3) continue; @@ -2723,16 +2570,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{}); - rangesInGPU.quintupletModuleIndices[i] = nTotQ; - rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i; - rangesInGPU.quintupletModuleOccupancy[i] = occupancy; + ranges.quintupletModuleIndices()[i] = nTotQ; + ranges.indicesOfEligibleT5Modules()[nEligibleT5Modules] = i; + ranges.quintupletModuleOccupancy()[i] = occupancy; } // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); if (cms::alpakatools::once_per_block(acc)) { - *rangesInGPU.nEligibleT5Modules = static_cast(nEligibleT5Modulesx); - *rangesInGPU.device_nTotalQuints = static_cast(nTotalQuintupletsx); + ranges.nEligibleT5Modules() = static_cast(nEligibleT5Modulesx); + ranges.nTotalQuints() = static_cast(nTotalQuintupletsx); } } }; @@ -2740,9 +2587,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct AddQuintupletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Quintuplets quintupletsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRanges ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2750,14 +2597,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (quintupletsInGPU.nQuintuplets[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { - rangesInGPU.quintupletRanges[i * 2] = -1; - rangesInGPU.quintupletRanges[i * 2 + 1] = -1; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (quintupletsOccupancy.nQuintuplets()[i] == 0 or ranges.quintupletModuleIndices()[i] == -1) { + ranges.quintupletRanges()[i][0] = -1; + ranges.quintupletRanges()[i][1] = -1; } else { - rangesInGPU.quintupletRanges[i * 2] = rangesInGPU.quintupletModuleIndices[i]; - rangesInGPU.quintupletRanges[i * 2 + 1] = - rangesInGPU.quintupletModuleIndices[i] + quintupletsInGPU.nQuintuplets[i] - 1; + ranges.quintupletRanges()[i][0] = ranges.quintupletModuleIndices()[i]; + ranges.quintupletRanges()[i][1] = + ranges.quintupletModuleIndices()[i] + quintupletsOccupancy.nQuintuplets()[i] - 1; } } } diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index bc2d1d82a5fc9..b1c2c6d815dde 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -4,187 +4,25 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" #include "MiniDoublet.h" #include "Hit.h" -#include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Segments { - FPX* dPhis; - FPX* dPhiMins; - FPX* dPhiMaxs; - FPX* dPhiChanges; - FPX* dPhiChangeMins; - FPX* dPhiChangeMaxs; - uint16_t* innerLowerModuleIndices; - uint16_t* outerLowerModuleIndices; - unsigned int* seedIdx; - unsigned int* mdIndices; - unsigned int* nMemoryLocations; - unsigned int* innerMiniDoubletAnchorHitIndices; - unsigned int* outerMiniDoubletAnchorHitIndices; - int* charge; - int* superbin; - unsigned int* nSegments; //number of segments per inner lower module - unsigned int* totOccupancySegments; //number of segments per inner lower module - uint4* pLSHitsIdxs; - PixelType* pixelType; - char* isQuad; - char* isDup; - bool* partOfPT5; - float* ptIn; - float* ptErr; - float* px; - float* py; - float* pz; - float* etaErr; - float* eta; - float* phi; - float* score; - float* circleCenterX; - float* circleCenterY; - float* circleRadius; - - template - void setData(TBuff& buf) { - dPhis = buf.dPhis_buf.data(); - dPhiMins = buf.dPhiMins_buf.data(); - dPhiMaxs = buf.dPhiMaxs_buf.data(); - dPhiChanges = buf.dPhiChanges_buf.data(); - dPhiChangeMins = buf.dPhiChangeMins_buf.data(); - dPhiChangeMaxs = buf.dPhiChangeMaxs_buf.data(); - innerLowerModuleIndices = buf.innerLowerModuleIndices_buf.data(); - outerLowerModuleIndices = buf.outerLowerModuleIndices_buf.data(); - seedIdx = buf.seedIdx_buf.data(); - mdIndices = buf.mdIndices_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - innerMiniDoubletAnchorHitIndices = buf.innerMiniDoubletAnchorHitIndices_buf.data(); - outerMiniDoubletAnchorHitIndices = buf.outerMiniDoubletAnchorHitIndices_buf.data(); - charge = buf.charge_buf.data(); - superbin = buf.superbin_buf.data(); - nSegments = buf.nSegments_buf.data(); - totOccupancySegments = buf.totOccupancySegments_buf.data(); - pLSHitsIdxs = buf.pLSHitsIdxs_buf.data(); - pixelType = buf.pixelType_buf.data(); - isQuad = buf.isQuad_buf.data(); - isDup = buf.isDup_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - ptIn = buf.ptIn_buf.data(); - ptErr = buf.ptErr_buf.data(); - px = buf.px_buf.data(); - py = buf.py_buf.data(); - pz = buf.pz_buf.data(); - etaErr = buf.etaErr_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - score = buf.score_buf.data(); - circleCenterX = buf.circleCenterX_buf.data(); - circleCenterY = buf.circleCenterY_buf.data(); - circleRadius = buf.circleRadius_buf.data(); - } - }; - - template - struct SegmentsBuffer { - Buf dPhis_buf; - Buf dPhiMins_buf; - Buf dPhiMaxs_buf; - Buf dPhiChanges_buf; - Buf dPhiChangeMins_buf; - Buf dPhiChangeMaxs_buf; - Buf innerLowerModuleIndices_buf; - Buf outerLowerModuleIndices_buf; - Buf seedIdx_buf; - Buf mdIndices_buf; - Buf nMemoryLocations_buf; - Buf innerMiniDoubletAnchorHitIndices_buf; - Buf outerMiniDoubletAnchorHitIndices_buf; - Buf charge_buf; - Buf superbin_buf; - Buf nSegments_buf; - Buf totOccupancySegments_buf; - Buf pLSHitsIdxs_buf; - Buf pixelType_buf; - Buf isQuad_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf ptIn_buf; - Buf ptErr_buf; - Buf px_buf; - Buf py_buf; - Buf pz_buf; - Buf etaErr_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_buf; - Buf circleCenterX_buf; - Buf circleCenterY_buf; - Buf circleRadius_buf; - - Segments data_; - - template - SegmentsBuffer(unsigned int nMemoryLocationsIn, - uint16_t nLowerModules, - unsigned int maxPixelSegments, - TDevAcc const& devAccIn, - TQueue& queue) - : dPhis_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChanges_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChangeMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChangeMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - innerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - outerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - seedIdx_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - mdIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn * 2, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - innerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - outerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - charge_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - superbin_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - nSegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - totOccupancySegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - pLSHitsIdxs_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - pixelType_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - isQuad_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - ptIn_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - ptErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - px_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - py_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - pz_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - etaErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleCenterX_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleCenterY_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleRadius_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)) { - alpaka::memset(queue, nSegments_buf, 0u); - alpaka::memset(queue, totOccupancySegments_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - alpaka::memset(queue, pLSHitsIdxs_buf, 0u); - } - - inline Segments const* data() const { return &data_; } - inline void setData(SegmentsBuffer& buf) { data_.setData(buf); } - }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(Modules const& modulesInGPU, - unsigned int moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(ModulesConst modules, unsigned int moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf - short subdet = modulesInGPU.subdets[moduleIndex]; - short layer = modulesInGPU.layers[moduleIndex]; - short side = modulesInGPU.sides[moduleIndex]; - short rod = modulesInGPU.rods[moduleIndex]; + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; return (subdet == Barrel) && (((side != Center) && (layer == 3)) || ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || @@ -230,7 +68,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return moduleSeparation; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(ModulesConst modules, unsigned int moduleIndex) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -241,16 +79,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; - unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; - unsigned int iR = modulesInGPU.rings[moduleIndex] - 1; - short subdet = modulesInGPU.subdets[moduleIndex]; - short side = modulesInGPU.sides[moduleIndex]; + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; float moduleSeparation = 0; if (subdet == Barrel and side == Center) { moduleSeparation = miniDeltaFlat[iL]; - } else if (isTighterTiltedModules_seg(modulesInGPU, moduleIndex)) { + } else if (isTighterTiltedModules_seg(modules, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; } else if (subdet == Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; @@ -265,8 +103,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, float* dAlphaThresholdValues, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, float xIn, float yIn, float zIn, @@ -279,9 +117,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) - ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; //more accurate then outer rt - inner rt float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); @@ -290,14 +128,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); bool isInnerTilted = - modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and modulesInGPU.sides[innerLowerModuleIndex] != Center; + modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center; bool isOuterTilted = - modulesInGPU.subdets[outerLowerModuleIndex] == Barrel and modulesInGPU.sides[outerLowerModuleIndex] != Center; + modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center; - float drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; - float drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; - float innerModuleGapSize = moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); - float outerModuleGapSize = moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); + float drdzInner = modules.drdzs()[innerLowerModuleIndex]; + float drdzOuter = modules.drdzs()[outerLowerModuleIndex]; + float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); const float innerminiTilt2 = isInnerTilted ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) @@ -313,38 +151,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float sdLumForInnerMini2; float sdLumForOuterMini2; - if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) { + if (modules.subdets()[innerLowerModuleIndex] == Barrel) { sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { - sdLumForInnerMini2 = (mdsInGPU.dphis[innerMDIndex] * mdsInGPU.dphis[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / - (mdsInGPU.dzs[innerMDIndex] * mdsInGPU.dzs[innerMDIndex]); + sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel) { + if (modules.subdets()[outerLowerModuleIndex] == Barrel) { sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { - sdLumForOuterMini2 = (mdsInGPU.dphis[outerMDIndex] * mdsInGPU.dphis[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / - (mdsInGPU.dzs[outerMDIndex] * mdsInGPU.dzs[outerMDIndex]); + sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]); } // Unique stuff for the segment dudes alone float dAlpha_res_inner = 0.02f / miniDelta * - (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); float dAlpha_res_outer = 0.02f / miniDelta * - (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; - if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and modulesInGPU.sides[innerLowerModuleIndex] == Center) { + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel and modulesInGPU.sides[outerLowerModuleIndex] == Center) { + if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) { dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[1] = @@ -355,7 +193,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments segments, unsigned int lowerMDIndex, unsigned int upperMDIndex, uint16_t innerLowerModuleIndex, @@ -369,25 +207,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dPhiChangeMin, float dPhiChangeMax, unsigned int idx) { - segmentsInGPU.mdIndices[idx * 2] = lowerMDIndex; - segmentsInGPU.mdIndices[idx * 2 + 1] = upperMDIndex; - segmentsInGPU.innerLowerModuleIndices[idx] = innerLowerModuleIndex; - segmentsInGPU.outerLowerModuleIndices[idx] = outerLowerModuleIndex; - segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerMDAnchorHitIndex; - segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerMDAnchorHitIndex; - - segmentsInGPU.dPhis[idx] = __F2H(dPhi); - segmentsInGPU.dPhiMins[idx] = __F2H(dPhiMin); - segmentsInGPU.dPhiMaxs[idx] = __F2H(dPhiMax); - segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); - segmentsInGPU.dPhiChangeMins[idx] = __F2H(dPhiChangeMin); - segmentsInGPU.dPhiChangeMaxs[idx] = __F2H(dPhiChangeMax); + segments.mdIndices()[idx][0] = lowerMDIndex; + segments.mdIndices()[idx][1] = upperMDIndex; + segments.innerLowerModuleIndices()[idx] = innerLowerModuleIndex; + segments.outerLowerModuleIndices()[idx] = outerLowerModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerMDAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerMDAnchorHitIndex; + + segments.dPhis()[idx] = __F2H(dPhi); + segments.dPhiMins()[idx] = __F2H(dPhiMin); + segments.dPhiMaxs()[idx] = __F2H(dPhiMax); + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + segments.dPhiChangeMins()[idx] = __F2H(dPhiChangeMin); + segments.dPhiChangeMaxs()[idx] = __F2H(dPhiChangeMax); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, - Segments& segmentsInGPU, - MiniDoublets const& mdsInGPU, + Segments segments, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, unsigned int innerMDIndex, unsigned int outerMDIndex, uint16_t pixelModuleIndex, @@ -398,59 +237,59 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int idx, unsigned int pixelSegmentArrayIndex, float score) { - segmentsInGPU.mdIndices[idx * 2] = innerMDIndex; - segmentsInGPU.mdIndices[idx * 2 + 1] = outerMDIndex; - segmentsInGPU.innerLowerModuleIndices[idx] = pixelModuleIndex; - segmentsInGPU.outerLowerModuleIndices[idx] = pixelModuleIndex; - segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerAnchorHitIndex; - segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerAnchorHitIndex; - segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); - segmentsInGPU.isDup[pixelSegmentArrayIndex] = false; - segmentsInGPU.score[pixelSegmentArrayIndex] = score; - - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].x = hitIdxs[0]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].y = hitIdxs[1]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].z = hitIdxs[2]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].w = hitIdxs[3]; + segments.mdIndices()[idx][0] = innerMDIndex; + segments.mdIndices()[idx][1] = outerMDIndex; + segments.innerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.outerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerAnchorHitIndex; + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + + segmentsPixel.isDup()[pixelSegmentArrayIndex] = false; + segmentsPixel.partOfPT5()[pixelSegmentArrayIndex] = false; + segmentsPixel.score()[pixelSegmentArrayIndex] = score; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].x = hitIdxs[0]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].y = hitIdxs[1]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].z = hitIdxs[2]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].w = hitIdxs[3]; //computing circle parameters /* The two anchor hits are r3PCA and r3LH. p3PCA pt, eta, phi is hitIndex1 x, y, z */ - float circleRadius = mdsInGPU.outerX[innerMDIndex] / (2 * k2Rinv1GeVf); - float circlePhi = mdsInGPU.outerZ[innerMDIndex]; - float candidateCenterXs[] = {mdsInGPU.anchorX[innerMDIndex] + circleRadius * alpaka::math::sin(acc, circlePhi), - mdsInGPU.anchorX[innerMDIndex] - circleRadius * alpaka::math::sin(acc, circlePhi)}; - float candidateCenterYs[] = {mdsInGPU.anchorY[innerMDIndex] - circleRadius * alpaka::math::cos(acc, circlePhi), - mdsInGPU.anchorY[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; + float circleRadius = mds.outerX()[innerMDIndex] / (2 * k2Rinv1GeVf); + float circlePhi = mds.outerZ()[innerMDIndex]; + float candidateCenterXs[] = {mds.anchorX()[innerMDIndex] + circleRadius * alpaka::math::sin(acc, circlePhi), + mds.anchorX()[innerMDIndex] - circleRadius * alpaka::math::sin(acc, circlePhi)}; + float candidateCenterYs[] = {mds.anchorY()[innerMDIndex] - circleRadius * alpaka::math::cos(acc, circlePhi), + mds.anchorY()[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; //check which of the circles can accommodate r3LH better (we won't get perfect agreement) float bestChiSquared = lst_INF; float chiSquared; size_t bestIndex; for (size_t i = 0; i < 2; i++) { - chiSquared = - alpaka::math::abs(acc, - alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[outerMDIndex] - candidateCenterXs[i]) * - (mdsInGPU.anchorX[outerMDIndex] - candidateCenterXs[i]) + - (mdsInGPU.anchorY[outerMDIndex] - candidateCenterYs[i]) * - (mdsInGPU.anchorY[outerMDIndex] - candidateCenterYs[i])) - - circleRadius); + chiSquared = alpaka::math::abs(acc, + alpaka::math::sqrt(acc, + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) * + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) + + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i]) * + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i])) - + circleRadius); if (chiSquared < bestChiSquared) { bestChiSquared = chiSquared; bestIndex = i; } } - segmentsInGPU.circleCenterX[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; - segmentsInGPU.circleCenterY[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; - segmentsInGPU.circleRadius[pixelSegmentArrayIndex] = circleRadius; + segmentsPixel.circleCenterX()[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; + segmentsPixel.circleCenterY()[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; + segmentsPixel.circleRadius()[pixelSegmentArrayIndex] = circleRadius; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -461,27 +300,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) - ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; - xIn = mdsInGPU.anchorX[innerMDIndex]; - yIn = mdsInGPU.anchorY[innerMDIndex]; - zIn = mdsInGPU.anchorZ[innerMDIndex]; - rtIn = mdsInGPU.anchorRt[innerMDIndex]; + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; - xOut = mdsInGPU.anchorX[outerMDIndex]; - yOut = mdsInGPU.anchorY[outerMDIndex]; - zOut = mdsInGPU.anchorZ[outerMDIndex]; - rtOut = mdsInGPU.anchorRt[outerMDIndex]; + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float sdPVoff = 0.1f / rtOut; float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value - const float zGeom = modulesInGPU.layers[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom; //slope-correction only on outer end @@ -492,12 +331,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); - dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); if (alpaka::math::abs(acc, dPhi) > sdCut) return false; - dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]); if (alpaka::math::abs(acc, dPhiChange) > sdCut) return false; @@ -505,8 +344,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modulesInGPU, - mdsInGPU, + modules, + mds, xIn, yIn, zIn, @@ -520,8 +359,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { innerMDIndex, outerMDIndex); - float innerMDAlpha = mdsInGPU.dphichanges[innerMDIndex]; - float outerMDAlpha = mdsInGPU.dphichanges[outerMDIndex]; + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; @@ -539,8 +378,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -553,18 +392,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& dPhiChangeMax) { float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; - xIn = mdsInGPU.anchorX[innerMDIndex]; - yIn = mdsInGPU.anchorY[innerMDIndex]; - zIn = mdsInGPU.anchorZ[innerMDIndex]; - rtIn = mdsInGPU.anchorRt[innerMDIndex]; + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; - xOut = mdsInGPU.anchorX[outerMDIndex]; - yOut = mdsInGPU.anchorY[outerMDIndex]; - zOut = mdsInGPU.anchorZ[outerMDIndex]; - rtOut = mdsInGPU.anchorRt[outerMDIndex]; + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; - bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == Endcap) && - (modulesInGPU.moduleType[outerLowerModuleIndex] == TwoS); + bool outerLayerEndcapTwoS = + (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS); float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float disks2SMinRadius = 60.f; @@ -592,12 +431,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); float sdCut = sdSlope; if (outerLayerEndcapTwoS) { - float dPhiPos_high = phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); - float dPhiPos_low = phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_high = phi_mpi_pi(acc, mds.anchorHighEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mds.anchorLowEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; @@ -619,8 +458,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modulesInGPU, - mdsInGPU, + modules, + mds, xIn, yIn, zIn, @@ -634,8 +473,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { innerMDIndex, outerMDIndex); - float innerMDAlpha = mdsInGPU.dphichanges[innerMDIndex]; - float outerMDAlpha = mdsInGPU.dphichanges[outerMDIndex]; + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; @@ -653,8 +492,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -665,11 +504,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and - modulesInGPU.subdets[outerLowerModuleIndex] == Barrel) { + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) { return runSegmentDefaultAlgoBarrel(acc, - modulesInGPU, - mdsInGPU, + modules, + mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, @@ -682,8 +520,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhiChangeMax); } else { return runSegmentDefaultAlgoEndcap(acc, - modulesInGPU, - mdsInGPU, + modules, + mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, @@ -697,32 +535,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } } - struct CreateSegmentsInGPUv2 { + struct CreateSegments { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + MiniDoubletsConst mds, + MiniDoubletsOccupancyConst mdsOccupancy, + Segments segments, + SegmentsOccupancy segmentsOccupancy, + ObjectRangesConst ranges) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const blockThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); auto const blockThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t innerLowerModuleIndex = globalBlockIdx[2]; innerLowerModuleIndex < (*modulesInGPU.nLowerModules); + for (uint16_t innerLowerModuleIndex = globalBlockIdx[2]; innerLowerModuleIndex < modules.nLowerModules(); innerLowerModuleIndex += gridBlockExtent[2]) { - unsigned int nInnerMDs = mdsInGPU.nMDs[innerLowerModuleIndex]; + unsigned int nInnerMDs = mdsOccupancy.nMDs()[innerLowerModuleIndex]; if (nInnerMDs == 0) continue; - unsigned int nConnectedModules = modulesInGPU.nConnectedModules[innerLowerModuleIndex]; + unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex]; for (uint16_t outerLowerModuleArrayIdx = blockThreadIdx[1]; outerLowerModuleArrayIdx < nConnectedModules; outerLowerModuleArrayIdx += blockThreadExtent[1]) { - uint16_t outerLowerModuleIndex = - modulesInGPU.moduleMap[innerLowerModuleIndex * max_connected_modules + outerLowerModuleArrayIdx]; + uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx]; - unsigned int nOuterMDs = mdsInGPU.nMDs[outerLowerModuleIndex]; + unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex]; unsigned int limit = nInnerMDs * nOuterMDs; @@ -734,20 +573,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (outerMDArrayIdx >= nOuterMDs) continue; - unsigned int innerMDIndex = rangesInGPU.mdRanges[innerLowerModuleIndex * 2] + innerMDArrayIdx; - unsigned int outerMDIndex = rangesInGPU.mdRanges[outerLowerModuleIndex * 2] + outerMDArrayIdx; + unsigned int innerMDIndex = ranges.mdRanges()[innerLowerModuleIndex][0] + innerMDArrayIdx; + unsigned int outerMDIndex = ranges.mdRanges()[outerLowerModuleIndex][0] + outerMDArrayIdx; float dPhi, dPhiMin, dPhiMax, dPhiChange, dPhiChangeMin, dPhiChangeMax; - unsigned int innerMiniDoubletAnchorHitIndex = mdsInGPU.anchorHitIndices[innerMDIndex]; - unsigned int outerMiniDoubletAnchorHitIndex = mdsInGPU.anchorHitIndices[outerMDIndex]; + unsigned int innerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[innerMDIndex]; + unsigned int outerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[outerMDIndex]; dPhiMin = 0; dPhiMax = 0; dPhiChangeMin = 0; dPhiChangeMax = 0; if (runSegmentDefaultAlgo(acc, - modulesInGPU, - mdsInGPU, + modules, + mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, @@ -758,18 +597,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhiChange, dPhiChangeMin, dPhiChangeMax)) { - unsigned int totOccupancySegments = alpaka::atomicAdd( - acc, &segmentsInGPU.totOccupancySegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); - if (static_cast(totOccupancySegments) >= rangesInGPU.segmentModuleOccupancy[innerLowerModuleIndex]) { + unsigned int totOccupancySegments = + alpaka::atomicAdd(acc, + &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancySegments) >= ranges.segmentModuleOccupancy()[innerLowerModuleIndex]) { #ifdef WARNINGS printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); #endif } else { unsigned int segmentModuleIdx = alpaka::atomicAdd( - acc, &segmentsInGPU.nSegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); - unsigned int segmentIdx = rangesInGPU.segmentModuleIndices[innerLowerModuleIndex] + segmentModuleIdx; + acc, &segmentsOccupancy.nSegments()[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int segmentIdx = ranges.segmentModuleIndices()[innerLowerModuleIndex] + segmentModuleIdx; - addSegmentToMemory(segmentsInGPU, + addSegmentToMemory(segments, innerMDIndex, outerMDIndex, innerLowerModuleIndex, @@ -794,9 +636,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CreateSegmentArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - MiniDoublets mdsInGPU) const { + ModulesConst modules, + ObjectRanges ranges, + MiniDoubletsConst mds) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -814,17 +656,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (modulesInGPU.nConnectedModules[i] == 0) { - rangesInGPU.segmentModuleIndices[i] = nTotalSegments; - rangesInGPU.segmentModuleOccupancy[i] = 0; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (modules.nConnectedModules()[i] == 0) { + ranges.segmentModuleIndices()[i] = nTotalSegments; + ranges.segmentModuleOccupancy()[i] = 0; continue; } - short module_rings = modulesInGPU.rings[i]; - short module_layers = modulesInGPU.layers[i]; - short module_subdets = modulesInGPU.subdets[i]; - float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); if (module_layers <= 3 && module_subdets == 5) category_number = 0; @@ -882,15 +724,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } int nTotSegs = alpaka::atomicAdd(acc, &nTotalSegments, occupancy, alpaka::hierarchy::Threads{}); - rangesInGPU.segmentModuleIndices[i] = nTotSegs; - rangesInGPU.segmentModuleOccupancy[i] = occupancy; + ranges.segmentModuleIndices()[i] = nTotSegs; + ranges.segmentModuleOccupancy()[i] = occupancy; } // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); if (cms::alpakatools::once_per_block(acc)) { - rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments; - *rangesInGPU.device_nTotalSegs = nTotalSegments; + ranges.segmentModuleIndices()[modules.nLowerModules()] = nTotalSegments; + ranges.nTotalSegs() = nTotalSegments; } } }; @@ -898,9 +740,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct AddSegmentRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Segments segmentsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + ObjectRanges ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -908,13 +750,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (segmentsInGPU.nSegments[i] == 0) { - rangesInGPU.segmentRanges[i * 2] = -1; - rangesInGPU.segmentRanges[i * 2 + 1] = -1; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.segmentRanges()[i][0] = -1; + ranges.segmentRanges()[i][1] = -1; } else { - rangesInGPU.segmentRanges[i * 2] = rangesInGPU.segmentModuleIndices[i]; - rangesInGPU.segmentRanges[i * 2 + 1] = rangesInGPU.segmentModuleIndices[i] + segmentsInGPU.nSegments[i] - 1; + ranges.segmentRanges()[i][0] = ranges.segmentModuleIndices()[i]; + ranges.segmentRanges()[i][1] = ranges.segmentModuleIndices()[i] + segmentsOccupancy.nSegments()[i] - 1; } } } @@ -923,11 +765,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct AddPixelSegmentToEventKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - Hits hitsInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, + ModulesConst modules, + ObjectRangesConst ranges, + HitsConst hits, + MiniDoublets mds, + Segments segments, + SegmentsPixel segmentsPixel, unsigned int* hitIndices0, unsigned int* hitIndices1, unsigned int* hitIndices2, @@ -939,14 +782,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (int tid = globalThreadIdx[2]; tid < size; tid += gridThreadExtent[2]) { - unsigned int innerMDIndex = rangesInGPU.miniDoubletModuleIndices[pixelModuleIndex] + 2 * (tid); - unsigned int outerMDIndex = rangesInGPU.miniDoubletModuleIndices[pixelModuleIndex] + 2 * (tid) + 1; - unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + tid; + unsigned int innerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid); + unsigned int outerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid) + 1; + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + tid; addMDToMemory(acc, - mdsInGPU, - hitsInGPU, - modulesInGPU, + mds, + hits, + modules, hitIndices0[tid], hitIndices1[tid], pixelModuleIndex, @@ -960,9 +803,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { 0, innerMDIndex); addMDToMemory(acc, - mdsInGPU, - hitsInGPU, - modulesInGPU, + mds, + hits, + modules, hitIndices2[tid], hitIndices3[tid], pixelModuleIndex, @@ -977,21 +820,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { outerMDIndex); //in outer hits - pt, eta, phi - float slope = alpaka::math::sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]); - float intercept = hitsInGPU.zs[mdsInGPU.anchorHitIndices[innerMDIndex]] - - slope * hitsInGPU.rts[mdsInGPU.anchorHitIndices[innerMDIndex]]; - float score_lsq = (hitsInGPU.rts[mdsInGPU.anchorHitIndices[outerMDIndex]] * slope + intercept) - - (hitsInGPU.zs[mdsInGPU.anchorHitIndices[outerMDIndex]]); + float slope = alpaka::math::sinh(acc, hits.ys()[mds.outerHitIndices()[innerMDIndex]]); + float intercept = + hits.zs()[mds.anchorHitIndices()[innerMDIndex]] - slope * hits.rts()[mds.anchorHitIndices()[innerMDIndex]]; + float score_lsq = (hits.rts()[mds.anchorHitIndices()[outerMDIndex]] * slope + intercept) - + (hits.zs()[mds.anchorHitIndices()[outerMDIndex]]); score_lsq = score_lsq * score_lsq; unsigned int hits1[Params_pLS::kHits]; - hits1[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[innerMDIndex]]; - hits1[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[outerMDIndex]]; - hits1[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[innerMDIndex]]; - hits1[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[outerMDIndex]]; + hits1[0] = hits.idxs()[mds.anchorHitIndices()[innerMDIndex]]; + hits1[1] = hits.idxs()[mds.anchorHitIndices()[outerMDIndex]]; + hits1[2] = hits.idxs()[mds.outerHitIndices()[innerMDIndex]]; + hits1[3] = hits.idxs()[mds.outerHitIndices()[outerMDIndex]]; addPixelSegmentToMemory(acc, - segmentsInGPU, - mdsInGPU, + segments, + segmentsPixel, + mds, innerMDIndex, outerMDIndex, pixelModuleIndex, diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 16f36df3257cd..9bbf2de9c2cee 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -2,150 +2,56 @@ #define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" -#include "Triplet.h" -#include "Segment.h" -#include "MiniDoublet.h" -#include "PixelTriplet.h" -#include "Quintuplet.h" #include "Hit.h" -#include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct TrackCandidates { - short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS - unsigned int* directObjectIndices; // Will hold direct indices to each type containers - unsigned int* objectIndices; // Will hold tracklet and triplet indices - check the type!! - unsigned int* nTrackCandidates; - unsigned int* nTrackCandidatespT3; - unsigned int* nTrackCandidatespT5; - unsigned int* nTrackCandidatespLS; - unsigned int* nTrackCandidatesT5; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - int* pixelSeedIndex; - uint16_t* lowerModuleIndices; - - FPX* centerX; - FPX* centerY; - FPX* radius; - - template - void setData(TBuff& buf) { - trackCandidateType = buf.trackCandidateType_buf.data(); - directObjectIndices = buf.directObjectIndices_buf.data(); - objectIndices = buf.objectIndices_buf.data(); - nTrackCandidates = buf.nTrackCandidates_buf.data(); - nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); - nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); - nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); - nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); - - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - pixelSeedIndex = buf.pixelSeedIndex_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - radius = buf.radius_buf.data(); - } - }; - - template - struct TrackCandidatesBuffer { - Buf trackCandidateType_buf; - Buf directObjectIndices_buf; - Buf objectIndices_buf; - Buf nTrackCandidates_buf; - Buf nTrackCandidatespT3_buf; - Buf nTrackCandidatespT5_buf; - Buf nTrackCandidatespLS_buf; - Buf nTrackCandidatesT5_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf pixelSeedIndex_buf; - Buf lowerModuleIndices_buf; - - Buf centerX_buf; - Buf centerY_buf; - Buf radius_buf; - - TrackCandidates data_; - - template - TrackCandidatesBuffer(unsigned int maxTrackCandidates, TDevAcc const& devAccIn, TQueue& queue) - : trackCandidateType_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - directObjectIndices_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - objectIndices_buf(allocBufWrapper(devAccIn, 2 * maxTrackCandidates, queue)), - nTrackCandidates_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT3_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT5_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespLS_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatesT5_buf(allocBufWrapper(devAccIn, 1, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kHits * maxTrackCandidates, queue)), - pixelSeedIndex_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - radius_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)) { - alpaka::memset(queue, nTrackCandidates_buf, 0u); - alpaka::memset(queue, nTrackCandidatesT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT3_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespLS_buf, 0u); - alpaka::memset(queue, logicalLayers_buf, 0u); - alpaka::memset(queue, lowerModuleIndices_buf, 0u); - alpaka::memset(queue, hitIndices_buf, 0u); - alpaka::memset(queue, pixelSeedIndex_buf, 0); - } - - inline TrackCandidates const* data() const { return &data_; } - inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } - }; - - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, unsigned int trackletIndex, unsigned int trackCandidateIndex, uint4 hitIndices, int pixelSeedIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = 8; // type for pLS - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = 8; // type for pLS + cands.directObjectIndices()[trackCandidateIndex] = trackletIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = trackletIndex; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 0] = + cands.hitIndices()[trackCandidateIndex][0] = hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; + cands.hitIndices()[trackCandidateIndex][1] = hitIndices.z; + cands.hitIndices()[trackCandidateIndex][2] = hitIndices.y; + cands.hitIndices()[trackCandidateIndex][3] = hitIndices.w; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& cands, short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, - uint8_t* logicalLayerIndices, - uint16_t* lowerModuleIndices, - unsigned int* hitIndices, + const uint8_t* logicalLayerIndices, + const uint16_t* lowerModuleIndices, + const unsigned int* hitIndices, int pixelSeedIndex, float centerX, float centerY, float radius, unsigned int trackCandidateIndex, unsigned int directObjectIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = trackCandidateType; - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = directObjectIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = trackCandidateType; + cands.directObjectIndices()[trackCandidateIndex] = directObjectIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = innerTrackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = outerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = innerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = outerTrackletIndex; size_t limits = trackCandidateType == 7 ? Params_pT5::kLayers @@ -153,34 +59,31 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //send the starting pointer to the logicalLayer and hitIndices for (size_t i = 0; i < limits; i++) { - trackCandidatesInGPU.logicalLayers[Params_pT5::kLayers * trackCandidateIndex + i] = logicalLayerIndices[i]; - trackCandidatesInGPU.lowerModuleIndices[Params_pT5::kLayers * trackCandidateIndex + i] = lowerModuleIndices[i]; + cands.logicalLayers()[trackCandidateIndex][i] = logicalLayerIndices[i]; + cands.lowerModuleIndices()[trackCandidateIndex][i] = lowerModuleIndices[i]; } for (size_t i = 0; i < 2 * limits; i++) { - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + i] = hitIndices[i]; + cands.hitIndices()[trackCandidateIndex][i] = hitIndices[i]; } - trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); - trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); - trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); + cands.centerX()[trackCandidateIndex] = __F2H(centerX); + cands.centerY()[trackCandidateIndex] = __F2H(centerY); + cands.radius()[trackCandidateIndex] = __F2H(radius); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, - unsigned int jx, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Hits const& hitsInGPU) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits( + unsigned int ix, unsigned int jx, MiniDoubletsConst mds, SegmentsConst segments, HitsConst hits) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; - phits1[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * ix]]]; - phits1[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * ix + 1]]]; - phits1[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * ix]]]; - phits1[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * ix + 1]]]; + phits1[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][1]]]; + phits1[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][1]]]; - phits2[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * jx]]]; - phits2[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * jx + 1]]]; - phits2[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * jx]]]; - phits2[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * jx + 1]]]; + phits2[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][1]]]; + phits2[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][1]]]; int npMatched = 0; @@ -207,39 +110,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CrossCleanpT3 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - PixelTriplets pixelTripletsInGPU, - Segments segmentsInGPU, - PixelQuintuplets pixelQuintupletsInGPU) const { + ModulesConst modules, + ObjectRangesConst ranges, + PixelTriplets pixelTriplets, + SegmentsPixelConst segmentsPixel, + PixelQuintupletsConst pixelQuintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; pixelTripletIndex += gridThreadExtent[2]) { - if (pixelTripletsInGPU.isDup[pixelTripletIndex]) + if (pixelTriplets.isDup()[pixelTripletIndex]) continue; // Cross cleaning step - float eta1 = __H2F(pixelTripletsInGPU.eta_pix[pixelTripletIndex]); - float phi1 = __H2F(pixelTripletsInGPU.phi_pix[pixelTripletIndex]); + float eta1 = __H2F(pixelTriplets.eta_pix()[pixelTripletIndex]); + float phi1 = __H2F(pixelTriplets.phi_pix()[pixelTripletIndex]); - int pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + int pixelModuleIndex = modules.nLowerModules(); + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; - unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; pixelQuintupletIndex += gridThreadExtent[1]) { - unsigned int pLS_jx = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; - float eta2 = segmentsInGPU.eta[pLS_jx - prefix]; - float phi2 = segmentsInGPU.phi[pLS_jx - prefix]; + unsigned int pLS_jx = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + float eta2 = segmentsPixel.eta()[pLS_jx - prefix]; + float phi2 = segmentsPixel.phi()[pLS_jx - prefix]; float dEta = alpaka::math::abs(acc, (eta1 - eta2)); float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-5f) - pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + pixelTriplets.isDup()[pixelTripletIndex] = true; } } } @@ -248,43 +151,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CrossCleanT5 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Quintuplets quintupletsInGPU, - PixelQuintuplets pixelQuintupletsInGPU, - PixelTriplets pixelTripletsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintupletsConst pixelQuintuplets, + PixelTripletsConst pixelTriplets, + ObjectRangesConst ranges) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (int innerInnerInnerLowerModuleArrayIndex = globalThreadIdx[0]; - innerInnerInnerLowerModuleArrayIndex < *(modulesInGPU.nLowerModules); + innerInnerInnerLowerModuleArrayIndex < modules.nLowerModules(); innerInnerInnerLowerModuleArrayIndex += gridThreadExtent[0]) { - if (rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] == -1) + if (ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] == -1) continue; - unsigned int nQuints = quintupletsInGPU.nQuintuplets[innerInnerInnerLowerModuleArrayIndex]; + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[innerInnerInnerLowerModuleArrayIndex]; for (unsigned int innerObjectArrayIndex = globalThreadIdx[1]; innerObjectArrayIndex < nQuints; innerObjectArrayIndex += gridThreadExtent[1]) { unsigned int quintupletIndex = - rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; + ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; // Don't add duplicate T5s or T5s that are accounted in pT5s - if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) continue; #ifdef Crossclean_T5 - unsigned int loop_bound = *pixelQuintupletsInGPU.nPixelQuintuplets + *pixelTripletsInGPU.nPixelTriplets; + unsigned int loop_bound = pixelQuintuplets.nPixelQuintuplets() + pixelTriplets.nPixelTriplets(); // Cross cleaning step - float eta1 = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi1 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + float eta1 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi1 = __H2F(quintuplets.phi()[quintupletIndex]); for (unsigned int jx = globalThreadIdx[2]; jx < loop_bound; jx += gridThreadExtent[2]) { float eta2, phi2; - if (jx < *pixelQuintupletsInGPU.nPixelQuintuplets) { - eta2 = __H2F(pixelQuintupletsInGPU.eta[jx]); - phi2 = __H2F(pixelQuintupletsInGPU.phi[jx]); + if (jx < pixelQuintuplets.nPixelQuintuplets()) { + eta2 = __H2F(pixelQuintuplets.eta()[jx]); + phi2 = __H2F(pixelQuintuplets.phi()[jx]); } else { - eta2 = __H2F(pixelTripletsInGPU.eta[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); - phi2 = __H2F(pixelTripletsInGPU.phi[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); + eta2 = __H2F(pixelTriplets.eta()[jx - pixelQuintuplets.nPixelQuintuplets()]); + phi2 = __H2F(pixelTriplets.phi()[jx - pixelQuintuplets.nPixelQuintuplets()]); } float dEta = alpaka::math::abs(acc, eta1 - eta2); @@ -292,7 +196,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) - quintupletsInGPU.isDup[quintupletIndex] = true; + quintuplets.isDup()[quintupletIndex] = true; } #endif } @@ -303,92 +207,94 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CrossCleanpLS { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, - Segments segmentsInGPU, - MiniDoublets mdsInGPU, - Hits hitsInGPU, - Quintuplets quintupletsInGPU) const { + ModulesConst modules, + ObjectRangesConst ranges, + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, + HitsConst hits, + QuintupletsConst quintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - int pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nPixels = segmentsInGPU.nSegments[pixelModuleIndex]; + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixels = segmentsOccupancy.nSegments()[pixelModuleIndex]; for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; pixelArrayIndex += gridThreadExtent[2]) { - if (!segmentsInGPU.isQuad[pixelArrayIndex] || segmentsInGPU.isDup[pixelArrayIndex]) + if (!segmentsPixel.isQuad()[pixelArrayIndex] || segmentsPixel.isDup()[pixelArrayIndex]) continue; - float eta1 = segmentsInGPU.eta[pixelArrayIndex]; - float phi1 = segmentsInGPU.phi[pixelArrayIndex]; - unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + float eta1 = segmentsPixel.eta()[pixelArrayIndex]; + float phi1 = segmentsPixel.phi()[pixelArrayIndex]; + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; - unsigned int nTrackCandidates = *(trackCandidatesInGPU.nTrackCandidates); + unsigned int nTrackCandidates = cands.nTrackCandidates(); for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; trackCandidateIndex += gridThreadExtent[1]) { - short type = trackCandidatesInGPU.trackCandidateType[trackCandidateIndex]; - unsigned int innerTrackletIdx = trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex]; + short type = cands.trackCandidateType()[trackCandidateIndex]; + unsigned int innerTrackletIdx = cands.objectIndices()[trackCandidateIndex][0]; if (type == 4) // T5 { unsigned int quintupletIndex = innerTrackletIdx; // T5 index - float eta2 = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi2 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } if (type == 5) // pT3 { - int pLSIndex = pixelTripletsInGPU.pixelSegmentIndices[innerTrackletIdx]; - int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mdsInGPU, segmentsInGPU, hitsInGPU); + int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); if (npMatched > 0) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; int pT3Index = innerTrackletIdx; - float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); - float phi2 = __H2F(pixelTripletsInGPU.phi_pix[pT3Index]); + float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); + float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } if (type == 7) // pT5 { unsigned int pLSIndex = innerTrackletIdx; - int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mdsInGPU, segmentsInGPU, hitsInGPU); + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); if (npMatched > 0) { - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } - float eta2 = segmentsInGPU.eta[pLSIndex - prefix]; - float phi2 = segmentsInGPU.phi[pLSIndex - prefix]; + float eta2 = segmentsPixel.eta()[pLSIndex - prefix]; + float phi2 = segmentsPixel.phi()[pLSIndex - prefix]; float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } } } } }; - struct AddpT3asTrackCandidatesInGPU { + struct AddpT3asTrackCandidates { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, - Segments segmentsInGPU, - ObjectRanges rangesInGPU) const { + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -396,39 +302,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; - unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; pixelTripletIndex += gridThreadExtent[0]) { - if ((pixelTripletsInGPU.isDup[pixelTripletIndex])) + if ((pixelTriplets.isDup()[pixelTripletIndex])) continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT3"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); - float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + - __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); - unsigned int pT3PixelIndex = pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex]; - addTrackCandidateToMemory(trackCandidatesInGPU, + float radius = 0.5f * (__H2F(pixelTriplets.pixelRadius()[pixelTripletIndex]) + + __H2F(pixelTriplets.tripletRadius()[pixelTripletIndex])); + unsigned int pT3PixelIndex = pixelTriplets.pixelSegmentIndices()[pixelTripletIndex]; + addTrackCandidateToMemory(cands, 5 /*track candidate type pT3=5*/, pixelTripletIndex, pixelTripletIndex, - &pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex], - &pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex], - &pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex], - segmentsInGPU.seedIdx[pT3PixelIndex - pLS_offset], - __H2F(pixelTripletsInGPU.centerX[pixelTripletIndex]), - __H2F(pixelTripletsInGPU.centerY[pixelTripletIndex]), + pixelTriplets.logicalLayers()[pixelTripletIndex].data(), + pixelTriplets.lowerModuleIndices()[pixelTripletIndex].data(), + pixelTriplets.hitIndices()[pixelTripletIndex].data(), + segmentsPixel.seedIdx()[pT3PixelIndex - pLS_offset], + __H2F(pixelTriplets.centerX()[pixelTripletIndex]), + __H2F(pixelTriplets.centerY()[pixelTripletIndex]), radius, trackCandidateIdx, pixelTripletIndex); @@ -437,52 +343,52 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct AddT5asTrackCandidateInGPU { + struct AddT5asTrackCandidate { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - Quintuplets quintupletsInGPU, - TrackCandidates trackCandidatesInGPU, - ObjectRanges rangesInGPU) const { + QuintupletsConst quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + TrackCandidates cands, + ObjectRangesConst ranges) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (int idx = globalThreadIdx[1]; idx < nLowerModules; idx += gridThreadExtent[1]) { - if (rangesInGPU.quintupletModuleIndices[idx] == -1) + if (ranges.quintupletModuleIndices()[idx] == -1) continue; - unsigned int nQuints = quintupletsInGPU.nQuintuplets[idx]; + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[idx]; for (unsigned int jdx = globalThreadIdx[2]; jdx < nQuints; jdx += gridThreadExtent[2]) { - unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[idx] + jdx; - if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[idx] + jdx; + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) continue; - if (!(quintupletsInGPU.TightCutFlag[quintupletIndex])) + if (!(quintuplets.tightCutFlag()[quintupletIndex])) continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatespT5 - - *trackCandidatesInGPU.nTrackCandidatespT3 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatespT5() - cands.nTrackCandidatespT3() >= n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs { #ifdef WARNINGS printf("Track Candidate excess alert! Type = T5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u, alpaka::hierarchy::Threads{}); - addTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatesT5(), 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(cands, 4 /*track candidate type T5=4*/, quintupletIndex, quintupletIndex, - &quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex], - &quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex], - &quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex], + quintuplets.logicalLayers()[quintupletIndex].data(), + quintuplets.lowerModuleIndices()[quintupletIndex].data(), + quintuplets.hitIndices()[quintupletIndex].data(), -1 /*no pixel seed index for T5s*/, - quintupletsInGPU.regressionG[quintupletIndex], - quintupletsInGPU.regressionF[quintupletIndex], - quintupletsInGPU.regressionRadius[quintupletIndex], + quintuplets.regressionG()[quintupletIndex], + quintuplets.regressionF()[quintupletIndex], + quintuplets.regressionRadius()[quintupletIndex], trackCandidateIdx, quintupletIndex); } @@ -491,53 +397,55 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct AddpLSasTrackCandidateInGPU { + struct AddpLSasTrackCandidate { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - TrackCandidates trackCandidatesInGPU, - Segments segmentsInGPU, + TrackCandidates cands, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixelConst segmentsPixel, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixels = segmentsInGPU.nSegments[nLowerModules]; + unsigned int nPixels = segmentsOccupancy.nSegments()[nLowerModules]; for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; pixelArrayIndex += gridThreadExtent[2]) { - if ((tc_pls_triplets ? 0 : !segmentsInGPU.isQuad[pixelArrayIndex]) || (segmentsInGPU.isDup[pixelArrayIndex])) + if ((tc_pls_triplets ? 0 : !segmentsPixel.isQuad()[pixelArrayIndex]) || + (segmentsPixel.isDup()[pixelArrayIndex])) continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatesT5 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatesT5() >= n_max_pixel_track_candidates) // T5 TCs have already been added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pLS"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u, alpaka::hierarchy::Threads{}); - addpLSTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatespLS(), 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(cands, pixelArrayIndex, trackCandidateIdx, - segmentsInGPU.pLSHitsIdxs[pixelArrayIndex], - segmentsInGPU.seedIdx[pixelArrayIndex]); + segmentsPixel.pLSHitsIdxs()[pixelArrayIndex], + segmentsPixel.seedIdx()[pixelArrayIndex]); } } } }; - struct AddpT5asTrackCandidateInGPU { + struct AddpT5asTrackCandidate { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - PixelQuintuplets pixelQuintupletsInGPU, - TrackCandidates trackCandidatesInGPU, - Segments segmentsInGPU, - ObjectRanges rangesInGPU) const { + PixelQuintupletsConst pixelQuintuplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -545,46 +453,48 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; - unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; + int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; pixelQuintupletIndex += gridThreadExtent[0]) { - if (pixelQuintupletsInGPU.isDup[pixelQuintupletIndex]) + if (pixelQuintuplets.isDup()[pixelQuintupletIndex]) continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u, alpaka::hierarchy::Threads{}); - - float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + - __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); - unsigned int pT5PixelIndex = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; - addTrackCandidateToMemory( - trackCandidatesInGPU, - 7 /*track candidate type pT5=7*/, - pT5PixelIndex, - pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex], - &pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex], - &pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex], - &pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex], - segmentsInGPU.seedIdx[pT5PixelIndex - pLS_offset], - __H2F(pixelQuintupletsInGPU.centerX[pixelQuintupletIndex]), - __H2F(pixelQuintupletsInGPU.centerY[pixelQuintupletIndex]), - radius, - trackCandidateIdx, - pixelQuintupletIndex); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelQuintuplets.pixelRadius()[pixelQuintupletIndex]) + + __H2F(pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex])); + unsigned int pT5PixelIndex = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + addTrackCandidateToMemory(cands, + 7 /*track candidate type pT5=7*/, + pT5PixelIndex, + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex], + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex].data(), + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex].data(), + pixelQuintuplets.hitIndices()[pixelQuintupletIndex].data(), + segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], + __H2F(pixelQuintuplets.centerX()[pixelQuintupletIndex]), + __H2F(pixelQuintuplets.centerY()[pixelQuintupletIndex]), + radius, + trackCandidateIdx, + pixelQuintupletIndex); } } } }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(lst::TrackCandidatesDeviceCollection, lst::TrackCandidatesHostCollection); + #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 5e1b352748573..d01641f75c22c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -4,211 +4,73 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" -#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" #include "Segment.h" #include "MiniDoublet.h" #include "Hit.h" -#include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Triplets { - unsigned int* segmentIndices; - uint16_t* lowerModuleIndices; //3 of them - unsigned int* nTriplets; - unsigned int* totOccupancyTriplets; - unsigned int* nMemoryLocations; - uint8_t* logicalLayers; - unsigned int* hitIndices; - FPX* betaIn; - float* circleRadius; - float* circleCenterX; - float* circleCenterY; - bool* partOfPT5; - bool* partOfT5; - bool* partOfPT3; -#ifdef CUT_VALUE_DEBUG - //debug variables - float* zOut; - float* rtOut; - float* betaInCut; -#endif - template - void setData(TBuff& buf) { - segmentIndices = buf.segmentIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - nTriplets = buf.nTriplets_buf.data(); - totOccupancyTriplets = buf.totOccupancyTriplets_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - betaIn = buf.betaIn_buf.data(); - circleRadius = buf.circleRadius_buf.data(); - circleCenterX = buf.circleCenterX_buf.data(); - circleCenterY = buf.circleCenterY_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - partOfT5 = buf.partOfT5_buf.data(); - partOfPT3 = buf.partOfPT3_buf.data(); -#ifdef CUT_VALUE_DEBUG - zOut = buf.zOut_buf.data(); - rtOut = buf.rtOut_buf.data(); - betaInCut = buf.betaInCut_buf.data(); -#endif - } - }; - - template - struct TripletsBuffer { - Buf segmentIndices_buf; - Buf lowerModuleIndices_buf; - Buf nTriplets_buf; - Buf totOccupancyTriplets_buf; - Buf nMemoryLocations_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf betaIn_buf; - Buf circleRadius_buf; - Buf circleCenterX_buf; - Buf circleCenterY_buf; - Buf partOfPT5_buf; - Buf partOfT5_buf; - Buf partOfPT3_buf; - -#ifdef CUT_VALUE_DEBUG - Buf zOut_buf; - Buf rtOut_buf; - Buf deltaPhiPos_buf; - Buf deltaPhi_buf; - Buf zLo_buf; - Buf zHi_buf; - Buf zLoPointed_buf; - Buf zHiPointed_buf; - Buf dPhiCut_buf; - Buf betaInCut_buf; - Buf rtLo_buf; - Buf rtHi_buf; -#endif - - Triplets data_; - - template - TripletsBuffer(unsigned int maxTriplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) - : segmentIndices_buf(allocBufWrapper(devAccIn, 2 * maxTriplets, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T3::kLayers * maxTriplets, queue)), - nTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - totOccupancyTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kHits, queue)), - betaIn_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleRadius_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleCenterX_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleCenterY_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfPT3_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) -#ifdef CUT_VALUE_DEBUG - , - zOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - deltaPhiPos_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - deltaPhi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zLoPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zHiPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - dPhiCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - betaInCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) -#endif - { - alpaka::memset(queue, nTriplets_buf, 0u); - alpaka::memset(queue, totOccupancyTriplets_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - alpaka::memset(queue, partOfT5_buf, false); - alpaka::memset(queue, partOfPT3_buf, false); - } - - inline Triplets const* data() const { return &data_; } - inline void setData(TripletsBuffer& buf) { data_.setData(buf); } - }; - -#ifdef CUT_VALUE_DEBUG - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets& triplets, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG float zOut, float rtOut, +#endif float betaIn, float betaInCut, float circleRadius, float circleCenterX, float circleCenterY, - unsigned int tripletIndex) -#else - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, - Triplets& tripletsInGPU, - unsigned int innerSegmentIndex, - unsigned int outerSegmentIndex, - uint16_t innerInnerLowerModuleIndex, - uint16_t middleLowerModuleIndex, - uint16_t outerOuterLowerModuleIndex, - float betaIn, - float circleRadius, - float circleCenterX, - float circleCenterY, - unsigned int tripletIndex) -#endif - { - tripletsInGPU.segmentIndices[tripletIndex * 2] = innerSegmentIndex; - tripletsInGPU.segmentIndices[tripletIndex * 2 + 1] = outerSegmentIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers] = innerInnerLowerModuleIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 1] = middleLowerModuleIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 2] = outerOuterLowerModuleIndex; - - tripletsInGPU.betaIn[tripletIndex] = __F2H(betaIn); - tripletsInGPU.circleRadius[tripletIndex] = circleRadius; - tripletsInGPU.circleCenterX[tripletIndex] = circleCenterX; - tripletsInGPU.circleCenterY[tripletIndex] = circleCenterY; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers] = - modulesInGPU.layers[innerInnerLowerModuleIndex] + (modulesInGPU.subdets[innerInnerLowerModuleIndex] == 4) * 6; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1] = - modulesInGPU.layers[middleLowerModuleIndex] + (modulesInGPU.subdets[middleLowerModuleIndex] == 4) * 6; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2] = - modulesInGPU.layers[outerOuterLowerModuleIndex] + (modulesInGPU.subdets[outerOuterLowerModuleIndex] == 4) * 6; + unsigned int tripletIndex) { + triplets.segmentIndices()[tripletIndex][0] = innerSegmentIndex; + triplets.segmentIndices()[tripletIndex][1] = outerSegmentIndex; + triplets.lowerModuleIndices()[tripletIndex][0] = innerInnerLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][1] = middleLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][2] = outerOuterLowerModuleIndex; + + triplets.betaIn()[tripletIndex] = __F2H(betaIn); + triplets.radius()[tripletIndex] = circleRadius; + triplets.centerX()[tripletIndex] = circleCenterX; + triplets.centerY()[tripletIndex] = circleCenterY; + triplets.logicalLayers()[tripletIndex][0] = + modules.layers()[innerInnerLowerModuleIndex] + (modules.subdets()[innerInnerLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][1] = + modules.layers()[middleLowerModuleIndex] + (modules.subdets()[middleLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][2] = + modules.layers()[outerOuterLowerModuleIndex] + (modules.subdets()[outerOuterLowerModuleIndex] == 4) * 6; //get the hits - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; - - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits] = mdsInGPU.anchorHitIndices[firstMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 1] = mdsInGPU.outerHitIndices[firstMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 2] = mdsInGPU.anchorHitIndices[secondMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 3] = mdsInGPU.outerHitIndices[secondMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 4] = mdsInGPU.anchorHitIndices[thirdMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 5] = mdsInGPU.outerHitIndices[thirdMDIndex]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + triplets.hitIndices()[tripletIndex][0] = mds.anchorHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][1] = mds.outerHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][2] = mds.anchorHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][3] = mds.outerHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][4] = mds.anchorHitIndices()[thirdMDIndex]; + triplets.hitIndices()[tripletIndex][5] = mds.outerHitIndices()[thirdMDIndex]; #ifdef CUT_VALUE_DEBUG - tripletsInGPU.zOut[tripletIndex] = zOut; - tripletsInGPU.rtOut[tripletIndex] = rtOut; - tripletsInGPU.betaInCut[tripletIndex] = betaInCut; + triplets.zOut()[tripletIndex] = zOut; + triplets.rtOut()[tripletIndex] = rtOut; + triplets.betaInCut()[tripletIndex] = betaInCut; #endif } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -216,18 +78,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondMDIndex, unsigned int thirdMDIndex) { //get the rt and z - const float& r1 = mdsInGPU.anchorRt[firstMDIndex]; - const float& r2 = mdsInGPU.anchorRt[secondMDIndex]; - const float& r3 = mdsInGPU.anchorRt[thirdMDIndex]; + const float& r1 = mds.anchorRt()[firstMDIndex]; + const float& r2 = mds.anchorRt()[secondMDIndex]; + const float& r3 = mds.anchorRt()[thirdMDIndex]; - const float& z1 = mdsInGPU.anchorZ[firstMDIndex]; - const float& z2 = mdsInGPU.anchorZ[secondMDIndex]; - const float& z3 = mdsInGPU.anchorZ[thirdMDIndex]; + const float& z1 = mds.anchorZ()[firstMDIndex]; + const float& z2 = mds.anchorZ()[secondMDIndex]; + const float& z3 = mds.anchorZ()[thirdMDIndex]; // Using lst_layer numbering convention defined in ModuleMethods.h - const int layer1 = modulesInGPU.lstLayers[innerInnerLowerModuleIndex]; - const int layer2 = modulesInGPU.lstLayers[middleLowerModuleIndex]; - const int layer3 = modulesInGPU.lstLayers[outerOuterLowerModuleIndex]; + const int layer1 = modules.lstLayers()[innerInnerLowerModuleIndex]; + const int layer2 = modules.lstLayers()[middleLowerModuleIndex]; + const int layer3 = modules.lstLayers()[outerOuterLowerModuleIndex]; const float residual = z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1); @@ -266,9 +128,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -280,16 +142,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int innerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS); + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); - float rtIn = mdsInGPU.anchorRt[firstMDIndex]; - float rtMid = mdsInGPU.anchorRt[secondMDIndex]; - rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; - float zIn = mdsInGPU.anchorZ[firstMDIndex]; - float zMid = mdsInGPU.anchorZ[secondMDIndex]; - zOut = mdsInGPU.anchorZ[thirdMDIndex]; + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -337,21 +199,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; // raw betaIn value without any correction, based on the mini-doublet hit positions - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); //beta computation float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = - alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); @@ -362,9 +223,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -378,16 +239,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int outerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS); + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); - float rtIn = mdsInGPU.anchorRt[firstMDIndex]; - float rtMid = mdsInGPU.anchorRt[secondMDIndex]; - rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; - float zIn = mdsInGPU.anchorZ[firstMDIndex]; - float zMid = mdsInGPU.anchorZ[secondMDIndex]; - zOut = mdsInGPU.anchorZ[thirdMDIndex]; + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -402,7 +263,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; + bool isOutSgInnerMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - @@ -443,15 +304,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) return false; - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); - float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; - betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; @@ -465,10 +326,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } float sdIn_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -481,9 +342,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -496,13 +357,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int outerSegmentIndex, float& betaIn, float& betaInCut) { - float rtIn = mdsInGPU.anchorRt[firstMDIndex]; - float rtMid = mdsInGPU.anchorRt[secondMDIndex]; - rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; - float zIn = mdsInGPU.anchorZ[firstMDIndex]; - float zMid = mdsInGPU.anchorZ[secondMDIndex]; - zOut = mdsInGPU.anchorZ[thirdMDIndex]; + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); @@ -514,8 +375,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); - bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS; + bool isOutSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch @@ -529,7 +390,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; + bool isInSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; float drtSDIn = rtMid - rtIn; float dzSDIn = zMid - zIn; @@ -564,17 +425,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; } - float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; - float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); - float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; - float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; - betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); - float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; @@ -586,10 +447,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { betaInRHmax = swapTemp; } float sdIn_dr = alpaka::math::sqrt(acc, - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * - (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * - (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -602,9 +463,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -618,16 +479,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int outerSegmentIndex, float& betaIn, float& betaInCut) { - short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex]; - short middleLowerModuleSubdet = modulesInGPU.subdets[middleLowerModuleIndex]; - short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Barrel) { return passPointingConstraintBBB(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -642,9 +503,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Endcap) { return passPointingConstraintBBE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -661,9 +522,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return passPointingConstraintBBE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -683,9 +544,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { else if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return passPointingConstraintEEE(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -737,9 +598,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, - Modules const& modulesInGPU, - MiniDoublets const& mdsInGPU, - Segments const& segmentsInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -753,17 +614,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& circleCenterX, float& circleCenterY) { //this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here! - if (segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1] != segmentsInGPU.mdIndices[2 * outerSegmentIndex]) + if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0]) return false; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; if (not(passRZConstraint(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -772,9 +632,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { thirdMDIndex))) return false; if (not(passPointingConstraint(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -790,25 +650,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { betaInCut))) return false; - float x1 = mdsInGPU.anchorX[firstMDIndex]; - float x2 = mdsInGPU.anchorX[secondMDIndex]; - float x3 = mdsInGPU.anchorX[thirdMDIndex]; - float y1 = mdsInGPU.anchorY[firstMDIndex]; - float y2 = mdsInGPU.anchorY[secondMDIndex]; - float y3 = mdsInGPU.anchorY[thirdMDIndex]; + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); return true; } - struct CreateTripletsInGPUv2 { + struct CreateTriplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - MiniDoublets mdsInGPU, - Segments segmentsInGPU, - Triplets tripletsInGPU, - ObjectRanges rangesInGPU, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + Triplets triplets, + TripletsOccupancy tripletsOccupancy, + ObjectRangesConst ranges, uint16_t* index_gpu, uint16_t nonZeroModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -817,36 +679,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (uint16_t innerLowerModuleArrayIdx = globalThreadIdx[0]; innerLowerModuleArrayIdx < nonZeroModules; innerLowerModuleArrayIdx += gridThreadExtent[0]) { uint16_t innerInnerLowerModuleIndex = index_gpu[innerLowerModuleArrayIdx]; - if (innerInnerLowerModuleIndex >= *modulesInGPU.nLowerModules) + if (innerInnerLowerModuleIndex >= modules.nLowerModules()) continue; - uint16_t nConnectedModules = modulesInGPU.nConnectedModules[innerInnerLowerModuleIndex]; + uint16_t nConnectedModules = modules.nConnectedModules()[innerInnerLowerModuleIndex]; if (nConnectedModules == 0) continue; - unsigned int nInnerSegments = segmentsInGPU.nSegments[innerInnerLowerModuleIndex]; + unsigned int nInnerSegments = segmentsOccupancy.nSegments()[innerInnerLowerModuleIndex]; for (unsigned int innerSegmentArrayIndex = globalThreadIdx[1]; innerSegmentArrayIndex < nInnerSegments; innerSegmentArrayIndex += gridThreadExtent[1]) { unsigned int innerSegmentIndex = - rangesInGPU.segmentRanges[innerInnerLowerModuleIndex * 2] + innerSegmentArrayIndex; + ranges.segmentRanges()[innerInnerLowerModuleIndex][0] + innerSegmentArrayIndex; // middle lower module - outer lower module of inner segment - uint16_t middleLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[innerSegmentIndex]; + uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; - unsigned int nOuterSegments = segmentsInGPU.nSegments[middleLowerModuleIndex]; + unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; for (unsigned int outerSegmentArrayIndex = globalThreadIdx[2]; outerSegmentArrayIndex < nOuterSegments; outerSegmentArrayIndex += gridThreadExtent[2]) { - unsigned int outerSegmentIndex = - rangesInGPU.segmentRanges[2 * middleLowerModuleIndex] + outerSegmentArrayIndex; + unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex; - uint16_t outerOuterLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[outerSegmentIndex]; + uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY; bool success = runTripletConstraintsAndAlgo(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, + modules, + mds, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -863,53 +724,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (success) { unsigned int totOccupancyTriplets = alpaka::atomicAdd(acc, - &tripletsInGPU.totOccupancyTriplets[innerInnerLowerModuleIndex], + &tripletsOccupancy.totOccupancyTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); if (static_cast(totOccupancyTriplets) >= - rangesInGPU.tripletModuleOccupancy[innerInnerLowerModuleIndex]) { + ranges.tripletModuleOccupancy()[innerInnerLowerModuleIndex]) { #ifdef WARNINGS printf("Triplet excess alert! Module index = %d\n", innerInnerLowerModuleIndex); #endif } else { unsigned int tripletModuleIndex = alpaka::atomicAdd( - acc, &tripletsInGPU.nTriplets[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + acc, &tripletsOccupancy.nTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int tripletIndex = - rangesInGPU.tripletModuleIndices[innerInnerLowerModuleIndex] + tripletModuleIndex; -#ifdef CUT_VALUE_DEBUG - addTripletToMemory(modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, + ranges.tripletModuleIndices()[innerInnerLowerModuleIndex] + tripletModuleIndex; + addTripletToMemory(modules, + mds, + segments, + triplets, innerSegmentIndex, outerSegmentIndex, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG zOut, rtOut, +#endif betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY, tripletIndex); -#else - addTripletToMemory(modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, - innerSegmentIndex, - outerSegmentIndex, - innerInnerLowerModuleIndex, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - betaIn, - circleRadius, - circleCenterX, - circleCenterY, - tripletIndex); -#endif } } } @@ -921,9 +767,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CreateTripletArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - ObjectRanges rangesInGPU, - Segments segmentsInGPU) const { + ModulesConst modules, + ObjectRanges ranges, + SegmentsOccupancyConst segmentsOccupancy) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -941,17 +787,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (segmentsInGPU.nSegments[i] == 0) { - rangesInGPU.tripletModuleIndices[i] = nTotalTriplets; - rangesInGPU.tripletModuleOccupancy[i] = 0; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.tripletModuleIndices()[i] = nTotalTriplets; + ranges.tripletModuleOccupancy()[i] = 0; continue; } - short module_rings = modulesInGPU.rings[i]; - short module_layers = modulesInGPU.layers[i]; - short module_subdets = modulesInGPU.subdets[i]; - float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); if (module_layers <= 3 && module_subdets == 5) category_number = 0; @@ -1008,15 +854,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #endif } - rangesInGPU.tripletModuleOccupancy[i] = occupancy; + ranges.tripletModuleOccupancy()[i] = occupancy; unsigned int nTotT = alpaka::atomicAdd(acc, &nTotalTriplets, occupancy, alpaka::hierarchy::Threads{}); - rangesInGPU.tripletModuleIndices[i] = nTotT; + ranges.tripletModuleIndices()[i] = nTotT; } // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); if (cms::alpakatools::once_per_block(acc)) { - *rangesInGPU.device_nTotalTrips = nTotalTriplets; + ranges.nTotalTrips() = nTotalTriplets; } } }; @@ -1024,9 +870,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct AddTripletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - Modules modulesInGPU, - Triplets tripletsInGPU, - ObjectRanges rangesInGPU) const { + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1034,13 +880,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (tripletsInGPU.nTriplets[i] == 0) { - rangesInGPU.tripletRanges[i * 2] = -1; - rangesInGPU.tripletRanges[i * 2 + 1] = -1; + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (tripletsOccupancy.nTriplets()[i] == 0) { + ranges.tripletRanges()[i][0] = -1; + ranges.tripletRanges()[i][1] = -1; } else { - rangesInGPU.tripletRanges[i * 2] = rangesInGPU.tripletModuleIndices[i]; - rangesInGPU.tripletRanges[i * 2 + 1] = rangesInGPU.tripletModuleIndices[i] + tripletsInGPU.nTriplets[i] - 1; + ranges.tripletRanges()[i][0] = ranges.tripletModuleIndices()[i]; + ranges.tripletRanges()[i][1] = ranges.tripletModuleIndices()[i] + tripletsOccupancy.nTriplets()[i] - 1; } } } diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile index ba5e19e6a2779..3dd0483edfeb6 100644 --- a/RecoTracker/LSTCore/standalone/LST/Makefile +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -44,17 +44,17 @@ LIBS=$(LIB_CPU) $(LIB_CUDA) $(LIB_ROCM) GENCODE_CUDA := -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_89,code=[sm_89,compute_89] CXX = g++ -CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Wshadow -Woverloaded-virtual -fPIC -fopenmp -I.. -CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Wshadow --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. -CXXFLAGS_ROCM = -O3 -g -Wall -Wshadow -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. +CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Woverloaded-virtual -fPIC -fopenmp -I.. +CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. +CXXFLAGS_ROCM = -O3 -g -Wall -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. CMSSWINCLUDE := -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src ifdef CMSSW_RELEASE_BASE CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src endif ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++17 ${CMSSWINCLUDE} -ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY --expt-relaxed-constexpr -ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG +ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 --expt-relaxed-constexpr +ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 ROOTINCLUDE = -I$(ROOT_ROOT)/include ROOTCFLAGS = -pthread -m64 $(ROOTINCLUDE) PRINTFLAG = -DT4FromT3 diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile index efcd2483c5eba..8eb677611513e 100644 --- a/RecoTracker/LSTCore/standalone/Makefile +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -9,7 +9,7 @@ OBJECTS_ROCM=$(SOURCES:.cc=_rocm.o) OBJECTS=$(OBJECTS_CPU) $(OBJECTS_CUDA) $(OBJECTS_ROCM) CXX = g++ -CXXFLAGS = -g -O2 -Wall -fPIC -Wshadow -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++17 +CXXFLAGS = -g -O2 -Wall -fPIC -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++17 INCLUDEFLAGS= -ILST -I$(shell pwd) -Icode -Icode/core -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include $(shell rooutil-config --include) -I$(shell root-config --incdir) -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src -I../interface/ -I../interface/alpaka/ -I../src/ -I../src/alpaka/ ifdef CMSSW_RELEASE_BASE INCLUDEFLAGS:= ${INCLUDEFLAGS} -I${CMSSW_RELEASE_BASE}/src @@ -20,9 +20,9 @@ LDFLAGS_ROCM= -L${ROCM_ROOT}/lib -lamdhip64 ALPAKAFLAGS = -DALPAKA_DEBUG=0 CUDAINCLUDE = -I${CUDA_HOME}/include ROCMINCLUDE = -I${ROCM_ROOT}/include -ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ +ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 EXTRAFLAGS = -ITMultiDrawTreePlayer -Wunused-variable -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -fopenmp DOQUINTUPLET = PTCUTFLAG = diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index c0e52d0a0d194..ca8126c8ae4c4 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -460,7 +460,9 @@ void run_lst() { if (ana.verbose == 5) { #pragma omp critical - { debugPrintOutlierMultiplicities(events.at(omp_get_thread_num())); } + { + // TODO: debugPrintOutlierMultiplicities + } } if (ana.do_write_ntuple) { diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 426a74babc4d1..634f0ed753b71 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -9,12 +9,12 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; //____________________________________________________________________________________________ std::tuple, std::vector> convertHitsToHitIdxsAndHitTypes( Event* event, std::vector hits) { - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); std::vector hitidxs; std::vector hittypes; for (auto& hit : hits) { - hitidxs.push_back(hitsEvt->idxs[hit]); - if (hitsEvt->detid[hit] == 1) + hitidxs.push_back(hitsEvt.idxs()[hit]); + if (hitsEvt.detid()[hit] == 1) hittypes.push_back(0); else hittypes.push_back(4); @@ -28,17 +28,17 @@ std::tuple, std::vector> convertHitsToHi //____________________________________________________________________________________________ std::vector getPixelHitsFrompLS(Event* event, unsigned int pLS) { - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - ObjectRanges const* rangesEvt = event->getRanges().data(); - Modules const* modulesEvt = event->getModules().data(); - const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - unsigned int MD_1 = segments->mdIndices[2 * (pLS + pLS_offset)]; - unsigned int MD_2 = segments->mdIndices[2 * (pLS + pLS_offset) + 1]; - unsigned int hit_1 = miniDoublets->anchorHitIndices[MD_1]; - unsigned int hit_2 = miniDoublets->outerHitIndices[MD_1]; - unsigned int hit_3 = miniDoublets->anchorHitIndices[MD_2]; - unsigned int hit_4 = miniDoublets->outerHitIndices[MD_2]; + SegmentsConst segments = event->getSegments(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + auto ranges = event->getRanges(); + auto modulesEvt = event->getModules(); + const unsigned int pLS_offset = ranges.segmentModuleIndices()[modulesEvt.nLowerModules()]; + unsigned int MD_1 = segments.mdIndices()[pLS + pLS_offset][0]; + unsigned int MD_2 = segments.mdIndices()[pLS + pLS_offset][1]; + unsigned int hit_1 = miniDoublets.anchorHitIndices()[MD_1]; + unsigned int hit_2 = miniDoublets.outerHitIndices()[MD_1]; + unsigned int hit_3 = miniDoublets.anchorHitIndices()[MD_2]; + unsigned int hit_4 = miniDoublets.outerHitIndices()[MD_2]; if (hit_3 == hit_4) return {hit_1, hit_2, hit_3}; else @@ -47,11 +47,11 @@ std::vector getPixelHitsFrompLS(Event* event, unsigned int pLS) { //____________________________________________________________________________________________ std::vector getPixelHitIdxsFrompLS(Event* event, unsigned int pLS) { - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hitidxs; for (auto& hit : hits) - hitidxs.push_back(hitsEvt->idxs[hit]); + hitidxs.push_back(hitsEvt.idxs()[hit]); return hitidxs; } @@ -74,9 +74,9 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ std::vector getHitsFromMD(Event* event, unsigned int MD) { - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - unsigned int hit_1 = miniDoublets->anchorHitIndices[MD]; - unsigned int hit_2 = miniDoublets->outerHitIndices[MD]; + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + unsigned int hit_1 = miniDoublets.anchorHitIndices()[MD]; + unsigned int hit_2 = miniDoublets.outerHitIndices()[MD]; return {hit_1, hit_2}; } @@ -92,9 +92,9 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ std::vector getMDsFromLS(Event* event, unsigned int LS) { - Segments const* segments = event->getSegments().data(); - unsigned int MD_1 = segments->mdIndices[2 * LS]; - unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; + SegmentsConst segments = event->getSegments(); + unsigned int MD_1 = segments.mdIndices()[LS][0]; + unsigned int MD_2 = segments.mdIndices()[LS][1]; return {MD_1, MD_2}; } @@ -117,11 +117,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(Event* event, unsigned int T3) { - Triplets const* triplets = event->getTriplets().data(); - unsigned int LS_1 = triplets->segmentIndices[2 * T3]; - unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; - return {LS_1, LS_2}; +std::vector getLSsFromT3(Event* event, unsigned int t3) { + auto const triplets = event->getTriplets(); + unsigned int ls_1 = triplets.segmentIndices()[t3][0]; + unsigned int ls_2 = triplets.segmentIndices()[t3][1]; + return {ls_1, ls_2}; } //____________________________________________________________________________________________ @@ -152,11 +152,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(Event* event, unsigned int T5) { - Quintuplets const* quintuplets = event->getQuintuplets().data(); - unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; - unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; - return {T3_1, T3_2}; +std::vector getT3sFromT5(Event* event, unsigned int t5) { + auto const quintuplets = event->getQuintuplets(); + unsigned int t3_1 = quintuplets.tripletIndices()[t5][0]; + unsigned int t3_2 = quintuplets.tripletIndices()[t5][1]; + return {t3_1, t3_2}; } //____________________________________________________________________________________________ @@ -190,20 +190,20 @@ std::vector getHitsFromT5(Event* event, unsigned int T5) { //____________________________________________________________________________________________ std::vector getHitIdxsFromT5(Event* event, unsigned int T5) { - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); std::vector hits = getHitsFromT5(event, T5); std::vector hitidxs; for (auto& hit : hits) - hitidxs.push_back(hitsEvt->idxs[hit]); + hitidxs.push_back(hitsEvt.idxs()[hit]); return hitidxs; } //____________________________________________________________________________________________ std::vector getModuleIdxsFromT5(Event* event, unsigned int T5) { std::vector hits = getHitsFromT5(event, T5); std::vector module_idxs; - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); for (auto& hitIdx : hits) { - module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); + module_idxs.push_back(hitsEvt.moduleIndices()[hitIdx]); } return module_idxs; } @@ -225,17 +225,17 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ unsigned int getPixelLSFrompT3(Event* event, unsigned int pT3) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); - ObjectRanges const* rangesEvt = event->getRanges().data(); - Modules const* modulesEvt = event->getModules().data(); - const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - return pixelTriplets->pixelSegmentIndices[pT3] - pLS_offset; + auto const pixelTriplets = event->getPixelTriplets(); + auto ranges = event->getRanges(); + auto modulesEvt = event->getModules(); + const unsigned int pLS_offset = ranges.segmentModuleIndices()[modulesEvt.nLowerModules()]; + return pixelTriplets.pixelSegmentIndices()[pT3] - pLS_offset; } //____________________________________________________________________________________________ unsigned int getT3FrompT3(Event* event, unsigned int pT3) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); - return pixelTriplets->tripletIndices[pT3]; + auto const pixelTriplets = event->getPixelTriplets(); + return pixelTriplets.tripletIndices()[pT3]; } //____________________________________________________________________________________________ @@ -274,20 +274,20 @@ std::vector getHitsFrompT3(Event* event, unsigned int pT3) { //____________________________________________________________________________________________ std::vector getHitIdxsFrompT3(Event* event, unsigned int pT3) { - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); std::vector hits = getHitsFrompT3(event, pT3); std::vector hitidxs; for (auto& hit : hits) - hitidxs.push_back(hitsEvt->idxs[hit]); + hitidxs.push_back(hitsEvt.idxs()[hit]); return hitidxs; } //____________________________________________________________________________________________ std::vector getModuleIdxsFrompT3(Event* event, unsigned int pT3) { std::vector hits = getOuterTrackerHitsFrompT3(event, pT3); std::vector module_idxs; - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); for (auto& hitIdx : hits) { - module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); + module_idxs.push_back(hitsEvt.moduleIndices()[hitIdx]); } return module_idxs; } @@ -314,17 +314,17 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ unsigned int getPixelLSFrompT5(Event* event, unsigned int pT5) { - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); - ObjectRanges const* rangesEvt = event->getRanges().data(); - Modules const* modulesEvt = event->getModules().data(); - const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - return pixelQuintuplets->pixelIndices[pT5] - pLS_offset; + auto const pixelQuintuplets = event->getPixelQuintuplets(); + auto ranges = event->getRanges(); + auto modulesEvt = event->getModules(); + const unsigned int pLS_offset = ranges.segmentModuleIndices()[modulesEvt.nLowerModules()]; + return pixelQuintuplets.pixelSegmentIndices()[pT5] - pLS_offset; } //____________________________________________________________________________________________ unsigned int getT5FrompT5(Event* event, unsigned int pT5) { - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); - return pixelQuintuplets->T5Indices[pT5]; + auto const pixelQuintuplets = event->getPixelQuintuplets(); + return pixelQuintuplets.quintupletIndices()[pT5]; } //____________________________________________________________________________________________ @@ -369,11 +369,11 @@ std::vector getHitsFrompT5(Event* event, unsigned int pT5) { //____________________________________________________________________________________________ std::vector getHitIdxsFrompT5(Event* event, unsigned int pT5) { - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); std::vector hits = getHitsFrompT5(event, pT5); std::vector hitidxs; for (auto& hit : hits) - hitidxs.push_back(hitsEvt->idxs[hit]); + hitidxs.push_back(hitsEvt.idxs()[hit]); return hitidxs; } @@ -381,9 +381,9 @@ std::vector getHitIdxsFrompT5(Event* event, unsigned int pT5) { std::vector getModuleIdxsFrompT5(Event* event, unsigned int pT5) { std::vector hits = getOuterTrackerHitsFrompT5(event, pT5); std::vector module_idxs; - Hits const* hitsEvt = event->getHits().data(); + auto hitsEvt = event->getHits(); for (auto& hitIdx : hits) { - module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); + module_idxs.push_back(hitsEvt.moduleIndices()[hitIdx]); } return module_idxs; } @@ -410,11 +410,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(Event* event, unsigned int TC) { +std::vector getLSsFromTC(Event* event, unsigned int iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getLSsFrompT5(event, objidx); @@ -433,11 +433,11 @@ std::vector getLSsFromTC(Event* event, unsigned int TC) { //____________________________________________________________________________________________ std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, - unsigned TC) { + unsigned iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getHitIdxsAndHitTypesFrompT5(event, objidx); diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index e12512f5c5c7d..f1269840c9170 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -226,8 +226,8 @@ void setOutputBranches(Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + auto const& trackCandidates = event->getTrackCandidates(); + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object int type, isFake; @@ -291,23 +291,22 @@ void setOptionalOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setPixelQuintupletOutputBranches(Event* event) { // ============ pT5 ============= - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); - Segments const* segments = event->getSegments().data(); - Modules const* modules = event->getModules().data(); + auto const pixelQuintuplets = event->getPixelQuintuplets(); + auto const quintuplets = event->getQuintuplets(); + auto const segmentsPixel = event->getSegments(); + auto modules = event->getModules(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); - unsigned int nPixelQuintuplets = - *pixelQuintuplets->nPixelQuintuplets; // size of this nPixelTriplets array is 1 (NOTE: parallelism lost here.) + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); std::vector sim_pT5_matched(n_accepted_simtrk); std::vector> pT5_matched_simIdx; for (unsigned int pT5 = 0; pT5 < nPixelQuintuplets; pT5++) { unsigned int T5Index = getT5FrompT5(event, pT5); unsigned int pLSIndex = getPixelLSFrompT5(event, pT5); - float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; - float eta = segments->eta[pLSIndex]; - float phi = segments->phi[pLSIndex]; + float pt = (__H2F(quintuplets.innerRadius()[T5Index]) * k2Rinv1GeVf * 2 + segmentsPixel.ptIn()[pLSIndex]) / 2; + float eta = segmentsPixel.eta()[pLSIndex]; + float phi = segmentsPixel.phi()[pLSIndex]; std::vector hit_idx = getHitIdxsFrompT5(event, pT5); std::vector module_idx = getModuleIdxsFrompT5(event, pT5); @@ -316,8 +315,8 @@ void setPixelQuintupletOutputBranches(Event* event) { int layer_binary = 1; int moduleType_binary = 0; for (size_t i = 0; i < module_idx.size(); i += 2) { - layer_binary |= (1 << (modules->layers[module_idx[i]] + 6 * (modules->subdets[module_idx[i]] == 4))); - moduleType_binary |= (modules->moduleType[module_idx[i]] << i); + layer_binary |= (1 << (modules.layers()[module_idx[i]] + 6 * (modules.subdets()[module_idx[i]] == 4))); + moduleType_binary |= (modules.moduleType()[module_idx[i]] << i); } std::vector simidx = matchedSimTrkIdxs(hit_idx, hit_type); ana.tx->pushbackToBranch("pT5_isFake", static_cast(simidx.size() == 0)); @@ -366,21 +365,22 @@ void setPixelQuintupletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setQuintupletOutputBranches(Event* event) { - Quintuplets const* quintuplets = event->getQuintuplets().data(); - ObjectRanges const* ranges = event->getRanges().data(); - Modules const* modules = event->getModules().data(); + auto const quintuplets = event->getQuintuplets(); + auto const quintupletsOccupancy = event->getQuintuplets(); + auto ranges = event->getRanges(); + auto modules = event->getModules(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); std::vector sim_t5_matched(n_accepted_simtrk); std::vector> t5_matched_simIdx; - for (unsigned int lowerModuleIdx = 0; lowerModuleIdx < *(modules->nLowerModules); ++lowerModuleIdx) { - int nQuintuplets = quintuplets->nQuintuplets[lowerModuleIdx]; + for (unsigned int lowerModuleIdx = 0; lowerModuleIdx < modules.nLowerModules(); ++lowerModuleIdx) { + int nQuintuplets = quintupletsOccupancy.nQuintuplets()[lowerModuleIdx]; for (unsigned int idx = 0; idx < nQuintuplets; idx++) { - unsigned int quintupletIndex = ranges->quintupletModuleIndices[lowerModuleIdx] + idx; - float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * k2Rinv1GeVf * 2; - float eta = __H2F(quintuplets->eta[quintupletIndex]); - float phi = __H2F(quintuplets->phi[quintupletIndex]); + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[lowerModuleIdx] + idx; + float pt = __H2F(quintuplets.innerRadius()[quintupletIndex]) * k2Rinv1GeVf * 2; + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); std::vector hit_idx = getHitIdxsFromT5(event, quintupletIndex); std::vector hit_type = getHitTypesFromT5(event, quintupletIndex); @@ -389,8 +389,8 @@ void setQuintupletOutputBranches(Event* event) { int layer_binary = 0; int moduleType_binary = 0; for (size_t i = 0; i < module_idx.size(); i += 2) { - layer_binary |= (1 << (modules->layers[module_idx[i]] + 6 * (modules->subdets[module_idx[i]] == 4))); - moduleType_binary |= (modules->moduleType[module_idx[i]] << i); + layer_binary |= (1 << (modules.layers()[module_idx[i]] + 6 * (modules.subdets()[module_idx[i]] == 4))); + moduleType_binary |= (modules.moduleType()[module_idx[i]] << i); } std::vector simidx = matchedSimTrkIdxs(hit_idx, hit_type); @@ -399,11 +399,11 @@ void setQuintupletOutputBranches(Event* event) { ana.tx->pushbackToBranch("t5_pt", pt); ana.tx->pushbackToBranch("t5_eta", eta); ana.tx->pushbackToBranch("t5_phi", phi); - ana.tx->pushbackToBranch("t5_innerRadius", __H2F(quintuplets->innerRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_bridgeRadius", __H2F(quintuplets->bridgeRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_outerRadius", __H2F(quintuplets->outerRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_chiSquared", quintuplets->chiSquared[quintupletIndex]); - ana.tx->pushbackToBranch("t5_rzChiSquared", quintuplets->rzChiSquared[quintupletIndex]); + ana.tx->pushbackToBranch("t5_innerRadius", __H2F(quintuplets.innerRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_bridgeRadius", __H2F(quintuplets.bridgeRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_outerRadius", __H2F(quintuplets.outerRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_chiSquared", quintuplets.chiSquared()[quintupletIndex]); + ana.tx->pushbackToBranch("t5_rzChiSquared", quintuplets.rzChiSquared()[quintupletIndex]); ana.tx->pushbackToBranch("t5_layer_binary", layer_binary); ana.tx->pushbackToBranch("t5_moduleType_binary", moduleType_binary); @@ -437,22 +437,22 @@ void setQuintupletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setPixelTripletOutputBranches(Event* event) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); - Modules const* modules = event->getModules().data(); - Segments const* segments = event->getSegments().data(); + auto const pixelTriplets = event->getPixelTriplets(); + auto modules = event->getModules(); + SegmentsPixelConst segmentsPixel = event->getSegments(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); - unsigned int nPixelTriplets = *pixelTriplets->nPixelTriplets; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); std::vector sim_pT3_matched(n_accepted_simtrk); std::vector> pT3_matched_simIdx; for (unsigned int pT3 = 0; pT3 < nPixelTriplets; pT3++) { unsigned int T3Index = getT3FrompT3(event, pT3); unsigned int pLSIndex = getPixelLSFrompT3(event, pT3); - const float pt = segments->ptIn[pLSIndex]; + const float pt = segmentsPixel.ptIn()[pLSIndex]; - float eta = segments->eta[pLSIndex]; - float phi = segments->phi[pLSIndex]; + float eta = segmentsPixel.eta()[pLSIndex]; + float phi = segmentsPixel.phi()[pLSIndex]; std::vector hit_idx = getHitIdxsFrompT3(event, pT3); std::vector hit_type = getHitTypesFrompT3(event, pT3); @@ -461,8 +461,8 @@ void setPixelTripletOutputBranches(Event* event) { int layer_binary = 1; int moduleType_binary = 0; for (size_t i = 0; i < module_idx.size(); i += 2) { - layer_binary |= (1 << (modules->layers[module_idx[i]] + 6 * (modules->subdets[module_idx[i]] == 4))); - moduleType_binary |= (modules->moduleType[module_idx[i]] << i); + layer_binary |= (1 << (modules.layers()[module_idx[i]] + 6 * (modules.subdets()[module_idx[i]] == 4))); + moduleType_binary |= (modules.moduleType()[module_idx[i]] << i); } ana.tx->pushbackToBranch("pT3_isFake", static_cast(simidx.size() == 0)); ana.tx->pushbackToBranch("pT3_pt", pt); @@ -501,12 +501,12 @@ void setPixelTripletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setGnnNtupleBranches(Event* event) { // Get relevant information - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); + MiniDoubletsOccupancyConst miniDoublets = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); + auto modules = event->getModules(); + auto ranges = event->getRanges(); + auto const& trackCandidates = event->getTrackCandidates(); std::set mds_used_in_sg; std::map md_index_map; @@ -515,13 +515,13 @@ void setGnnNtupleBranches(Event* event) { // Loop over modules (lower ones where the MDs are saved) unsigned int nTotalMD = 0; unsigned int nTotalLS = 0; - for (unsigned int idx = 0; idx < *(modules->nLowerModules); ++idx) { - nTotalMD += miniDoublets->nMDs[idx]; - nTotalLS += segments->nSegments[idx]; + for (unsigned int idx = 0; idx < modules.nLowerModules(); ++idx) { + nTotalMD += miniDoublets.nMDs()[idx]; + nTotalLS += segmentsOccupancy.nSegments()[idx]; } std::set lss_used_in_true_tc; - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Only consider true track candidates std::vector hitidxs; @@ -545,7 +545,7 @@ void setGnnNtupleBranches(Event* event) { // std::cout << " nTotalLS: " << nTotalLS << std::endl; // Loop over modules (lower ones where the MDs are saved) - for (unsigned int idx = 0; idx < *(modules->nLowerModules); ++idx) { + for (unsigned int idx = 0; idx < modules.nLowerModules(); ++idx) { // // Loop over minidoublets // for (unsigned int jdx = 0; jdx < miniDoublets->nMDs[idx]; jdx++) // { @@ -556,9 +556,9 @@ void setGnnNtupleBranches(Event* event) { // } // Loop over segments - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { // Get the actual index to the segments using ranges - unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; + unsigned int sgIdx = ranges.segmentModuleIndices()[idx] + jdx; // Get the hit indices std::vector MDs = getMDsFromLS(event, sgIdx); @@ -582,8 +582,8 @@ void setGnnNtupleBranches(Event* event) { // Computing line segment pt estimate (assuming beam spot is at zero) lst_math::Hit hitA(0, 0, 0); - lst_math::Hit hitB(hitsEvt->xs[hits[0]], hitsEvt->ys[hits[0]], hitsEvt->zs[hits[0]]); - lst_math::Hit hitC(hitsEvt->xs[hits[2]], hitsEvt->ys[hits[2]], hitsEvt->zs[hits[2]]); + lst_math::Hit hitB(hitsEvt.xs()[hits[0]], hitsEvt.ys()[hits[0]], hitsEvt.zs()[hits[0]]); + lst_math::Hit hitC(hitsEvt.xs()[hits[2]], hitsEvt.ys()[hits[2]], hitsEvt.zs()[hits[2]]); lst_math::Hit center = lst_math::getCenterFromThreePoints(hitA, hitB, hitC); float pt = lst_math::ptEstimateFromRadius(center.rt()); float eta = hitC.eta(); @@ -642,25 +642,25 @@ void setGnnNtupleBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { // Get relevant information - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); // Get the hit indices - unsigned int hit0 = miniDoublets->anchorHitIndices[MD]; - unsigned int hit1 = miniDoublets->outerHitIndices[MD]; + unsigned int hit0 = miniDoublets.anchorHitIndices()[MD]; + unsigned int hit1 = miniDoublets.outerHitIndices()[MD]; // Get the hit infos - const float hit0_x = hitsEvt->xs[hit0]; - const float hit0_y = hitsEvt->ys[hit0]; - const float hit0_z = hitsEvt->zs[hit0]; + const float hit0_x = hitsEvt.xs()[hit0]; + const float hit0_y = hitsEvt.ys()[hit0]; + const float hit0_z = hitsEvt.zs()[hit0]; const float hit0_r = sqrt(hit0_x * hit0_x + hit0_y * hit0_y); - const float hit1_x = hitsEvt->xs[hit1]; - const float hit1_y = hitsEvt->ys[hit1]; - const float hit1_z = hitsEvt->zs[hit1]; + const float hit1_x = hitsEvt.xs()[hit1]; + const float hit1_y = hitsEvt.ys()[hit1]; + const float hit1_z = hitsEvt.zs()[hit1]; const float hit1_r = sqrt(hit1_x * hit1_x + hit1_y * hit1_y); // Do sim matching - std::vector hit_idx = {hitsEvt->idxs[hit0], hitsEvt->idxs[hit1]}; + std::vector hit_idx = {hitsEvt.idxs()[hit0], hitsEvt.idxs()[hit1]}; std::vector hit_type = {4, 4}; std::vector simidxs = matchedSimTrkIdxs(hit_idx, hit_type); @@ -668,8 +668,8 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { int tp_type = getDenomSimTrkType(simidxs); // Obtain where the actual hit is located in terms of their layer, module, rod, and ring number - unsigned int anchitidx = hitsEvt->idxs[hit0]; - int subdet = trk.ph2_subdet()[hitsEvt->idxs[anchitidx]]; + unsigned int anchitidx = hitsEvt.idxs()[hit0]; + int subdet = trk.ph2_subdet()[hitsEvt.idxs()[anchitidx]]; int is_endcap = subdet == 4; int layer = trk.ph2_layer()[anchitidx] + @@ -677,7 +677,7 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { int detId = trk.ph2_detId()[anchitidx]; // Obtaining dPhiChange - float dphichange = miniDoublets->dphichanges[MD]; + float dphichange = miniDoublets.dphichanges()[MD]; // Computing pt float pt = hit0_r * k2Rinv1GeVf / sin(dphichange); @@ -710,8 +710,8 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { //________________________________________________________________________________________________________________________________ std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[idx]; + auto const& trackCandidates = event->getTrackCandidates(); + short type = trackCandidates.trackCandidateType()[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -744,9 +744,9 @@ std::tuple> parseTrackCandidate( std::tuple, std::vector> parsepT5(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); - Segments const* segments = event->getSegments().data(); + auto const trackCandidates = event->getTrackCandidates(); + auto const quintuplets = event->getQuintuplets(); + auto const segmentsPixel = event->getSegments(); // // pictorial representation of a pT5 @@ -757,7 +757,7 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT5 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5Index = getT5FrompT5(event, pT5); @@ -839,10 +839,10 @@ std::tuple, std::vectorptIn[pLS]; - const float eta_pLS = segments->eta[pLS]; - const float phi_pLS = segments->phi[pLS]; - float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * k2Rinv1GeVf; + const float pt_pLS = segmentsPixel.ptIn()[pLS]; + const float eta_pLS = segmentsPixel.eta()[pLS]; + const float phi_pLS = segmentsPixel.phi()[pLS]; + float pt_T5 = __H2F(quintuplets.innerRadius()[T5Index]) * 2 * k2Rinv1GeVf; const float pt = (pt_T5 + pt_pLS) / 2; // Form the hit idx/type std::vector @@ -856,9 +856,9 @@ std::tuple, std::vector, std::vector> parsepT3(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); + auto const trackCandidates = event->getTrackCandidates(); + auto const triplets = event->getTriplets(); + auto const segmentsPixel = event->getSegments(); // // pictorial representation of a pT3 @@ -867,15 +867,15 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT3 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); // pixel pt - const float pt_pLS = segments->ptIn[pLS]; - const float eta_pLS = segments->eta[pLS]; - const float phi_pLS = segments->phi[pLS]; - float pt_T3 = triplets->circleRadius[T3] * 2 * k2Rinv1GeVf; + const float pt_pLS = segmentsPixel.ptIn()[pLS]; + const float eta_pLS = segmentsPixel.eta()[pLS]; + const float phi_pLS = segmentsPixel.phi()[pLS]; + float pt_T3 = triplets.radius()[T3] * 2 * k2Rinv1GeVf; // average pt const float pt = (pt_pLS + pt_T3) / 2; @@ -890,9 +890,9 @@ std::tuple, std::vector, std::vector> parseT5(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); - unsigned int T5 = trackCandidates->directObjectIndices[idx]; + auto const trackCandidates = event->getTrackCandidates(); + auto const quintuplets = event->getQuintuplets(); + unsigned int T5 = trackCandidates.directObjectIndices()[idx]; std::vector hits = getHitsFromT5(event, T5); // @@ -907,7 +907,7 @@ std::tuple, std::vectorinnerRadius[T5] * k2Rinv1GeVf * 2; + const float pt = quintuplets.innerRadius()[T5] * k2Rinv1GeVf * 2; // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[Hit_0], trk.ph2_y()[Hit_0], trk.ph2_z()[Hit_0]); @@ -924,16 +924,16 @@ std::tuple, std::vector, std::vector> parsepLS(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Segments const* segments = event->getSegments().data(); + auto const& trackCandidates = event->getTrackCandidates(); + SegmentsPixelConst segmentsPixel = event->getSegments(); // Getting pLS index - unsigned int pLS = trackCandidates->directObjectIndices[idx]; + unsigned int pLS = trackCandidates.directObjectIndices()[idx]; // Getting pt eta and phi - float pt = segments->ptIn[pLS]; - float eta = segments->eta[pLS]; - float phi = segments->phi[pLS]; + float pt = segmentsPixel.ptIn()[pLS]; + float eta = segmentsPixel.eta()[pLS]; + float phi = segmentsPixel.phi()[pLS]; // Getting hit indices and types std::vector hit_idx = getPixelHitIdxsFrompLS(event, pLS); @@ -944,32 +944,32 @@ std::tuple, std::vectorgetModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); + auto modules = event->getModules(); + auto hitRanges = event->getHits(); int nHits = 0; - for (unsigned int idx = 0; idx <= *(modules->nLowerModules); + for (unsigned int idx = 0; idx <= modules.nLowerModules(); idx++) // "<=" because cheating to include pixel track candidate lower module { - nHits += ranges->hitRanges[4 * idx + 1] - ranges->hitRanges[4 * idx] + 1; - nHits += ranges->hitRanges[4 * idx + 3] - ranges->hitRanges[4 * idx + 2] + 1; + nHits += hitRanges.hitRanges()[2 * idx][1] - hitRanges.hitRanges()[2 * idx][0] + 1; + nHits += hitRanges.hitRanges()[2 * idx + 1][1] - hitRanges.hitRanges()[2 * idx + 1][0] + 1; } std::cout << " nHits: " << nHits << std::endl; } //________________________________________________________________________________________________________________________________ void printMiniDoubletMultiplicities(Event* event) { - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Modules const* modules = event->getModules().data(); + MiniDoubletsOccupancyConst miniDoublets = event->getMiniDoublets(); + auto modules = event->getModules(); int nMiniDoublets = 0; int totOccupancyMiniDoublets = 0; - for (unsigned int idx = 0; idx <= *(modules->nModules); + for (unsigned int idx = 0; idx <= modules.nModules(); idx++) // "<=" because cheating to include pixel track candidate lower module { - if (modules->isLower[idx]) { - nMiniDoublets += miniDoublets->nMDs[idx]; - totOccupancyMiniDoublets += miniDoublets->totOccupancyMDs[idx]; + if (modules.isLower()[idx]) { + nMiniDoublets += miniDoublets.nMDs()[idx]; + totOccupancyMiniDoublets += miniDoublets.totOccupancyMDs()[idx]; } } std::cout << " nMiniDoublets: " << nMiniDoublets << std::endl; @@ -986,19 +986,20 @@ void printAllObjects(Event* event) { //________________________________________________________________________________________________________________________________ void printMDs(Event* event) { - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + MiniDoubletsOccupancyConst miniDoubletsOccupancy = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); + auto modules = event->getModules(); + auto ranges = event->getRanges(); // Then obtain the lower module index - for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { - for (unsigned int iMD = 0; iMD < miniDoublets->nMDs[idx]; iMD++) { - unsigned int mdIdx = ranges->miniDoubletModuleIndices[idx] + iMD; - unsigned int LowerHitIndex = miniDoublets->anchorHitIndices[mdIdx]; - unsigned int UpperHitIndex = miniDoublets->outerHitIndices[mdIdx]; - unsigned int hit0 = hitsEvt->idxs[LowerHitIndex]; - unsigned int hit1 = hitsEvt->idxs[UpperHitIndex]; + for (unsigned int idx = 0; idx <= modules.nLowerModules(); ++idx) { + for (unsigned int iMD = 0; iMD < miniDoubletsOccupancy.nMDs()[idx]; iMD++) { + unsigned int mdIdx = ranges.miniDoubletModuleIndices()[idx] + iMD; + unsigned int LowerHitIndex = miniDoublets.anchorHitIndices()[mdIdx]; + unsigned int UpperHitIndex = miniDoublets.outerHitIndices()[mdIdx]; + unsigned int hit0 = hitsEvt.idxs()[LowerHitIndex]; + unsigned int hit1 = hitsEvt.idxs()[UpperHitIndex]; std::cout << "VALIDATION 'MD': " << "MD" << " hit0: " << hit0 << " hit1: " << hit1 << std::endl; @@ -1008,28 +1009,29 @@ void printMDs(Event* event) { //________________________________________________________________________________________________________________________________ void printLSs(Event* event) { - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); + SegmentsConst segments = event->getSegments(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); + auto modules = event->getModules(); + auto ranges = event->getRanges(); int nSegments = 0; - for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { + for (unsigned int i = 0; i < modules.nLowerModules(); ++i) { unsigned int idx = i; //modules->lowerModuleIndices[i]; - nSegments += segments->nSegments[idx]; - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { - unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; - unsigned int InnerMiniDoubletIndex = segments->mdIndices[2 * sgIdx]; - unsigned int OuterMiniDoubletIndex = segments->mdIndices[2 * sgIdx + 1]; - unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets->anchorHitIndices[InnerMiniDoubletIndex]; - unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets->outerHitIndices[InnerMiniDoubletIndex]; - unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets->anchorHitIndices[OuterMiniDoubletIndex]; - unsigned int OuterMiniDoubletUpperHitIndex = miniDoublets->outerHitIndices[OuterMiniDoubletIndex]; - unsigned int hit0 = hitsEvt->idxs[InnerMiniDoubletLowerHitIndex]; - unsigned int hit1 = hitsEvt->idxs[InnerMiniDoubletUpperHitIndex]; - unsigned int hit2 = hitsEvt->idxs[OuterMiniDoubletLowerHitIndex]; - unsigned int hit3 = hitsEvt->idxs[OuterMiniDoubletUpperHitIndex]; + nSegments += segmentsOccupancy.nSegments()[idx]; + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { + unsigned int sgIdx = ranges.segmentModuleIndices()[idx] + jdx; + unsigned int InnerMiniDoubletIndex = segments.mdIndices()[sgIdx][0]; + unsigned int OuterMiniDoubletIndex = segments.mdIndices()[sgIdx][1]; + unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[InnerMiniDoubletIndex]; + unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[InnerMiniDoubletIndex]; + unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[OuterMiniDoubletIndex]; + unsigned int OuterMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[OuterMiniDoubletIndex]; + unsigned int hit0 = hitsEvt.idxs()[InnerMiniDoubletLowerHitIndex]; + unsigned int hit1 = hitsEvt.idxs()[InnerMiniDoubletUpperHitIndex]; + unsigned int hit2 = hitsEvt.idxs()[OuterMiniDoubletLowerHitIndex]; + unsigned int hit3 = hitsEvt.idxs()[OuterMiniDoubletUpperHitIndex]; std::cout << "VALIDATION 'LS': " << "LS" << " hit0: " << hit0 << " hit1: " << hit1 << " hit2: " << hit2 << " hit3: " << hit3 << std::endl; @@ -1040,27 +1042,28 @@ void printLSs(Event* event) { //________________________________________________________________________________________________________________________________ void printpLSs(Event* event) { - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); - - unsigned int i = *(modules->nLowerModules); + SegmentsConst segments = event->getSegments(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); + auto modules = event->getModules(); + auto ranges = event->getRanges(); + + unsigned int i = modules.nLowerModules(); unsigned int idx = i; //modules->lowerModuleIndices[i]; - int npLS = segments->nSegments[idx]; - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { - unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; - unsigned int InnerMiniDoubletIndex = segments->mdIndices[2 * sgIdx]; - unsigned int OuterMiniDoubletIndex = segments->mdIndices[2 * sgIdx + 1]; - unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets->anchorHitIndices[InnerMiniDoubletIndex]; - unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets->outerHitIndices[InnerMiniDoubletIndex]; - unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets->anchorHitIndices[OuterMiniDoubletIndex]; - unsigned int OuterMiniDoubletUpperHitIndex = miniDoublets->outerHitIndices[OuterMiniDoubletIndex]; - unsigned int hit0 = hitsEvt->idxs[InnerMiniDoubletLowerHitIndex]; - unsigned int hit1 = hitsEvt->idxs[InnerMiniDoubletUpperHitIndex]; - unsigned int hit2 = hitsEvt->idxs[OuterMiniDoubletLowerHitIndex]; - unsigned int hit3 = hitsEvt->idxs[OuterMiniDoubletUpperHitIndex]; + int npLS = segmentsOccupancy.nSegments()[idx]; + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { + unsigned int sgIdx = ranges.segmentModuleIndices()[idx] + jdx; + unsigned int InnerMiniDoubletIndex = segments.mdIndices()[sgIdx][0]; + unsigned int OuterMiniDoubletIndex = segments.mdIndices()[sgIdx][1]; + unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[InnerMiniDoubletIndex]; + unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[InnerMiniDoubletIndex]; + unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[OuterMiniDoubletIndex]; + unsigned int OuterMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[OuterMiniDoubletIndex]; + unsigned int hit0 = hitsEvt.idxs()[InnerMiniDoubletLowerHitIndex]; + unsigned int hit1 = hitsEvt.idxs()[InnerMiniDoubletUpperHitIndex]; + unsigned int hit2 = hitsEvt.idxs()[OuterMiniDoubletLowerHitIndex]; + unsigned int hit3 = hitsEvt.idxs()[OuterMiniDoubletUpperHitIndex]; std::cout << "VALIDATION 'pLS': " << "pLS" << " hit0: " << hit0 << " hit1: " << hit1 << " hit2: " << hit2 << " hit3: " << hit3 << std::endl; @@ -1070,37 +1073,38 @@ void printpLSs(Event* event) { //________________________________________________________________________________________________________________________________ void printT3s(Event* event) { - Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Hits const* hitsEvt = event->getHits().data(); - Modules const* modules = event->getModules().data(); + auto const triplets = event->getTriplets(); + auto const tripletsOccupancy = event->getTriplets(); + SegmentsConst segments = event->getSegments(); + MiniDoubletsConst miniDoublets = event->getMiniDoublets(); + auto hitsEvt = event->getHits(); + auto modules = event->getModules(); int nTriplets = 0; - for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { + for (unsigned int i = 0; i < modules.nLowerModules(); ++i) { // unsigned int idx = modules->lowerModuleIndices[i]; - nTriplets += triplets->nTriplets[i]; + nTriplets += tripletsOccupancy.nTriplets()[i]; unsigned int idx = i; - for (unsigned int jdx = 0; jdx < triplets->nTriplets[idx]; jdx++) { + for (unsigned int jdx = 0; jdx < tripletsOccupancy.nTriplets()[idx]; jdx++) { unsigned int tpIdx = idx * 5000 + jdx; - unsigned int InnerSegmentIndex = triplets->segmentIndices[2 * tpIdx]; - unsigned int OuterSegmentIndex = triplets->segmentIndices[2 * tpIdx + 1]; - unsigned int InnerSegmentInnerMiniDoubletIndex = segments->mdIndices[2 * InnerSegmentIndex]; - unsigned int InnerSegmentOuterMiniDoubletIndex = segments->mdIndices[2 * InnerSegmentIndex + 1]; - unsigned int OuterSegmentOuterMiniDoubletIndex = segments->mdIndices[2 * OuterSegmentIndex + 1]; - - unsigned int hit_idx0 = miniDoublets->anchorHitIndices[InnerSegmentInnerMiniDoubletIndex]; - unsigned int hit_idx1 = miniDoublets->outerHitIndices[InnerSegmentInnerMiniDoubletIndex]; - unsigned int hit_idx2 = miniDoublets->anchorHitIndices[InnerSegmentOuterMiniDoubletIndex]; - unsigned int hit_idx3 = miniDoublets->outerHitIndices[InnerSegmentOuterMiniDoubletIndex]; - unsigned int hit_idx4 = miniDoublets->anchorHitIndices[OuterSegmentOuterMiniDoubletIndex]; - unsigned int hit_idx5 = miniDoublets->outerHitIndices[OuterSegmentOuterMiniDoubletIndex]; - - unsigned int hit0 = hitsEvt->idxs[hit_idx0]; - unsigned int hit1 = hitsEvt->idxs[hit_idx1]; - unsigned int hit2 = hitsEvt->idxs[hit_idx2]; - unsigned int hit3 = hitsEvt->idxs[hit_idx3]; - unsigned int hit4 = hitsEvt->idxs[hit_idx4]; - unsigned int hit5 = hitsEvt->idxs[hit_idx5]; + unsigned int InnerSegmentIndex = triplets.segmentIndices()[tpIdx][0]; + unsigned int OuterSegmentIndex = triplets.segmentIndices()[tpIdx][1]; + unsigned int InnerSegmentInnerMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][0]; + unsigned int InnerSegmentOuterMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][1]; + unsigned int OuterSegmentOuterMiniDoubletIndex = segments.mdIndices()[OuterSegmentIndex][1]; + + unsigned int hit_idx0 = miniDoublets.anchorHitIndices()[InnerSegmentInnerMiniDoubletIndex]; + unsigned int hit_idx1 = miniDoublets.outerHitIndices()[InnerSegmentInnerMiniDoubletIndex]; + unsigned int hit_idx2 = miniDoublets.anchorHitIndices()[InnerSegmentOuterMiniDoubletIndex]; + unsigned int hit_idx3 = miniDoublets.outerHitIndices()[InnerSegmentOuterMiniDoubletIndex]; + unsigned int hit_idx4 = miniDoublets.anchorHitIndices()[OuterSegmentOuterMiniDoubletIndex]; + unsigned int hit_idx5 = miniDoublets.outerHitIndices()[OuterSegmentOuterMiniDoubletIndex]; + + unsigned int hit0 = hitsEvt.idxs()[hit_idx0]; + unsigned int hit1 = hitsEvt.idxs()[hit_idx1]; + unsigned int hit2 = hitsEvt.idxs()[hit_idx2]; + unsigned int hit3 = hitsEvt.idxs()[hit_idx3]; + unsigned int hit4 = hitsEvt.idxs()[hit_idx4]; + unsigned int hit5 = hitsEvt.idxs()[hit_idx5]; std::cout << "VALIDATION 'T3': " << "T3" << " hit0: " << hit0 << " hit1: " << hit1 << " hit2: " << hit2 << " hit3: " << hit3 << " hit4: " << hit4 @@ -1109,30 +1113,3 @@ void printT3s(Event* event) { } std::cout << "VALIDATION nTriplets: " << nTriplets << std::endl; } - -//________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(Event* event) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); - //int nTrackCandidates = 0; - for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { - if (trackCandidates->nTrackCandidates[idx] > 50000) { - std::cout << " modules->detIds[modules->lowerModuleIndices[idx]]: " << modules->detIds[idx] << std::endl; - std::cout << " idx: " << idx - << " trackCandidates->nTrackCandidates[idx]: " << trackCandidates->nTrackCandidates[idx] << std::endl; - std::cout << " idx: " << idx << " triplets->nTriplets[idx]: " << triplets->nTriplets[idx] << std::endl; - unsigned int i = idx; //modules->lowerModuleIndices[idx]; - std::cout << " idx: " << idx << " i: " << i << " segments->nSegments[i]: " << segments->nSegments[i] << std::endl; - int nMD = miniDoublets->nMDs[2 * idx] + miniDoublets->nMDs[2 * idx + 1]; - std::cout << " idx: " << idx << " nMD: " << nMD << std::endl; - int nHits = 0; - nHits += ranges->hitRanges[4 * idx + 1] - ranges->hitRanges[4 * idx] + 1; - nHits += ranges->hitRanges[4 * idx + 3] - ranges->hitRanges[4 * idx + 2] + 1; - std::cout << " idx: " << idx << " nHits: " << nHits << std::endl; - } - } -} diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h index 7a25c0d3cbcc6..3f04ec59ad554 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h @@ -52,7 +52,4 @@ void printT3s(LSTEvent* event); void printT4s(LSTEvent* event); void printTCs(LSTEvent* event); -// Print anomalous multiplicities -void debugPrintOutlierMultiplicities(LSTEvent* event); - #endif diff --git a/RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.h b/RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.h index b799708f0f39f..9510778fab3ef 100644 --- a/RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.h +++ b/RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.h @@ -226,63 +226,62 @@ namespace cxxopts { class Option_exists_error : public OptionSpecException { public: Option_exists_error(const std::string& option) - : OptionSpecException(u8"Option " + LQUOTE + option + RQUOTE + u8" already exists") {} + : OptionSpecException("Option " + LQUOTE + option + RQUOTE + " already exists") {} }; class Invalid_option_format_error : public OptionSpecException { public: Invalid_option_format_error(const std::string& format) - : OptionSpecException(u8"Invalid option format " + LQUOTE + format + RQUOTE) {} + : OptionSpecException("Invalid option format " + LQUOTE + format + RQUOTE) {} }; class Option_syntax_exception : public OptionParseException { public: Option_syntax_exception(const std::string& text) - : OptionParseException(u8"Argument " + LQUOTE + text + RQUOTE + u8" starts with a - but has incorrect syntax") { - } + : OptionParseException("Argument " + LQUOTE + text + RQUOTE + " starts with a - but has incorrect syntax") {} }; class Option_not_exists_exception : public OptionParseException { public: Option_not_exists_exception(const std::string& option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" does not exist") {} + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " does not exist") {} }; class Missing_argument_exception : public OptionParseException { public: Missing_argument_exception(const std::string& option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" is missing an argument") {} + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " is missing an argument") {} }; class Option_requires_argument_exception : public OptionParseException { public: Option_requires_argument_exception(const std::string& option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" requires an argument") {} + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " requires an argument") {} }; class Option_not_has_argument_exception : public OptionParseException { public: Option_not_has_argument_exception(const std::string& option, const std::string& arg) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" does not take an argument, but argument " + + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " does not take an argument, but argument " + LQUOTE + arg + RQUOTE + " given") {} }; class Option_not_present_exception : public OptionParseException { public: Option_not_present_exception(const std::string& option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" not present") {} + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " not present") {} }; class Argument_incorrect_type : public OptionParseException { public: Argument_incorrect_type(const std::string& arg) - : OptionParseException(u8"Argument " + LQUOTE + arg + RQUOTE + u8" failed to parse") {} + : OptionParseException("Argument " + LQUOTE + arg + RQUOTE + " failed to parse") {} }; class Option_required_exception : public OptionParseException { public: Option_required_exception(const std::string& option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + u8" is required but not present") {} + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " is required but not present") {} }; namespace values {