diff --git a/SimTracker/TrackerHitAssociation/BuildFile.xml b/SimTracker/TrackerHitAssociation/BuildFile.xml
index 48e91f5cbee07..62017bf5c8dd1 100644
--- a/SimTracker/TrackerHitAssociation/BuildFile.xml
+++ b/SimTracker/TrackerHitAssociation/BuildFile.xml
@@ -8,9 +8,11 @@
+
+
diff --git a/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h b/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h
new file mode 100644
index 0000000000000..89e969313e4b6
--- /dev/null
+++ b/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h
@@ -0,0 +1,69 @@
+#ifndef SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h
+#define SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h
+
+#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+namespace trackerHitAssociationHeterogeneous {
+
+ struct ClusterSLView {
+ using Clus2TP = std::array;
+
+ Clus2TP* links_d;
+ uint32_t* tkId_d;
+ uint32_t* tkId2_d;
+ uint32_t* n1_d;
+ uint32_t* n2_d;
+ };
+
+ template
+ class Product {
+ public:
+ template
+ using unique_ptr = typename Traits::template unique_ptr;
+
+ Product() = default;
+ ~Product() = default;
+ Product(Product const&) = delete;
+ Product(Product&&) = default;
+
+ Product(int nlinks, int nhits, cudaStream_t stream);
+
+ ClusterSLView& view() { return m_view; }
+ ClusterSLView const& view() const { return m_view; }
+
+ int nLinks() const { return m_nLinks; }
+ int nHits() const { return m_nHits; }
+
+ private:
+ static constexpr uint32_t n32 = 4;
+
+ unique_ptr m_storeTP; //!
+ unique_ptr m_store32; //!
+
+ ClusterSLView m_view; //!
+
+ int m_nLinks;
+ int m_nHits;
+ };
+
+ template
+ Product::Product(int nlinks, int nhits, cudaStream_t stream) : m_nLinks(nlinks), m_nHits(nhits) {
+ m_storeTP = Traits::template make_device_unique(m_nLinks * 7, stream);
+ m_store32 = Traits::template make_device_unique(m_nHits * n32, stream);
+
+ auto get32 = [&](int i) { return m_store32.get() + i * m_nHits; };
+
+ m_view.links_d = (ClusterSLView::Clus2TP*)(m_storeTP.get());
+ m_view.tkId_d = get32(0);
+ m_view.tkId2_d = get32(1);
+ m_view.n1_d = get32(2);
+ m_view.n2_d = get32(3);
+ }
+
+ using ProductCUDA = Product;
+
+} // namespace trackerHitAssociationHeterogeneous
+
+#endif // SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h
diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
index 767e3b814e6c1..00b03eb86bb34 100644
--- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
+++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
@@ -9,23 +9,20 @@
#include "ClusterSLOnGPU.h"
-using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU;
-using Clus2TP = ClusterSLGPU::Clus2TP;
+using ClusterSLView = trackerHitAssociationHeterogeneous::ClusterSLView;
+using Clus2TP = ClusterSLView::Clus2TP;
// #define DUMP_TK2
__global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd,
uint32_t ndigis,
- clusterSLOnGPU::HitsOnGPU const* hhp,
- ClusterSLGPU const* slp,
+ TrackingRecHit2DSOAView const* hhp,
+ ClusterSLView sl,
uint32_t n) {
- assert(slp == slp->me_d);
-
- constexpr int32_t invTK = 0; // std::numeric_limits::max();
+ constexpr uint32_t invTK = 0; // std::numeric_limits::max();
constexpr uint16_t InvId = 9999; // must be > MaxNumModules
auto const& hh = *hhp;
- auto const& sl = *slp;
auto i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= ndigis)
@@ -41,7 +38,7 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd,
auto cl = first + dd->clus(i);
assert(cl < 2000 * blockDim.x);
- const Clus2TP me{{id, ch, 0, 0, 0, 0}};
+ const Clus2TP me{{id, ch, 0, 0, 0, 0, 0}};
auto less = [] __host__ __device__(Clus2TP const& a, Clus2TP const& b) -> bool {
// in this context we do not care of [2]
@@ -80,38 +77,32 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd,
}
}
-__global__ void verifyZero(uint32_t nhits, ClusterSLGPU const* slp) {
+__global__ void doZero(uint32_t nhits, ClusterSLView sl) {
auto i = blockIdx.x * blockDim.x + threadIdx.x;
if (i > nhits)
return;
- auto const& sl = *slp;
-
- assert(sl.tkId_d[i] == 0);
- auto const& tk = sl.links_d[0];
- assert(tk[0] == 0);
- assert(tk[1] == 0);
- assert(tk[2] == 0);
- assert(tk[3] == 0);
+ sl.tkId_d[i] = 0;
+ sl.n1_d[i] = 0;
+ sl.tkId2_d[i] = 0;
+ sl.n2_d[i] = 0;
}
-__global__ void dumpLink(
- int first, int ev, clusterSLOnGPU::HitsOnGPU const* hhp, uint32_t nhits, ClusterSLGPU const* slp) {
+__global__ void dumpLink(int first, int ev, TrackingRecHit2DSOAView const* hhp, uint32_t nhits, ClusterSLView sl) {
auto i = first + blockIdx.x * blockDim.x + threadIdx.x;
if (i > nhits)
return;
auto const& hh = *hhp;
- auto const& sl = *slp;
auto const& tk1 = sl.links_d[sl.tkId_d[i]];
#ifdef DUMP_TK2
auto const& tk2 = sl.links_d[sl.tkId2_d[i]];
- printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d %d %d %d %d\n",
+ printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n",
#else
- printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d\n",
+ printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d\n",
#endif
ev,
i,
@@ -128,6 +119,7 @@ __global__ void dumpLink(
tk1[3],
tk1[4],
tk1[5],
+ tk1[6],
sl.n1_d[i]
#ifdef DUMP_TK2
,
@@ -135,6 +127,7 @@ __global__ void dumpLink(
tk2[3],
tk2[4],
tk2[5],
+ tk2[6],
sl.n2_d[i]
#endif
);
@@ -142,13 +135,11 @@ __global__ void dumpLink(
namespace clusterSLOnGPU {
- constexpr uint32_t invTK = 0; // std::numeric_limits::max();
-
void printCSVHeader() {
#ifdef DUMP_TK2
- printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
+ printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
#else
- printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
+ printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
#endif
"ev",
"ind",
@@ -163,6 +154,7 @@ namespace clusterSLOnGPU {
"ysize",
"tkId",
"pt",
+ "eta",
"z0",
"r0",
"n1"
@@ -170,6 +162,7 @@ namespace clusterSLOnGPU {
,
"tkId2",
"pt2",
+ "eta",
"z02",
"r02",
"n2"
@@ -180,74 +173,50 @@ namespace clusterSLOnGPU {
std::atomic evId(0);
std::once_flag doneCSVHeader;
- Kernel::Kernel(cudaStream_t stream, bool dump) : doDump(dump) {
+ Kernel::Kernel(bool dump) : doDump(dump) {
if (doDump)
std::call_once(doneCSVHeader, printCSVHeader);
- alloc(stream);
- }
-
- void Kernel::alloc(cudaStream_t stream) {
- cudaCheck(cudaMalloc((void**)&slgpu.links_d, (ClusterSLGPU::MAX_DIGIS) * sizeof(Clus2TP)));
- cudaCheck(cudaMalloc((void**)&slgpu.tkId_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t)));
- cudaCheck(cudaMalloc((void**)&slgpu.tkId2_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t)));
- cudaCheck(cudaMalloc((void**)&slgpu.n1_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t)));
- cudaCheck(cudaMalloc((void**)&slgpu.n2_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t)));
- cudaCheck(cudaMalloc((void**)&slgpu.me_d, sizeof(ClusterSLGPU)));
- cudaCheck(cudaMemcpyAsync(slgpu.me_d, &slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream));
}
- void Kernel::deAlloc() {
- cudaCheck(cudaFree(slgpu.links_d));
- cudaCheck(cudaFree(slgpu.tkId_d));
- cudaCheck(cudaFree(slgpu.tkId2_d));
- cudaCheck(cudaFree(slgpu.n1_d));
- cudaCheck(cudaFree(slgpu.n2_d));
- cudaCheck(cudaFree(slgpu.me_d));
- }
-
- void Kernel::zero(cudaStream_t stream) {
- cudaCheck(cudaMemsetAsync(slgpu.tkId_d, invTK, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream));
- cudaCheck(cudaMemsetAsync(slgpu.tkId2_d, invTK, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream));
- cudaCheck(cudaMemsetAsync(slgpu.n1_d, 0, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream));
- cudaCheck(cudaMemsetAsync(slgpu.n2_d, 0, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream));
- }
+ trackerHitAssociationHeterogeneous::ProductCUDA Kernel::makeAsync(SiPixelDigisCUDA const& dd,
+ uint32_t ndigis,
+ HitsOnCPU const& hh,
+ Clus2TP const* digi2tp,
+ uint32_t nhits,
+ uint32_t nlinks,
+ cudaStream_t stream) const {
+ trackerHitAssociationHeterogeneous::ProductCUDA product(nlinks, nhits, stream);
+ auto& csl = product.view();
- void Kernel::algo(SiPixelDigisCUDA const& dd,
- uint32_t ndigis,
- HitsOnCPU const& hh,
- uint32_t nhits,
- uint32_t n,
- cudaStream_t stream) {
- zero(stream);
+ cudaCheck(cudaMemcpyAsync(csl.links_d, digi2tp, sizeof(Clus2TP) * nlinks, cudaMemcpyDefault, stream));
if (0 == nhits)
- return;
- ClusterSLGPU const& sl = slgpu;
+ return product;
int ev = ++evId;
int threadsPerBlock = 256;
int blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock;
- verifyZero<<>>(nhits, sl.me_d);
+ doZero<<>>(nhits, csl);
cudaCheck(cudaGetLastError());
blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock;
-
- assert(sl.me_d);
- simLink<<>>(dd.view(), ndigis, hh.view(), sl.me_d, n);
+ simLink<<>>(dd.view(), ndigis, hh.view(), csl, nlinks);
cudaCheck(cudaGetLastError());
if (doDump) {
cudaStreamSynchronize(stream); // flush previous printf
// one line == 200B so each kernel can print only 5K lines....
- blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock;
+ blocks = 16;
for (int first = 0; first < int(nhits); first += blocks * threadsPerBlock) {
- dumpLink<<>>(first, ev, hh.view(), nhits, sl.me_d);
+ dumpLink<<>>(first, ev, hh.view(), nhits, csl);
cudaCheck(cudaGetLastError());
cudaStreamSynchronize(stream);
}
}
cudaCheck(cudaGetLastError());
+
+ return product;
}
} // namespace clusterSLOnGPU
diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h
index be7eaba7230d7..d8879f2154df4 100644
--- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h
+++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h
@@ -4,40 +4,31 @@
#include
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
-
-#include "trackerHitAssociationHeterogeneousProduct.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h"
namespace clusterSLOnGPU {
- using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU;
- using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct;
-
+ using ClusterSLView = trackerHitAssociationHeterogeneous::ClusterSLView;
+ using Clus2TP = ClusterSLView::Clus2TP;
using HitsOnGPU = TrackingRecHit2DSOAView;
using HitsOnCPU = TrackingRecHit2DCUDA;
- using Clus2TP = ClusterSLGPU::Clus2TP;
-
class Kernel {
public:
- Kernel(cudaStream_t stream, bool dump);
- ~Kernel() { deAlloc(); }
- void algo(SiPixelDigisCUDA const& dd,
- uint32_t ndigis,
- HitsOnCPU const& hh,
- uint32_t nhits,
- uint32_t n,
- cudaStream_t stream);
- GPUProduct getProduct() { return GPUProduct{slgpu.me_d}; }
+ explicit Kernel(bool dump);
+ ~Kernel() {}
+ trackerHitAssociationHeterogeneous::ProductCUDA makeAsync(SiPixelDigisCUDA const& dd,
+ uint32_t ndigis,
+ HitsOnCPU const& hh,
+ Clus2TP const* digi2tp,
+ uint32_t nhits,
+ uint32_t nlinks,
+ cudaStream_t stream) const;
private:
- void alloc(cudaStream_t stream);
- void deAlloc();
- void zero(cudaStream_t stream);
-
public:
- ClusterSLGPU slgpu;
bool doDump;
};
} // namespace clusterSLOnGPU
diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc
new file mode 100644
index 0000000000000..b8a4e97e84d10
--- /dev/null
+++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc
@@ -0,0 +1,227 @@
+#include
+#include
+#include
+
+#include
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+#include "DataFormats/Common/interface/DetSetVector.h"
+#include "DataFormats/Common/interface/DetSetVectorNew.h"
+#include "DataFormats/Common/interface/Handle.h"
+#include "DataFormats/DetId/interface/DetId.h"
+#include "DataFormats/Phase2TrackerCluster/interface/Phase2TrackerCluster1D.h"
+#include "DataFormats/Phase2TrackerDigi/interface/Phase2TrackerDigi.h"
+#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h"
+#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h"
+#include "DataFormats/SiStripCluster/interface/SiStripCluster.h"
+#include "DataFormats/TrackerRecHit2D/interface/OmniClusterRef.h"
+#include "FWCore/Framework/interface/ESHandle.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/global/EDProducer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/EDGetToken.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"
+#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
+#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "SimDataFormats/Track/interface/SimTrackContainer.h"
+#include "SimDataFormats/TrackerDigiSimLink/interface/PixelDigiSimLink.h"
+#include "SimDataFormats/TrackerDigiSimLink/interface/StripDigiSimLink.h"
+#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticle.h"
+#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticleFwd.h"
+#include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h"
+
+#include "ClusterSLOnGPU.h"
+
+class ClusterTPAssociationProducerCUDA : public edm::global::EDProducer<> {
+public:
+ typedef std::vector OmniClusterCollection;
+
+ using ClusterSLGPU = trackerHitAssociationHeterogeneous::ClusterSLView;
+ using Clus2TP = ClusterSLGPU::Clus2TP;
+ using ProductCUDA = trackerHitAssociationHeterogeneous::ProductCUDA;
+
+ explicit ClusterTPAssociationProducerCUDA(const edm::ParameterSet &);
+ ~ClusterTPAssociationProducerCUDA() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
+
+private:
+ void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override;
+
+ std::map, TrackingParticleRef> makeMap(const edm::Event &iEvent) const;
+
+ template
+ std::vector> getSimTrackId(const edm::Handle> &simLinks,
+ const DetId &detId,
+ uint32_t channel) const;
+
+ edm::EDGetTokenT> sipixelSimLinksToken_;
+ edm::EDGetTokenT> sistripSimLinksToken_;
+ edm::EDGetTokenT> siphase2OTSimLinksToken_;
+ edm::EDGetTokenT> pixelClustersToken_;
+ edm::EDGetTokenT> stripClustersToken_;
+ edm::EDGetTokenT> phase2OTClustersToken_;
+ edm::EDGetTokenT trackingParticleToken_;
+
+ edm::EDGetTokenT> tGpuDigis;
+ edm::EDGetTokenT> tGpuHits;
+
+ edm::EDPutTokenT> tokenGPUProd_;
+
+ clusterSLOnGPU::Kernel m_gpuAlgo;
+};
+
+ClusterTPAssociationProducerCUDA::ClusterTPAssociationProducerCUDA(const edm::ParameterSet &cfg)
+ : sipixelSimLinksToken_(
+ consumes>(cfg.getParameter("pixelSimLinkSrc"))),
+ sistripSimLinksToken_(
+ consumes>(cfg.getParameter("stripSimLinkSrc"))),
+ siphase2OTSimLinksToken_(
+ consumes>(cfg.getParameter("phase2OTSimLinkSrc"))),
+ pixelClustersToken_(
+ consumes>(cfg.getParameter("pixelClusterSrc"))),
+ stripClustersToken_(
+ consumes>(cfg.getParameter("stripClusterSrc"))),
+ phase2OTClustersToken_(consumes>(
+ cfg.getParameter("phase2OTClusterSrc"))),
+ trackingParticleToken_(
+ consumes(cfg.getParameter("trackingParticleSrc"))),
+ tGpuDigis(
+ consumes>(cfg.getParameter("heterogeneousPixelDigiClusterSrc"))),
+ tGpuHits(
+ consumes>(cfg.getParameter("heterogeneousPixelRecHitSrc"))),
+ m_gpuAlgo(cfg.getParameter("dumpCSV")) {
+ tokenGPUProd_ = produces>();
+}
+
+void ClusterTPAssociationProducerCUDA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("simTrackSrc", edm::InputTag("g4SimHits"));
+ desc.add("pixelSimLinkSrc", edm::InputTag("simSiPixelDigis"));
+ desc.add("stripSimLinkSrc", edm::InputTag("simSiStripDigis"));
+ desc.add("phase2OTSimLinkSrc", edm::InputTag("simSiPixelDigis", "Tracker"));
+ desc.add("pixelClusterSrc", edm::InputTag("siPixelClusters"));
+ desc.add("stripClusterSrc", edm::InputTag("siStripClusters"));
+ desc.add("phase2OTClusterSrc", edm::InputTag("siPhase2Clusters"));
+ desc.add("trackingParticleSrc", edm::InputTag("mix", "MergedTrackTruth"));
+ desc.add("heterogeneousPixelDigiClusterSrc", edm::InputTag("siPixelClustersCUDAPreSplitting"));
+ desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitsCUDAPreSplitting"));
+
+ desc.add("dumpCSV", false);
+
+ descriptions.add("tpClusterProducerCUDADefault", desc);
+}
+
+std::map, TrackingParticleRef> ClusterTPAssociationProducerCUDA::makeMap(
+ const edm::Event &iEvent) const {
+ // TrackingParticle
+ edm::Handle TPCollectionH;
+ iEvent.getByToken(trackingParticleToken_, TPCollectionH);
+
+ // prepare temporary map between SimTrackId and TrackingParticle index
+ std::map, TrackingParticleRef> mapping;
+ for (TrackingParticleCollection::size_type itp = 0; itp < TPCollectionH.product()->size(); ++itp) {
+ TrackingParticleRef trackingParticle(TPCollectionH, itp);
+
+ // SimTracks inside TrackingParticle
+ EncodedEventId eid(trackingParticle->eventId());
+ for (auto itrk = trackingParticle->g4Track_begin(); itrk != trackingParticle->g4Track_end(); ++itrk) {
+ std::pair trkid(itrk->trackId(), eid);
+ //std::cout << "creating map for id: " << trkid.first << " with tp: " << trackingParticle.key() << std::endl;
+ mapping.insert(std::make_pair(trkid, trackingParticle));
+ }
+ }
+ return mapping;
+}
+
+void ClusterTPAssociationProducerCUDA::produce(edm::StreamID streamID,
+ edm::Event &iEvent,
+ const edm::EventSetup &iSetup) const {
+ edm::ESHandle geom;
+ iSetup.get().get(geom);
+
+ // Pixel DigiSimLink
+ edm::Handle> sipixelSimLinks;
+ // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks);
+ iEvent.getByToken(sipixelSimLinksToken_, sipixelSimLinks);
+
+ // TrackingParticle
+ edm::Handle TPCollectionH;
+ iEvent.getByToken(trackingParticleToken_, TPCollectionH);
+
+ auto mapping = makeMap(iEvent);
+
+ edm::Handle> gd;
+ iEvent.getByToken(tGpuDigis, gd);
+ edm::Handle> gh;
+ iEvent.getByToken(tGpuHits, gh);
+
+ CUDAScopedContextProduce ctx{*gd};
+ auto const &gDigis = ctx.get(*gd);
+ auto const &gHits = ctx.get(*gh);
+ auto ndigis = gDigis.nDigis();
+ auto nhits = gHits.nHits();
+
+ std::vector digi2tp;
+ digi2tp.push_back({{0, 0, 0, 0, 0, 0, 0}}); // put at 0 0
+ for (auto const &links : *sipixelSimLinks) {
+ DetId detId(links.detId());
+ const GeomDetUnit *genericDet = geom->idToDetUnit(detId);
+ uint32_t gind = genericDet->index();
+ for (auto const &link : links) {
+ if (link.fraction() < 0.5f) {
+ continue;
+ }
+ auto tkid = std::make_pair(link.SimTrackId(), link.eventId());
+ auto ipos = mapping.find(tkid);
+ if (ipos != mapping.end()) {
+ uint32_t pt = 1000 * (*ipos).second->pt();
+ uint32_t eta = 10000 * (*ipos).second->eta();
+ uint32_t z0 = 10000 * (*ipos).second->vz(); // in um
+ uint32_t r0 = 10000 * std::sqrt((*ipos).second->vx() * (*ipos).second->vx() +
+ (*ipos).second->vy() * (*ipos).second->vy()); // in um
+ digi2tp.push_back({{gind, uint32_t(link.channel()), (*ipos).second.key(), pt, eta, z0, r0}});
+ }
+ }
+ }
+
+ std::sort(digi2tp.begin(), digi2tp.end());
+
+ ctx.emplace(iEvent,
+ tokenGPUProd_,
+ m_gpuAlgo.makeAsync(gDigis, ndigis, gHits, digi2tp.data(), nhits, digi2tp.size(), ctx.stream()));
+}
+
+template
+std::vector>
+//std::pair
+ClusterTPAssociationProducerCUDA::getSimTrackId(const edm::Handle> &simLinks,
+ const DetId &detId,
+ uint32_t channel) const {
+ //std::pair simTrkId;
+ std::vector> simTrkId;
+ auto isearch = simLinks->find(detId);
+ if (isearch != simLinks->end()) {
+ // Loop over DigiSimLink in this det unit
+ edm::DetSet link_detset = (*isearch);
+ for (typename edm::DetSet::const_iterator it = link_detset.data.begin(); it != link_detset.data.end(); ++it) {
+ if (channel == it->channel()) {
+ simTrkId.push_back(std::make_pair(it->SimTrackId(), it->eventId()));
+ }
+ }
+ }
+ return simTrkId;
+}
+
+#include "FWCore/PluginManager/interface/ModuleDef.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+
+DEFINE_FWK_MODULE(ClusterTPAssociationProducerCUDA);
diff --git a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py
index a8de0a96e4678..ae8893e2431a3 100644
--- a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py
+++ b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py
@@ -20,9 +20,6 @@
)
-from SimTracker.TrackerHitAssociation.tpClusterProducerHeterogeneousDefault_cfi import tpClusterProducerHeterogeneousDefault as _tpClusterProducerHeterogeneous
-tpClusterProducerHeterogeneous = _tpClusterProducerHeterogeneous.clone()
-
-from SimTracker.TrackerHitAssociation.tpClusterHeterogeneousConverter_cfi import tpClusterHeterogeneousConverter as _tpHeterogeneousConverter
-tpClusterProducerConverter = _tpHeterogeneousConverter.clone()
+from SimTracker.TrackerHitAssociation.tpClusterProducerCUDADefault_cfi import tpClusterProducerCUDADefault as _tpClusterProducerCUDA
+tpClusterProducerCUDA = _tpClusterProducerCUDA.clone()
diff --git a/SimTracker/TrackerHitAssociation/src/classes.h b/SimTracker/TrackerHitAssociation/src/classes.h
index 457b6683d5cea..3c143de04fe0a 100644
--- a/SimTracker/TrackerHitAssociation/src/classes.h
+++ b/SimTracker/TrackerHitAssociation/src/classes.h
@@ -5,6 +5,8 @@
#include "DataFormats/Common/interface/AssociationMap.h"
#include "DataFormats/TrackerRecHit2D/interface/OmniClusterRef.h"
#include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h"
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h"
#include "DataFormats/Common/interface/AssociationMap.h"
namespace SimTracker_TrackerHitAssociation {
diff --git a/SimTracker/TrackerHitAssociation/src/classes_def.xml b/SimTracker/TrackerHitAssociation/src/classes_def.xml
index f801d25b176e0..fc157eacc310f 100644
--- a/SimTracker/TrackerHitAssociation/src/classes_def.xml
+++ b/SimTracker/TrackerHitAssociation/src/classes_def.xml
@@ -20,4 +20,9 @@
+
+
+
+
+
diff --git a/SimTracker/TrackerHitAssociation/test/BuildFile.xml b/SimTracker/TrackerHitAssociation/test/BuildFile.xml
index a0dc6b61844a0..960d1457bda46 100644
--- a/SimTracker/TrackerHitAssociation/test/BuildFile.xml
+++ b/SimTracker/TrackerHitAssociation/test/BuildFile.xml
@@ -1,14 +1,19 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc b/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc
new file mode 100644
index 0000000000000..2c87ad818e185
--- /dev/null
+++ b/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc
@@ -0,0 +1,68 @@
+#include
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "DataFormats/Common/interface/Handle.h"
+#include "FWCore/Framework/interface/ESHandle.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Framework/interface/global/EDAnalyzer.h"
+#include "FWCore/Framework/interface/ConsumesCollector.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+#include "FWCore/PluginManager/interface/ModuleDef.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/Utilities/interface/EDGetToken.h"
+#include "FWCore/Utilities/interface/RunningAverage.h"
+#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
+#include "HeterogeneousCore/CUDACore/interface/GPUCuda.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+
+#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h"
+
+class ClusterTPCUDAdump : public edm::global::EDAnalyzer<> {
+public:
+ using ClusterSLGPU = trackerHitAssociationHeterogeneous::ClusterSLView;
+ using Clus2TP = ClusterSLGPU::Clus2TP;
+ using ProductCUDA = trackerHitAssociationHeterogeneous::ProductCUDA;
+
+ explicit ClusterTPCUDAdump(const edm::ParameterSet& iConfig);
+ ~ClusterTPCUDAdump() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+private:
+ void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override;
+ const bool m_onGPU;
+ edm::EDGetTokenT> tokenGPU_;
+};
+
+ClusterTPCUDAdump::ClusterTPCUDAdump(const edm::ParameterSet& iConfig) : m_onGPU(iConfig.getParameter("onGPU")) {
+ if (m_onGPU) {
+ tokenGPU_ = consumes>(iConfig.getParameter("clusterTP"));
+ } else {
+ }
+}
+
+void ClusterTPCUDAdump::analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const {
+ if (m_onGPU) {
+ auto const& hctp = iEvent.get(tokenGPU_);
+ CUDAScopedContextProduce ctx{hctp};
+
+ auto const& ctp = ctx.get(hctp);
+ auto const& soa = ctp.view();
+ assert(soa.links_d);
+ } else {
+ }
+}
+
+void ClusterTPCUDAdump::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("onGPU", true);
+ desc.add("clusterTP", edm::InputTag("tpClusterProducerCUDAPreSplitting"));
+ descriptions.add("clusterTPCUDAdump", desc);
+}
+
+DEFINE_FWK_MODULE(ClusterTPCUDAdump);
diff --git a/Validation/RecoTrack/python/TrackValidation_cff.py b/Validation/RecoTrack/python/TrackValidation_cff.py
index ed460d0d8c3f7..c6bdad88b5e50 100644
--- a/Validation/RecoTrack/python/TrackValidation_cff.py
+++ b/Validation/RecoTrack/python/TrackValidation_cff.py
@@ -485,13 +485,11 @@ def _getMVASelectors(postfix):
# Built tracks, in the standard sequence mainly for monitoring the track selection MVA
tpClusterProducerPreSplitting = tpClusterProducer.clone(pixelClusterSrc = "siPixelClustersPreSplitting")
quickTrackAssociatorByHitsPreSplitting = quickTrackAssociatorByHits.clone(cluster2TPSrc = "tpClusterProducerPreSplitting")
-tpClusterProducerHeterogeneousPreSplitting = tpClusterProducerHeterogeneous.clone(
+
+tpClusterProducerCUDAPreSplitting = tpClusterProducerCUDA.clone(
pixelClusterSrc = "siPixelClustersPreSplitting"
)
-from Configuration.ProcessModifiers.gpu_cff import gpu
-gpu.toReplaceWith(tpClusterProducerPreSplitting, tpClusterProducerConverter.clone(
- src = "tpClusterProducerHeterogeneousPreSplitting"
-))
+
_trackValidatorSeedingBuilding = trackValidator.clone( # common for built tracks and seeds (in trackingOnly)
associators = ["quickTrackAssociatorByHits"],
UseAssociators = True,
@@ -595,7 +593,6 @@ def _uniqueFirstLayers(layerList):
)
tracksValidationTruth = cms.Task(
tpClusterProducer,
- tpClusterProducerHeterogeneousPreSplitting,
tpClusterProducerPreSplitting,
quickTrackAssociatorByHits,
quickTrackAssociatorByHitsPreSplitting,
@@ -603,6 +600,16 @@ def _uniqueFirstLayers(layerList):
VertexAssociatorByPositionAndTracks,
trackingParticleNumberOfLayersProducer
)
+
+#gpu tp ???
+from Configuration.ProcessModifiers.gpu_cff import gpu
+tpClusterProducerPreSplittingCUDA = cms.Task(
+ tpClusterProducerCUDAPreSplitting
+)
+_tracksValidationTruth_gpu = tracksValidationTruth.copy()
+_tracksValidationTruth_gpu.add(tpClusterProducerPreSplittingCUDA)
+gpu.toReplaceWith(tracksValidationTruth,_tracksValidationTruth_gpu)
+
fastSim.toModify(tracksValidationTruth, lambda x: x.remove(tpClusterProducer))
tracksPreValidation = cms.Task(