diff --git a/SimTracker/TrackerHitAssociation/BuildFile.xml b/SimTracker/TrackerHitAssociation/BuildFile.xml index 48e91f5cbee07..62017bf5c8dd1 100644 --- a/SimTracker/TrackerHitAssociation/BuildFile.xml +++ b/SimTracker/TrackerHitAssociation/BuildFile.xml @@ -8,9 +8,11 @@ + + diff --git a/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h b/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h new file mode 100644 index 0000000000000..89e969313e4b6 --- /dev/null +++ b/SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h @@ -0,0 +1,69 @@ +#ifndef SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h +#define SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h + +#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +namespace trackerHitAssociationHeterogeneous { + + struct ClusterSLView { + using Clus2TP = std::array; + + Clus2TP* links_d; + uint32_t* tkId_d; + uint32_t* tkId2_d; + uint32_t* n1_d; + uint32_t* n2_d; + }; + + template + class Product { + public: + template + using unique_ptr = typename Traits::template unique_ptr; + + Product() = default; + ~Product() = default; + Product(Product const&) = delete; + Product(Product&&) = default; + + Product(int nlinks, int nhits, cudaStream_t stream); + + ClusterSLView& view() { return m_view; } + ClusterSLView const& view() const { return m_view; } + + int nLinks() const { return m_nLinks; } + int nHits() const { return m_nHits; } + + private: + static constexpr uint32_t n32 = 4; + + unique_ptr m_storeTP; //! + unique_ptr m_store32; //! + + ClusterSLView m_view; //! + + int m_nLinks; + int m_nHits; + }; + + template + Product::Product(int nlinks, int nhits, cudaStream_t stream) : m_nLinks(nlinks), m_nHits(nhits) { + m_storeTP = Traits::template make_device_unique(m_nLinks * 7, stream); + m_store32 = Traits::template make_device_unique(m_nHits * n32, stream); + + auto get32 = [&](int i) { return m_store32.get() + i * m_nHits; }; + + m_view.links_d = (ClusterSLView::Clus2TP*)(m_storeTP.get()); + m_view.tkId_d = get32(0); + m_view.tkId2_d = get32(1); + m_view.n1_d = get32(2); + m_view.n2_d = get32(3); + } + + using ProductCUDA = Product; + +} // namespace trackerHitAssociationHeterogeneous + +#endif // SimTracker_TrackerHitAssociation_plugins_trackerHitAssociationHeterogeneousProduct_h diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 767e3b814e6c1..00b03eb86bb34 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -9,23 +9,20 @@ #include "ClusterSLOnGPU.h" -using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; -using Clus2TP = ClusterSLGPU::Clus2TP; +using ClusterSLView = trackerHitAssociationHeterogeneous::ClusterSLView; +using Clus2TP = ClusterSLView::Clus2TP; // #define DUMP_TK2 __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd, uint32_t ndigis, - clusterSLOnGPU::HitsOnGPU const* hhp, - ClusterSLGPU const* slp, + TrackingRecHit2DSOAView const* hhp, + ClusterSLView sl, uint32_t n) { - assert(slp == slp->me_d); - - constexpr int32_t invTK = 0; // std::numeric_limits::max(); + constexpr uint32_t invTK = 0; // std::numeric_limits::max(); constexpr uint16_t InvId = 9999; // must be > MaxNumModules auto const& hh = *hhp; - auto const& sl = *slp; auto i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= ndigis) @@ -41,7 +38,7 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd, auto cl = first + dd->clus(i); assert(cl < 2000 * blockDim.x); - const Clus2TP me{{id, ch, 0, 0, 0, 0}}; + const Clus2TP me{{id, ch, 0, 0, 0, 0, 0}}; auto less = [] __host__ __device__(Clus2TP const& a, Clus2TP const& b) -> bool { // in this context we do not care of [2] @@ -80,38 +77,32 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd, } } -__global__ void verifyZero(uint32_t nhits, ClusterSLGPU const* slp) { +__global__ void doZero(uint32_t nhits, ClusterSLView sl) { auto i = blockIdx.x * blockDim.x + threadIdx.x; if (i > nhits) return; - auto const& sl = *slp; - - assert(sl.tkId_d[i] == 0); - auto const& tk = sl.links_d[0]; - assert(tk[0] == 0); - assert(tk[1] == 0); - assert(tk[2] == 0); - assert(tk[3] == 0); + sl.tkId_d[i] = 0; + sl.n1_d[i] = 0; + sl.tkId2_d[i] = 0; + sl.n2_d[i] = 0; } -__global__ void dumpLink( - int first, int ev, clusterSLOnGPU::HitsOnGPU const* hhp, uint32_t nhits, ClusterSLGPU const* slp) { +__global__ void dumpLink(int first, int ev, TrackingRecHit2DSOAView const* hhp, uint32_t nhits, ClusterSLView sl) { auto i = first + blockIdx.x * blockDim.x + threadIdx.x; if (i > nhits) return; auto const& hh = *hhp; - auto const& sl = *slp; auto const& tk1 = sl.links_d[sl.tkId_d[i]]; #ifdef DUMP_TK2 auto const& tk2 = sl.links_d[sl.tkId2_d[i]]; - printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d %d %d %d %d\n", + printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", #else - printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d\n", + printf("HIT: %d %d %d %d %.4f %.4f %.4f %.4f %d %d %d %d %d %d %d %d %d\n", #endif ev, i, @@ -128,6 +119,7 @@ __global__ void dumpLink( tk1[3], tk1[4], tk1[5], + tk1[6], sl.n1_d[i] #ifdef DUMP_TK2 , @@ -135,6 +127,7 @@ __global__ void dumpLink( tk2[3], tk2[4], tk2[5], + tk2[6], sl.n2_d[i] #endif ); @@ -142,13 +135,11 @@ __global__ void dumpLink( namespace clusterSLOnGPU { - constexpr uint32_t invTK = 0; // std::numeric_limits::max(); - void printCSVHeader() { #ifdef DUMP_TK2 - printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", + printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", #else - printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", + printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", #endif "ev", "ind", @@ -163,6 +154,7 @@ namespace clusterSLOnGPU { "ysize", "tkId", "pt", + "eta", "z0", "r0", "n1" @@ -170,6 +162,7 @@ namespace clusterSLOnGPU { , "tkId2", "pt2", + "eta", "z02", "r02", "n2" @@ -180,74 +173,50 @@ namespace clusterSLOnGPU { std::atomic evId(0); std::once_flag doneCSVHeader; - Kernel::Kernel(cudaStream_t stream, bool dump) : doDump(dump) { + Kernel::Kernel(bool dump) : doDump(dump) { if (doDump) std::call_once(doneCSVHeader, printCSVHeader); - alloc(stream); - } - - void Kernel::alloc(cudaStream_t stream) { - cudaCheck(cudaMalloc((void**)&slgpu.links_d, (ClusterSLGPU::MAX_DIGIS) * sizeof(Clus2TP))); - cudaCheck(cudaMalloc((void**)&slgpu.tkId_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&slgpu.tkId2_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&slgpu.n1_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&slgpu.n2_d, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&slgpu.me_d, sizeof(ClusterSLGPU))); - cudaCheck(cudaMemcpyAsync(slgpu.me_d, &slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream)); } - void Kernel::deAlloc() { - cudaCheck(cudaFree(slgpu.links_d)); - cudaCheck(cudaFree(slgpu.tkId_d)); - cudaCheck(cudaFree(slgpu.tkId2_d)); - cudaCheck(cudaFree(slgpu.n1_d)); - cudaCheck(cudaFree(slgpu.n2_d)); - cudaCheck(cudaFree(slgpu.me_d)); - } - - void Kernel::zero(cudaStream_t stream) { - cudaCheck(cudaMemsetAsync(slgpu.tkId_d, invTK, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.tkId2_d, invTK, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.n1_d, 0, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.n2_d, 0, (ClusterSLGPU::MaxNumModules * 256) * sizeof(uint32_t), stream)); - } + trackerHitAssociationHeterogeneous::ProductCUDA Kernel::makeAsync(SiPixelDigisCUDA const& dd, + uint32_t ndigis, + HitsOnCPU const& hh, + Clus2TP const* digi2tp, + uint32_t nhits, + uint32_t nlinks, + cudaStream_t stream) const { + trackerHitAssociationHeterogeneous::ProductCUDA product(nlinks, nhits, stream); + auto& csl = product.view(); - void Kernel::algo(SiPixelDigisCUDA const& dd, - uint32_t ndigis, - HitsOnCPU const& hh, - uint32_t nhits, - uint32_t n, - cudaStream_t stream) { - zero(stream); + cudaCheck(cudaMemcpyAsync(csl.links_d, digi2tp, sizeof(Clus2TP) * nlinks, cudaMemcpyDefault, stream)); if (0 == nhits) - return; - ClusterSLGPU const& sl = slgpu; + return product; int ev = ++evId; int threadsPerBlock = 256; int blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock; - verifyZero<<>>(nhits, sl.me_d); + doZero<<>>(nhits, csl); cudaCheck(cudaGetLastError()); blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; - - assert(sl.me_d); - simLink<<>>(dd.view(), ndigis, hh.view(), sl.me_d, n); + simLink<<>>(dd.view(), ndigis, hh.view(), csl, nlinks); cudaCheck(cudaGetLastError()); if (doDump) { cudaStreamSynchronize(stream); // flush previous printf // one line == 200B so each kernel can print only 5K lines.... - blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock; + blocks = 16; for (int first = 0; first < int(nhits); first += blocks * threadsPerBlock) { - dumpLink<<>>(first, ev, hh.view(), nhits, sl.me_d); + dumpLink<<>>(first, ev, hh.view(), nhits, csl); cudaCheck(cudaGetLastError()); cudaStreamSynchronize(stream); } } cudaCheck(cudaGetLastError()); + + return product; } } // namespace clusterSLOnGPU diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index be7eaba7230d7..d8879f2154df4 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -4,40 +4,31 @@ #include #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" - -#include "trackerHitAssociationHeterogeneousProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h" namespace clusterSLOnGPU { - using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; - using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct; - + using ClusterSLView = trackerHitAssociationHeterogeneous::ClusterSLView; + using Clus2TP = ClusterSLView::Clus2TP; using HitsOnGPU = TrackingRecHit2DSOAView; using HitsOnCPU = TrackingRecHit2DCUDA; - using Clus2TP = ClusterSLGPU::Clus2TP; - class Kernel { public: - Kernel(cudaStream_t stream, bool dump); - ~Kernel() { deAlloc(); } - void algo(SiPixelDigisCUDA const& dd, - uint32_t ndigis, - HitsOnCPU const& hh, - uint32_t nhits, - uint32_t n, - cudaStream_t stream); - GPUProduct getProduct() { return GPUProduct{slgpu.me_d}; } + explicit Kernel(bool dump); + ~Kernel() {} + trackerHitAssociationHeterogeneous::ProductCUDA makeAsync(SiPixelDigisCUDA const& dd, + uint32_t ndigis, + HitsOnCPU const& hh, + Clus2TP const* digi2tp, + uint32_t nhits, + uint32_t nlinks, + cudaStream_t stream) const; private: - void alloc(cudaStream_t stream); - void deAlloc(); - void zero(cudaStream_t stream); - public: - ClusterSLGPU slgpu; bool doDump; }; } // namespace clusterSLOnGPU diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc new file mode 100644 index 0000000000000..b8a4e97e84d10 --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationProducerCUDA.cc @@ -0,0 +1,227 @@ +#include +#include +#include + +#include + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/Phase2TrackerCluster/interface/Phase2TrackerCluster1D.h" +#include "DataFormats/Phase2TrackerDigi/interface/Phase2TrackerDigi.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" +#include "DataFormats/SiStripCluster/interface/SiStripCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/OmniClusterRef.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "SimDataFormats/Track/interface/SimTrackContainer.h" +#include "SimDataFormats/TrackerDigiSimLink/interface/PixelDigiSimLink.h" +#include "SimDataFormats/TrackerDigiSimLink/interface/StripDigiSimLink.h" +#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticle.h" +#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticleFwd.h" +#include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h" + +#include "ClusterSLOnGPU.h" + +class ClusterTPAssociationProducerCUDA : public edm::global::EDProducer<> { +public: + typedef std::vector OmniClusterCollection; + + using ClusterSLGPU = trackerHitAssociationHeterogeneous::ClusterSLView; + using Clus2TP = ClusterSLGPU::Clus2TP; + using ProductCUDA = trackerHitAssociationHeterogeneous::ProductCUDA; + + explicit ClusterTPAssociationProducerCUDA(const edm::ParameterSet &); + ~ClusterTPAssociationProducerCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; + + std::map, TrackingParticleRef> makeMap(const edm::Event &iEvent) const; + + template + std::vector> getSimTrackId(const edm::Handle> &simLinks, + const DetId &detId, + uint32_t channel) const; + + edm::EDGetTokenT> sipixelSimLinksToken_; + edm::EDGetTokenT> sistripSimLinksToken_; + edm::EDGetTokenT> siphase2OTSimLinksToken_; + edm::EDGetTokenT> pixelClustersToken_; + edm::EDGetTokenT> stripClustersToken_; + edm::EDGetTokenT> phase2OTClustersToken_; + edm::EDGetTokenT trackingParticleToken_; + + edm::EDGetTokenT> tGpuDigis; + edm::EDGetTokenT> tGpuHits; + + edm::EDPutTokenT> tokenGPUProd_; + + clusterSLOnGPU::Kernel m_gpuAlgo; +}; + +ClusterTPAssociationProducerCUDA::ClusterTPAssociationProducerCUDA(const edm::ParameterSet &cfg) + : sipixelSimLinksToken_( + consumes>(cfg.getParameter("pixelSimLinkSrc"))), + sistripSimLinksToken_( + consumes>(cfg.getParameter("stripSimLinkSrc"))), + siphase2OTSimLinksToken_( + consumes>(cfg.getParameter("phase2OTSimLinkSrc"))), + pixelClustersToken_( + consumes>(cfg.getParameter("pixelClusterSrc"))), + stripClustersToken_( + consumes>(cfg.getParameter("stripClusterSrc"))), + phase2OTClustersToken_(consumes>( + cfg.getParameter("phase2OTClusterSrc"))), + trackingParticleToken_( + consumes(cfg.getParameter("trackingParticleSrc"))), + tGpuDigis( + consumes>(cfg.getParameter("heterogeneousPixelDigiClusterSrc"))), + tGpuHits( + consumes>(cfg.getParameter("heterogeneousPixelRecHitSrc"))), + m_gpuAlgo(cfg.getParameter("dumpCSV")) { + tokenGPUProd_ = produces>(); +} + +void ClusterTPAssociationProducerCUDA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { + edm::ParameterSetDescription desc; + desc.add("simTrackSrc", edm::InputTag("g4SimHits")); + desc.add("pixelSimLinkSrc", edm::InputTag("simSiPixelDigis")); + desc.add("stripSimLinkSrc", edm::InputTag("simSiStripDigis")); + desc.add("phase2OTSimLinkSrc", edm::InputTag("simSiPixelDigis", "Tracker")); + desc.add("pixelClusterSrc", edm::InputTag("siPixelClusters")); + desc.add("stripClusterSrc", edm::InputTag("siStripClusters")); + desc.add("phase2OTClusterSrc", edm::InputTag("siPhase2Clusters")); + desc.add("trackingParticleSrc", edm::InputTag("mix", "MergedTrackTruth")); + desc.add("heterogeneousPixelDigiClusterSrc", edm::InputTag("siPixelClustersCUDAPreSplitting")); + desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitsCUDAPreSplitting")); + + desc.add("dumpCSV", false); + + descriptions.add("tpClusterProducerCUDADefault", desc); +} + +std::map, TrackingParticleRef> ClusterTPAssociationProducerCUDA::makeMap( + const edm::Event &iEvent) const { + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_, TPCollectionH); + + // prepare temporary map between SimTrackId and TrackingParticle index + std::map, TrackingParticleRef> mapping; + for (TrackingParticleCollection::size_type itp = 0; itp < TPCollectionH.product()->size(); ++itp) { + TrackingParticleRef trackingParticle(TPCollectionH, itp); + + // SimTracks inside TrackingParticle + EncodedEventId eid(trackingParticle->eventId()); + for (auto itrk = trackingParticle->g4Track_begin(); itrk != trackingParticle->g4Track_end(); ++itrk) { + std::pair trkid(itrk->trackId(), eid); + //std::cout << "creating map for id: " << trkid.first << " with tp: " << trackingParticle.key() << std::endl; + mapping.insert(std::make_pair(trkid, trackingParticle)); + } + } + return mapping; +} + +void ClusterTPAssociationProducerCUDA::produce(edm::StreamID streamID, + edm::Event &iEvent, + const edm::EventSetup &iSetup) const { + edm::ESHandle geom; + iSetup.get().get(geom); + + // Pixel DigiSimLink + edm::Handle> sipixelSimLinks; + // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks); + iEvent.getByToken(sipixelSimLinksToken_, sipixelSimLinks); + + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_, TPCollectionH); + + auto mapping = makeMap(iEvent); + + edm::Handle> gd; + iEvent.getByToken(tGpuDigis, gd); + edm::Handle> gh; + iEvent.getByToken(tGpuHits, gh); + + CUDAScopedContextProduce ctx{*gd}; + auto const &gDigis = ctx.get(*gd); + auto const &gHits = ctx.get(*gh); + auto ndigis = gDigis.nDigis(); + auto nhits = gHits.nHits(); + + std::vector digi2tp; + digi2tp.push_back({{0, 0, 0, 0, 0, 0, 0}}); // put at 0 0 + for (auto const &links : *sipixelSimLinks) { + DetId detId(links.detId()); + const GeomDetUnit *genericDet = geom->idToDetUnit(detId); + uint32_t gind = genericDet->index(); + for (auto const &link : links) { + if (link.fraction() < 0.5f) { + continue; + } + auto tkid = std::make_pair(link.SimTrackId(), link.eventId()); + auto ipos = mapping.find(tkid); + if (ipos != mapping.end()) { + uint32_t pt = 1000 * (*ipos).second->pt(); + uint32_t eta = 10000 * (*ipos).second->eta(); + uint32_t z0 = 10000 * (*ipos).second->vz(); // in um + uint32_t r0 = 10000 * std::sqrt((*ipos).second->vx() * (*ipos).second->vx() + + (*ipos).second->vy() * (*ipos).second->vy()); // in um + digi2tp.push_back({{gind, uint32_t(link.channel()), (*ipos).second.key(), pt, eta, z0, r0}}); + } + } + } + + std::sort(digi2tp.begin(), digi2tp.end()); + + ctx.emplace(iEvent, + tokenGPUProd_, + m_gpuAlgo.makeAsync(gDigis, ndigis, gHits, digi2tp.data(), nhits, digi2tp.size(), ctx.stream())); +} + +template +std::vector> +//std::pair +ClusterTPAssociationProducerCUDA::getSimTrackId(const edm::Handle> &simLinks, + const DetId &detId, + uint32_t channel) const { + //std::pair simTrkId; + std::vector> simTrkId; + auto isearch = simLinks->find(detId); + if (isearch != simLinks->end()) { + // Loop over DigiSimLink in this det unit + edm::DetSet link_detset = (*isearch); + for (typename edm::DetSet::const_iterator it = link_detset.data.begin(); it != link_detset.data.end(); ++it) { + if (channel == it->channel()) { + simTrkId.push_back(std::make_pair(it->SimTrackId(), it->eventId())); + } + } + } + return simTrkId; +} + +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +DEFINE_FWK_MODULE(ClusterTPAssociationProducerCUDA); diff --git a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py index a8de0a96e4678..ae8893e2431a3 100644 --- a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py +++ b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py @@ -20,9 +20,6 @@ ) -from SimTracker.TrackerHitAssociation.tpClusterProducerHeterogeneousDefault_cfi import tpClusterProducerHeterogeneousDefault as _tpClusterProducerHeterogeneous -tpClusterProducerHeterogeneous = _tpClusterProducerHeterogeneous.clone() - -from SimTracker.TrackerHitAssociation.tpClusterHeterogeneousConverter_cfi import tpClusterHeterogeneousConverter as _tpHeterogeneousConverter -tpClusterProducerConverter = _tpHeterogeneousConverter.clone() +from SimTracker.TrackerHitAssociation.tpClusterProducerCUDADefault_cfi import tpClusterProducerCUDADefault as _tpClusterProducerCUDA +tpClusterProducerCUDA = _tpClusterProducerCUDA.clone() diff --git a/SimTracker/TrackerHitAssociation/src/classes.h b/SimTracker/TrackerHitAssociation/src/classes.h index 457b6683d5cea..3c143de04fe0a 100644 --- a/SimTracker/TrackerHitAssociation/src/classes.h +++ b/SimTracker/TrackerHitAssociation/src/classes.h @@ -5,6 +5,8 @@ #include "DataFormats/Common/interface/AssociationMap.h" #include "DataFormats/TrackerRecHit2D/interface/OmniClusterRef.h" #include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h" +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h" #include "DataFormats/Common/interface/AssociationMap.h" namespace SimTracker_TrackerHitAssociation { diff --git a/SimTracker/TrackerHitAssociation/src/classes_def.xml b/SimTracker/TrackerHitAssociation/src/classes_def.xml index f801d25b176e0..fc157eacc310f 100644 --- a/SimTracker/TrackerHitAssociation/src/classes_def.xml +++ b/SimTracker/TrackerHitAssociation/src/classes_def.xml @@ -20,4 +20,9 @@ + + + + + diff --git a/SimTracker/TrackerHitAssociation/test/BuildFile.xml b/SimTracker/TrackerHitAssociation/test/BuildFile.xml index a0dc6b61844a0..960d1457bda46 100644 --- a/SimTracker/TrackerHitAssociation/test/BuildFile.xml +++ b/SimTracker/TrackerHitAssociation/test/BuildFile.xml @@ -1,14 +1,19 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + diff --git a/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc b/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc new file mode 100644 index 0000000000000..2c87ad818e185 --- /dev/null +++ b/SimTracker/TrackerHitAssociation/test/ClusterTPCUDAdump.cc @@ -0,0 +1,68 @@ +#include + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/RunningAverage.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/GPUCuda.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" + +#include "SimTracker/TrackerHitAssociation/interface/trackerHitAssociationHeterogeneous.h" + +class ClusterTPCUDAdump : public edm::global::EDAnalyzer<> { +public: + using ClusterSLGPU = trackerHitAssociationHeterogeneous::ClusterSLView; + using Clus2TP = ClusterSLGPU::Clus2TP; + using ProductCUDA = trackerHitAssociationHeterogeneous::ProductCUDA; + + explicit ClusterTPCUDAdump(const edm::ParameterSet& iConfig); + ~ClusterTPCUDAdump() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override; + const bool m_onGPU; + edm::EDGetTokenT> tokenGPU_; +}; + +ClusterTPCUDAdump::ClusterTPCUDAdump(const edm::ParameterSet& iConfig) : m_onGPU(iConfig.getParameter("onGPU")) { + if (m_onGPU) { + tokenGPU_ = consumes>(iConfig.getParameter("clusterTP")); + } else { + } +} + +void ClusterTPCUDAdump::analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const { + if (m_onGPU) { + auto const& hctp = iEvent.get(tokenGPU_); + CUDAScopedContextProduce ctx{hctp}; + + auto const& ctp = ctx.get(hctp); + auto const& soa = ctp.view(); + assert(soa.links_d); + } else { + } +} + +void ClusterTPCUDAdump::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("onGPU", true); + desc.add("clusterTP", edm::InputTag("tpClusterProducerCUDAPreSplitting")); + descriptions.add("clusterTPCUDAdump", desc); +} + +DEFINE_FWK_MODULE(ClusterTPCUDAdump); diff --git a/Validation/RecoTrack/python/TrackValidation_cff.py b/Validation/RecoTrack/python/TrackValidation_cff.py index ed460d0d8c3f7..c6bdad88b5e50 100644 --- a/Validation/RecoTrack/python/TrackValidation_cff.py +++ b/Validation/RecoTrack/python/TrackValidation_cff.py @@ -485,13 +485,11 @@ def _getMVASelectors(postfix): # Built tracks, in the standard sequence mainly for monitoring the track selection MVA tpClusterProducerPreSplitting = tpClusterProducer.clone(pixelClusterSrc = "siPixelClustersPreSplitting") quickTrackAssociatorByHitsPreSplitting = quickTrackAssociatorByHits.clone(cluster2TPSrc = "tpClusterProducerPreSplitting") -tpClusterProducerHeterogeneousPreSplitting = tpClusterProducerHeterogeneous.clone( + +tpClusterProducerCUDAPreSplitting = tpClusterProducerCUDA.clone( pixelClusterSrc = "siPixelClustersPreSplitting" ) -from Configuration.ProcessModifiers.gpu_cff import gpu -gpu.toReplaceWith(tpClusterProducerPreSplitting, tpClusterProducerConverter.clone( - src = "tpClusterProducerHeterogeneousPreSplitting" -)) + _trackValidatorSeedingBuilding = trackValidator.clone( # common for built tracks and seeds (in trackingOnly) associators = ["quickTrackAssociatorByHits"], UseAssociators = True, @@ -595,7 +593,6 @@ def _uniqueFirstLayers(layerList): ) tracksValidationTruth = cms.Task( tpClusterProducer, - tpClusterProducerHeterogeneousPreSplitting, tpClusterProducerPreSplitting, quickTrackAssociatorByHits, quickTrackAssociatorByHitsPreSplitting, @@ -603,6 +600,16 @@ def _uniqueFirstLayers(layerList): VertexAssociatorByPositionAndTracks, trackingParticleNumberOfLayersProducer ) + +#gpu tp ??? +from Configuration.ProcessModifiers.gpu_cff import gpu +tpClusterProducerPreSplittingCUDA = cms.Task( + tpClusterProducerCUDAPreSplitting +) +_tracksValidationTruth_gpu = tracksValidationTruth.copy() +_tracksValidationTruth_gpu.add(tpClusterProducerPreSplittingCUDA) +gpu.toReplaceWith(tracksValidationTruth,_tracksValidationTruth_gpu) + fastSim.toModify(tracksValidationTruth, lambda x: x.remove(tpClusterProducer)) tracksPreValidation = cms.Task(