From 25c7697e4b011f1399361b2d78fbfcc8ea9252c4 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 7 Jul 2018 12:22:01 +0200 Subject: [PATCH 01/19] enable sorting hits in layers by histogramming --- RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu | 6 +++--- .../plugins/siPixelRecHitsHeterogeneousProduct.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu index 999fdcd6eff19..1e85afd0443e9 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu @@ -36,7 +36,7 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & gpu_.sortIndex_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t))); cudaCheck(cudaMalloc((void**) & gpu_.mr_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t))); cudaCheck(cudaMalloc((void**) & gpu_.mc_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t))); -// cudaCheck(cudaMalloc((void**) & gpu_.hist_d, 10*sizeof(HitsOnGPU::Hist))); + cudaCheck(cudaMalloc((void**) & gpu_.hist_d, 10*sizeof(HitsOnGPU::Hist))); cudaCheck(cudaMalloc((void**) & gpu_d, sizeof(HitsOnGPU))); cudaCheck(cudaMemcpyAsync(gpu_d, &gpu_, sizeof(HitsOnGPU), cudaMemcpyDefault,cudaStream.id())); @@ -59,7 +59,7 @@ namespace pixelgpudetails { cudaCheck(cudaFree(gpu_.sortIndex_d)); cudaCheck(cudaFree(gpu_.mr_d)); cudaCheck(cudaFree(gpu_.mc_d)); - // cudaCheck(cudaFree(gpu_.hist_d)); + cudaCheck(cudaFree(gpu_.hist_d)); cudaCheck(cudaFree(gpu_d)); } @@ -119,7 +119,7 @@ namespace pixelgpudetails { // for timing test // radixSortMultiWrapper<<<10, 256, 0, c.stream>>>(gpu_.iphi_d,gpu_.sortIndex_d,gpu_.hitsLayerStart_d); - // fillManyFromVector(gpu_.hist_d,10,gpu_.iphi_d, gpu_.hitsLayerStart_d, nhits,256,c.stream); + cudautils::fillManyFromVector(gpu_.hist_d,10,gpu_.iphi_d, gpu_.hitsLayerStart_d, nhits,256,stream.id()); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h index 6ccd435faa7ba..dfe352f1ed8d5 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h @@ -8,7 +8,7 @@ #include #include -// #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" namespace siPixelRecHitsHeterogeneousProduct { @@ -31,8 +31,8 @@ namespace siPixelRecHitsHeterogeneousProduct { uint16_t * mr_d; uint16_t * mc_d; - // using Hist = HistoContainer; - // Hist * hist_d; + using Hist = HistoContainer; + Hist * hist_d; }; From 6160220e6e927cd718d70bfe4f5dc7a775a10cbe Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 7 Jul 2018 16:44:54 +0200 Subject: [PATCH 02/19] clusterSimLink on gpu --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 12 +- .../plugins/SiPixelRawToClusterGPUKernel.h | 18 +- .../SiPixelRawToClusterHeterogeneous.cc | 3 +- .../siPixelRawToClusterHeterogeneousProduct.h | 3 + .../SiPixelRecHits/plugins/PixelRecHits.cu | 1 + .../siPixelRecHitsHeterogeneousProduct.h | 2 + .../plugins/BuildFile.xml | 4 +- .../plugins/ClusterSLOnGPU.cu | 163 ++++++++++++++++++ .../plugins/ClusterSLOnGPU.h | 33 ++++ 9 files changed, 235 insertions(+), 4 deletions(-) create mode 100644 SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu create mode 100644 SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 29e5e82049b5c..92ea71ae5ac05 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -38,7 +38,7 @@ namespace pixelgpudetails { - SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel() { + SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream) { int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); @@ -90,6 +90,12 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); + + cudaCheck(cudaMalloc((void**) & gpuProduct_d, sizeof(GPUProduct))); + gpuProduct = getProduct(); + + cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); + } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { @@ -111,6 +117,10 @@ namespace pixelgpudetails { cudaCheck(cudaFree(clus_d)); cudaCheck(cudaFree(clusInModule_d)); cudaCheck(cudaFree(moduleId_d)); + + cudaCheck(cudaFree(gpuProduct_d)); + + } void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2b0b205c9f536..2552f72584ee2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -144,12 +144,22 @@ namespace pixelgpudetails { (adc << thePacking.adc_shift); } + constexpr + uint32_t pixelToChannel( int row, int col) { + constexpr Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + + using error_obj = siPixelRawToClusterHeterogeneousProduct::error_obj; class SiPixelRawToClusterGPUKernel { public: - SiPixelRawToClusterGPUKernel(); + + using GPUProduct = siPixelRawToClusterHeterogeneousProduct::GPUProduct; + + SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream); ~SiPixelRawToClusterGPUKernel(); @@ -170,6 +180,7 @@ namespace pixelgpudetails { auto getProduct() const { return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, + gpuProduct_d, nDigis, nModulesActive, xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d }; @@ -181,6 +192,11 @@ namespace pixelgpudetails { unsigned char *fedId_h = nullptr; // to hold fed index for each word // output + GPUProduct gpuProduct; + GPUProduct * gpuProduct_d; + + // FIXME cleanup all these are in the gpuProduct above... + uint32_t *pdigi_h = nullptr, *rawIdArr_h = nullptr; // host copy of output uint16_t *adc_h = nullptr; int32_t *clus_h = nullptr; // host copy of calib&clus output pixelgpudetails::error_obj *data_h = nullptr; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc index 196a08b85d861..e2c2dcf828685 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc @@ -425,7 +425,7 @@ void SiPixelRawToClusterHeterogeneous::produceCPU(edm::HeterogeneousEvent& ev, c // ----------------------------------------------------------------------------- void SiPixelRawToClusterHeterogeneous::beginStreamGPUCuda(edm::StreamID streamId, cuda::stream_t<>& cudaStream) { // Allocate GPU resources here - gpuAlgo_ = std::make_unique(); + gpuAlgo_ = std::make_unique(cudaStream); gpuModulesToUnpack_ = std::make_unique(); } @@ -523,6 +523,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv void SiPixelRawToClusterHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent& ev, const edm::EventSetup& es, cuda::stream_t<>& cudaStream) { auto output = std::make_unique(gpuAlgo_->getProduct()); + assert(output->me_d); ev.put(std::move(output), [this](const GPUProduct& gpu, CPUProduct& cpu) { this->convertGPUtoCPU(gpu, cpu); }); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h index 6d6da10934532..bafe6fb2fed58 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h @@ -36,6 +36,7 @@ namespace siPixelRawToClusterHeterogeneousProduct { { } }; + // FIXME split in two struct GPUProduct { // Needed for digi and cluster CPU output uint32_t const * pdigi_h = nullptr; @@ -44,6 +45,8 @@ namespace siPixelRawToClusterHeterogeneousProduct { uint16_t const * adc_h = nullptr; GPU::SimpleVector const * error_h = nullptr; + GPUProduct const * me_d = nullptr; + // Needed for GPU rechits uint32_t nDigis; uint32_t nModules; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu index 1e85afd0443e9..1bf3c7dce3d94 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu @@ -39,6 +39,7 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & gpu_.hist_d, 10*sizeof(HitsOnGPU::Hist))); cudaCheck(cudaMalloc((void**) & gpu_d, sizeof(HitsOnGPU))); + gpu_.me_d = gpu_d; cudaCheck(cudaMemcpyAsync(gpu_d, &gpu_, sizeof(HitsOnGPU), cudaMemcpyDefault,cudaStream.id())); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h index dfe352f1ed8d5..b48ba2352625c 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h @@ -33,6 +33,8 @@ namespace siPixelRecHitsHeterogeneousProduct { using Hist = HistoContainer; Hist * hist_d; + + HitsOnGPU const * me_d=nullptr; }; diff --git a/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml b/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml index 7cc02d9313e26..3ef0459183c1d 100644 --- a/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml +++ b/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml @@ -1,5 +1,7 @@ - + + + diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu new file mode 100644 index 0000000000000..e773887b484d3 --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -0,0 +1,163 @@ +#include "ClusterSLOnGPU.h" + +// for the "packing" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h" +#include +#include + +/* +struct ClusterSLGPU { + ClusterSLGPU(){alloc();} + void alloc(); + + ClusterSLGPU * me_d; + std::array * links_d; + uint32_t * tkId_d; + uint32_t * tkId2_d; + uint32_t * n1_d; + uint32_t * n2_d; + + static constexpr uint32_t MAX_DIGIS = 2000*150; + static constexpr uint32_t MaxNumModules = 2000; + +}; +*/ + + +template +__device__ +ForwardIt lowerBound(ForwardIt first, ForwardIt last, const T& value, Compare comp) +{ + ForwardIt it; + auto count = last-first; + + while (count > 0) { + it = first; + auto step = count / 2; + it+=step; + if (comp(*it, value)) { + first = ++it; + count -= step + 1; + } + else + count = step; + } + return first; +} + + + +__global__ +void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) { + + constexpr uint16_t InvId=9999; // must be > MaxNumModules + + auto const & dd = *ddp; + auto const & hh = *hhp; + auto const & sl = *slp; + auto i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i>ndigis) return; + + auto id = dd.moduleInd_d[i]; + if (InvId==id) return; + assert(id<2000); + + auto ch = pixelgpudetails::pixelToChannel(dd.xx_d[i], dd.yy_d[i]); + auto first = hh.hitsModuleStart_d[id]; + auto cl = first + dd.clus_d[i]; + assert(cl<256*2000); + + const std::array me{{id,ch,0}}; + + auto less = [](std::array const & a, std::array const & b)->bool { + return a[0] const & a, std::array const & b)->bool { + return a[0]==b[0] && a[1]==b[1]; // in this context we do not care of [2] + }; + + auto const * b = sl.links_d; + auto const * e = b+n; + + // auto p = cuda_std::lower_bound(b,e,me,less); + auto p = lowerBound(b,e,me,less); + auto j = p-sl.links_d; + assert(j>=0); + j = std::min(int(j),int(n-1)); + if (equal(me,sl.links_d[j])) { + //auto const & l = sl.links_d[j]; + auto const tk = j; // l[2]; + auto old = atomicCAS(&sl.tkId_d[cl],0,tk); + if (0==old ||tk==old) atomicAdd(&sl.n1_d[cl],1); + else { + auto old = atomicCAS(&sl.tkId2_d[cl],0,tk); + if (0==old ||tk==old) atomicAdd(&sl.n2_d[cl],1); + } + } + /* + else { + auto const & k=sl.links_d[j]; + auto const & kk = j+1nhits) return; + + auto const & hh = *hhp; + auto const & sl = *slp; + + auto const & tk1 = sl.links_d[sl.tkId_d[i]]; + auto const & tk2 = sl.links_d[sl.tkId2_d[i]]; + + printf("HIT: %d %d %d %d %f %f %f %f %d %d %d %d %d %d %d\n",ev, i, + hh.detInd_d[i], hh.charge_d[i], + hh.xg_d[i],hh.yg_d[i],hh.zg_d[i],hh.rg_d[i],hh.iphi_d[i], + tk1[2],tk1[3],sl.n1_d[i], + tk2[2],tk2[3],sl.n2_d[i] + ); + +} + + + +namespace clusterSLOnGPU { + + struct CSVHeader { + CSVHeader() { + printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", "ev", "ind", + "det", "charge", + "xg","yg","zg","rg","iphi", + "tkId","pt","n1","tkId2","pt2","n2" + ); + } + + }; + CSVHeader csvHeader; + + std::atomic evId(0); + + void wrapper(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, ClusterSLGPU const & sl, uint32_t n, cuda::stream_t<>& stream) { + + int ev = ++evId; + int threadsPerBlock = 256; + int blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; + + assert(sl.me_d); + simLink<<>>(dd.me_d,ndigis, hh.gpu_d, sl.me_d,n); + blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock; + dumpLink<<>>(ev, hh.gpu_d, nhits, sl.me_d); + cudaCheck(cudaGetLastError()); + + } + +} diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h new file mode 100644 index 0000000000000..8fa8d6cba0c7b --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -0,0 +1,33 @@ +// gpu +#include +#include +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h" +#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" + +struct ClusterSLGPU { + ClusterSLGPU(){alloc();} + void alloc(); + void zero(cudaStream_t stream); + + ClusterSLGPU * me_d; + std::array * links_d; + uint32_t * tkId_d; + uint32_t * tkId2_d; + uint32_t * n1_d; + uint32_t * n2_d; + + static constexpr uint32_t MAX_DIGIS = 2000*150; + static constexpr uint32_t MaxNumModules = 2000; + +}; + +namespace clusterSLOnGPU { + + using DigisOnGPU = siPixelRawToClusterHeterogeneousProduct::GPUProduct; + using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU; + using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU; + void wrapper(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, ClusterSLGPU const & sl, uint32_t n, cuda::stream_t<>& stream); + +} From c26ca645a1ca26c4a191e4b37c0cbba271e50b80 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sun, 8 Jul 2018 16:43:15 +0200 Subject: [PATCH 03/19] Heterogeneous clTP --- .../ClusterTPAssociationHeterogeneous.cc | 416 ++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc new file mode 100644 index 0000000000000..984c3e5ddd90d --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -0,0 +1,416 @@ +#include +#include +#include + +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" + +#include "HeterogeneousCore/CUDACore/interface/GPUCuda.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/Producer/interface/HeterogeneousEDProducer.h" + + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" +#include "DataFormats/TrackerRecHit2D/interface/OmniClusterRef.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiStripCluster/interface/SiStripCluster.h" +#include "DataFormats/Phase2TrackerCluster/interface/Phase2TrackerCluster1D.h" + +#include "SimDataFormats/Track/interface/SimTrackContainer.h" +#include "SimDataFormats/TrackerDigiSimLink/interface/StripDigiSimLink.h" +#include "SimDataFormats/TrackerDigiSimLink/interface/PixelDigiSimLink.h" +#include "DataFormats/Phase2TrackerDigi/interface/Phase2TrackerDigi.h" +#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticle.h" +#include "SimDataFormats/TrackingAnalysis/interface/TrackingParticleFwd.h" +#include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h" + +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + + +// gpu +#include "RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h" +#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" + +#include +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "ClusterSLOnGPU.h" + +void +ClusterSLGPU::alloc() { + cudaCheck(cudaMalloc((void**) & links_d,(MAX_DIGIS)*sizeof(std::array))); + + cudaCheck(cudaMalloc((void**) & tkId_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & tkId2_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & n1_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & n2_d,(MaxNumModules*256)*sizeof(uint32_t))); + + + cudaCheck(cudaMalloc((void**) & me_d, sizeof(ClusterSLGPU))); + cudaCheck(cudaMemcpy(me_d, this, sizeof(ClusterSLGPU), cudaMemcpyDefault)); + cudaCheck(cudaDeviceSynchronize()); + +} + +void +ClusterSLGPU::zero(cudaStream_t stream) { + cudaCheck(cudaMemsetAsync(tkId_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(tkId2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(n1_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(n2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); +} + +class ClusterTPAssociationHeterogeneous : : public HeterogeneousEDProducer> +{ +public: + typedef std::vector OmniClusterCollection; + + using PixelDigiClustersH = siPixelRawToClusterHeterogeneousProduct::HeterogeneousDigiCluster; + using PixelRecHitsH = siPixelRecHitsHeterogeneousProduct::HeterogeneousPixelRecHit; + + explicit ClusterTPAssociationHeterogeneous(const edm::ParameterSet&); + ~ClusterTPAssociationHeterogeneous() override; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + + void beginStreamGPUCuda(edm::StreamID streamId, + cuda::stream_t<> &cudaStream) override; + + void acquireGPUCuda(const edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) override; + void produceGPUCuda(edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) override; + void produceCPU(edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup) override; + + + template + std::vector > + getSimTrackId(const edm::Handle >& simLinks, const DetId& detId, uint32_t channel) const; + + edm::EDGetTokenT > sipixelSimLinksToken_; + edm::EDGetTokenT > sistripSimLinksToken_; + edm::EDGetTokenT > siphase2OTSimLinksToken_; + edm::EDGetTokenT > pixelClustersToken_; + edm::EDGetTokenT > stripClustersToken_; + edm::EDGetTokenT > phase2OTClustersToken_; + edm::EDGetTokenT trackingParticleToken_; + + edm::EDGetTokenT tGpuDigis; + edm::EDGetTokenT tGpuHits; + + ClusterSLGPU slGPU; + +}; + +ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm::ParameterSet & cfg) + : HeterogeneousEDProducer(cfg), + sipixelSimLinksToken_(consumes >(cfg.getParameter("pixelSimLinkSrc"))), + sistripSimLinksToken_(consumes >(cfg.getParameter("stripSimLinkSrc"))), + siphase2OTSimLinksToken_(consumes >(cfg.getParameter("phase2OTSimLinkSrc"))), + pixelClustersToken_(consumes >(cfg.getParameter("pixelClusterSrc"))), + stripClustersToken_(consumes >(cfg.getParameter("stripClusterSrc"))), + phase2OTClustersToken_(consumes >(cfg.getParameter("phase2OTClusterSrc"))), + trackingParticleToken_(consumes(cfg.getParameter("trackingParticleSrc"))), + tGpuDigis(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelDigiClusterSrc"))), + tGpuHits(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), +{ + produces(); +} + +ClusterTPAssociationHeterogeneous::~ClusterTPAssociationHeterogeneous() { +} + +void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("simTrackSrc", edm::InputTag("g4SimHits")); + desc.add("pixelSimLinkSrc", edm::InputTag("simSiPixelDigis")); + desc.add("stripSimLinkSrc", edm::InputTag("simSiStripDigis")); + desc.add("phase2OTSimLinkSrc", edm::InputTag("simSiPixelDigis","Tracker")); + desc.add("pixelClusterSrc", edm::InputTag("siPixelClusters")); + desc.add("stripClusterSrc", edm::InputTag("siStripClusters")); + desc.add("phase2OTClusterSrc", edm::InputTag("siPhase2Clusters")); + desc.add("trackingParticleSrc", edm::InputTag("mix", "MergedTrackTruth")); + desc.add("heterogeneousPixelDigiClusterSrc", edm::InputTag("siPixelClustersHeterogeneous")); + desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitHeterogeneous")); + + HeterogeneousEDProducer::fillPSetDescription(desc); + + descriptions.add("tpClusterProducerDefault", desc); +} + + +void ClusterTPAssociationHeterogeneous::beginStreamGPUCuda(edm::StreamID streamId, + cuda::stream_t<> &cudaStream) { + +} + +void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) { + +} + +void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) { + +} + + +void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { + + edm::ESHandle geom; + es.get().get( geom ); + + + + // Pixel DigiSimLink + edm::Handle > sipixelSimLinks; + // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks); + iEvent.getByToken(sipixelSimLinksToken_,sipixelSimLinks); + + // SiStrip DigiSimLink + edm::Handle > sistripSimLinks; + iEvent.getByToken(sistripSimLinksToken_,sistripSimLinks); + + // Phase2 OT DigiSimLink + edm::Handle > siphase2OTSimLinks; + iEvent.getByToken(siphase2OTSimLinksToken_, siphase2OTSimLinks); + + // Pixel Cluster + edm::Handle > pixelClusters; + bool foundPixelClusters = iEvent.getByToken(pixelClustersToken_,pixelClusters); + + // Strip Cluster + edm::Handle > stripClusters; + bool foundStripClusters = iEvent.getByToken(stripClustersToken_,stripClusters); + + // Phase2 Cluster + edm::Handle > phase2OTClusters; + bool foundPhase2OTClusters = iEvent.getByToken(phase2OTClustersToken_, phase2OTClusters); + + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_,TPCollectionH); + + auto clusterTPList = std::make_unique(TPCollectionH); + + // prepare temporary map between SimTrackId and TrackingParticle index + std::map, TrackingParticleRef> mapping; + for (TrackingParticleCollection::size_type itp = 0; + itp < TPCollectionH.product()->size(); ++itp) { + TrackingParticleRef trackingParticle(TPCollectionH, itp); + + // SimTracks inside TrackingParticle + EncodedEventId eid(trackingParticle->eventId()); + //size_t index = 0; + for (std::vector::const_iterator itrk = trackingParticle->g4Track_begin(); + itrk != trackingParticle->g4Track_end(); ++itrk) { + std::pair trkid(itrk->trackId(), eid); + //std::cout << "creating map for id: " << trkid.first << " with tp: " << trackingParticle.key() << std::endl; + mapping.insert(std::make_pair(trkid, trackingParticle)); + } + } + + // gpu stuff ------------------------ + + std::cout << "In tpsimlink " << mapping.size() << std::endl; + + edm::Handle gd; + edm::Handle gd; + iEvent.getByToken(tGpuDigis, gd); + iEvent.getByToken(tGpuHits, gh); + auto const & gDigis = *gd; + auto const & gHits = *gh; + auto const & dcont = * gDigis.gpu_d; + auto const & hh = *gHits.gpu_d; + auto ndigis = gDigis.ndigis; + auto nhits = gHits.nhits; + + uint32_t nn=0, ng=0, ng10=0; + std::vector> digi2tp; + {std::array a{{0,0,0,0}}; digi2tp.push_back(a);} // put at 0 0 + for (auto const & links : *sipixelSimLinks) { + DetId detId(links.detId()); + const GeomDetUnit * genericDet = geom->idToDetUnit(detId); + uint32_t gind = genericDet->index(); + for (auto const & link : links) { + ++ng; + if (link.fraction() > 0.3f) ++ng10; + if (link.fraction() < 0.5f) continue; + auto tkid = std::make_pair(link.SimTrackId(), link.eventId()); + auto ipos = mapping.find(tkid); + if (ipos != mapping.end()) { + uint32_t pt = 1000*(*ipos).second->pt(); + ++nn; + std::array a{{gind,uint32_t(link.channel()),(*ipos).second.key(),pt}}; + digi2tp.push_back(a); + } + } + } + std::sort(digi2tp.begin(),digi2tp.end()); + + std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; + + cudaCheck(cudaMemcpyAsync(slGPU.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, dcont.stream)); + slGPU.zero(dcont.stream); + clusterSLOnGPU::wrapper(dcont, ndigis, hh, nhits, slGPU, digi2tp.size()); + + // end gpu stuff --------------------- + + + + + if ( foundPixelClusters ) { + // Pixel Clusters + for (edmNew::DetSetVector::const_iterator iter = pixelClusters->begin(); + iter != pixelClusters->end(); ++iter) { + uint32_t detid = iter->id(); + DetId detId(detid); + edmNew::DetSet link_pixel = (*iter); + for (edmNew::DetSet::const_iterator di = link_pixel.begin(); + di != link_pixel.end(); ++di) { + const SiPixelCluster& cluster = (*di); + edm::Ref, SiPixelCluster> c_ref = + edmNew::makeRefTo(pixelClusters, di); + + std::set > simTkIds; + for (int irow = cluster.minPixelRow(); irow <= cluster.maxPixelRow(); ++irow) { + for (int icol = cluster.minPixelCol(); icol <= cluster.maxPixelCol(); ++icol) { + uint32_t channel = PixelChannelIdentifier::pixelToChannel(irow, icol); + std::vector > trkid(getSimTrackId(sipixelSimLinks, detId, channel)); + if (trkid.empty()) continue; + simTkIds.insert(trkid.begin(),trkid.end()); + } + } + for (std::set >::const_iterator iset = simTkIds.begin(); + iset != simTkIds.end(); iset++) { + auto ipos = mapping.find(*iset); + if (ipos != mapping.end()) { + //std::cout << "cluster in detid: " << detid << " from tp: " << ipos->second.key() << " " << iset->first << std::endl; + clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + } + } + } + } + } + + if ( foundStripClusters ) { + // Strip Clusters + for (edmNew::DetSetVector::const_iterator iter = stripClusters->begin(false), eter = stripClusters->end(false); + iter != eter; ++iter) { + if (!(*iter).isValid()) continue; + uint32_t detid = iter->id(); + DetId detId(detid); + edmNew::DetSet link_strip = (*iter); + for (edmNew::DetSet::const_iterator di = link_strip.begin(); + di != link_strip.end(); di++) { + const SiStripCluster& cluster = (*di); + edm::Ref, SiStripCluster> c_ref = + edmNew::makeRefTo(stripClusters, di); + + std::set > simTkIds; + int first = cluster.firstStrip(); + int last = first + cluster.amplitudes().size(); + + for (int istr = first; istr < last; ++istr) { + std::vector > trkid(getSimTrackId(sistripSimLinks, detId, istr)); + if (trkid.empty()) continue; + simTkIds.insert(trkid.begin(),trkid.end()); + } + for (std::set >::const_iterator iset = simTkIds.begin(); + iset != simTkIds.end(); iset++) { + auto ipos = mapping.find(*iset); + if (ipos != mapping.end()) { + //std::cout << "cluster in detid: " << detid << " from tp: " << ipos->second.key() << " " << iset->first << std::endl; + clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + } + } + } + } + } + + if ( foundPhase2OTClusters ) { + + // Phase2 Clusters + if(phase2OTClusters.isValid()){ + for (edmNew::DetSetVector::const_iterator iter = phase2OTClusters->begin(false), eter = phase2OTClusters->end(false); + iter != eter; ++iter) { + if (!(*iter).isValid()) continue; + uint32_t detid = iter->id(); + DetId detId(detid); + edmNew::DetSet link_phase2 = (*iter); + for (edmNew::DetSet::const_iterator di = link_phase2.begin(); + di != link_phase2.end(); di++) { + const Phase2TrackerCluster1D& cluster = (*di); + edm::Ref, Phase2TrackerCluster1D> c_ref = + edmNew::makeRefTo(phase2OTClusters, di); + + std::set > simTkIds; + + for (unsigned int istr(0); istr < cluster.size(); ++istr) { + uint32_t channel = Phase2TrackerDigi::pixelToChannel(cluster.firstRow() + istr, cluster.column()); + std::vector > trkid(getSimTrackId(siphase2OTSimLinks, detId, channel)); + if (trkid.empty()) continue; + simTkIds.insert(trkid.begin(),trkid.end()); + } + + for (std::set >::const_iterator iset = simTkIds.begin(); + iset != simTkIds.end(); iset++) { + auto ipos = mapping.find(*iset); + if (ipos != mapping.end()) { + clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + } + } + } + } + } + + } + clusterTPList->sortAndUnique(); + iEvent.put(std::move(clusterTPList)); +} + +template +std::vector > +//std::pair +ClusterTPAssociationHeterogeneous::getSimTrackId(const edm::Handle >& simLinks, + const DetId& detId, uint32_t channel) const +{ + //std::pair simTrkId; + std::vector > simTrkId; + auto isearch = simLinks->find(detId); + if (isearch != simLinks->end()) { + // Loop over DigiSimLink in this det unit + edm::DetSet link_detset = (*isearch); + for (typename edm::DetSet::const_iterator it = link_detset.data.begin(); + it != link_detset.data.end(); ++it) { + if (channel == it->channel()) { + simTrkId.push_back(std::make_pair(it->SimTrackId(), it->eventId())); + } + } + } + return simTrkId; +} +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +DEFINE_FWK_MODULE(ClusterTPAssociationHeterogeneous); From 3e2f557b8e84631d626ac1cde4946ec78eaa8650 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sun, 8 Jul 2018 17:00:53 +0200 Subject: [PATCH 04/19] Heterogeneous clTP --- .../ClusterTPAssociationHeterogeneous.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 984c3e5ddd90d..e35935ef9fef2 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -72,7 +72,7 @@ ClusterSLGPU::zero(cudaStream_t stream) { cudaCheck(cudaMemsetAsync(n2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); } -class ClusterTPAssociationHeterogeneous : : public HeterogeneousEDProducer> { public: @@ -82,7 +82,7 @@ class ClusterTPAssociationHeterogeneous : : public HeterogeneousEDProducer >(cfg.getParameter("stripClusterSrc"))), phase2OTClustersToken_(consumes >(cfg.getParameter("phase2OTClusterSrc"))), trackingParticleToken_(consumes(cfg.getParameter("trackingParticleSrc"))), - tGpuDigis(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelDigiClusterSrc"))), - tGpuHits(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), + tGpuDigis(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelDigiClusterSrc"))), + tGpuHits(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelRecHitSrc"))) { produces(); } -ClusterTPAssociationHeterogeneous::~ClusterTPAssociationHeterogeneous() { -} - void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("simTrackSrc", edm::InputTag("g4SimHits")); @@ -235,14 +232,14 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve std::cout << "In tpsimlink " << mapping.size() << std::endl; edm::Handle gd; - edm::Handle gd; + edm::Handle gh; iEvent.getByToken(tGpuDigis, gd); iEvent.getByToken(tGpuHits, gh); auto const & gDigis = *gd; auto const & gHits = *gh; - auto const & dcont = * gDigis.gpu_d; + auto const & dcont = * gDigis.me_d; auto const & hh = *gHits.gpu_d; - auto ndigis = gDigis.ndigis; + auto ndigis = gDigis.nDigis; auto nhits = gHits.nhits; uint32_t nn=0, ng=0, ng10=0; From c1890f8a51457febe59dd97363bc313b5f6c9ab4 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 10 Jul 2018 11:54:16 +0200 Subject: [PATCH 05/19] moving on --- .../siPixelRecHitsHeterogeneousProduct.h | 6 +- .../plugins/BuildFile.xml | 7 + .../ClusterTPAssociationHeterogeneous.cc | 160 ++++++++++-------- 3 files changed, 103 insertions(+), 70 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h index b48ba2352625c..79608c13713d7 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h @@ -41,14 +41,16 @@ namespace siPixelRecHitsHeterogeneousProduct { struct HitsOnCPU { HitsOnCPU() = default; explicit HitsOnCPU(uint32_t nhits) : - charge(nhits),xl(nhits),yl(nhits),xe(nhits),ye(nhits), mr(nhits), mc(nhits){} + charge(nhits),xl(nhits),yl(nhits),xe(nhits),ye(nhits), mr(nhits), mc(nhits), + nHits(nhits){} uint32_t hitsModuleStart[2001]; std::vector charge; std::vector xl, yl; std::vector xe, ye; std::vector mr; std::vector mc; - + + uint32_t nHits; HitsOnGPU const * gpu_d=nullptr; }; diff --git a/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml b/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml index 3ef0459183c1d..c767b1e68936a 100644 --- a/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml +++ b/SimTracker/TrackerHitAssociation/plugins/BuildFile.xml @@ -1,5 +1,12 @@ + + + + + + + diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index e35935ef9fef2..50d03c41b414e 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -100,6 +100,8 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer std::vector > @@ -118,6 +120,8 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer, TrackingParticleRef> mapping; + }; ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm::ParameterSet & cfg) @@ -150,7 +154,7 @@ void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescr HeterogeneousEDProducer::fillPSetDescription(desc); - descriptions.add("tpClusterProducerDefault", desc); + descriptions.add("tpClusterProducerHeterogeneousDefault", desc); } @@ -159,88 +163,62 @@ void ClusterTPAssociationHeterogeneous::beginStreamGPUCuda(edm::StreamID streamI } -void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent &iEvent, - const edm::EventSetup &iSetup, - cuda::stream_t<> &cudaStream) { +void ClusterTPAssociationHeterogeneous::makeMap(const edm::HeterogeneousEvent &iEvent) { + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_,TPCollectionH); -} - -void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent &iEvent, - const edm::EventSetup &iSetup, - cuda::stream_t<> &cudaStream) { - -} - - -void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { - - edm::ESHandle geom; - es.get().get( geom ); - - - - // Pixel DigiSimLink - edm::Handle > sipixelSimLinks; - // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks); - iEvent.getByToken(sipixelSimLinksToken_,sipixelSimLinks); - // SiStrip DigiSimLink - edm::Handle > sistripSimLinks; - iEvent.getByToken(sistripSimLinksToken_,sistripSimLinks); + // prepare temporary map between SimTrackId and TrackingParticle index + mapping.clear(); + for (TrackingParticleCollection::size_type itp = 0; + itp < TPCollectionH.product()->size(); ++itp) { + TrackingParticleRef trackingParticle(TPCollectionH, itp); + + // SimTracks inside TrackingParticle + EncodedEventId eid(trackingParticle->eventId()); + for (auto itrk = trackingParticle->g4Track_begin(); + itrk != trackingParticle->g4Track_end(); ++itrk) { + std::pair trkid(itrk->trackId(), eid); + //std::cout << "creating map for id: " << trkid.first << " with tp: " << trackingParticle.key() << std::endl; + mapping.insert(std::make_pair(trkid, trackingParticle)); + } + } - // Phase2 OT DigiSimLink - edm::Handle > siphase2OTSimLinks; - iEvent.getByToken(siphase2OTSimLinksToken_, siphase2OTSimLinks); - // Pixel Cluster - edm::Handle > pixelClusters; - bool foundPixelClusters = iEvent.getByToken(pixelClustersToken_,pixelClusters); +} - // Strip Cluster - edm::Handle > stripClusters; - bool foundStripClusters = iEvent.getByToken(stripClustersToken_,stripClusters); - // Phase2 Cluster - edm::Handle > phase2OTClusters; - bool foundPhase2OTClusters = iEvent.getByToken(phase2OTClustersToken_, phase2OTClusters); +void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) { - // TrackingParticle - edm::Handle TPCollectionH; - iEvent.getByToken(trackingParticleToken_,TPCollectionH); + edm::ESHandle geom; + iSetup.get().get( geom ); - auto clusterTPList = std::make_unique(TPCollectionH); + // Pixel DigiSimLink + edm::Handle > sipixelSimLinks; + // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks); + iEvent.getByToken(sipixelSimLinksToken_,sipixelSimLinks); - // prepare temporary map between SimTrackId and TrackingParticle index - std::map, TrackingParticleRef> mapping; - for (TrackingParticleCollection::size_type itp = 0; - itp < TPCollectionH.product()->size(); ++itp) { - TrackingParticleRef trackingParticle(TPCollectionH, itp); - - // SimTracks inside TrackingParticle - EncodedEventId eid(trackingParticle->eventId()); - //size_t index = 0; - for (std::vector::const_iterator itrk = trackingParticle->g4Track_begin(); - itrk != trackingParticle->g4Track_end(); ++itrk) { - std::pair trkid(itrk->trackId(), eid); - //std::cout << "creating map for id: " << trkid.first << " with tp: " << trackingParticle.key() << std::endl; - mapping.insert(std::make_pair(trkid, trackingParticle)); - } - } + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_,TPCollectionH); - // gpu stuff ------------------------ + makeMap(iEvent); + + // gpu stuff ------------------------ std::cout << "In tpsimlink " << mapping.size() << std::endl; edm::Handle gd; edm::Handle gh; - iEvent.getByToken(tGpuDigis, gd); + iEvent.getByToken(tGpuDigis, gd); iEvent.getByToken(tGpuHits, gh); auto const & gDigis = *gd; auto const & gHits = *gh; - auto const & dcont = * gDigis.me_d; - auto const & hh = *gHits.gpu_d; auto ndigis = gDigis.nDigis; - auto nhits = gHits.nhits; + auto nhits = gHits.nHits; uint32_t nn=0, ng=0, ng10=0; std::vector> digi2tp; @@ -251,7 +229,7 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve uint32_t gind = genericDet->index(); for (auto const & link : links) { ++ng; - if (link.fraction() > 0.3f) ++ng10; + if (link.fraction() > 0.3f) ++ng10; if (link.fraction() < 0.5f) continue; auto tkid = std::make_pair(link.SimTrackId(), link.eventId()); auto ipos = mapping.find(tkid); @@ -267,14 +245,57 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; - cudaCheck(cudaMemcpyAsync(slGPU.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, dcont.stream)); - slGPU.zero(dcont.stream); - clusterSLOnGPU::wrapper(dcont, ndigis, hh, nhits, slGPU, digi2tp.size()); + cudaCheck(cudaMemcpyAsync(slGPU.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, cudaStream.id())); + slGPU.zero(cudaStream.id()); + clusterSLOnGPU::wrapper(gDigis, ndigis, gHits, nhits, slGPU, digi2tp.size(),cudaStream); // end gpu stuff --------------------- +} + +void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent &iEvent, + const edm::EventSetup &iSetup, + cuda::stream_t<> &cudaStream) { + +} + + +void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { + + + makeMap(iEvent); + + // Pixel DigiSimLink + edm::Handle > sipixelSimLinks; + // iEvent.getByLabel(_pixelSimLinkSrc, sipixelSimLinks); + iEvent.getByToken(sipixelSimLinksToken_,sipixelSimLinks); + + // SiStrip DigiSimLink + edm::Handle > sistripSimLinks; + iEvent.getByToken(sistripSimLinksToken_,sistripSimLinks); + + // Phase2 OT DigiSimLink + edm::Handle > siphase2OTSimLinks; + iEvent.getByToken(siphase2OTSimLinksToken_, siphase2OTSimLinks); + // Pixel Cluster + edm::Handle > pixelClusters; + bool foundPixelClusters = iEvent.getByToken(pixelClustersToken_,pixelClusters); + + // Strip Cluster + edm::Handle > stripClusters; + bool foundStripClusters = iEvent.getByToken(stripClustersToken_,stripClusters); + + // Phase2 Cluster + edm::Handle > phase2OTClusters; + bool foundPhase2OTClusters = iEvent.getByToken(phase2OTClustersToken_, phase2OTClusters); + + // TrackingParticle + edm::Handle TPCollectionH; + iEvent.getByToken(trackingParticleToken_,TPCollectionH); + + auto clusterTPList = std::make_unique(TPCollectionH); if ( foundPixelClusters ) { // Pixel Clusters @@ -382,8 +403,11 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve } } + + clusterTPList->sortAndUnique(); iEvent.put(std::move(clusterTPList)); + mapping.clear(); } template From 365897511eab59412070c7fbb756e6165f8dc302 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 10 Jul 2018 14:17:01 +0200 Subject: [PATCH 06/19] progressing?? --- .../plugins/ClusterSLOnGPU.cu | 33 +++++++++++- .../plugins/ClusterSLOnGPU.h | 40 ++++++++------ .../ClusterTPAssociationHeterogeneous.cc | 52 ++++++++----------- ...rackerHitAssociationHeterogeneousProduct.h | 34 ++++++++++++ 4 files changed, 111 insertions(+), 48 deletions(-) create mode 100644 SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index e773887b484d3..31695c8ea9768 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -146,8 +146,37 @@ namespace clusterSLOnGPU { std::atomic evId(0); - void wrapper(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, ClusterSLGPU const & sl, uint32_t n, cuda::stream_t<>& stream) { - + + void + Kernel::alloc(cuda::stream_t<>& stream) { + cudaCheck(cudaMalloc((void**) & slgpu.links_d,(MAX_DIGIS)*sizeof(std::array))); + + cudaCheck(cudaMalloc((void**) & slgpu.tkId_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.tkId2_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.n1_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.n2_d,(MaxNumModules*256)*sizeof(uint32_t))); + + + cudaCheck(cudaMalloc((void**) & slgpu.me_d, sizeof(ClusterSLGPU))); + cudaCheck(cudaMemcpyAsync(slgpu.me_d, slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaDeviceSynchronize()); + + } + + void + Kernel::zero(cudaStream_t stream) { + cudaCheck(cudaMemsetAsync(slgpu.tkId_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.tkId2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.n1_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.n2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + } + + + void + Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { + + ClusterSLGPU const & sl = slgpu; + int ev = ++evId; int threadsPerBlock = 256; int blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 8fa8d6cba0c7b..9c695560a80fc 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -1,33 +1,41 @@ -// gpu +#ifndef SimTrackerTrackerHitAssociationClusterSLOnGPU_H +#define SimTrackerTrackerHitAssociationClusterSLOnGPU_H + #include #include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "trackerHitAssociationHeterogeneousProduct.h" + #include "RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h" #include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" -struct ClusterSLGPU { - ClusterSLGPU(){alloc();} - void alloc(); - void zero(cudaStream_t stream); - - ClusterSLGPU * me_d; - std::array * links_d; - uint32_t * tkId_d; - uint32_t * tkId2_d; - uint32_t * n1_d; - uint32_t * n2_d; - static constexpr uint32_t MAX_DIGIS = 2000*150; - static constexpr uint32_t MaxNumModules = 2000; -}; namespace clusterSLOnGPU { + using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; + using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct; + using DigisOnGPU = siPixelRawToClusterHeterogeneousProduct::GPUProduct; using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU; using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU; - void wrapper(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, ClusterSLGPU const & sl, uint32_t n, cuda::stream_t<>& stream); + + class Kernel { + public: + void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); + GPUProduct getProduct() { retun GPUProduct{slgpu.me_d};} + + private: + void alloc(cuda::stream_t<>& stream); + void zero(cudaStream_t stream); + private: + ClusterSLGPU slgpu; + + }; } + +#endif diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 50d03c41b414e..d750750cf7daf 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -48,41 +48,22 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "ClusterSLOnGPU.h" -void -ClusterSLGPU::alloc() { - cudaCheck(cudaMalloc((void**) & links_d,(MAX_DIGIS)*sizeof(std::array))); - - cudaCheck(cudaMalloc((void**) & tkId_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & tkId2_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & n1_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & n2_d,(MaxNumModules*256)*sizeof(uint32_t))); - - - cudaCheck(cudaMalloc((void**) & me_d, sizeof(ClusterSLGPU))); - cudaCheck(cudaMemcpy(me_d, this, sizeof(ClusterSLGPU), cudaMemcpyDefault)); - cudaCheck(cudaDeviceSynchronize()); - -} - -void -ClusterSLGPU::zero(cudaStream_t stream) { - cudaCheck(cudaMemsetAsync(tkId_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(tkId2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(n1_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(n2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); -} - class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer> { public: typedef std::vector OmniClusterCollection; + using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; + using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct; + using CPUProduct = trackerHitAssociationHeterogeneousProduct::CPUProduct; + using Output = trackerHitAssociationHeterogeneousProduct::ClusterTPAHeterogeneousProduct; + using PixelDigiClustersH = siPixelRawToClusterHeterogeneousProduct::HeterogeneousDigiCluster; using PixelRecHitsH = siPixelRecHitsHeterogeneousProduct::HeterogeneousPixelRecHit; explicit ClusterTPAssociationHeterogeneous(const edm::ParameterSet&); - ~ClusterTPAssociationHeterogeneous() = default;; + ~ClusterTPAssociationHeterogeneous() = default; static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); @@ -118,7 +99,7 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer tGpuDigis; edm::EDGetTokenT tGpuHits; - ClusterSLGPU slGPU; + std::unique_ptr gpuAlgo; std::map, TrackingParticleRef> mapping; @@ -161,6 +142,8 @@ void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescr void ClusterTPAssociationHeterogeneous::beginStreamGPUCuda(edm::StreamID streamId, cuda::stream_t<> &cudaStream) { + gpuAlgo = std::make_unique(cudaStream); + } void ClusterTPAssociationHeterogeneous::makeMap(const edm::HeterogeneousEvent &iEvent) { @@ -246,8 +229,7 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; cudaCheck(cudaMemcpyAsync(slGPU.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, cudaStream.id())); - slGPU.zero(cudaStream.id()); - clusterSLOnGPU::wrapper(gDigis, ndigis, gHits, nhits, slGPU, digi2tp.size(),cudaStream); + gpuAlgo->algo(gDigis, ndigis, gHits, nhits, digi2tp.size(),cudaStream); // end gpu stuff --------------------- @@ -257,6 +239,8 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent &iEvent, const edm::EventSetup &iSetup, cuda::stream_t<> &cudaStream) { + auto output = std::make_unique(gpuAlgo->getProduct()); + } @@ -265,6 +249,12 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve makeMap(iEvent); + iEvent.put(std::move(produceLegacy(iEvent,es))); + +} + +std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { + // Pixel DigiSimLink edm::Handle > sipixelSimLinks; @@ -295,7 +285,8 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve edm::Handle TPCollectionH; iEvent.getByToken(trackingParticleToken_,TPCollectionH); - auto clusterTPList = std::make_unique(TPCollectionH); + auto output = std::make_unique(TPCollectionH); + auto & clusterTPList = output.collection; if ( foundPixelClusters ) { // Pixel Clusters @@ -406,7 +397,8 @@ void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEve clusterTPList->sortAndUnique(); - iEvent.put(std::move(clusterTPList)); + + return output; mapping.clear(); } diff --git a/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h new file mode 100644 index 0000000000000..a84d6bb93efbb --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h @@ -0,0 +1,34 @@ +#ifndef SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H +#define SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H + + +namespace trackerHitAssociationHeterogeneousProduct { + + struct CPUProduct { + ClusterTPAssociation collection; + } + + struct ClusterSLGPU { + + ClusterSLGPU * me_d=nullptr; + std::array * links_d; + uint32_t * tkId_d; + uint32_t * tkId2_d; + uint32_t * n1_d; + uint32_t * n2_d; + + static constexpr uint32_t MAX_DIGIS = 2000*150; + static constexpr uint32_t MaxNumModules = 2000; + + }; + + struct GPUProduct { + ClusterSLGPU * gpu_d=nullptr; + }; + + using ClusterTPAHeterogeneousProduct = HeterogeneousProductImpl, + heterogeneous::GPUCudaProduct >; + +} + +#endif From 5aa57d570ba549ad0e683652238ee7668dcbdb63 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 12 Jul 2018 18:52:05 +0200 Subject: [PATCH 07/19] compiles --- .../plugins/ClusterSLOnGPU.cu | 21 +++++++++--------- .../plugins/ClusterSLOnGPU.h | 5 +++-- .../ClusterTPAssociationHeterogeneous.cc | 22 +++++++++---------- ...rackerHitAssociationHeterogeneousProduct.h | 15 ++++++++++++- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 31695c8ea9768..30db1a292bc26 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -47,6 +47,7 @@ ForwardIt lowerBound(ForwardIt first, ForwardIt last, const T& value, Compare co } +using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; __global__ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) { @@ -149,26 +150,26 @@ namespace clusterSLOnGPU { void Kernel::alloc(cuda::stream_t<>& stream) { - cudaCheck(cudaMalloc((void**) & slgpu.links_d,(MAX_DIGIS)*sizeof(std::array))); + cudaCheck(cudaMalloc((void**) & slgpu.links_d,(ClusterSLGPU::MAX_DIGIS)*sizeof(std::array))); - cudaCheck(cudaMalloc((void**) & slgpu.tkId_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & slgpu.tkId2_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & slgpu.n1_d,(MaxNumModules*256)*sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**) & slgpu.n2_d,(MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.tkId_d,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.tkId2_d,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.n1_d,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & slgpu.n2_d,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t))); cudaCheck(cudaMalloc((void**) & slgpu.me_d, sizeof(ClusterSLGPU))); - cudaCheck(cudaMemcpyAsync(slgpu.me_d, slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(slgpu.me_d, &slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream.id())); cudaCheck(cudaDeviceSynchronize()); } void Kernel::zero(cudaStream_t stream) { - cudaCheck(cudaMemsetAsync(slgpu.tkId_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.tkId2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.n1_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.n2_d,0,(MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.tkId_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.tkId2_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.n1_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.n2_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); } diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 9c695560a80fc..27b78000aec56 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -26,13 +26,14 @@ namespace clusterSLOnGPU { class Kernel { public: + Kernel(cuda::stream_t<>& stream) {alloc(stream);} void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); - GPUProduct getProduct() { retun GPUProduct{slgpu.me_d};} + GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} private: void alloc(cuda::stream_t<>& stream); void zero(cudaStream_t stream); - private: + public: ClusterSLGPU slgpu; }; diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index d750750cf7daf..fbff2a0d354e7 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -82,7 +82,7 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer produceLegacy(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es); template std::vector > @@ -99,7 +99,7 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer tGpuDigis; edm::EDGetTokenT tGpuHits; - std::unique_ptr gpuAlgo; + std::unique_ptr gpuAlgo; std::map, TrackingParticleRef> mapping; @@ -142,7 +142,7 @@ void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescr void ClusterTPAssociationHeterogeneous::beginStreamGPUCuda(edm::StreamID streamId, cuda::stream_t<> &cudaStream) { - gpuAlgo = std::make_unique(cudaStream); + gpuAlgo = std::make_unique(cudaStream); } @@ -228,7 +228,7 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; - cudaCheck(cudaMemcpyAsync(slGPU.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, cudaStream.id())); + cudaCheck(cudaMemcpyAsync(gpuAlgo->slgpu.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, cudaStream.id())); gpuAlgo->algo(gDigis, ndigis, gHits, nhits, digi2tp.size(),cudaStream); // end gpu stuff --------------------- @@ -247,13 +247,13 @@ void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent & void ClusterTPAssociationHeterogeneous::produceCPU(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { - makeMap(iEvent); iEvent.put(std::move(produceLegacy(iEvent,es))); } -std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { +std::unique_ptr +ClusterTPAssociationHeterogeneous::produceLegacy(edm::HeterogeneousEvent &iEvent, const edm::EventSetup& es) { // Pixel DigiSimLink @@ -286,7 +286,7 @@ std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm iEvent.getByToken(trackingParticleToken_,TPCollectionH); auto output = std::make_unique(TPCollectionH); - auto & clusterTPList = output.collection; + auto & clusterTPList = output->collection; if ( foundPixelClusters ) { // Pixel Clusters @@ -315,7 +315,7 @@ std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm auto ipos = mapping.find(*iset); if (ipos != mapping.end()) { //std::cout << "cluster in detid: " << detid << " from tp: " << ipos->second.key() << " " << iset->first << std::endl; - clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + clusterTPList.emplace_back(OmniClusterRef(c_ref), ipos->second); } } } @@ -350,7 +350,7 @@ std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm auto ipos = mapping.find(*iset); if (ipos != mapping.end()) { //std::cout << "cluster in detid: " << detid << " from tp: " << ipos->second.key() << " " << iset->first << std::endl; - clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + clusterTPList.emplace_back(OmniClusterRef(c_ref), ipos->second); } } } @@ -386,7 +386,7 @@ std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm iset != simTkIds.end(); iset++) { auto ipos = mapping.find(*iset); if (ipos != mapping.end()) { - clusterTPList->emplace_back(OmniClusterRef(c_ref), ipos->second); + clusterTPList.emplace_back(OmniClusterRef(c_ref), ipos->second); } } } @@ -396,7 +396,7 @@ std::unique_ptr ClusterTPAssociationHeterogeneous::produceLegacy(edm } - clusterTPList->sortAndUnique(); + clusterTPList.sortAndUnique(); return output; mapping.clear(); diff --git a/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h index a84d6bb93efbb..4db318c5348f6 100644 --- a/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h +++ b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h @@ -1,12 +1,23 @@ #ifndef SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H #define SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H +#ifndef __NVCC__ +#include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h" +#endif + +#include "HeterogeneousCore/Product/interface/HeterogeneousProduct.h" + namespace trackerHitAssociationHeterogeneousProduct { +#ifndef __NVCC__ struct CPUProduct { + CPUProduct() = default; + template + explicit CPUProduct(T const & t) : collection(t){} ClusterTPAssociation collection; - } + }; +#endif struct ClusterSLGPU { @@ -26,8 +37,10 @@ namespace trackerHitAssociationHeterogeneousProduct { ClusterSLGPU * gpu_d=nullptr; }; +#ifndef __NVCC__ using ClusterTPAHeterogeneousProduct = HeterogeneousProductImpl, heterogeneous::GPUCudaProduct >; +#endif } From 4d3eea0ea2d5ee74ed73cebf20b880e5d1a1bbee Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 12 Jul 2018 19:08:13 +0200 Subject: [PATCH 08/19] compiles converter as well --- .../ClusterTPAssociationHeterogeneous.cc | 6 +- ...sterTPAssociationHeterogeneousConverter.cc | 59 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index fbff2a0d354e7..949654073bd38 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -239,8 +239,12 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent &iEvent, const edm::EventSetup &iSetup, cuda::stream_t<> &cudaStream) { - auto output = std::make_unique(gpuAlgo->getProduct()); + auto output = std::make_unique(gpuAlgo->getProduct()); + + iEvent.put(std::move(output), [this, &iEvent, &iSetup](const GPUProduct& hits, CPUProduct& cpu) { + cpu = *(this->produceLegacy(iEvent,iSetup)); + }); } diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc new file mode 100644 index 0000000000000..84cd7fef424ea --- /dev/null +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc @@ -0,0 +1,59 @@ +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/Product/interface/HeterogeneousProduct.h" + +#include "trackerHitAssociationHeterogeneousProduct.h" + + +class ClusterTPAssociationHeterogeneousConverter: public edm::global::EDProducer<> { +public: + + using Input = trackerHitAssociationHeterogeneousProduct::ClusterTPAHeterogeneousProduct; + using Product = ClusterTPAssociation; + + explicit ClusterTPAssociationHeterogeneousConverter(edm::ParameterSet const& iConfig); + ~ClusterTPAssociationHeterogeneousConverter() = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + edm::EDGetTokenT token_; +}; + +ClusterTPAssociationHeterogeneousConverter::ClusterTPAssociationHeterogeneousConverter(edm::ParameterSet const& iConfig): + token_(consumes(iConfig.getParameter("src"))) +{ + produces(); +} + +void ClusterTPAssociationHeterogeneousConverter::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("tpClusterProducerHeterogeneos")); + + descriptions.addWithDefaultLabel(desc); +} + +namespace { + template + auto copy_unique(const T& t) { + return std::make_unique(t); + } +} + +void ClusterTPAssociationHeterogeneousConverter::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + edm::Handle hinput; + iEvent.getByToken(token_, hinput); + + const auto& input = hinput->get().getProduct(); + + iEvent.put(copy_unique(input.collection)); +} + + +DEFINE_FWK_MODULE(ClusterTPAssociationHeterogeneousConverter); From 731b0626c5706dc846ce6e2c8cc9b67b809759c9 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 12 Jul 2018 19:25:49 +0200 Subject: [PATCH 09/19] missing only sequence --- .../ClusterTPAssociationHeterogeneousConverter.cc | 2 +- .../python/tpClusterProducer_cfi.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc index 84cd7fef424ea..d8b9e099c3474 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneousConverter.cc @@ -36,7 +36,7 @@ void ClusterTPAssociationHeterogeneousConverter::fillDescriptions(edm::Configura edm::ParameterSetDescription desc; desc.add("src", edm::InputTag("tpClusterProducerHeterogeneos")); - descriptions.addWithDefaultLabel(desc); + descriptions.add("tpClusterHeterogeneousConverter",desc); } namespace { diff --git a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py index 8757a67226fb8..e8330c074dfac 100644 --- a/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py +++ b/SimTracker/TrackerHitAssociation/python/tpClusterProducer_cfi.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu from SimTracker.TrackerHitAssociation.tpClusterProducerDefault_cfi import tpClusterProducerDefault as _tpClusterProducerDefault @@ -18,3 +19,12 @@ stripSimLinkSrc = "mixData:StripDigiSimLink", phase2OTSimLinkSrc = "mixData:Phase2OTDigiSimLink", ) + + +from SimTracker.TrackerHitAssociation.tpClusterProducerHeterogeneousDefault_cfi import tpClusterProducerHeterogeneousDefault as _tpClusterProducerHeterogeneous +tpClusterProducerHeterogeneous = _tpClusterProducerHeterogeneous.clone() + +from SimTracker.TrackerHitAssociation.tpClusterHeterogeneousConverter_cfi import tpClusterHeterogeneousConverter as _tpHeterogeneousConverter + +gpu.toReplaceWith(tpClusterProducer, _tpHeterogeneousConverter.clone()) + From b65075bc96e838b7258f3f5a7672065be2f4d92c Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 18 Jul 2018 15:52:11 +0200 Subject: [PATCH 10/19] runs up to some point --- .../plugins/ClusterTPAssociationHeterogeneous.cc | 2 +- .../RecoTrack/python/TrackValidation_cff.py | 15 +++++++++++++-- Validation/RecoTrack/python/associators_cff.py | 3 ++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 949654073bd38..c0bc3b66f0c7f 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -117,7 +117,7 @@ ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm:: tGpuDigis(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelDigiClusterSrc"))), tGpuHits(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelRecHitSrc"))) { - produces(); + produces(); } void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { diff --git a/Validation/RecoTrack/python/TrackValidation_cff.py b/Validation/RecoTrack/python/TrackValidation_cff.py index b6251473fcab4..dff571862bb46 100644 --- a/Validation/RecoTrack/python/TrackValidation_cff.py +++ b/Validation/RecoTrack/python/TrackValidation_cff.py @@ -709,9 +709,14 @@ def _uniqueFirstLayers(layerList): ### Pixel tracking only mode (placeholder for now) -tpClusterProducerPixelTrackingOnly = tpClusterProducer.clone( - pixelClusterSrc = "siPixelClustersPreSplitting" + +tpClusterProducerHeterogeneousPixelTrackingOnly = tpClusterProducerHeterogeneous.clone( + pixelClusterSrc = "siPixelClustersPreSplitting" ) +tpClusterProducerPixelTrackingOnly = tpClusterProducer.clone() +# Need to use the modifier to customize because the exact EDProducer type depends on the modifier +gpu.toModify(tpClusterProducerPixelTrackingOnly, src = "tpClusterProducerHeterogeneousPixelTrackingOnly") + quickTrackAssociatorByHitsPixelTrackingOnly = quickTrackAssociatorByHits.clone( cluster2TPSrc = "tpClusterProducerPixelTrackingOnly" ) @@ -739,12 +744,18 @@ def _uniqueFirstLayers(layerList): tracksValidationTruthPixelTrackingOnly.replace(quickTrackAssociatorByHits, quickTrackAssociatorByHitsPixelTrackingOnly) tracksValidationTruthPixelTrackingOnly.replace(trackingParticleRecoTrackAsssociation, trackingParticlePixelTrackAsssociation) tracksValidationTruthPixelTrackingOnly.replace(VertexAssociatorByPositionAndTracks, PixelVertexAssociatorByPositionAndTracks) + +_tracksValidationTruthPixelTrackingOnlyGPU = tracksValidationTruthPixelTrackingOnly.copy() +_tracksValidationTruthPixelTrackingOnlyGPU.insert(0, tpClusterProducerHeterogeneousPixelTrackingOnly) +gpu.toReplaceWith(tracksValidationTruthPixelTrackingOnly, _tracksValidationTruthPixelTrackingOnlyGPU) + tracksValidationPixelTrackingOnly = cms.Sequence( tracksValidationTruthPixelTrackingOnly + trackValidatorPixelTrackingOnly ) + ### Lite mode (only generalTracks and HP) trackValidatorLite = trackValidator.clone( label = ["generalTracks", "cutsRecoTracksHp"] diff --git a/Validation/RecoTrack/python/associators_cff.py b/Validation/RecoTrack/python/associators_cff.py index 73c6f416e509c..8c2c8b7c7c85e 100644 --- a/Validation/RecoTrack/python/associators_cff.py +++ b/Validation/RecoTrack/python/associators_cff.py @@ -3,7 +3,8 @@ #### TrackAssociation import SimTracker.TrackAssociatorProducers.quickTrackAssociatorByHits_cfi import SimTracker.TrackAssociatorProducers.trackAssociatorByPosition_cfi -from SimTracker.TrackerHitAssociation.tpClusterProducer_cfi import tpClusterProducer as _tpClusterProducer +# from SimTracker.TrackerHitAssociation.tpClusterProducer_cfi import tpClusterProducer as _tpClusterProducer +from SimTracker.TrackerHitAssociation.tpClusterProducer_cfi import tpClusterProducerHeterogeneous as _tpClusterProducer from SimTracker.TrackAssociation.trackingParticleRecoTrackAsssociation_cfi import trackingParticleRecoTrackAsssociation as _trackingParticleRecoTrackAsssociation hltTPClusterProducer = _tpClusterProducer.clone( From ebe74fcbea745c2567e3c93d5ba088dbf16e02de Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 18 Jul 2018 16:53:39 +0200 Subject: [PATCH 11/19] works, dumps the hits --- .../plugins/ClusterTPAssociationHeterogeneous.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index c0bc3b66f0c7f..0ae0986428a5f 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -196,8 +196,8 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE edm::Handle gd; edm::Handle gh; - iEvent.getByToken(tGpuDigis, gd); - iEvent.getByToken(tGpuHits, gh); + iEvent.getByToken(tGpuDigis, gd); + iEvent.getByToken(tGpuHits, gh); auto const & gDigis = *gd; auto const & gHits = *gh; auto ndigis = gDigis.nDigis; @@ -242,8 +242,10 @@ void ClusterTPAssociationHeterogeneous::produceGPUCuda(edm::HeterogeneousEvent & auto output = std::make_unique(gpuAlgo->getProduct()); - iEvent.put(std::move(output), [this, &iEvent, &iSetup](const GPUProduct& hits, CPUProduct& cpu) { - cpu = *(this->produceLegacy(iEvent,iSetup)); + auto legacy = produceLegacy(iEvent,iSetup).release(); + + iEvent.put(std::move(output), [legacy](const GPUProduct& hits, CPUProduct& cpu) { + cpu = *legacy; delete legacy; }); } From 17f1e4c9d0a1c0dfa9f5f149a3136896ee0e3af5 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Fri, 20 Jul 2018 11:37:51 +0200 Subject: [PATCH 12/19] fixed --- .../plugins/ClusterSLOnGPU.cu | 54 +++++++++---------- .../ClusterTPAssociationHeterogeneous.cc | 6 +-- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 30db1a292bc26..0610aa339ef87 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -3,28 +3,10 @@ // for the "packing" #include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h" +#include #include #include -/* -struct ClusterSLGPU { - ClusterSLGPU(){alloc();} - void alloc(); - - ClusterSLGPU * me_d; - std::array * links_d; - uint32_t * tkId_d; - uint32_t * tkId2_d; - uint32_t * n1_d; - uint32_t * n2_d; - - static constexpr uint32_t MAX_DIGIS = 2000*150; - static constexpr uint32_t MaxNumModules = 2000; - -}; -*/ - - template __device__ ForwardIt lowerBound(ForwardIt first, ForwardIt last, const T& value, Compare comp) @@ -52,6 +34,9 @@ using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; __global__ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) { + + constexpr int32_t invTK = 0; // std::numeric_limits::max(); + constexpr uint16_t InvId=9999; // must be > MaxNumModules auto const & dd = *ddp; @@ -70,7 +55,7 @@ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLO auto cl = first + dd.clus_d[i]; assert(cl<256*2000); - const std::array me{{id,ch,0}}; + const std::array me{{id,ch,0,0}}; auto less = [](std::array const & a, std::array const & b)->bool { return a[0]=0); + + auto getTK = [&](int i) { auto const & l = sl.links_d[i]; return l[2];}; + j = std::min(int(j),int(n-1)); if (equal(me,sl.links_d[j])) { - //auto const & l = sl.links_d[j]; - auto const tk = j; // l[2]; - auto old = atomicCAS(&sl.tkId_d[cl],0,tk); - if (0==old ||tk==old) atomicAdd(&sl.n1_d[cl],1); - else { - auto old = atomicCAS(&sl.tkId2_d[cl],0,tk); - if (0==old ||tk==old) atomicAdd(&sl.n2_d[cl],1); - } + auto const itk = j; + auto const tk = getTK(j); + auto old = atomicCAS(&sl.tkId_d[cl],invTK,itk); + if (invTK==old || tk==getTK(old)) { + atomicAdd(&sl.n1_d[cl],1); + } else { + auto old = atomicCAS(&sl.tkId2_d[cl],invTK,itk); + if (invTK==old || tk==getTK(old)) atomicAdd(&sl.n2_d[cl],1); + } + // if (3==tk) printf("TK3: %d %d %d %d: %d,%d\n",j,cl,id, i, dd.xx_d[i], dd.yy_d[i]); } /* else { @@ -133,6 +123,8 @@ void dumpLink(int ev, clusterSLOnGPU::HitsOnGPU const * hhp, uint32_t nhits, Clu namespace clusterSLOnGPU { + constexpr uint32_t invTK = 0; // std::numeric_limits::max(); + struct CSVHeader { CSVHeader() { printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", "ev", "ind", @@ -166,8 +158,8 @@ namespace clusterSLOnGPU { void Kernel::zero(cudaStream_t stream) { - cudaCheck(cudaMemsetAsync(slgpu.tkId_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); - cudaCheck(cudaMemsetAsync(slgpu.tkId2_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.tkId_d,invTK,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); + cudaCheck(cudaMemsetAsync(slgpu.tkId2_d,invTK,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); cudaCheck(cudaMemsetAsync(slgpu.n1_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); cudaCheck(cudaMemsetAsync(slgpu.n2_d,0,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); } @@ -176,6 +168,8 @@ namespace clusterSLOnGPU { void Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { + zero(stream.id()); + ClusterSLGPU const & sl = slgpu; int ev = ++evId; diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 0ae0986428a5f..64c379e0f106f 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -212,16 +212,16 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE uint32_t gind = genericDet->index(); for (auto const & link : links) { ++ng; - if (link.fraction() > 0.3f) ++ng10; + if (link.fraction() > 0.3f) ++ng10; if (link.fraction() < 0.5f) continue; auto tkid = std::make_pair(link.SimTrackId(), link.eventId()); auto ipos = mapping.find(tkid); - if (ipos != mapping.end()) { + if (ipos != mapping.end()) { uint32_t pt = 1000*(*ipos).second->pt(); ++nn; std::array a{{gind,uint32_t(link.channel()),(*ipos).second.key(),pt}}; digi2tp.push_back(a); - } + } } } std::sort(digi2tp.begin(),digi2tp.end()); From 56846273bc7a00f4df1e10e23a4eef61b51c44bd Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 21 Jul 2018 16:15:12 +0200 Subject: [PATCH 13/19] keep in limit of buffer --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 1 + .../plugins/SiPixelRawToClusterGPUKernel.h | 4 +- .../siPixelRawToClusterHeterogeneousProduct.h | 6 ++- .../plugins/ClusterSLOnGPU.cu | 52 ++++++++++++++++--- .../ClusterTPAssociationHeterogeneous.cc | 4 +- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 92ea71ae5ac05..932df3d2ee016 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -96,6 +96,7 @@ namespace pixelgpudetails { cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); + std::cout << "xx_d " << xx_d <<' ' << gpuProduct.xx_d << std::endl; } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2552f72584ee2..bb7768ebf5a76 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -181,8 +181,8 @@ namespace pixelgpudetails { return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, gpuProduct_d, - nDigis, nModulesActive, - xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d + xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d, + nDigis, nModulesActive }; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h index bafe6fb2fed58..5c0d027fe57b5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h @@ -48,8 +48,6 @@ namespace siPixelRawToClusterHeterogeneousProduct { GPUProduct const * me_d = nullptr; // Needed for GPU rechits - uint32_t nDigis; - uint32_t nModules; uint16_t const * xx_d; uint16_t const * yy_d; uint16_t const * adc_d; @@ -58,6 +56,10 @@ namespace siPixelRawToClusterHeterogeneousProduct { int32_t const * clus_d; uint32_t const * clusInModule_d; uint32_t const * moduleId_d; + + uint32_t nDigis; + uint32_t nModules; + }; using HeterogeneousDigiCluster = HeterogeneousProductImpl, diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 0610aa339ef87..b247f4045a8f4 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -34,6 +34,7 @@ using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; __global__ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) { + assert(slp==slp->me_d); constexpr int32_t invTK = 0; // std::numeric_limits::max(); @@ -70,7 +71,7 @@ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLO // auto p = cuda_std::lower_bound(b,e,me,less); auto p = lowerBound(b,e,me,less); - auto j = p-sl.links_d; + int32_t j = p-sl.links_d; assert(j>=0); auto getTK = [&](int i) { auto const & l = sl.links_d[i]; return l[2];}; @@ -82,11 +83,12 @@ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLO auto old = atomicCAS(&sl.tkId_d[cl],invTK,itk); if (invTK==old || tk==getTK(old)) { atomicAdd(&sl.n1_d[cl],1); +// sl.n1_d[cl] = tk; } else { auto old = atomicCAS(&sl.tkId2_d[cl],invTK,itk); if (invTK==old || tk==getTK(old)) atomicAdd(&sl.n2_d[cl],1); } - // if (3==tk) printf("TK3: %d %d %d %d: %d,%d\n",j,cl,id, i, dd.xx_d[i], dd.yy_d[i]); +// if (92==tk) printf("TK3: %d %d %d %d: %d,%d ?%d?%d\n", j, cl, id, i, dd.xx_d[i], dd.yy_d[i], hh.mr_d[cl], hh.mc_d[cl]); } /* else { @@ -98,12 +100,32 @@ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLO } - __global__ -void dumpLink(int ev, clusterSLOnGPU::HitsOnGPU const * hhp, uint32_t nhits, ClusterSLGPU const * slp) { +void verifyZero(int ev, clusterSLOnGPU::DigisOnGPU const * ddp, clusterSLOnGPU::HitsOnGPU const * hhp, uint32_t nhits, ClusterSLGPU const * slp) { auto i = blockIdx.x*blockDim.x + threadIdx.x; if (i>nhits) return; + auto const & dd = *ddp; + auto const & hh = *hhp; + auto const & sl = *slp; + + assert(sl.tkId_d[i]==0); + auto const & tk = sl.links_d[0]; + assert(tk[0]==0); + assert(tk[1]==0); + assert(tk[2]==0); + assert(tk[3]==0); + + if (i==0) printf("xx_d gpu %x\n",dd.xx_d); + +} + + +__global__ +void dumpLink(int first, int ev, clusterSLOnGPU::HitsOnGPU const * hhp, uint32_t nhits, ClusterSLGPU const * slp) { + auto i = first + blockIdx.x*blockDim.x + threadIdx.x; + if (i>nhits) return; + auto const & hh = *hhp; auto const & sl = *slp; @@ -167,6 +189,11 @@ namespace clusterSLOnGPU { void Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { + + size_t pfs = 16*1024*1024; + // cudaDeviceSetLimit(cudaLimitPrintfFifoSize,pfs); + cudaDeviceGetLimit(&pfs,cudaLimitPrintfFifoSize); + std::cout << "cudaLimitPrintfFifoSize " << pfs << std::endl; zero(stream.id()); @@ -174,12 +201,23 @@ namespace clusterSLOnGPU { int ev = ++evId; int threadsPerBlock = 256; - int blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; + + int blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock; + verifyZero<<>>(ev, dd.me_d, hh.gpu_d, nhits, sl.me_d); + + + blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; assert(sl.me_d); simLink<<>>(dd.me_d,ndigis, hh.gpu_d, sl.me_d,n); - blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock; - dumpLink<<>>(ev, hh.gpu_d, nhits, sl.me_d); + cudaStreamSynchronize(stream.id()); + + // one line == 200B so each kernel can print only 5K lines.... + blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock; + for (int first=0; first>>(first, ev, hh.gpu_d, nhits, sl.me_d); + cudaStreamSynchronize(stream.id()); + } cudaCheck(cudaGetLastError()); } diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 64c379e0f106f..3fc56610080b4 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -103,6 +103,8 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer, TrackingParticleRef> mapping; + std::vector> digi2tp; + }; ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm::ParameterSet & cfg) @@ -204,7 +206,7 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE auto nhits = gHits.nHits; uint32_t nn=0, ng=0, ng10=0; - std::vector> digi2tp; + digi2tp.clear(); {std::array a{{0,0,0,0}}; digi2tp.push_back(a);} // put at 0 0 for (auto const & links : *sipixelSimLinks) { DetId detId(links.detId()); From 4bfe9b7abf52c2aa33339ce6f98bf4325ec182c8 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 23 Jul 2018 12:29:50 +0200 Subject: [PATCH 14/19] remove debug, protect dump with flag --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 2 +- .../plugins/ClusterSLOnGPU.cu | 14 ++++++++------ .../TrackerHitAssociation/plugins/ClusterSLOnGPU.h | 4 ++-- .../plugins/ClusterTPAssociationHeterogeneous.cc | 9 +++++++-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 932df3d2ee016..88004245dddae 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -93,10 +93,10 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & gpuProduct_d, sizeof(GPUProduct))); gpuProduct = getProduct(); + assert(xx_d==gpuProduct.xx_d); cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); - std::cout << "xx_d " << xx_d <<' ' << gpuProduct.xx_d << std::endl; } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index b247f4045a8f4..060a9056647ae 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -116,7 +116,7 @@ void verifyZero(int ev, clusterSLOnGPU::DigisOnGPU const * ddp, clusterSLOnGPU:: assert(tk[2]==0); assert(tk[3]==0); - if (i==0) printf("xx_d gpu %x\n",dd.xx_d); + // if (i==0) printf("xx_d gpu %x\n",dd.xx_d); } @@ -212,11 +212,13 @@ namespace clusterSLOnGPU { simLink<<>>(dd.me_d,ndigis, hh.gpu_d, sl.me_d,n); cudaStreamSynchronize(stream.id()); - // one line == 200B so each kernel can print only 5K lines.... - blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock; - for (int first=0; first>>(first, ev, hh.gpu_d, nhits, sl.me_d); - cudaStreamSynchronize(stream.id()); + if (doDump) { + // one line == 200B so each kernel can print only 5K lines.... + blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock; + for (int first=0; first>>(first, ev, hh.gpu_d, nhits, sl.me_d); + cudaStreamSynchronize(stream.id()); + } } cudaCheck(cudaGetLastError()); diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 27b78000aec56..529b0ce3715e2 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -26,7 +26,7 @@ namespace clusterSLOnGPU { class Kernel { public: - Kernel(cuda::stream_t<>& stream) {alloc(stream);} + Kernel(cuda::stream_t<>& stream, bool doDump) {alloc(stream);} void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} @@ -35,7 +35,7 @@ namespace clusterSLOnGPU { void zero(cudaStream_t stream); public: ClusterSLGPU slgpu; - + bool doDump; }; } diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index 3fc56610080b4..e2408a05289a9 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -105,6 +105,8 @@ class ClusterTPAssociationHeterogeneous : public HeterogeneousEDProducer> digi2tp; + bool doDump; + }; ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm::ParameterSet & cfg) @@ -117,7 +119,8 @@ ClusterTPAssociationHeterogeneous::ClusterTPAssociationHeterogeneous(const edm:: phase2OTClustersToken_(consumes >(cfg.getParameter("phase2OTClusterSrc"))), trackingParticleToken_(consumes(cfg.getParameter("trackingParticleSrc"))), tGpuDigis(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelDigiClusterSrc"))), - tGpuHits(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelRecHitSrc"))) + tGpuHits(consumesHeterogeneous(cfg.getParameter("heterogeneousPixelRecHitSrc"))), + doDump(cfg.getParameter("dumpCSV")) { produces(); } @@ -135,6 +138,8 @@ void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescr desc.add("heterogeneousPixelDigiClusterSrc", edm::InputTag("siPixelClustersHeterogeneous")); desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitHeterogeneous")); + desc.add("dumpCSV",false); + HeterogeneousEDProducer::fillPSetDescription(desc); descriptions.add("tpClusterProducerHeterogeneousDefault", desc); @@ -144,7 +149,7 @@ void ClusterTPAssociationHeterogeneous::fillDescriptions(edm::ConfigurationDescr void ClusterTPAssociationHeterogeneous::beginStreamGPUCuda(edm::StreamID streamId, cuda::stream_t<> &cudaStream) { - gpuAlgo = std::make_unique(cudaStream); + gpuAlgo = std::make_unique(cudaStream,doDump); } From e13e054e0af91b338cbb4b640c93a36bf9ac5f4f Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 23 Jul 2018 15:37:58 +0200 Subject: [PATCH 15/19] consume hit on gpu --- .../CAHitNtupletHeterogeneousEDProducer.cc | 23 +++ .../plugins/CAHitQuadrupletGeneratorGPU.cu | 17 ++ .../plugins/CAHitQuadrupletGeneratorGPU.h | 11 ++ .../PixelTriplets/plugins/gpuPixelDoublets.h | 161 ++++++++++++++++++ 4 files changed, 212 insertions(+) create mode 100644 RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc index 186a84617e8d3..e9873247a119f 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc @@ -18,6 +18,11 @@ #include "CAHitQuadrupletGeneratorGPU.h" +// gpu +#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" + + + namespace { void fillNtuplets(RegionsSeedingHitSets::RegionFiller &seedingHitSetsFiller, const OrderedHitSeeds &quadruplets) { @@ -31,6 +36,10 @@ class CAHitNtupletHeterogeneousEDProducer : public HeterogeneousEDProducer> { public: + + using PixelRecHitsH = siPixelRecHitsHeterogeneousProduct::HeterogeneousPixelRecHit; + + CAHitNtupletHeterogeneousEDProducer(const edm::ParameterSet &iConfig); ~CAHitNtupletHeterogeneousEDProducer() = default; @@ -50,6 +59,9 @@ class CAHitNtupletHeterogeneousEDProducer private: edm::EDGetTokenT doubletToken_; + edm::EDGetTokenT tGpuHits; + + edm::RunningAverage localRA_; CAHitQuadrupletGeneratorGPU GPUGenerator_; CAHitQuadrupletGenerator CPUGenerator_; @@ -63,6 +75,7 @@ CAHitNtupletHeterogeneousEDProducer::CAHitNtupletHeterogeneousEDProducer( : HeterogeneousEDProducer(iConfig), doubletToken_(consumes( iConfig.getParameter("doublets"))), + tGpuHits(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), GPUGenerator_(iConfig, consumesCollector()), CPUGenerator_(iConfig, consumesCollector()) { produces(); @@ -73,6 +86,9 @@ void CAHitNtupletHeterogeneousEDProducer::fillDescriptions( edm::ParameterSetDescription desc; desc.add("doublets", edm::InputTag("hitPairEDProducer")); + + desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitHeterogeneous")); + CAHitQuadrupletGeneratorGPU::fillDescriptions(desc); HeterogeneousEDProducer::fillPSetDescription(desc); auto label = "caHitQuadrupletHeterogeneousEDProducer"; @@ -106,6 +122,13 @@ void CAHitNtupletHeterogeneousEDProducer::acquireGPUCuda( seedingHitSets_ = std::make_unique(); + edm::Handle gh; + iEvent.getByToken(tGpuHits, gh); + auto const & gHits = *gh; +// auto nhits = gHits.nHits; + + GPUGenerator_.buildDoublets(gHits,0.06f,cudaStream.id()); + if (regionDoublets.empty()) { emptyRegionDoublets = true; } else { diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu index 4a7f3ce5aa5f1..b94aad94bf16b 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu @@ -6,6 +6,8 @@ #include "GPUCACell.h" #include "CAHitQuadrupletGeneratorGPU.h" +#include "gpuPixelDoublets.h" + __global__ void kernel_debug(unsigned int numberOfLayerPairs_, unsigned int numberOfLayers_, const GPULayerDoublets *gpuDoublets, @@ -490,3 +492,18 @@ CAHitQuadrupletGeneratorGPU::fetchKernelResult(int regionIndex, cudaStream_t cud } return quadsInterface; } + + + + +void CAHitQuadrupletGeneratorGPU::buildDoublets(HitsOnCPU const & hh, float phicut, cudaStream_t stream) { + auto nhits = hh.nHits; + + float phiCut=0.06; + int threadsPerBlock = 256; + int blocks = (nhits + threadsPerBlock - 1) / threadsPerBlock; + + gpuPixelDoublets::getDoubletsFromHisto<<>>(hh.gpu_d,phiCut); + + +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h index a6e9db46e68e7..717164c9cff62 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h @@ -3,6 +3,10 @@ #include +#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" + + + #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/Utilities/interface/EDGetToken.h" @@ -32,6 +36,11 @@ namespace edm { class CAHitQuadrupletGeneratorGPU { public: + + using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU; + using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU; + + typedef LayerHitMapCache LayerCacheType; static constexpr unsigned int minLayers = 4; @@ -49,6 +58,8 @@ class CAHitQuadrupletGeneratorGPU { void initEvent(const edm::Event& ev, const edm::EventSetup& es); + void buildDoublets(HitsOnCPU const & hh, float phicut, cudaStream_t stream); + void hitNtuplets(const IntermediateHitDoublets& regionDoublets, const edm::EventSetup& es, const SeedingLayerSetsHits& layers, cudaStream_t stream); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h new file mode 100644 index 0000000000000..019e775209f96 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h @@ -0,0 +1,161 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelDoublets_h +#define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelDouplets_h + +#include +#include +#include +#include +#include +#include + +#include "DataFormats/Math/interface/approx_atan2.h" + +#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" + +namespace gpuPixelDoublets { + + + __device__ + std::pair + findPhiLimits(int16_t phiMe, int16_t * iphi, uint16_t * index, uint16_t size, int16_t iphicut) { + + assert(iphicut>0); + + // find extreemes in top + int16_t minPhi = phiMe-iphicut; + int16_t maxPhi = phiMe+iphicut; + + // std::cout << "\n phi min/max " << phiMe << ' ' << minPhi << ' ' << maxPhi << std::endl; + + // guess and adjust + auto findLimit = [&](int16_t mPhi) { + int jm = float(0.5f*size)*(1.f+float(mPhi)/float(std::numeric_limits::max())); + // std::cout << "jm for " << mPhi << ' ' << jm << std::endl; + jm = std::min(size-1,std::max(0,jm)); + bool notDone=true; + while(jm>0 && mPhiiphi[index[++jm]]){} + jm = std::min(size-1,std::max(0,jm)); + return jm; + }; + + auto jmin = findLimit(minPhi); + auto jmax = findLimit(maxPhi); + + + /* + std::cout << "j min/max " << jmin << ' ' << jmax << std::endl; + std::cout << "found min/max " << iphi[index[jmin]] << ' ' << iphi[index[jmax]] << std::endl; + std::cout << "found min/max +1 " << iphi[index[jmin+1]] << ' ' << iphi[index[jmax+1]] << std::endl; + std::cout << "found min/max -1 " << iphi[index[jmin-1]] << ' ' << iphi[index[jmax-1]] << std::endl; + */ + + return std::make_pair(jmin,jmax); + } + + + __global__ + void getDoubletsFromSorted(int16_t * iphi, uint16_t * index, uint32_t * offsets, float phiCut) { + + auto iphicut = phi2short(phiCut); + + auto i = blockIdx.x*blockDim.x + threadIdx.x; + if (i>=offsets[9]) return; // get rid of last layer + + assert(0==offsets[0]); + int top = (i>offsets[5]) ? 5: 0; + while (i>=offsets[++top]){}; + assert(top<10); + auto bottom = top-1; + if (bottom==3 || bottom==6) return; // do not have UP... (9 we got rid already) + assert(i>=offsets[bottom]); + assert(i= (offsets[top]-offsets[bottom])) { + printf("index problem: %d %d %d %d %d\n",i, offsets[top], offsets[bottom], offsets[top]-offsets[bottom], index[i]); + return; + } + + assert(index[i]::max()); + + auto jLimits = findPhiLimits(phiMe, iphi+offsets[top],index+offsets[top],size,iphicut); + + auto slidingWindow = [&](uint16_t mysize, uint16_t mymin,uint16_t mymax) { + auto topPhi = iphi+offsets[top]; + uint16_t imax = std::numeric_limits::max(); + uint16_t offset = (mymin>mymax) ? imax-(mysize-1) : 0; + int n=0; + for (uint16_t i = mymin+offset; i!=mymax; i++) { + assert(i<=imax); + uint16_t k = (i>mymax) ? i-offset : i; + assert(k=mymin || k2*iphicut && int16_t(phiMe-topPhi[k])>2*iphicut) + printf("deltaPhi problem: %d %d %d %d, deltas %d:%d cut %d\n",i,k,phiMe,topPhi[k],int16_t(topPhi[k]-phiMe),int16_t(phiMe-topPhi[k]),iphicut); + n++; + } + int tot = (mymin>mymax) ? (mysize-mymin)+mymax : mymax-mymin; + assert(n==tot); + }; + + slidingWindow(size,jLimits.first,jLimits.second); + + } + + template + __device__ + void doubletsFromHisto(int16_t const * iphi, Hist const * hist, uint32_t const * offsets, float phiCut) { + + auto iphicut = phi2short(phiCut); + + auto i = blockIdx.x*blockDim.x + threadIdx.x; + if (i>=offsets[9]) return; // get rid of last layer + + assert(0==offsets[0]); + int top = (i>offsets[5]) ? 5: 0; + while (i>=offsets[++top]){}; + assert(top<10); + auto bottom = top-1; + if (bottom==3 || bottom==6) return; // do not have UP... (9 we got rid already) + assert(i>=offsets[bottom]); + assert(i iphicut ) continue; + ++tot; + } + } + if (0==hist[top].nspills) assert(tot>=nmin); + // look in spill bin as well.... + + } + + void + __global__ + getDoubletsFromHisto(siPixelRecHitsHeterogeneousProduct::HitsOnGPU const * hhp, float phiCut) { + auto const & hh = *hhp; + doubletsFromHisto(hh.iphi_d,hh.hist_d,hh.hitsLayerStart_d,phiCut); + + } + +} // namespace end + +#endif From cd1088b0f8ae0e5cd10e1e6ae4320c56ec0885cd Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 25 Jul 2018 15:51:36 +0200 Subject: [PATCH 16/19] initialize --- SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 529b0ce3715e2..24ddd27450a11 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -26,7 +26,7 @@ namespace clusterSLOnGPU { class Kernel { public: - Kernel(cuda::stream_t<>& stream, bool doDump) {alloc(stream);} + Kernel(cuda::stream_t<>& stream, bool dump) : doDump(dump) {alloc(stream);} void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} From 46f5e22c55e4dde946bd32562b83601518a9ab4f Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 26 Jul 2018 11:46:57 +0200 Subject: [PATCH 17/19] remove debug --- .../plugins/ClusterSLOnGPU.cu | 18 ++++++++++++------ .../plugins/ClusterSLOnGPU.h | 2 +- .../ClusterTPAssociationHeterogeneous.cc | 4 ++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 060a9056647ae..535388947fc84 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -6,6 +6,7 @@ #include #include #include +#include template __device__ @@ -105,8 +106,8 @@ void verifyZero(int ev, clusterSLOnGPU::DigisOnGPU const * ddp, clusterSLOnGPU:: auto i = blockIdx.x*blockDim.x + threadIdx.x; if (i>nhits) return; - auto const & dd = *ddp; - auto const & hh = *hhp; +// auto const & dd = *ddp; +// auto const & hh = *hhp; auto const & sl = *slp; assert(sl.tkId_d[i]==0); @@ -147,8 +148,7 @@ namespace clusterSLOnGPU { constexpr uint32_t invTK = 0; // std::numeric_limits::max(); - struct CSVHeader { - CSVHeader() { + void printCSVHeader() { printf("HIT: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", "ev", "ind", "det", "charge", "xg","yg","zg","rg","iphi", @@ -156,10 +156,14 @@ namespace clusterSLOnGPU { ); } - }; - CSVHeader csvHeader; std::atomic evId(0); + std::once_flag doneCSVHeader; + + Kernel::Kernel(cuda::stream_t<>& stream, bool dump) : doDump(dump) { + if (doDump) std::call_once(doneCSVHeader,printCSVHeader); + alloc(stream); + } void @@ -190,10 +194,12 @@ namespace clusterSLOnGPU { void Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { + /* size_t pfs = 16*1024*1024; // cudaDeviceSetLimit(cudaLimitPrintfFifoSize,pfs); cudaDeviceGetLimit(&pfs,cudaLimitPrintfFifoSize); std::cout << "cudaLimitPrintfFifoSize " << pfs << std::endl; + */ zero(stream.id()); diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 24ddd27450a11..621bfbcb367bb 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -26,7 +26,7 @@ namespace clusterSLOnGPU { class Kernel { public: - Kernel(cuda::stream_t<>& stream, bool dump) : doDump(dump) {alloc(stream);} + Kernel(cuda::stream_t<>& stream, bool dump); void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc index e2408a05289a9..6cffed8ad0cd5 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterTPAssociationHeterogeneous.cc @@ -199,7 +199,7 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE // gpu stuff ------------------------ - std::cout << "In tpsimlink " << mapping.size() << std::endl; + // std::cout << "In tpsimlink " << mapping.size() << std::endl; edm::Handle gd; edm::Handle gh; @@ -233,7 +233,7 @@ void ClusterTPAssociationHeterogeneous::acquireGPUCuda(const edm::HeterogeneousE } std::sort(digi2tp.begin(),digi2tp.end()); - std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; + // std::cout << "In tpsimlink found " << nn << " valid link out of " << ng << '/' << ng10 << ' ' << digi2tp.size() << std::endl; cudaCheck(cudaMemcpyAsync(gpuAlgo->slgpu.links_d, digi2tp.data(), sizeof(std::array)*digi2tp.size(), cudaMemcpyDefault, cudaStream.id())); gpuAlgo->algo(gDigis, ndigis, gHits, nhits, digi2tp.size(),cudaStream); From 007795f772efb3ab0689edd670d410922bc27c84 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 26 Jul 2018 16:17:40 +0200 Subject: [PATCH 18/19] free memory, remove sync, use cudastd --- .../CUDAUtilities/interface/HistoContainer.h | 8 ---- .../interface/cudastdAlgorithm.h | 14 ++++++ .../plugins/ClusterSLOnGPU.cu | 43 +++++++------------ .../plugins/ClusterSLOnGPU.h | 2 + 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h b/HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h index baa2e7f64c613..c398b29a64c69 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h +++ b/HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h @@ -8,14 +8,6 @@ #include #endif // __CUDA_ARCH__ -#ifdef __CUDACC__ -#include -#else -#define __device__ -#define __global__ -#define __host__ -#endif // __CUDACC__ - #include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h" #ifdef __CUDACC__ diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h b/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h index 4bfa4e694d9dc..c451ab7e4113d 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h @@ -1,6 +1,15 @@ #ifndef HeterogeneousCore_CUDAUtilities_cudastdAlgorithm_h #define HeterogeneousCore_CUDAUtilities_cudastdAlgorithm_h +#ifdef __CUDACC__ +#include +#else +#define __device__ +#define __global__ +#define __host__ +#endif // __CUDACC__ + + #include // reimplementation of std algorithms able to compile with CUDA and run on GPUs, @@ -10,6 +19,7 @@ namespace cuda_std { template struct less { + __device__ __host__ constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs < rhs; } @@ -18,10 +28,12 @@ namespace cuda_std { template<> struct less { template + __device__ __host__ constexpr bool operator()(const T &lhs, const U &rhs ) const { return lhs < rhs;} }; template> + __device__ __host__ constexpr RandomIt lower_bound(RandomIt first, RandomIt last, const T& value, Compare comp={}) { @@ -43,6 +55,7 @@ namespace cuda_std { } template> + __device__ __host__ constexpr RandomIt upper_bound(RandomIt first, RandomIt last, const T& value, Compare comp={}) { @@ -64,6 +77,7 @@ namespace cuda_std { } template> + __device__ __host__ constexpr RandomIt binary_find(RandomIt first, RandomIt last, const T& value, Compare comp={}) { diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index 535388947fc84..7a3e4eb83161d 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -8,27 +8,6 @@ #include #include -template -__device__ -ForwardIt lowerBound(ForwardIt first, ForwardIt last, const T& value, Compare comp) -{ - ForwardIt it; - auto count = last-first; - - while (count > 0) { - it = first; - auto step = count / 2; - it+=step; - if (comp(*it, value)) { - first = ++it; - count -= step + 1; - } - else - count = step; - } - return first; -} - using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; @@ -59,19 +38,18 @@ void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLO const std::array me{{id,ch,0,0}}; - auto less = [](std::array const & a, std::array const & b)->bool { + auto less = [] __device__ __host__ (std::array const & a, std::array const & b)->bool { return a[0] const & a, std::array const & b)->bool { + auto equal = [] __device__ __host__ (std::array const & a, std::array const & b)->bool { return a[0]==b[0] && a[1]==b[1]; // in this context we do not care of [2] }; auto const * b = sl.links_d; auto const * e = b+n; - // auto p = cuda_std::lower_bound(b,e,me,less); - auto p = lowerBound(b,e,me,less); + auto p = cuda_std::lower_bound(b,e,me,less); int32_t j = p-sl.links_d; assert(j>=0); @@ -178,10 +156,19 @@ namespace clusterSLOnGPU { cudaCheck(cudaMalloc((void**) & slgpu.me_d, sizeof(ClusterSLGPU))); cudaCheck(cudaMemcpyAsync(slgpu.me_d, &slgpu, sizeof(ClusterSLGPU), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaDeviceSynchronize()); - } + void + Kernel::deAlloc() { + cudaCheck(cudaFree(slgpu.links_d)); + cudaCheck(cudaFree(slgpu.tkId_d)); + cudaCheck(cudaFree(slgpu.tkId2_d)); + cudaCheck(cudaFree(slgpu.n1_d)); + cudaCheck(cudaFree(slgpu.n2_d)); + cudaCheck(cudaFree(slgpu.me_d)); +} + + void Kernel::zero(cudaStream_t stream) { cudaCheck(cudaMemsetAsync(slgpu.tkId_d,invTK,(ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); @@ -216,9 +203,9 @@ namespace clusterSLOnGPU { assert(sl.me_d); simLink<<>>(dd.me_d,ndigis, hh.gpu_d, sl.me_d,n); - cudaStreamSynchronize(stream.id()); if (doDump) { + cudaStreamSynchronize(stream.id()); // flush previous printf // one line == 200B so each kernel can print only 5K lines.... blocks = 16; // (nhits + threadsPerBlock - 1) / threadsPerBlock; for (int first=0; first& stream, bool dump); + ~Kernel() {deAlloc();} void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} private: void alloc(cuda::stream_t<>& stream); + void deAlloc(); void zero(cudaStream_t stream); public: ClusterSLGPU slgpu; From 921e2979407e0561a917cda0536380c0792a53f0 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 31 Jul 2018 09:24:44 +0200 Subject: [PATCH 19/19] fix NVCC protections --- .../CUDAUtilities/interface/cudastdAlgorithm.h | 6 ------ .../CUDAUtilities/test/BuildFile.xml | 16 ++++++++-------- .../trackerHitAssociationHeterogeneousProduct.h | 6 +++--- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h b/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h index c451ab7e4113d..c107f4b228539 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h @@ -1,13 +1,7 @@ #ifndef HeterogeneousCore_CUDAUtilities_cudastdAlgorithm_h #define HeterogeneousCore_CUDAUtilities_cudastdAlgorithm_h -#ifdef __CUDACC__ #include -#else -#define __device__ -#define __global__ -#define __host__ -#endif // __CUDACC__ #include diff --git a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml index 1c1d28dbf3761..ab97c243c4385 100644 --- a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml +++ b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml @@ -1,31 +1,31 @@ + + - + + + - - - - + + + - - - diff --git a/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h index 4db318c5348f6..c3f86ddf855c8 100644 --- a/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h +++ b/SimTracker/TrackerHitAssociation/plugins/trackerHitAssociationHeterogeneousProduct.h @@ -1,7 +1,7 @@ #ifndef SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H #define SimTrackerTrackerHitAssociationClusterHeterogeneousProduct_H -#ifndef __NVCC__ +#ifndef __CUDACC__ #include "SimTracker/TrackerHitAssociation/interface/ClusterTPAssociation.h" #endif @@ -10,7 +10,7 @@ namespace trackerHitAssociationHeterogeneousProduct { -#ifndef __NVCC__ +#ifndef __CUDACC__ struct CPUProduct { CPUProduct() = default; template @@ -37,7 +37,7 @@ namespace trackerHitAssociationHeterogeneousProduct { ClusterSLGPU * gpu_d=nullptr; }; -#ifndef __NVCC__ +#ifndef __CUDACC__ using ClusterTPAHeterogeneousProduct = HeterogeneousProductImpl, heterogeneous::GPUCudaProduct >; #endif