Skip to content

Commit

Permalink
Add optional flags to disable SOA->legacy conversion and GPU->CPU tra…
Browse files Browse the repository at this point in the history
…nsfer (#132)

Always produce the CPU cluster and rechit collections, since they are needed anyway.
Add transfer and conversion flags to clusterizer, rechits and CA.
Add a skeleton for the future pixel track producer.
Add customize functions to disable conversions to legacy formats, and to disable unnecessary GPU->CPU transfers.
  • Loading branch information
makortel authored and fwyzard committed Dec 29, 2020
1 parent adf1e92 commit 6b9ac38
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 37 deletions.
14 changes: 2 additions & 12 deletions RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,9 @@
striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits)
trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer)

from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import *
from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import *
from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import *

from Configuration.ProcessModifiers.gpu_cff import gpu
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import *
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneousConverter_cfi import siPixelRecHitHeterogeneousConverter as _siPixelRecHitHeterogeneousConverter
gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneousConverter.clone())

_pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy()
_pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting)
_pixeltrackerlocalreco_gpu.replace(siPixelRecHitsPreSplitting, siPixelRecHitHeterogeneous+siPixelRecHitsPreSplitting)
gpu.toReplaceWith(pixeltrackerlocalreco, _pixeltrackerlocalreco_gpu)
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous
gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneous)

from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import *
from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ namespace pixelgpudetails {
const SiPixelGainForHLTonGPU *gains,
const uint32_t wordCounter, const uint32_t fedCounter,
bool convertADCtoElectrons,
bool useQualityInfo, bool includeErrors, bool debug,
bool useQualityInfo, bool includeErrors, bool transferToCPU, bool debug,
cuda::stream_t<>& stream)
{
nDigis = wordCounter;
Expand Down Expand Up @@ -646,25 +646,26 @@ namespace pixelgpudetails {
cudaCheck(cudaGetLastError());

// copy data to host variable

cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));

if (includeErrors) {
cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id()));
// If we want to transfer only the minimal amount of data, we
// need a synchronization point. A single ExternalWork (of
// SiPixelRawToClusterHeterogeneous) does not help because it is
// already used to synchronize the data movement. So we'd need
// two ExternalWorks (or explicit use of TBB tasks). The
// prototype of #100 would allow this easily (as there would be
// two ExternalWorks).
//
//error_h->set_data(data_h);
//cudaCheck(cudaStreamSynchronize(stream.id()));
//int size = error_h->size();
//cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id()));
if(transferToCPU) {
cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));

if (includeErrors) {
cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id()));
// If we want to transfer only the minimal amount of data, we
// need a synchronization point. A single ExternalWork (of
// SiPixelRawToClusterHeterogeneous) does not help because it is
// already used to synchronize the data movement. So we'd need
// two ExternalWorks (or explicit use of TBB tasks). The
// prototype of #100 would allow this easily (as there would be
// two ExternalWorks).
//
//error_h->set_data(data_h);
//cudaCheck(cudaStreamSynchronize(stream.id()));
//int size = error_h->size();
//cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id()));
}
}
// End of Raw2Digi and passing data for cluserisation

Expand All @@ -682,7 +683,9 @@ namespace pixelgpudetails {
cudaCheck(cudaGetLastError());

// calibrated adc
cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id()));
if(transferToCPU) {
cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id()));
}

/*
std::cout
Expand Down Expand Up @@ -730,7 +733,9 @@ namespace pixelgpudetails {


// clusters
cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
if(transferToCPU) {
cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
}
} // end clusterizer scope
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ namespace pixelgpudetails {
void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
const SiPixelGainForHLTonGPU *gains,
const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,
bool useQualityInfo, bool includeErrors, bool debug,
bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug,
cuda::stream_t<>& stream);

auto getProduct() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@
from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters
siPixelClustersPreSplitting = _siPixelClusters.clone()

# In principle we could remove `siPixelClustersPreSplitting` from the `pixeltrackerlocalreco`
# sequence when the `gpu` modufier is active; for the time being we keep it for simplicity.
from Configuration.ProcessModifiers.gpu_cff import gpu
from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import siPixelClustersHeterogeneous as _siPixelClustersHeterogeneous
from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import *
from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import *
gpu.toReplaceWith(siPixelClustersPreSplitting, _siPixelClustersHeterogeneous.clone())

0 comments on commit 6b9ac38

Please sign in to comment.