diff --git a/CUDADataFormats/BeamSpot/BuildFile.xml b/CUDADataFormats/BeamSpot/BuildFile.xml new file mode 100644 index 0000000000000..75f3d15738429 --- /dev/null +++ b/CUDADataFormats/BeamSpot/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h new file mode 100644 index 0000000000000..7b04fac67b9f1 --- /dev/null +++ b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h @@ -0,0 +1,33 @@ +#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h +#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" + +class BeamSpotCUDA { +public: + // default constructor, required by cms::cuda::Product + BeamSpotCUDA() = default; + + // constructor that allocates cached device memory on the given CUDA stream + BeamSpotCUDA(cudaStream_t stream) { data_d_ = cms::cuda::make_device_unique(stream); } + + // movable, non-copiable + BeamSpotCUDA(BeamSpotCUDA const&) = delete; + BeamSpotCUDA(BeamSpotCUDA&&) = default; + BeamSpotCUDA& operator=(BeamSpotCUDA const&) = delete; + BeamSpotCUDA& operator=(BeamSpotCUDA&&) = default; + + BeamSpotPOD* data() { return data_d_.get(); } + BeamSpotPOD const* data() const { return data_d_.get(); } + + cms::cuda::device::unique_ptr& ptr() { return data_d_; } + cms::cuda::device::unique_ptr const& ptr() const { return data_d_; } + +private: + cms::cuda::device::unique_ptr data_d_; +}; + +#endif // CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h diff --git a/CUDADataFormats/BeamSpot/src/classes.h b/CUDADataFormats/BeamSpot/src/classes.h new file mode 100644 index 0000000000000..5aebe536f8a01 --- /dev/null +++ b/CUDADataFormats/BeamSpot/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_BeamSpot_classes_h +#define CUDADataFormats_BeamSpot_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_BeamSpot_classes_h diff --git a/CUDADataFormats/BeamSpot/src/classes_def.xml b/CUDADataFormats/BeamSpot/src/classes_def.xml new file mode 100644 index 0000000000000..198edeebe7c73 --- /dev/null +++ b/CUDADataFormats/BeamSpot/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/Configuration/StandardSequences/python/Reconstruction_cff.py b/Configuration/StandardSequences/python/Reconstruction_cff.py index edf85287421b3..24776960bc877 100644 --- a/Configuration/StandardSequences/python/Reconstruction_cff.py +++ b/Configuration/StandardSequences/python/Reconstruction_cff.py @@ -103,7 +103,7 @@ from RecoLocalCalo.Castor.Castor_cff import * from RecoLocalCalo.Configuration.hcalGlobalReco_cff import * -globalreco_trackingTask = cms.Task(offlineBeamSpot, +globalreco_trackingTask = cms.Task(offlineBeamSpotTask, MeasurementTrackerEventPreSplitting, # unclear where to put this siPixelClusterShapeCachePreSplitting, # unclear where to put this standalonemuontrackingTask, @@ -117,7 +117,7 @@ ########################################## # offlineBeamSpot is reconstructed before mixing in fastSim ########################################## -_fastSim_globalreco_trackingTask = globalreco_trackingTask.copyAndExclude([offlineBeamSpot,MeasurementTrackerEventPreSplitting,siPixelClusterShapeCachePreSplitting]) +_fastSim_globalreco_trackingTask = globalreco_trackingTask.copyAndExclude([offlineBeamSpotTask,MeasurementTrackerEventPreSplitting,siPixelClusterShapeCachePreSplitting]) fastSim.toReplaceWith(globalreco_trackingTask,_fastSim_globalreco_trackingTask) _phase2_timing_layer_globalreco_trackingTask = globalreco_trackingTask.copy() @@ -212,7 +212,7 @@ reconstruction_trackingOnly = cms.Sequence(reconstruction_trackingOnlyTask) reconstruction_pixelTrackingOnlyTask = cms.Task( pixeltrackerlocalrecoTask, - offlineBeamSpot, + offlineBeamSpotTask, siPixelClusterShapeCachePreSplitting, recopixelvertexingTask ) diff --git a/DataFormats/BeamSpot/interface/BeamSpotPOD.h b/DataFormats/BeamSpot/interface/BeamSpotPOD.h new file mode 100644 index 0000000000000..5920db6a18930 --- /dev/null +++ b/DataFormats/BeamSpot/interface/BeamSpotPOD.h @@ -0,0 +1,20 @@ +#ifndef DataFormats_BeamSpot_interface_BeamSpotPOD_h +#define DataFormats_BeamSpot_interface_BeamSpotPOD_h + +// This struct is a transient-only, simplified representation of the beamspot +// data used as the underlying type for data transfers and operations in +// heterogeneous code (e.g. in CUDA code). + +// The covariance matrix is not used in that code, so is left out here. + +// align to the CUDA L1 cache line size +struct alignas(128) BeamSpotPOD { + float x, y, z; // position + float sigmaZ; + float beamWidthX, beamWidthY; + float dxdz, dydz; + float emittanceX, emittanceY; + float betaStar; +}; + +#endif // DataFormats_BeamSpot_interface_BeamSpotPOD_h diff --git a/DataFormats/BeamSpot/src/classes.h b/DataFormats/BeamSpot/src/classes.h index efe580a4cc8f5..2365c90fdc257 100644 --- a/DataFormats/BeamSpot/src/classes.h +++ b/DataFormats/BeamSpot/src/classes.h @@ -1,8 +1,3 @@ - -#include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "Math/Cartesian3D.h" -#include "DataFormats/Math/interface/Vector3D.h" -#include "Math/Polar3D.h" -#include "Math/CylindricalEta3D.h" -#include "DataFormats/Math/interface/Vector.h" +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Common/interface/Wrapper.h" diff --git a/DataFormats/BeamSpot/src/classes_def.xml b/DataFormats/BeamSpot/src/classes_def.xml index 9c99903ae8bd4..af3d4e4627c58 100644 --- a/DataFormats/BeamSpot/src/classes_def.xml +++ b/DataFormats/BeamSpot/src/classes_def.xml @@ -4,4 +4,7 @@ + + + diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc new file mode 100644 index 0000000000000..8b0de1c739076 --- /dev/null +++ b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc @@ -0,0 +1,101 @@ +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" + +namespace { + + class BeamSpotHost { + public: + BeamSpotHost() : data_h_{cms::cuda::make_host_noncached_unique(cudaHostAllocWriteCombined)} {} + + BeamSpotHost(BeamSpotHost const&) = delete; + BeamSpotHost(BeamSpotHost&&) = default; + + BeamSpotHost& operator=(BeamSpotHost const&) = delete; + BeamSpotHost& operator=(BeamSpotHost&&) = default; + + BeamSpotPOD* data() { return data_h_.get(); } + BeamSpotPOD const* data() const { return data_h_.get(); } + + cms::cuda::host::noncached::unique_ptr& ptr() { return data_h_; } + cms::cuda::host::noncached::unique_ptr const& ptr() const { return data_h_; } + + private: + cms::cuda::host::noncached::unique_ptr data_h_; + }; + +} // namespace + +class BeamSpotToCUDA : public edm::global::EDProducer> { +public: + explicit BeamSpotToCUDA(const edm::ParameterSet& iConfig); + ~BeamSpotToCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + std::unique_ptr beginStream(edm::StreamID) const override { + edm::Service cs; + if (cs->enabled()) { + return std::make_unique(); + } else { + return nullptr; + } + } + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + +private: + const edm::EDGetTokenT bsGetToken_; + const edm::EDPutTokenT> bsPutToken_; +}; + +BeamSpotToCUDA::BeamSpotToCUDA(const edm::ParameterSet& iConfig) + : bsGetToken_{consumes(iConfig.getParameter("src"))}, + bsPutToken_{produces>()} {} + +void BeamSpotToCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("offlineBeamSpot")); + descriptions.add("offlineBeamSpotToCUDA", desc); +} + +void BeamSpotToCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + cms::cuda::ScopedContextProduce ctx{streamID}; + + const reco::BeamSpot& bs = iEvent.get(bsGetToken_); + + auto& bsHost = streamCache(streamID)->ptr(); + + bsHost->x = bs.x0(); + bsHost->y = bs.y0(); + bsHost->z = bs.z0(); + + bsHost->sigmaZ = bs.sigmaZ(); + bsHost->beamWidthX = bs.BeamWidthX(); + bsHost->beamWidthY = bs.BeamWidthY(); + bsHost->dxdz = bs.dxdz(); + bsHost->dydz = bs.dydz(); + bsHost->emittanceX = bs.emittanceX(); + bsHost->emittanceY = bs.emittanceY(); + bsHost->betaStar = bs.betaStar(); + + BeamSpotCUDA bsDevice(ctx.stream()); + cms::cuda::copyAsync(bsDevice.ptr(), bsHost, ctx.stream()); + + ctx.emplace(iEvent, bsPutToken_, std::move(bsDevice)); +} + +DEFINE_FWK_MODULE(BeamSpotToCUDA); diff --git a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml index c7498da5ae33f..97d6d07a27631 100644 --- a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml +++ b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml @@ -1,13 +1,12 @@ + + + + + - - - - - - - + @@ -40,4 +39,10 @@ - + + + + + + + diff --git a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py index deb62255199e5..7cc651af22106 100644 --- a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py +++ b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py @@ -1,4 +1,11 @@ import FWCore.ParameterSet.Config as cms from RecoVertex.BeamSpotProducer.BeamSpot_cfi import * +from RecoVertex.BeamSpotProducer.offlineBeamSpotToCUDA_cfi import offlineBeamSpotToCUDA +offlineBeamSpotTask = cms.Task(offlineBeamSpot) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_offlineBeamSpotTask_gpu = offlineBeamSpotTask.copy() +_offlineBeamSpotTask_gpu.add(offlineBeamSpotToCUDA) +gpu.toReplaceWith(offlineBeamSpotTask, _offlineBeamSpotTask_gpu)