diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 00386b06c2e6c..8bfefee5c3387 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -1,8 +1,6 @@ #ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h #define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h -#include - #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" #include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index 9758731f0bb8e..e4f278c28ec69 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -87,9 +87,7 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); - cudaCheck( - cudaMalloc((void**)&data.gainDataOnGPU, - this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> + cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size())); // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory cudaCheck(cudaMemcpyAsync( data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream)); diff --git a/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp b/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp index 6c4bc91ebeb3d..8c18054e0deb5 100644 --- a/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp +++ b/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp @@ -70,7 +70,6 @@ int main(void) { SFrame sf1(f1.position().x(), f1.position().y(), f1.position().z(), f1.rotation()); - // auto d_sf = cuda::memory::device::make_unique(current_device, 1); auto d_sf = cudautils::make_device_unique(sizeof(SFrame), nullptr); cudaCheck(cudaMemcpy(d_sf.get(), &sf1, sizeof(SFrame), cudaMemcpyHostToDevice)); diff --git a/DataFormats/Math/test/CholeskyInvert_t.cpp b/DataFormats/Math/test/CholeskyInvert_t.cpp index aefae8614d437..4c0b064da6ed1 100644 --- a/DataFormats/Math/test/CholeskyInvert_t.cpp +++ b/DataFormats/Math/test/CholeskyInvert_t.cpp @@ -1,4 +1,4 @@ -// nvcc -O3 CholeskyDecomp_t.cu -Icuda-api-wrappers/src/ --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native" +// nvcc -O3 CholeskyDecomp_t.cu --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native" // add -DDOPROF to run nvprof --metrics all #include diff --git a/DataFormats/Math/test/CholeskyInvert_t.cu b/DataFormats/Math/test/CholeskyInvert_t.cu index d44fda8220beb..f2e440d6009ff 100644 --- a/DataFormats/Math/test/CholeskyInvert_t.cu +++ b/DataFormats/Math/test/CholeskyInvert_t.cu @@ -1,4 +1,4 @@ -// nvcc -O3 CholeskyDecomp_t.cu -Icuda-api-wrappers/src/ --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native" +// nvcc -O3 CholeskyDecomp_t.cu --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native" // add -DDOPROF to run nvprof --metrics all #include diff --git a/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h b/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h index a456173a87374..b8b230e510fa3 100644 --- a/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h +++ b/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h @@ -3,17 +3,16 @@ #include #include +#include #include -#include - #include "FWCore/Concurrency/interface/hardware_pause.h" #include "FWCore/Utilities/interface/thread_safety_macros.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAEventCache.h" -#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h" +#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" +#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h" template class CUDAESProduct { diff --git a/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h b/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h index f17fc37e4b949..758218bb958a2 100644 --- a/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h +++ b/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h @@ -1,19 +1,17 @@ #ifndef HeterogeneousCore_CUDACore_CUDAScopedContext_h #define HeterogeneousCore_CUDACore_CUDAScopedContext_h +#include + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" #include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h" #include "FWCore/Framework/interface/Event.h" -#include "FWCore/Utilities/interface/StreamID.h" #include "FWCore/Utilities/interface/EDGetToken.h" #include "FWCore/Utilities/interface/EDPutToken.h" -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "FWCore/Utilities/interface/StreamID.h" #include "HeterogeneousCore/CUDACore/interface/CUDAContextState.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h" #include "HeterogeneousCore/CUDAUtilities/interface/SharedEventPtr.h" - -#include - -#include +#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h" namespace cudatest { class TestCUDAScopedContext; diff --git a/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc b/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc index 89e7e1c3d00af..507824f8bfdb7 100644 --- a/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc +++ b/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc @@ -87,7 +87,6 @@ TEST_CASE("Use of CUDAScopedContext", "[CUDACore]") { SECTION("Joining multiple CUDA streams") { cudautils::ScopedSetDevice setDeviceForThisScope(defaultDevice); - auto current_device = cuda::device::current::get(); // Mimick a producer on the first CUDA stream int h_a1 = 1; diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/src/CUDAService.cc index 31602de8b71a8..4a18ed2dcf176 100644 --- a/HeterogeneousCore/CUDAServices/src/CUDAService.cc +++ b/HeterogeneousCore/CUDAServices/src/CUDAService.cc @@ -371,11 +371,6 @@ int CUDAService::deviceWithMostFreeMemory() const { size_t maxFreeMemory = 0; int device = -1; for (int i = 0; i < numberOfDevices_; ++i) { - /* - // TODO: understand why the api-wrappers version gives same value for all devices - auto device = cuda::device::get(i); - auto freeMemory = device.memory.amount_free(); - */ size_t freeMemory, totalMemory; cudaSetDevice(i); cudaMemGetInfo(&freeMemory, &totalMemory); diff --git a/HeterogeneousCore/CUDAUtilities/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/BuildFile.xml index 019056960d32d..4528e0288d64f 100644 --- a/HeterogeneousCore/CUDAUtilities/BuildFile.xml +++ b/HeterogeneousCore/CUDAUtilities/BuildFile.xml @@ -1,6 +1,5 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 575c72d33a69a..aedca75b90c17 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -3,8 +3,6 @@ #include -#include - #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" diff --git a/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h index 3dc8beb65d9bb..05b399e870f58 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h @@ -1,11 +1,10 @@ #ifndef RecoPixelVertexing_PixelTrackFitting_plugins_HelixFitOnGPU_h #define RecoPixelVertexing_PixelTrackFitting_plugins_HelixFitOnGPU_h -#include - -#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h" #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h" + #include "CAConstants.h" namespace Rfit {