Remove last references to CUDA API Wrappers

cms-patatrack · Nov 27, 2019 · e4f89c0 · e4f89c0
1 parent 2e0a3c9
commit e4f89c0
Show file tree

Hide file tree

Showing 12 changed files with 14 additions and 32 deletions.
diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
@@ -1,8 +1,6 @@
 #ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
 #define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
 
-#include <cuda/api_wrappers.h>
-
 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
 #include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
 

diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
@@ -87,9 +87,7 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
 const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const {
   const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) {
     cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
-    cudaCheck(
-        cudaMalloc((void**)&data.gainDataOnGPU,
-                   this->gains_->data().size()));  // TODO: this could be changed to cuda::memory::device::unique_ptr<>
+    cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size()));
     // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
     cudaCheck(cudaMemcpyAsync(
         data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream));

diff --git a/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp b/DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp
@@ -70,7 +70,6 @@ int main(void) {
 
   SFrame sf1(f1.position().x(), f1.position().y(), f1.position().z(), f1.rotation());
 
-  // auto d_sf = cuda::memory::device::make_unique<SFrame[]>(current_device, 1);
   auto d_sf = cudautils::make_device_unique<char[]>(sizeof(SFrame), nullptr);
   cudaCheck(cudaMemcpy(d_sf.get(), &sf1, sizeof(SFrame), cudaMemcpyHostToDevice));
 

diff --git a/DataFormats/Math/test/CholeskyInvert_t.cpp b/DataFormats/Math/test/CholeskyInvert_t.cpp
@@ -1,4 +1,4 @@
-// nvcc -O3 CholeskyDecomp_t.cu -Icuda-api-wrappers/src/ --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native"
+// nvcc -O3 CholeskyDecomp_t.cu --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native"
 // add -DDOPROF to run  nvprof --metrics all
 
 #include <algorithm>

diff --git a/DataFormats/Math/test/CholeskyInvert_t.cu b/DataFormats/Math/test/CholeskyInvert_t.cu
@@ -1,4 +1,4 @@
-// nvcc -O3 CholeskyDecomp_t.cu -Icuda-api-wrappers/src/ --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native"
+// nvcc -O3 CholeskyDecomp_t.cu --expt-relaxed-constexpr -gencode arch=compute_61,code=sm_61 --compiler-options="-Ofast -march=native"
 // add -DDOPROF to run  nvprof --metrics all
 
 #include <algorithm>

diff --git a/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h b/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h
@@ -3,17 +3,16 @@
 
 #include <atomic>
 #include <cassert>
+#include <mutex>
 #include <vector>
 
-#include <cuda/api_wrappers.h>
-
 #include "FWCore/Concurrency/interface/hardware_pause.h"
 #include "FWCore/Utilities/interface/thread_safety_macros.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/CUDAEventCache.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h"
 
 template <typename T>
 class CUDAESProduct {

diff --git a/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h b/HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h
@@ -1,19 +1,17 @@
 #ifndef HeterogeneousCore_CUDACore_CUDAScopedContext_h
 #define HeterogeneousCore_CUDACore_CUDAScopedContext_h
 
+#include <optional>
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
 #include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h"
 #include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Utilities/interface/StreamID.h"
 #include "FWCore/Utilities/interface/EDGetToken.h"
 #include "FWCore/Utilities/interface/EDPutToken.h"
-#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "FWCore/Utilities/interface/StreamID.h"
 #include "HeterogeneousCore/CUDACore/interface/CUDAContextState.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/SharedEventPtr.h"
-
-#include <cuda/api_wrappers.h>
-
-#include <optional>
+#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h"
 
 namespace cudatest {
   class TestCUDAScopedContext;

diff --git a/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc b/HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc
@@ -87,7 +87,6 @@ TEST_CASE("Use of CUDAScopedContext", "[CUDACore]") {
 
     SECTION("Joining multiple CUDA streams") {
       cudautils::ScopedSetDevice setDeviceForThisScope(defaultDevice);
-      auto current_device = cuda::device::current::get();
 
       // Mimick a producer on the first CUDA stream
       int h_a1 = 1;

diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/src/CUDAService.cc
@@ -371,11 +371,6 @@ int CUDAService::deviceWithMostFreeMemory() const {
   size_t maxFreeMemory = 0;
   int device = -1;
   for (int i = 0; i < numberOfDevices_; ++i) {
-    /*
-    // TODO: understand why the api-wrappers version gives same value for all devices
-    auto device = cuda::device::get(i);
-    auto freeMemory = device.memory.amount_free();
-    */
     size_t freeMemory, totalMemory;
     cudaSetDevice(i);
     cudaMemGetInfo(&freeMemory, &totalMemory);

diff --git a/HeterogeneousCore/CUDAUtilities/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/BuildFile.xml
@@ -1,6 +1,5 @@
 <use name="cub"/>
 <use name="cuda"/>
-<use name="cuda-api-wrappers"/>
 <use name="FWCore/Utilities"/>
 <use name="FWCore/MessageLogger"/>
 

diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
@@ -3,8 +3,6 @@
 
 #include <utility>
 
-#include <cuda/api_wrappers.h>
-
 #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h"
 #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h"
 #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h"

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h
@@ -1,11 +1,10 @@
 #ifndef RecoPixelVertexing_PixelTrackFitting_plugins_HelixFitOnGPU_h
 #define RecoPixelVertexing_PixelTrackFitting_plugins_HelixFitOnGPU_h
 
-#include <cuda/api_wrappers.h>
-
-#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h"
 #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h"
+
 #include "CAConstants.h"
 
 namespace Rfit {