Add assorted CUDA-related utilities (#345)

cms-patatrack · May 13, 2019 · 87a6dbf · 87a6dbf
1 parent 0e9830d
commit 87a6dbf
Show file tree

Hide file tree

Showing 10 changed files with 983 additions and 673 deletions.
diff --git a/HeterogeneousCore/CUDAUtilities/interface/AtomicPairCounter.h b/HeterogeneousCore/CUDAUtilities/interface/AtomicPairCounter.h
@@ -1,19 +1,21 @@
-#ifndef HeterogeneousCoreCUDAUtilitiesAtomicPairCounter_H
-#define HeterogeneousCoreCUDAUtilitiesAtomicPairCounter_H
+#ifndef HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
+#define HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
 
-#include <cuda_runtime.h>
 #include <cstdint>
 
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+
 class AtomicPairCounter {
 public:
-
   using c_type = unsigned long long int;
 
-  AtomicPairCounter(){}
-  AtomicPairCounter(c_type i) { counter.ac=i;}
+  AtomicPairCounter() {}
+  AtomicPairCounter(c_type i) { counter.ac = i; }
 
-  __device__ __host__
-  AtomicPairCounter & operator=(c_type i) { counter.ac=i; return *this;}
+  __device__ __host__ AtomicPairCounter& operator=(c_type i) {
+    counter.ac = i;
+    return *this;
+  }
 
   struct Counters {
     uint32_t n;  // in a "One to Many" association is the number of "One"
@@ -25,30 +27,26 @@ class AtomicPairCounter {
     c_type ac;
   };
 
-#ifdef __CUDACC__
+  static constexpr c_type incr = 1UL << 32;
 
-  static constexpr c_type incr = 1UL<<32;
-
-  __device__ __host__
-  Counters get() const { return counter.counters;}
+  __device__ __host__ Counters get() const { return counter.counters; }
 
   // increment n by 1 and m by i.  return previous value
-  __device__
-  Counters add(uint32_t i) {
-    c_type c = i; 
-    c+=incr;
+  __host__ __device__ __forceinline__ Counters add(uint32_t i) {
+    c_type c = i;
+    c += incr;
     Atomic2 ret;
-    ret.ac = atomicAdd(&counter.ac,c);
+#ifdef __CUDA_ARCH__
+    ret.ac = atomicAdd(&counter.ac, c);
+#else
+    ret.ac = counter.ac;
+    counter.ac += c;
+#endif
     return ret.counters;
   }
 
-#endif
-
 private:
-
   Atomic2 counter;
-
 };
 
-
-#endif
+#endif  // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
diff --git a/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h b/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h
@@ -6,97 +6,116 @@
 #include <type_traits>
 #include <utility>
 
-#include <cuda.h>
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
 
 namespace GPU {
-template <class T> struct SimpleVector {
-  constexpr SimpleVector() = default;
+  template <class T>
+  struct SimpleVector {
+    constexpr SimpleVector() = default;
+
+    // ownership of m_data stays within the caller
+    constexpr void construct(int capacity, T *data) {
+      m_size = 0;
+      m_capacity = capacity;
+      m_data = data;
+    }
 
-  // ownership of m_data stays within the caller
-  constexpr void construct(int capacity, T *data) {
-    m_size = 0;
-    m_capacity = capacity;
-    m_data = data;
-  }
+    inline constexpr int push_back_unsafe(const T &element) {
+      auto previousSize = m_size;
+      m_size++;
+      if (previousSize < m_capacity) {
+        m_data[previousSize] = element;
+        return previousSize;
+      } else {
+        --m_size;
+        return -1;
+      }
+    }
 
-  inline constexpr int push_back_unsafe(const T &element) {
-    auto previousSize = m_size;
-    m_size++;
-    if (previousSize < m_capacity) {
-      m_data[previousSize] = element;
-      return previousSize;
-    } else {
-      --m_size;
-      return -1;
+    template <class... Ts>
+    constexpr int emplace_back_unsafe(Ts &&... args) {
+      auto previousSize = m_size;
+      m_size++;
+      if (previousSize < m_capacity) {
+        (new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
+        return previousSize;
+      } else {
+        --m_size;
+        return -1;
+      }
     }
-  }
 
-  template <class... Ts> constexpr int emplace_back_unsafe(Ts &&... args) {
-    auto previousSize = m_size;
-    m_size++;
-    if (previousSize < m_capacity) {
-      (new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
-      return previousSize;
-    } else {
-      --m_size;
-      return -1;
+    __device__ inline T &back() { return m_data[m_size - 1]; }
+
+    __device__ inline const T &back() const {
+      if (m_size > 0) {
+        return m_data[m_size - 1];
+      } else
+        return T();  //undefined behaviour
     }
-  }
 
-  inline constexpr T & back() const {
+    // thread-safe version of the vector, when used in a CUDA kernel
+    __device__ int push_back(const T &element) {
+      auto previousSize = atomicAdd(&m_size, 1);
+      if (previousSize < m_capacity) {
+        m_data[previousSize] = element;
+        return previousSize;
+      } else {
+        atomicSub(&m_size, 1);
+        return -1;
+      }
+    }
 
-    if (m_size > 0) {
-      return m_data[m_size - 1];
-    } else
-      return T(); //undefined behaviour
-  }
+    template <class... Ts>
+    __device__ int emplace_back(Ts &&... args) {
+      auto previousSize = atomicAdd(&m_size, 1);
+      if (previousSize < m_capacity) {
+        (new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
+        return previousSize;
+      } else {
+        atomicSub(&m_size, 1);
+        return -1;
+      }
+    }
 
-#ifdef __CUDACC__
-
-  // thread-safe version of the vector, when used in a CUDA kernel
-  __device__
-  int push_back(const T &element) {
-    auto previousSize = atomicAdd(&m_size, 1);
-    if (previousSize < m_capacity) {
-      m_data[previousSize] = element;
-      return previousSize;
-    } else {
-      atomicSub(&m_size, 1);
-      return -1;
+    // thread safe version of resize
+    __device__ int extend(int size = 1) {
+      auto previousSize = atomicAdd(&m_size, size);
+      if (previousSize < m_capacity) {
+        return previousSize;
+      } else {
+        atomicSub(&m_size, size);
+        return -1;
+      }
     }
-  }
 
-  template <class... Ts>
-  __device__
-  int emplace_back(Ts &&... args) {
-    auto previousSize = atomicAdd(&m_size, 1);
-    if (previousSize < m_capacity) {
-      (new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
-      return previousSize;
-    } else {
-      atomicSub(&m_size, 1);
-      return -1;
+    __device__ int shrink(int size = 1) {
+      auto previousSize = atomicSub(&m_size, size);
+      if (previousSize >= size) {
+        return previousSize - size;
+      } else {
+        atomicAdd(&m_size, size);
+        return -1;
+      }
     }
-  }
 
-#endif // __CUDACC__
-  inline constexpr bool empty() const { return m_size==0;}
-  inline constexpr bool full() const { return m_size==m_capacity;}
-  inline constexpr T& operator[](int i) { return m_data[i]; }
-  inline constexpr const T& operator[](int i) const { return m_data[i]; }
-  inline constexpr void reset() { m_size = 0; }
-  inline constexpr int size() const { return m_size; }
-  inline constexpr int capacity() const { return m_capacity; }
-  inline constexpr T const * data() const { return m_data; }
-  inline constexpr void resize(int size) { m_size = size; }
-  inline constexpr void set_data(T * data) { m_data = data; }
-
-private:
-  int m_size;
-  int m_capacity;
-
-  T *m_data;
-};
+    inline constexpr bool empty() const { return m_size <= 0; }
+    inline constexpr bool full() const { return m_size >= m_capacity; }
+    inline constexpr T &operator[](int i) { return m_data[i]; }
+    inline constexpr const T &operator[](int i) const { return m_data[i]; }
+    inline constexpr void reset() { m_size = 0; }
+    inline constexpr int size() const { return m_size; }
+    inline constexpr int capacity() const { return m_capacity; }
+    inline constexpr T const *data() const { return m_data; }
+    inline constexpr void resize(int size) { m_size = size; }
+    inline constexpr void set_data(T *data) { m_data = data; }
+
+  private:
+    int m_size;
+    int m_capacity;
+
+    T *m_data;
+  };
 
   // ownership of m_data stays within the caller
   template <class T>
@@ -109,11 +128,11 @@ template <class T> struct SimpleVector {
   // ownership of m_data stays within the caller
   template <class T>
   SimpleVector<T> *make_SimpleVector(SimpleVector<T> *mem, int capacity, T *data) {
-    auto ret = new(mem) SimpleVector<T>();
+    auto ret = new (mem) SimpleVector<T>();
     ret->construct(capacity, data);
     return ret;
   }
 
-} // namespace GPU
+}  // namespace GPU
 
-#endif // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h
+#endif  // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h