Skip to content

Commit

Permalink
Add assorted CUDA-related utilities (#345)
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed May 13, 2019
1 parent 0e9830d commit 87a6dbf
Show file tree
Hide file tree
Showing 10 changed files with 983 additions and 673 deletions.
46 changes: 22 additions & 24 deletions HeterogeneousCore/CUDAUtilities/interface/AtomicPairCounter.h
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
#ifndef HeterogeneousCoreCUDAUtilitiesAtomicPairCounter_H
#define HeterogeneousCoreCUDAUtilitiesAtomicPairCounter_H
#ifndef HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
#define HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h

#include <cuda_runtime.h>
#include <cstdint>

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

class AtomicPairCounter {
public:

using c_type = unsigned long long int;

AtomicPairCounter(){}
AtomicPairCounter(c_type i) { counter.ac=i;}
AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }

__device__ __host__
AtomicPairCounter & operator=(c_type i) { counter.ac=i; return *this;}
__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}

struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
Expand All @@ -25,30 +27,26 @@ class AtomicPairCounter {
c_type ac;
};

#ifdef __CUDACC__
static constexpr c_type incr = 1UL << 32;

static constexpr c_type incr = 1UL<<32;

__device__ __host__
Counters get() const { return counter.counters;}
__device__ __host__ Counters get() const { return counter.counters; }

// increment n by 1 and m by i. return previous value
__device__
Counters add(uint32_t i) {
c_type c = i;
c+=incr;
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
ret.ac = atomicAdd(&counter.ac,c);
#ifdef __CUDA_ARCH__
ret.ac = atomicAdd(&counter.ac, c);
#else
ret.ac = counter.ac;
counter.ac += c;
#endif
return ret.counters;
}

#endif

private:

Atomic2 counter;

};


#endif
#endif // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
179 changes: 99 additions & 80 deletions HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,97 +6,116 @@
#include <type_traits>
#include <utility>

#include <cuda.h>
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

namespace GPU {
template <class T> struct SimpleVector {
constexpr SimpleVector() = default;
template <class T>
struct SimpleVector {
constexpr SimpleVector() = default;

// ownership of m_data stays within the caller
constexpr void construct(int capacity, T *data) {
m_size = 0;
m_capacity = capacity;
m_data = data;
}

// ownership of m_data stays within the caller
constexpr void construct(int capacity, T *data) {
m_size = 0;
m_capacity = capacity;
m_data = data;
}
inline constexpr int push_back_unsafe(const T &element) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
--m_size;
return -1;
}
}

inline constexpr int push_back_unsafe(const T &element) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
--m_size;
return -1;
template <class... Ts>
constexpr int emplace_back_unsafe(Ts &&... args) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
--m_size;
return -1;
}
}
}

template <class... Ts> constexpr int emplace_back_unsafe(Ts &&... args) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
--m_size;
return -1;
__device__ inline T &back() { return m_data[m_size - 1]; }

__device__ inline const T &back() const {
if (m_size > 0) {
return m_data[m_size - 1];
} else
return T(); //undefined behaviour
}
}

inline constexpr T & back() const {
// thread-safe version of the vector, when used in a CUDA kernel
__device__ int push_back(const T &element) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}

if (m_size > 0) {
return m_data[m_size - 1];
} else
return T(); //undefined behaviour
}
template <class... Ts>
__device__ int emplace_back(Ts &&... args) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}

#ifdef __CUDACC__

// thread-safe version of the vector, when used in a CUDA kernel
__device__
int push_back(const T &element) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
// thread safe version of resize
__device__ int extend(int size = 1) {
auto previousSize = atomicAdd(&m_size, size);
if (previousSize < m_capacity) {
return previousSize;
} else {
atomicSub(&m_size, size);
return -1;
}
}
}

template <class... Ts>
__device__
int emplace_back(Ts &&... args) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
__device__ int shrink(int size = 1) {
auto previousSize = atomicSub(&m_size, size);
if (previousSize >= size) {
return previousSize - size;
} else {
atomicAdd(&m_size, size);
return -1;
}
}
}

#endif // __CUDACC__
inline constexpr bool empty() const { return m_size==0;}
inline constexpr bool full() const { return m_size==m_capacity;}
inline constexpr T& operator[](int i) { return m_data[i]; }
inline constexpr const T& operator[](int i) const { return m_data[i]; }
inline constexpr void reset() { m_size = 0; }
inline constexpr int size() const { return m_size; }
inline constexpr int capacity() const { return m_capacity; }
inline constexpr T const * data() const { return m_data; }
inline constexpr void resize(int size) { m_size = size; }
inline constexpr void set_data(T * data) { m_data = data; }

private:
int m_size;
int m_capacity;

T *m_data;
};
inline constexpr bool empty() const { return m_size <= 0; }
inline constexpr bool full() const { return m_size >= m_capacity; }
inline constexpr T &operator[](int i) { return m_data[i]; }
inline constexpr const T &operator[](int i) const { return m_data[i]; }
inline constexpr void reset() { m_size = 0; }
inline constexpr int size() const { return m_size; }
inline constexpr int capacity() const { return m_capacity; }
inline constexpr T const *data() const { return m_data; }
inline constexpr void resize(int size) { m_size = size; }
inline constexpr void set_data(T *data) { m_data = data; }

private:
int m_size;
int m_capacity;

T *m_data;
};

// ownership of m_data stays within the caller
template <class T>
Expand All @@ -109,11 +128,11 @@ template <class T> struct SimpleVector {
// ownership of m_data stays within the caller
template <class T>
SimpleVector<T> *make_SimpleVector(SimpleVector<T> *mem, int capacity, T *data) {
auto ret = new(mem) SimpleVector<T>();
auto ret = new (mem) SimpleVector<T>();
ret->construct(capacity, data);
return ret;
}

} // namespace GPU
} // namespace GPU

#endif // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h
#endif // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h
Loading

0 comments on commit 87a6dbf

Please sign in to comment.