Skip to content

Commit

Permalink
fix broadcast kernel (#46158)
Browse files Browse the repository at this point in the history
  • Loading branch information
sneaxiy committed Sep 19, 2022
1 parent e468e93 commit 860f607
Show file tree
Hide file tree
Showing 12 changed files with 515 additions and 46 deletions.
11 changes: 6 additions & 5 deletions paddle/fluid/platform/device/gpu/cuda/cuda_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,12 @@ namespace platform {
*
*/

#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += blockDim.x * gridDim.x, i = __index__)
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += __stride__, i = __index__)

class CublasHandleHolder {
public:
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/platform/device/gpu/rocm/rocm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ namespace platform {
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \
int64_t __stride__ = static_cast<int64_t>(hipBlockDim_x) * hipGridDim_x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += hipBlockDim_x * hipGridDim_x, i = __index__)
__index__ += __stride__, i = __index__)

class CublasHandleHolder {
public:
Expand Down
11 changes: 6 additions & 5 deletions paddle/phi/backends/gpu/cuda/cuda_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,12 @@ namespace gpu {
*
*/

#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += blockDim.x * gridDim.x, i = __index__)
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += __stride__, i = __index__)

} // namespace gpu
} // namespace backends
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/backends/gpu/rocm/rocm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ namespace gpu {
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \
int64_t __stride__ = static_cast<int64_t>(hipBlockDim_x) * hipGridDim_x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += hipBlockDim_x * hipGridDim_x, i = __index__)
__index__ += __stride__, i = __index__)

} // namespace gpu
} // namespace backends
Expand Down
Loading

0 comments on commit 860f607

Please sign in to comment.