Skip to content

Commit

Permalink
Move array to shared memory as the content is the same for every thread
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn authored and fwyzard committed May 3, 2019
1 parent ccbde12 commit 422a363
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,16 @@ namespace gpuPixelDoubletsAlgos {
// e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
const int nPairsMax = 16;
assert(nPairs <= nPairsMax);
uint32_t innerLayerCumulativeSize[nPairsMax];
innerLayerCumulativeSize[0] = layerSize(layerPairs[0]);
for (uint32_t i = 1; i < nPairs; ++i) {
innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i-1] + layerSize(layerPairs[2*i]);
__shared__ uint32_t innerLayerCumulativeSize[nPairsMax];
__shared__ uint32_t ntot;
if (threadIdx.y==0 && threadIdx.x==0) {
innerLayerCumulativeSize[0] = layerSize(layerPairs[0]);
for (uint32_t i = 1; i < nPairs; ++i) {
innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i-1] + layerSize(layerPairs[2*i]);
}
ntot = innerLayerCumulativeSize[nPairs-1];
}
auto ntot = innerLayerCumulativeSize[nPairs-1];
__syncthreads();

// x runs faster
auto idy = blockIdx.y * blockDim.y + threadIdx.y;
Expand Down

0 comments on commit 422a363

Please sign in to comment.