Skip to content

Commit

Permalink
Apply code-format fixes (#427)
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Dec 3, 2019
1 parent 459524a commit d02f4be
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 156 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ namespace gpuVertexFinder {
// based on Rodrighez&Laio algo
//
__device__ __forceinline__ void clusterTracksByDensity(gpuVertexFinder::ZVertices* pdata,
gpuVertexFinder::WorkSpace* pws,
int minT, // min number of neighbours to be "seed"
float eps, // max absolute distance to cluster
float errmax, // max error to be "seed"
float chi2max // max normalized distance to cluster
gpuVertexFinder::WorkSpace* pws,
int minT, // min number of neighbours to be "seed"
float eps, // max absolute distance to cluster
float errmax, // max error to be "seed"
float chi2max // max normalized distance to cluster
) {
using namespace gpuVertexFinder;
constexpr bool verbose = false; // in principle the compiler should optmize out if false
Expand Down Expand Up @@ -220,14 +220,14 @@ namespace gpuVertexFinder {
}

__global__ void clusterTracksByDensityKernel(gpuVertexFinder::ZVertices* pdata,
gpuVertexFinder::WorkSpace* pws,
int minT, // min number of neighbours to be "seed"
float eps, // max absolute distance to cluster
float errmax, // max error to be "seed"
float chi2max // max normalized distance to cluster
gpuVertexFinder::WorkSpace* pws,
int minT, // min number of neighbours to be "seed"
float eps, // max absolute distance to cluster
float errmax, // max error to be "seed"
float chi2max // max normalized distance to cluster
) {
clusterTracksByDensity(pdata,pws,minT,eps,errmax,chi2max);
}
clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
}

} // namespace gpuVertexFinder

Expand Down
13 changes: 6 additions & 7 deletions RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
namespace gpuVertexFinder {

__device__ __forceinline__ void fitVertices(ZVertices* pdata,
WorkSpace* pws,
float chi2Max // for outlier rejection
WorkSpace* pws,
float chi2Max // for outlier rejection
) {
constexpr bool verbose = false; // in principle the compiler should optmize out if false

Expand Down Expand Up @@ -102,12 +102,11 @@ namespace gpuVertexFinder {
}

__global__ void fitVerticesKernel(ZVertices* pdata,
WorkSpace* pws,
float chi2Max // for outlier rejection
WorkSpace* pws,
float chi2Max // for outlier rejection
) {

fitVertices(pdata,pws,chi2Max);
}
fitVertices(pdata, pws, chi2Max);
}

} // namespace gpuVertexFinder

Expand Down
8 changes: 2 additions & 6 deletions RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

namespace gpuVertexFinder {

__device__ __forceinline__
void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
__device__ __forceinline__ void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
auto& __restrict__ data = *pdata;
auto& __restrict__ ws = *pws;
auto nt = ws.ntrks;
Expand Down Expand Up @@ -67,10 +66,7 @@ namespace gpuVertexFinder {
#endif
}


__global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) {
sortByPt2(pdata,pws);
}
__global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) { sortByPt2(pdata, pws); }

} // namespace gpuVertexFinder

Expand Down
172 changes: 86 additions & 86 deletions RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,106 +32,106 @@ namespace gpuVertexFinder {
assert(zt);

// one vertex per block
for ( auto kv = blockIdx.x; kv<nvFinal; kv += gridDim.x) {

if (nn[kv] < 4)
continue;
if (chi2[kv] < maxChi2 * float(nn[kv]))
continue;

constexpr int MAXTK = 512;
assert(nn[kv] < MAXTK);
if (nn[kv] >= MAXTK) continue; // too bad FIXME
__shared__ uint32_t it[MAXTK]; // track index
__shared__ float zz[MAXTK]; // z pos
__shared__ uint8_t newV[MAXTK]; // 0 or 1
__shared__ float ww[MAXTK]; // z weight

__shared__ uint32_t nq; // number of track for this vertex
nq = 0;
__syncthreads();

// copy to local
for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
if (iv[k] == int(kv)) {
auto old = atomicInc(&nq, MAXTK);
zz[old] = zt[k] - zv[kv];
newV[old] = zz[old] < 0 ? 0 : 1;
ww[old] = 1.f / ezt2[k];
it[old] = k;
}
}

__shared__ float znew[2], wnew[2]; // the new vertices

__syncthreads();
assert(int(nq) == nn[kv] + 1);

int maxiter = 20;
// kt-min....
bool more = true;
while (__syncthreads_or(more)) {
more = false;
if (0 == threadIdx.x) {
znew[0] = 0;
znew[1] = 0;
wnew[0] = 0;
wnew[1] = 0;
}
__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
auto i = newV[k];
atomicAdd(&znew[i], zz[k] * ww[k]);
atomicAdd(&wnew[i], ww[k]);
}
for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) {
if (nn[kv] < 4)
continue;
if (chi2[kv] < maxChi2 * float(nn[kv]))
continue;

constexpr int MAXTK = 512;
assert(nn[kv] < MAXTK);
if (nn[kv] >= MAXTK)
continue; // too bad FIXME
__shared__ uint32_t it[MAXTK]; // track index
__shared__ float zz[MAXTK]; // z pos
__shared__ uint8_t newV[MAXTK]; // 0 or 1
__shared__ float ww[MAXTK]; // z weight

__shared__ uint32_t nq; // number of track for this vertex
nq = 0;
__syncthreads();
if (0 == threadIdx.x) {
znew[0] /= wnew[0];
znew[1] /= wnew[1];

// copy to local
for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
if (iv[k] == int(kv)) {
auto old = atomicInc(&nq, MAXTK);
zz[old] = zt[k] - zv[kv];
newV[old] = zz[old] < 0 ? 0 : 1;
ww[old] = 1.f / ezt2[k];
it[old] = k;
}
}

__shared__ float znew[2], wnew[2]; // the new vertices

__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
auto d0 = fabs(zz[k] - znew[0]);
auto d1 = fabs(zz[k] - znew[1]);
auto newer = d0 < d1 ? 0 : 1;
more |= newer != newV[k];
newV[k] = newer;
}
--maxiter;
if (maxiter <= 0)
assert(int(nq) == nn[kv] + 1);

int maxiter = 20;
// kt-min....
bool more = true;
while (__syncthreads_or(more)) {
more = false;
}
if (0 == threadIdx.x) {
znew[0] = 0;
znew[1] = 0;
wnew[0] = 0;
wnew[1] = 0;
}
__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
auto i = newV[k];
atomicAdd(&znew[i], zz[k] * ww[k]);
atomicAdd(&wnew[i], ww[k]);
}
__syncthreads();
if (0 == threadIdx.x) {
znew[0] /= wnew[0];
znew[1] /= wnew[1];
}
__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
auto d0 = fabs(zz[k] - znew[0]);
auto d1 = fabs(zz[k] - znew[1]);
auto newer = d0 < d1 ? 0 : 1;
more |= newer != newV[k];
newV[k] = newer;
}
--maxiter;
if (maxiter <= 0)
more = false;
}

// avoid empty vertices
if (0 == wnew[0] || 0 == wnew[1])
continue;
// avoid empty vertices
if (0 == wnew[0] || 0 == wnew[1])
continue;

// quality cut
auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
// quality cut
auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);

auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);

if (verbose && 0 == threadIdx.x)
printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
if (verbose && 0 == threadIdx.x)
printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);

if (chi2Dist < 4)
continue;
if (chi2Dist < 4)
continue;

// get a new global vertex
__shared__ uint32_t igv;
if (0 == threadIdx.x)
igv = atomicAdd(&ws.nvIntermediate, 1);
__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
if (1 == newV[k])
iv[it[k]] = igv;
}
// get a new global vertex
__shared__ uint32_t igv;
if (0 == threadIdx.x)
igv = atomicAdd(&ws.nvIntermediate, 1);
__syncthreads();
for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
if (1 == newV[k])
iv[it[k]] = igv;
}

} // loop on vertices
} // loop on vertices
}

__global__ void splitVerticesKernel(ZVertices* pdata, WorkSpace* pws, float maxChi2) {
splitVertices(pdata, pws, maxChi2);
splitVertices(pdata, pws, maxChi2);
}

} // namespace gpuVertexFinder
Expand Down
5 changes: 2 additions & 3 deletions RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,14 @@ namespace gpuVertexFinder {
float ierrmax, // max error to be "seed"
float ichi2max // max normalized distance to cluster
)
: oneKernel_(oneKernel && !(useDBSCAN||useIterative)),
: oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
useDensity_(useDensity),
useDBSCAN_(useDBSCAN),
useIterative_(useIterative),
minT(iminT),
eps(ieps),
errmax(ierrmax),
chi2max(ichi2max) {
}
chi2max(ichi2max) {}

~Producer() = default;

Expand Down
Loading

0 comments on commit d02f4be

Please sign in to comment.