From d02f4bee794a7c04d7710e6e019fb6f3a004ecc7 Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Tue, 3 Dec 2019 18:13:21 +0100
Subject: [PATCH] Apply code-format fixes (#427)

---
 .../src/gpuClusterTracksByDensity.h           |  24 +--
 .../PixelVertexFinding/src/gpuFitVertices.h   |  13 +-
 .../PixelVertexFinding/src/gpuSortByPt2.h     |   8 +-
 .../PixelVertexFinding/src/gpuSplitVertices.h | 172 +++++++++---------
 .../PixelVertexFinding/src/gpuVertexFinder.h  |   5 +-
 .../src/gpuVertexFinderImpl.h                 |  46 +++--
 .../PixelVertexFinding/test/VertexFinder_t.h  |  35 ++--
 7 files changed, 147 insertions(+), 156 deletions(-)

diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksByDensity.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksByDensity.h
index d69156214bfe0..871b09599c903 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksByDensity.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksByDensity.h
@@ -18,11 +18,11 @@ namespace gpuVertexFinder {
   // based on Rodrighez&Laio algo
   //
   __device__ __forceinline__ void clusterTracksByDensity(gpuVertexFinder::ZVertices* pdata,
-                                         gpuVertexFinder::WorkSpace* pws,
-                                         int minT,      // min number of neighbours to be "seed"
-                                         float eps,     // max absolute distance to cluster
-                                         float errmax,  // max error to be "seed"
-                                         float chi2max  // max normalized distance to cluster
+                                                         gpuVertexFinder::WorkSpace* pws,
+                                                         int minT,      // min number of neighbours to be "seed"
+                                                         float eps,     // max absolute distance to cluster
+                                                         float errmax,  // max error to be "seed"
+                                                         float chi2max  // max normalized distance to cluster
   ) {
     using namespace gpuVertexFinder;
     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
@@ -220,14 +220,14 @@ namespace gpuVertexFinder {
   }
 
   __global__ void clusterTracksByDensityKernel(gpuVertexFinder::ZVertices* pdata,
-                                         gpuVertexFinder::WorkSpace* pws,
-                                         int minT,    // min number of neighbours to be "seed"
-                                         float eps,     // max absolute distance to cluster
-                                         float errmax,  // max error to be "seed"
-                                         float chi2max  // max normalized distance to cluster
+                                               gpuVertexFinder::WorkSpace* pws,
+                                               int minT,      // min number of neighbours to be "seed"
+                                               float eps,     // max absolute distance to cluster
+                                               float errmax,  // max error to be "seed"
+                                               float chi2max  // max normalized distance to cluster
   ) {
-   clusterTracksByDensity(pdata,pws,minT,eps,errmax,chi2max);
- }
+    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
+  }
 
 }  // namespace gpuVertexFinder
 
diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h
index 1c3641df709ec..4487cb12ea17b 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h
@@ -13,8 +13,8 @@
 namespace gpuVertexFinder {
 
   __device__ __forceinline__ void fitVertices(ZVertices* pdata,
-                              WorkSpace* pws,
-                              float chi2Max  // for outlier rejection
+                                              WorkSpace* pws,
+                                              float chi2Max  // for outlier rejection
   ) {
     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
 
@@ -102,12 +102,11 @@ namespace gpuVertexFinder {
   }
 
   __global__ void fitVerticesKernel(ZVertices* pdata,
-                              WorkSpace* pws,
-                              float chi2Max  // for outlier rejection
+                                    WorkSpace* pws,
+                                    float chi2Max  // for outlier rejection
   ) {
-
-   fitVertices(pdata,pws,chi2Max);
- }
+    fitVertices(pdata, pws, chi2Max);
+  }
 
 }  // namespace gpuVertexFinder
 
diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h
index 93a34c826bd1f..89cc9a3844f76 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h
@@ -15,8 +15,7 @@
 
 namespace gpuVertexFinder {
 
-  __device__ __forceinline__ 
-  void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
+  __device__ __forceinline__ void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
     auto& __restrict__ data = *pdata;
     auto& __restrict__ ws = *pws;
     auto nt = ws.ntrks;
@@ -67,10 +66,7 @@ namespace gpuVertexFinder {
 #endif
   }
 
-
-  __global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) {
-     sortByPt2(pdata,pws);
-  }
+  __global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) { sortByPt2(pdata, pws); }
 
 }  // namespace gpuVertexFinder
 
diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h
index 48ed9290ab06c..694915ab02157 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h
@@ -32,106 +32,106 @@ namespace gpuVertexFinder {
     assert(zt);
 
     // one vertex per block
-    for ( auto kv = blockIdx.x; kv<nvFinal; kv += gridDim.x) {
-
-    if (nn[kv] < 4)
-      continue;
-    if (chi2[kv] < maxChi2 * float(nn[kv]))
-      continue;
-
-    constexpr int MAXTK = 512;
-    assert(nn[kv] < MAXTK);
-    if (nn[kv] >= MAXTK) continue; // too bad FIXME
-    __shared__ uint32_t it[MAXTK];   // track index
-    __shared__ float zz[MAXTK];      // z pos
-    __shared__ uint8_t newV[MAXTK];  // 0 or 1
-    __shared__ float ww[MAXTK];      // z weight
-
-    __shared__ uint32_t nq;  // number of track for this vertex
-    nq = 0;
-    __syncthreads();
-
-    // copy to local
-    for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
-      if (iv[k] == int(kv)) {
-        auto old = atomicInc(&nq, MAXTK);
-        zz[old] = zt[k] - zv[kv];
-        newV[old] = zz[old] < 0 ? 0 : 1;
-        ww[old] = 1.f / ezt2[k];
-        it[old] = k;
-      }
-    }
-
-    __shared__ float znew[2], wnew[2];  // the new vertices
-
-    __syncthreads();
-    assert(int(nq) == nn[kv] + 1);
-
-    int maxiter = 20;
-    // kt-min....
-    bool more = true;
-    while (__syncthreads_or(more)) {
-      more = false;
-      if (0 == threadIdx.x) {
-        znew[0] = 0;
-        znew[1] = 0;
-        wnew[0] = 0;
-        wnew[1] = 0;
-      }
-      __syncthreads();
-      for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-        auto i = newV[k];
-        atomicAdd(&znew[i], zz[k] * ww[k]);
-        atomicAdd(&wnew[i], ww[k]);
-      }
+    for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) {
+      if (nn[kv] < 4)
+        continue;
+      if (chi2[kv] < maxChi2 * float(nn[kv]))
+        continue;
+
+      constexpr int MAXTK = 512;
+      assert(nn[kv] < MAXTK);
+      if (nn[kv] >= MAXTK)
+        continue;                      // too bad FIXME
+      __shared__ uint32_t it[MAXTK];   // track index
+      __shared__ float zz[MAXTK];      // z pos
+      __shared__ uint8_t newV[MAXTK];  // 0 or 1
+      __shared__ float ww[MAXTK];      // z weight
+
+      __shared__ uint32_t nq;  // number of track for this vertex
+      nq = 0;
       __syncthreads();
-      if (0 == threadIdx.x) {
-        znew[0] /= wnew[0];
-        znew[1] /= wnew[1];
+
+      // copy to local
+      for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
+        if (iv[k] == int(kv)) {
+          auto old = atomicInc(&nq, MAXTK);
+          zz[old] = zt[k] - zv[kv];
+          newV[old] = zz[old] < 0 ? 0 : 1;
+          ww[old] = 1.f / ezt2[k];
+          it[old] = k;
+        }
       }
+
+      __shared__ float znew[2], wnew[2];  // the new vertices
+
       __syncthreads();
-      for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-        auto d0 = fabs(zz[k] - znew[0]);
-        auto d1 = fabs(zz[k] - znew[1]);
-        auto newer = d0 < d1 ? 0 : 1;
-        more |= newer != newV[k];
-        newV[k] = newer;
-      }
-      --maxiter;
-      if (maxiter <= 0)
+      assert(int(nq) == nn[kv] + 1);
+
+      int maxiter = 20;
+      // kt-min....
+      bool more = true;
+      while (__syncthreads_or(more)) {
         more = false;
-    }
+        if (0 == threadIdx.x) {
+          znew[0] = 0;
+          znew[1] = 0;
+          wnew[0] = 0;
+          wnew[1] = 0;
+        }
+        __syncthreads();
+        for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
+          auto i = newV[k];
+          atomicAdd(&znew[i], zz[k] * ww[k]);
+          atomicAdd(&wnew[i], ww[k]);
+        }
+        __syncthreads();
+        if (0 == threadIdx.x) {
+          znew[0] /= wnew[0];
+          znew[1] /= wnew[1];
+        }
+        __syncthreads();
+        for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
+          auto d0 = fabs(zz[k] - znew[0]);
+          auto d1 = fabs(zz[k] - znew[1]);
+          auto newer = d0 < d1 ? 0 : 1;
+          more |= newer != newV[k];
+          newV[k] = newer;
+        }
+        --maxiter;
+        if (maxiter <= 0)
+          more = false;
+      }
 
-    // avoid empty vertices
-    if (0 == wnew[0] || 0 == wnew[1])
-      continue;
+      // avoid empty vertices
+      if (0 == wnew[0] || 0 == wnew[1])
+        continue;
 
-    // quality cut
-    auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
+      // quality cut
+      auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
 
-    auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
+      auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
 
-    if (verbose && 0 == threadIdx.x)
-      printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
+      if (verbose && 0 == threadIdx.x)
+        printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
 
-    if (chi2Dist < 4)
-      continue;
+      if (chi2Dist < 4)
+        continue;
 
-    // get a new global vertex
-    __shared__ uint32_t igv;
-    if (0 == threadIdx.x)
-      igv = atomicAdd(&ws.nvIntermediate, 1);
-    __syncthreads();
-    for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-      if (1 == newV[k])
-        iv[it[k]] = igv;
-    }
+      // get a new global vertex
+      __shared__ uint32_t igv;
+      if (0 == threadIdx.x)
+        igv = atomicAdd(&ws.nvIntermediate, 1);
+      __syncthreads();
+      for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
+        if (1 == newV[k])
+          iv[it[k]] = igv;
+      }
 
-  } // loop on vertices
+    }  // loop on vertices
   }
 
   __global__ void splitVerticesKernel(ZVertices* pdata, WorkSpace* pws, float maxChi2) {
-     splitVertices(pdata, pws, maxChi2);
+    splitVertices(pdata, pws, maxChi2);
   }
 
 }  // namespace gpuVertexFinder
diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinder.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinder.h
index fec0308a1154d..6cd86c93a6737 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinder.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinder.h
@@ -52,15 +52,14 @@ namespace gpuVertexFinder {
              float ierrmax,  // max error to be "seed"
              float ichi2max  // max normalized distance to cluster
              )
-        : oneKernel_(oneKernel && !(useDBSCAN||useIterative)),
+        : oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
           useDensity_(useDensity),
           useDBSCAN_(useDBSCAN),
           useIterative_(useIterative),
           minT(iminT),
           eps(ieps),
           errmax(ierrmax),
-          chi2max(ichi2max) {
-          }
+          chi2max(ichi2max) {}
 
     ~Producer() = default;
 
diff --git a/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinderImpl.h b/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinderImpl.h
index 4dd839adc5315..d6e63227ccf85 100644
--- a/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinderImpl.h
+++ b/RecoPixelVertexing/PixelVertexFinding/src/gpuVertexFinderImpl.h
@@ -49,44 +49,41 @@ namespace gpuVertexFinder {
 #ifndef THREE_KERNELS
   __global__ void vertexFinderOneKernel(gpuVertexFinder::ZVertices* pdata,
                                         gpuVertexFinder::WorkSpace* pws,
-                                        int minT,    // min number of neighbours to be "seed"
+                                        int minT,      // min number of neighbours to be "seed"
                                         float eps,     // max absolute distance to cluster
                                         float errmax,  // max error to be "seed"
                                         float chi2max  // max normalized distance to cluster,
   ) {
-    clusterTracksByDensity(pdata,pws,minT,eps,errmax,chi2max);
+    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
     __syncthreads();
-    fitVertices(pdata,pws, 50.);
+    fitVertices(pdata, pws, 50.);
     __syncthreads();
-    splitVertices(pdata,pws, 9.f);
+    splitVertices(pdata, pws, 9.f);
     __syncthreads();
-    fitVertices(pdata,pws, 5000.);
+    fitVertices(pdata, pws, 5000.);
     __syncthreads();
-    sortByPt2(pdata,pws);
+    sortByPt2(pdata, pws);
   }
 #else
   __global__ void vertexFinderKernel1(gpuVertexFinder::ZVertices* pdata,
-                                           gpuVertexFinder::WorkSpace* pws,
-                                        int minT,    // min number of neighbours to be "seed"
-                                           float eps,     // max absolute distance to cluster
-                                        float errmax,  // max error to be "seed"
-                                           float chi2max  // max normalized distance to cluster,
+                                      gpuVertexFinder::WorkSpace* pws,
+                                      int minT,      // min number of neighbours to be "seed"
+                                      float eps,     // max absolute distance to cluster
+                                      float errmax,  // max error to be "seed"
+                                      float chi2max  // max normalized distance to cluster,
   ) {
-    clusterTracksByDensity(pdata,pws,minT,eps,errmax,chi2max);
+    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
     __syncthreads();
-    fitVertices(pdata,pws, 50.);
+    fitVertices(pdata, pws, 50.);
   }
 
-  __global__ void vertexFinderKernel2(gpuVertexFinder::ZVertices* pdata,
-                                    gpuVertexFinder::WorkSpace* pws)
-  {
-    fitVertices(pdata,pws, 5000.);
+  __global__ void vertexFinderKernel2(gpuVertexFinder::ZVertices* pdata, gpuVertexFinder::WorkSpace* pws) {
+    fitVertices(pdata, pws, 5000.);
     __syncthreads();
-    sortByPt2(pdata,pws);
+    sortByPt2(pdata, pws);
   }
 #endif
 
-
 #ifdef __CUDACC__
   ZVertexHeterogeneous Producer::makeAsync(cudaStream_t stream, TkSoA const* tksoa, float ptMin) const {
     // std::cout << "producing Vertices on GPU" << std::endl;
@@ -120,10 +117,10 @@ namespace gpuVertexFinder {
 
 #ifdef __CUDACC__
     if (oneKernel_) {
-      // implemented only for density clustesrs   
+      // implemented only for density clustesrs
 #ifndef THREE_KERNELS
       vertexFinderOneKernel<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
-#else 
+#else
       vertexFinderKernel1<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
       cudaCheck(cudaGetLastError());
       // one block per vertex...
@@ -131,7 +128,7 @@ namespace gpuVertexFinder {
       cudaCheck(cudaGetLastError());
       vertexFinderKernel2<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get());
 #endif
-    } else {   // five kernels
+    } else {  // five kernels
       if (useDensity_) {
         clusterTracksByDensityKernel<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
       } else if (useDBSCAN_) {
@@ -147,7 +144,7 @@ namespace gpuVertexFinder {
       cudaCheck(cudaGetLastError());
       fitVerticesKernel<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get(), 5000.);
       cudaCheck(cudaGetLastError());
-      sortByPt2Kernel<<<1, 1024-256, 0, stream>>>(soa, ws_d.get());
+      sortByPt2Kernel<<<1, 1024 - 256, 0, stream>>>(soa, ws_d.get());
     }
     cudaCheck(cudaGetLastError());
 #else  // __CUDACC__
@@ -161,7 +158,8 @@ namespace gpuVertexFinder {
     // std::cout << "found " << (*ws_d).nvIntermediate << " vertices " << std::endl;
     fitVertices(soa, ws_d.get(), 50.);
     // one block per vertex!
-    blockIdx.x = 0; gridDim.x=1;
+    blockIdx.x = 0;
+    gridDim.x = 1;
     splitVertices(soa, ws_d.get(), 9.f);
     resetGrid();
     fitVertices(soa, ws_d.get(), 5000.);
diff --git a/RecoPixelVertexing/PixelVertexFinding/test/VertexFinder_t.h b/RecoPixelVertexing/PixelVertexFinding/test/VertexFinder_t.h
index d32a611402e61..5261069a6b283 100644
--- a/RecoPixelVertexing/PixelVertexFinding/test/VertexFinder_t.h
+++ b/RecoPixelVertexing/PixelVertexFinding/test/VertexFinder_t.h
@@ -23,23 +23,23 @@
 
 #ifdef ONE_KERNEL
 #ifdef __CUDACC__
-  __global__ void vertexFinderOneKernel(gpuVertexFinder::ZVertices* pdata,
-                                           gpuVertexFinder::WorkSpace* pws,
-                                           int minT,    // min number of neighbours to be "seed"
-                                           float eps,     // max absolute distance to cluster
-                                        float errmax,  // max error to be "seed"
-                                           float chi2max  // max normalized distance to cluster,
-  ) {
-    clusterTracksByDensity(pdata,pws,minT,eps,errmax,chi2max);
-    __syncthreads();
-    fitVertices(pdata,pws, 50.);
-    __syncthreads();
-    splitVertices(pdata,pws, 9.f);    
-    __syncthreads();
-    fitVertices(pdata,pws, 5000.);
-    __syncthreads();
-    sortByPt2(pdata,pws);
-  }
+__global__ void vertexFinderOneKernel(gpuVertexFinder::ZVertices* pdata,
+                                      gpuVertexFinder::WorkSpace* pws,
+                                      int minT,      // min number of neighbours to be "seed"
+                                      float eps,     // max absolute distance to cluster
+                                      float errmax,  // max error to be "seed"
+                                      float chi2max  // max normalized distance to cluster,
+) {
+  clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
+  __syncthreads();
+  fitVertices(pdata, pws, 50.);
+  __syncthreads();
+  splitVertices(pdata, pws, 9.f);
+  __syncthreads();
+  fitVertices(pdata, pws, 5000.);
+  __syncthreads();
+  sortByPt2(pdata, pws);
+}
 #endif
 #endif
 
@@ -265,7 +265,6 @@ int main() {
 
 #ifdef __CUDACC__
       // one vertex per block!!!
- //     cudautils::launch(splitVerticesKernel, {1, 256}, onGPU_d.get(), ws_d.get(), 9.f);
       cudautils::launch(splitVerticesKernel, {1024, 64}, onGPU_d.get(), ws_d.get(), 9.f);
       cudaCheck(cudaMemcpy(&nv, LOC_WS(nvIntermediate), sizeof(uint32_t), cudaMemcpyDeviceToHost));
 #else