Move multifit/MAHI common code to DataFormats/CaloRecHit (#557)

Move multifit/MAHI common code to DataFormats/CaloRecHit/interface/MultifitComputations.h . Improve naming and description of fnnls parameters. Use Eigen preprocessor symbols instead of explicit CUDA keywords, and CUDA preprocessor symbols to protect CUDA-only functions. Co-authored-by: Andrea Bocci <andrea.bocci@cern.ch>
cms-patatrack · Oct 20, 2020 · 02d87a4 · 02d87a4
1 parent d07d030
commit 02d87a4
Showing 1 changed file with 27 additions and 23 deletions.
diff --git a/...ormats/Math/interface/EigenComputations.h → ...loRecHit/interface/MultifitComputations.h b/...ormats/Math/interface/EigenComputations.h → ...loRecHit/interface/MultifitComputations.h
@@ -1,5 +1,5 @@
-#ifndef DataFormats_Math_interface_EigenComputations_h
-#define DataFormats_Math_interface_EigenComputations_h
+#ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
+#define DataFormats_CaloRecHit_interface_MultifitComputations_h
 
 #include <cmath>
 #include <limits>
@@ -32,16 +32,16 @@ namespace calo {
       static constexpr int stride = Stride;
       T* data;
 
-      __forceinline__ __device__ MapSymM(T* data) : data{data} {}
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapSymM(T* data) : data{data} {}
 
-      __forceinline__ __device__ T const& operator()(int const row, int const col) const {
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC T const& operator()(int const row, int const col) const {
         auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
         auto const index = total - tmp + row - col;
         return data[index];
       }
 
       template <typename U = T>
-      __forceinline__ __device__ typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
       operator()(int const row, int const col) {
         auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
         auto const index = total - tmp + row - col;
@@ -58,17 +58,17 @@ namespace calo {
       using base_type = typename std::remove_cv<type>::type;
 
       type* data;
-      __forceinline__ __device__ MapMForPM(type* data) : data{data} {}
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapMForPM(type* data) : data{data} {}
 
-      __forceinline__ __device__ base_type operator()(int const row, int const col) const {
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC base_type operator()(int const row, int const col) const {
         auto const index = 2 - col + row;
         return index >= 0 ? data[index] : 0;
       }
     };
 
     // simple/trivial cholesky decomposition impl
     template <typename MatrixType1, typename MatrixType2>
-    __forceinline__ __device__ void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
       auto const sqrtm_0_0 = std::sqrt(M(0, 0));
       L(0, 0) = sqrtm_0_0;
       using T = typename MatrixType1::base_type;
@@ -94,7 +94,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2>
-    __forceinline__ __device__ void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
       auto const sqrtm_0_0 = std::sqrt(M(0, 0));
       L(0, 0) = sqrtm_0_0;
       using T = typename MatrixType1::base_type;
@@ -119,7 +119,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename VectorType>
-    __forceinline__ __device__ void compute_decomposition_forwardsubst_with_offsets(
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(
         MatrixType1& L,
         MatrixType2 const& M,
         float b[MatrixType1::stride],
@@ -158,7 +158,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename VectorType>
-    __forceinline__ __device__ void update_decomposition_forwardsubst_with_offsets(
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(
         MatrixType1& L,
         MatrixType2 const& M,
         float b[MatrixType1::stride],
@@ -190,7 +190,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
-    __device__ void solve_forward_subst_matrix(MatrixType1& A,
+    EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1& A,
                                                MatrixType2 const& pulseMatrixView,
                                                MatrixType3 const& matrixL) {
       // FIXME: this assumes pulses are on columns and samples on rows
@@ -205,7 +205,12 @@ namespace calo {
 // preload a column and load column 0 of cholesky
 #pragma unroll
         for (int i = 0; i < NSAMPLES; i++) {
+#ifdef __CUDA_ARCH__
+          // load through the read-only cache
           reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
+#else
+          reg_b[i] = pulseMatrixView.coeffRef(i, icol);
+#endif  // __CUDA_ARCH__
           reg_L[i] = matrixL(i, 0);
         }
 
@@ -236,7 +241,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2>
-    __device__ void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
+    EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
                                                MatrixType1 inputAmplitudesView,
                                                MatrixType2 matrixL) {
       constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
@@ -276,24 +281,24 @@ namespace calo {
       }
     }
 
-    /*
     // TODO: add active bxs
     template <typename MatrixType, typename VectorType>
-    __device__ void fnnls(MatrixType const& AtA,
+    EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,
                           VectorType const& Atb,
                           VectorType& solution,
                           int& npassive,
                           ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
                           MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
-                          double const eps,
-                          int const maxIterations) {
+                          double eps,                    // convergence condition
+                          const int maxIterations,       // maximum number of iterations
+                          const int relaxationPeriod,    // every "relaxationPeriod" iterations
+                          const int relaxationFactor) {  // multiply "eps" by "relaxationFactor"
       // constants
       constexpr auto NPULSES = VectorType::RowsAtCompileTime;
 
       // to keep track of where to terminate if converged
       Eigen::Index w_max_idx_prev = 0;
       float w_max_prev = 0;
-      auto eps_to_use = eps;
       bool recompute = false;
 
       // used throughout
@@ -331,7 +336,7 @@ namespace calo {
           }
 
           // check for convergence
-          if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
+          if (w_max < eps || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
             break;
 
           if (iter >= maxIterations)
@@ -428,13 +433,12 @@ namespace calo {
 
         // as in cpu
         ++iter;
-        if (iter % 16 == 0)
-          eps_to_use *= 2;
+        if (iter % relaxationPeriod == 0)
+          eps *= relaxationFactor;
       }
     }
-    */
 
   }  // namespace multifit
 }  // namespace calo
 
-#endif  // DataFormats_Math_interface_EigenComputations_h
+#endif  // DataFormats_CaloRecHit_interface_MultifitComputations_h