From 14bd8bf205c4711992038127a40632d8114f8e0b Mon Sep 17 00:00:00 2001
From: mariadalfonso <mariadalfonso@users.noreply.github.com>
Date: Tue, 20 Oct 2020 09:25:14 +0200
Subject: [PATCH] Move multifit/MAHI common code to DataFormats/CaloRecHit
 (#557)

Move multifit/MAHI common code to DataFormats/CaloRecHit/interface/MultifitComputations.h .
Improve naming and description of fnnls parameters.
Use Eigen preprocessor symbols instead of explicit CUDA keywords, and CUDA preprocessor symbols to protect CUDA-only functions.

Co-authored-by: Andrea Bocci <andrea.bocci@cern.ch>
---
 .../HcalRecProducers/src/KernelHelpers.h      |   4 +-
 RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu | 150 +-----------------
 2 files changed, 4 insertions(+), 150 deletions(-)
diff --git a/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h b/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h
index b0447b1600b9b..1fd5cb0fc387a 100644
--- a/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h
+++ b/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h
@@ -10,8 +10,6 @@
 namespace hcal {
   namespace reconstruction {
 
-    constexpr int32_t IPHI_MAX = 72;
-
     // this is from HcalTimeSlew.
     // HcalTimeSlew are values that come in from ESProducer that takes them
     // from a python config. see DeclsForKernels for more explanation
@@ -64,7 +62,7 @@ namespace hcal {
     // 2 funcs below are taken from HcalTopology (reimplemented here).
     // Inputs are constants that are also taken from HcalTopology
     // but passed to the kernel as arguments using the HclaTopology itself
-    //    constexpr int32_t IPHI_MAX = 72;
+    constexpr int32_t IPHI_MAX = 72;
 
     __forceinline__ __device__ uint32_t did2linearIndexHB(
         uint32_t const didraw, int const maxDepthHB, int const firstHBRing, int const lastHBRing, int const nEtaHB) {
diff --git a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu b/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu
index f4f19511899c8..5b3ad6693f27e 100644
--- a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu
+++ b/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu
@@ -1,7 +1,7 @@
 #include <Eigen/Dense>
 
 #include "DataFormats/HcalRecHit/interface/HcalSpecialTimes.h"
-#include "DataFormats/Math/interface/EigenComputations.h"
+#include "DataFormats/CaloRecHit/interface/MultifitComputations.h"
 
 // nvcc not able to parse this guy (whatever is inlcuded from it)....
 //#include "RecoLocalCalo/HcalRecAlgos/interface/PulseShapeFunctor.h"
@@ -620,151 +620,6 @@ namespace hcal {
       pulseMatrixP[ipulse * nsamples + sample] = value_t0p;
     }
 
-    // TODO: add active bxs
-    template <typename MatrixType, typename VectorType>
-    __device__ void fnnls(MatrixType const& AtA,
-                          VectorType const& Atb,
-                          VectorType& solution,
-                          int& npassive,
-                          calo::multifit::ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
-                          calo::multifit::MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
-                          double const eps,
-                          int const maxIterations) {
-      // constants
-      constexpr auto NPULSES = VectorType::RowsAtCompileTime;
-
-      // to keep track of where to terminate if converged
-      Eigen::Index w_max_idx_prev = 0;
-      float w_max_prev = 0;
-      auto eps_to_use = eps;
-      bool recompute = false;
-
-      // used throughout
-      VectorType s;
-      float reg_b[NPULSES];
-      //float matrixLStorage[MapSymM<float, NPULSES>::total];
-      //MapSymM<float, NPULSES> matrixL{matrixLStorage};
-
-      int iter = 0;
-      while (true) {
-        if (iter > 0 || npassive == 0) {
-          auto const nactive = NPULSES - npassive;
-          // exit if there are no more pulses to constrain
-          if (nactive == 0)
-            break;
-
-          // compute the gradient
-          //w.tail(nactive) = Atb.tail(nactive) - (AtA * solution).tail(nactive);
-          Eigen::Index w_max_idx;
-          float w_max = -std::numeric_limits<float>::max();
-          for (int icol = npassive; icol < NPULSES; icol++) {
-            auto const icol_real = pulseOffsets(icol);
-            auto const atb = Atb(icol_real);
-            float sum = 0;
-#pragma unroll
-            for (int counter = 0; counter < NPULSES; counter++)
-              sum += counter > icol_real ? AtA(counter, icol_real) * solution(counter)
-                                         : AtA(icol_real, counter) * solution(counter);
-
-            auto const w = atb - sum;
-            if (w > w_max) {
-              w_max = w;
-              w_max_idx = icol - npassive;
-            }
-          }
-
-          // check for convergence
-          if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
-            break;
-
-          if (iter >= maxIterations)
-            break;
-
-          w_max_prev = w_max;
-          w_max_idx_prev = w_max_idx;
-
-          // move index to the right part of the vector
-          w_max_idx += npassive;
-
-          Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(w_max_idx));
-          ++npassive;
-        }
-
-        // inner loop
-        while (true) {
-          if (npassive == 0)
-            break;
-
-          //s.head(npassive)
-          //auto const& matrixL =
-          //    AtA.topLeftCorner(npassive, npassive)
-          //        .llt().matrixL();
-          //.solve(Atb.head(npassive));
-          if (recompute || iter == 0)
-            calo::multifit::compute_decomposition_forwardsubst_with_offsets(
-                matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
-          else
-            calo::multifit::update_decomposition_forwardsubst_with_offsets(
-                matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
-
-          // run backward substituion
-          s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
-          for (int i = npassive - 2; i >= 0; --i) {
-            float total = 0;
-            for (int j = i + 1; j < npassive; j++)
-              total += matrixL(j, i) * s(j);
-
-            s(i) = (reg_b[i] - total) / matrixL(i, i);
-          }
-
-          // done if solution values are all positive
-          if (s.head(npassive).minCoeff() > 0.f) {
-            for (int i = 0; i < npassive; i++) {
-              auto const i_real = pulseOffsets(i);
-              solution(i_real) = s(i);
-            }
-            //solution.head(npassive) = s.head(npassive);
-            recompute = false;
-            break;
-          }
-
-          // there were negative values -> have to recompute the whole decomp
-          recompute = true;
-
-          auto alpha = std::numeric_limits<float>::max();
-          Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
-          for (int i = 0; i < npassive; i++) {
-            if (s[i] <= 0.) {
-              auto const i_real = pulseOffsets(i);
-              auto const ratio = solution[i_real] / (solution[i_real] - s[i]);
-              if (ratio < alpha) {
-                alpha = ratio;
-                alpha_idx = i;
-                alpha_idx_real = i_real;
-              }
-            }
-          }
-
-          // upadte solution
-          for (int i = 0; i < npassive; i++) {
-            auto const i_real = pulseOffsets(i);
-            solution(i_real) += alpha * (s(i) - solution(i_real));
-          }
-          //solution.head(npassive) += alpha *
-          //    (s.head(npassive) - solution.head(npassive));
-          solution[alpha_idx_real] = 0;
-          --npassive;
-
-          Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(alpha_idx));
-        }
-
-        // as in cpu
-        ++iter;
-        if (iter % 10 == 0)
-          eps_to_use *= 10;
-      }
-    }
-
     template <int NSAMPLES, int NPULSES>
     __forceinline__ __device__ void update_covariance(
         calo::multifit::ColumnVector<NPULSES> const& resultAmplitudesVector,
@@ -1086,7 +941,8 @@ namespace hcal {
 
         // run fast nnls
         // FIXME: provide values from config
-        fnnls(AtA, Atb, resultAmplitudesVector, npassive, pulseOffsets, matrixLForFnnls, 1e-11, 500);
+        calo::multifit::fnnls(
+            AtA, Atb, resultAmplitudesVector, npassive, pulseOffsets, matrixLForFnnls, 1e-11, 500, 10, 10);
 
 #ifdef HCAL_MAHI_GPUDEBUG
         printf("result Amplitudes\n");