From 15533316970948d88ff104a0d380313f1f97d608 Mon Sep 17 00:00:00 2001
From: grasseau <grasseau@subatech.in2p3.fr>
Date: Thu, 8 Dec 2022 10:10:05 +0100
Subject: [PATCH] [MCHClustering] Optimize St4-5 clustering - preparing PR -
 option TimingStat

---
 Detectors/MUON/MCH/Clustering/CMakeLists.txt  |    1 +
 .../include/MCHClustering/ClusterConfig.h     |   49 +-
 .../include/MCHClustering/ClusterFinderGEM.h  |    9 +-
 .../include/MCHClustering/ClusterPEM.h        |   34 +-
 .../include/MCHClustering/PadsPEM.h           |   50 +-
 .../include/MCHClustering/clusterProcessing.h |    2 +-
 .../MUON/MCH/Clustering/src/ClusterConfig.cxx |   42 +
 .../MCH/Clustering/src/ClusterFinderGEM.cxx   |   55 +-
 .../Clustering/src/ClusterFinderOriginal.cxx  |    2 +-
 .../MUON/MCH/Clustering/src/ClusterPEM.cxx    | 1911 ++++++++++++++---
 .../MUON/MCH/Clustering/src/InspectModel.cxx  |  128 +-
 .../MUON/MCH/Clustering/src/InspectModel.h    |   22 +-
 Detectors/MUON/MCH/Clustering/src/PadsPEM.cxx | 1487 +++++++++++--
 .../MCH/Clustering/src/clusterProcessing.cxx  |   89 +-
 .../MUON/MCH/Clustering/src/mathUtil.cxx      |   11 +-
 Detectors/MUON/MCH/Clustering/src/mathUtil.h  |   32 +
 .../MUON/MCH/Clustering/src/mathieson.cxx     |  803 ++++++-
 Detectors/MUON/MCH/Clustering/src/mathieson.h |   51 +-
 .../MUON/MCH/Clustering/src/mathiesonFit.cxx  |  800 ++++++-
 .../MUON/MCH/Clustering/src/mathiesonFit.h    |   18 +-
 .../MUON/MCH/Clustering/src/poissonEM.cxx     |  163 +-
 Detectors/MUON/MCH/Clustering/src/poissonEM.h |   12 +-
 .../MCH/Workflow/src/ClusterFinderGEMSpec.cxx |  117 +-
 23 files changed, 5007 insertions(+), 881 deletions(-)
 create mode 100644 Detectors/MUON/MCH/Clustering/src/ClusterConfig.cxx

diff --git a/Detectors/MUON/MCH/Clustering/CMakeLists.txt b/Detectors/MUON/MCH/Clustering/CMakeLists.txt
index 6ebbc3f7dec0f..7ca0dc06e6b9d 100644
--- a/Detectors/MUON/MCH/Clustering/CMakeLists.txt
+++ b/Detectors/MUON/MCH/Clustering/CMakeLists.txt
@@ -23,6 +23,7 @@ o2_target_root_dictionary(MCHClustering
 
 o2_add_library(MCHClusteringGEM
                SOURCES src/ClusterOriginal.cxx
+                       src/ClusterConfig.cxx
                        src/ClusterDump.cxx
                        src/ClusterFinderOriginal.cxx
                        src/ClusterFinderGEM.cxx
diff --git a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterConfig.h b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterConfig.h
index 1790f6566f6ac..b345966172430 100644
--- a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterConfig.h
+++ b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterConfig.h
@@ -29,14 +29,30 @@ struct ClusterConfig {
   //
   // Physical-Numerical parameters
   //
+  // Run2
+  // 4.f * 0.22875f;
+  double minChargeOfPads;              // Lowest Charge of a Pad
+  double minChargeOfClusterPerCathode; // Lowest Charge of a Pad
   // static constexpr double minChargeOfClusterPerCathode = 1.1; // Lowest Charge of a Group
-  static constexpr double minChargeOfClusterPerCathode = 20.0; // Lowest Charge of a Group
-  //
-  // Algorithm limitations
+  // Run3
+  // static double minChargeOfPads = 16; // Lowest Charge of a Pad
+  // static double minChargeOfClusterPerCathode = 1.0 * minChargeOfPads; // Lowest Charge of a Group
   //
+  // ClusterResolution
+  float SDefaultClusterResolution; ///< default cluster resolution (cm)
+  float SBadClusterResolution;     ///< bad (e.g. mono-cathode) cluster resolution (cm)
+
+  // Large Clusters
+  int nbrPadLimit = 600;
+  double ratioStepForLargeCluster = 0.05; // increment to find nPads < nbrPadLimit
   // Limit of pad number  to perform the fitting
-  static constexpr int nbrOfPadsLimitForTheFitting = 100;
+  int nbrOfPadsLimitForTheFitting = 100;
+  // Stop the fitting if small xy shift
+  double minFittingXYStep = 0.1; // in cm
   //
+  // Algorithm choices
+  //
+  int useSpline = 0;
   // Logs
   //
   enum VerboseMode {
@@ -45,13 +61,13 @@ struct ClusterConfig {
     detail = 0x2, ///< Describes in detail
     debug = 0x3   ///< Ful details
   };
-  static constexpr VerboseMode fittingLog = no;
-  static constexpr VerboseMode processingLog = no; // Global
-  static constexpr VerboseMode padMappingLog = no;
-  static constexpr VerboseMode groupsLog = no;
-  static constexpr VerboseMode EMLocalMaxLog = no;
-  static constexpr VerboseMode inspectModelLog = no;
-  static constexpr VerboseMode laplacianLocalMaxLog = no;
+  VerboseMode fittingLog = no;
+  VerboseMode processingLog = no; // Global
+  VerboseMode padMappingLog = no;
+  VerboseMode groupsLog = no;
+  VerboseMode EMLocalMaxLog = no;
+  VerboseMode inspectModelLog = no;
+  VerboseMode laplacianLocalMaxLog = no;
   //
   // Checks
   //
@@ -60,14 +76,15 @@ struct ClusterConfig {
     active = 0x1,   ///< Describe default activation
   };
   // Activate/deactivate InspectModel
-  static constexpr ActivateMode inspectModel = active;
+  ActivateMode inspectModel = inactive;
   //
-  static constexpr bool groupsCheck = true;
-  static constexpr bool padMappingCheck = true;
-  // TODO ???
-  // Check, Stat
+  bool groupsCheck = true;
+  bool padMappingCheck = true;
+  bool mathiesonCheck = false;
 };
 
+void initClusterConfig();
+
 } // namespace mch
 } // end namespace o2
 
diff --git a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterFinderGEM.h b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterFinderGEM.h
index ab2bbf9fa3736..95872b0b7d617 100644
--- a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterFinderGEM.h
+++ b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterFinderGEM.h
@@ -61,7 +61,7 @@ class ClusterFinderGEM
   // GG method called by the process workflow ( ClusterFinderGEMSpec )
   //
 
-  void init(int mode);
+  void init(int mode, bool run2Config);
   void deinit();
   void reset();
   void fillGEMInputData(gsl::span<const Digit>& digits, uint16_t bunchCrossing, uint32_t orbit, uint32_t iPreCluster);
@@ -87,8 +87,11 @@ class ClusterFinderGEM
   static constexpr int SNFitClustersMax = 3;                     ///< maximum number of clusters fitted at the same time
   static constexpr int SNFitParamMax = 3 * SNFitClustersMax - 1; ///< maximum number of fit parameters
   static constexpr double SLowestCoupling = 1.e-2;               ///< minimum coupling between clusters of pixels and pads
-  static constexpr float SDefaultClusterResolution = 0.2f;       ///< default cluster resolution (cm)
-  static constexpr float SBadClusterResolution = 10.f;           ///< bad (e.g. mono-cathode) cluster resolution (cm)
+
+  // Invalid ???
+  // static constexpr char statFileName[] = "statistics.csv";
+  // std::fstream statStream;
+
   // GG Unused
   // void resetPreCluster(gsl::span<const Digit>& digits);
   // void simplifyPreCluster(std::vector<int>& removedDigits);
diff --git a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterPEM.h b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterPEM.h
index 43f96c20a5673..8dc2bd1bef6ce 100644
--- a/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterPEM.h
+++ b/Detectors/MUON/MCH/Clustering/include/MCHClustering/ClusterPEM.h
@@ -18,13 +18,16 @@
 #ifndef O2_MCH_CLUSTER_H_
 #define O2_MCH_CLUSTER_H_
 
+#include <gsl/gsl_blas.h>
+#include <gsl/gsl_linalg.h>
+
 #include "MCHClustering/PadsPEM.h"
 
 namespace o2
 {
 namespace mch
 {
-typedef std::pair<int, const double*> DataBlock_t;
+typedef std::pair<int, double*> DataBlock_t;
 
 typedef struct {
   PadIdx_t i;
@@ -45,20 +48,32 @@ class ClusterPEM
   ~ClusterPEM();
   inline int getNbrOfPads(int c)
   {
-    return (pads[c] == nullptr ? 0 : pads[c]->getNbrOfPads());
+    return ((pads[c] == nullptr) ? 0 : pads[c]->getNbrOfPads());
+  };
+  inline int getNbrOfObsPads(int c)
+  {
+    return ((pads[c] == nullptr) ? 0 : pads[c]->getNbrOfObsPads());
   };
   inline int getNbrOfPads()
   {
     return getNbrOfPads(0) + getNbrOfPads(1);
   };
+  inline int getNbrOfObsPads()
+  {
+    return getNbrOfObsPads(0) + getNbrOfObsPads(1);
+  };
   inline double getTotalCharge(int c)
   {
     return (pads[c] == nullptr ? 0 : pads[c]->getTotalCharge());
   };
+
   inline const double* getCharges(int c)
   {
-    return (pads[c] == nullptr) ? nullptr : pads[c]->getCharges();
+    return (pads[c] == nullptr ? nullptr : pads[c]->getCharges());
   }
+
+  double getMaxCharge();
+
   inline const Pads* getPads(int c) { return pads[c]; };
   inline const Groups_t* getCathGroup(int c) { return cathGroup[c]; };
   inline Groups_t* getProjPadGroup() { return projPadToGrp; };
@@ -67,6 +82,9 @@ class ClusterPEM
   {
     return (projectedPads == nullptr ? -1 : projectedPads->getNbrOfPads());
   };
+  std::pair<double, double> computeChargeBarycenter(int plane);
+  //
+  std::pair<int, int> getNxNy(int c);
   // Unused - Old version
   // double *getProjPadsAsXYdXY( Groups_t group, const Mask_t* maskGrp, int
   // nbrProjPadsInTheGroup);
@@ -85,6 +103,8 @@ class ClusterPEM
   void addBoundaryPads();
   // Find local maximima with the PET algo
   int findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath);
+  int findLocalMaxWithPEMFullRefinement(double* thetaL, int nbrOfPadsInTheGroupCath);
+  int findLocalMaxWithPEM2Lev(double* thetaL, int nbrOfPadsInTheGroupCath);
   // Perform the fitting
   DataBlock_t fit(double* thetaInit, int K);
   // Not used in the Clustering/fitting
@@ -144,6 +164,9 @@ class ClusterPEM
   int renumberGroups(Groups_t* grpToGrp, int nGrp);
   // Remove low charged groups
   void removeLowChargedGroups(int nGroups);
+  // Remove smallCharged seeds
+  int filterFitModelOnSmallChargedSeeds(Pads& pads, double* theta, int K,
+                                        Mask_t* maskFilteredTheta);
   // Keep the seeds inside the cluster area
   // Some fitting cases provide seeds outside of the cluster area
   int filterFitModelOnClusterRegion(Pads& pads, double* theta, int K,
@@ -152,7 +175,8 @@ class ClusterPEM
   int filterFitModelOnSpaceVariations(const double* theta0, int K0,
                                       double* theta, int K,
                                       Mask_t* maskFilteredTheta);
-
+  Pads* findLocalMaxWithRefinement(double* thetaL, int nbrOfPadsInTheGroupCath);
+  Pads* findLocalMaxWithoutRefinement(double* thetaL, int nbrOfPadsInTheGroupCath);
   // ???
   int getIndexByRow(const char* matrix, PadIdx_t N, PadIdx_t M, PadIdx_t* IIdx);
   int getIndexByColumns(const char* matrix, PadIdx_t N, PadIdx_t M,
@@ -171,6 +195,8 @@ class ClusterPEM
                   PadIdx_t* sortedLocalMax, int kMax, double* smoothQ);
 };
 
+gsl_matrix* moore_penrose_pinv(gsl_matrix* A, double rcond);
+
 } // namespace mch
 } // namespace o2
 
diff --git a/Detectors/MUON/MCH/Clustering/include/MCHClustering/PadsPEM.h b/Detectors/MUON/MCH/Clustering/include/MCHClustering/PadsPEM.h
index 35dc0d3146a58..23426a939c196 100644
--- a/Detectors/MUON/MCH/Clustering/include/MCHClustering/PadsPEM.h
+++ b/Detectors/MUON/MCH/Clustering/include/MCHClustering/PadsPEM.h
@@ -17,6 +17,8 @@
 #ifndef O2_MCH_PADSPEM_H_
 #define O2_MCH_PADSPEM_H_
 
+#include <vector>
+
 #include "MCHClustering/ClusterConfig.h"
 
 namespace o2
@@ -45,14 +47,15 @@ inline static PadIdx_t getTheFirstNeighborOf(PadIdx_t* neigh, PadIdx_t i)
 class Pads
 {
  public:
-  enum padMode {
+  enum PadMode {
     xydxdyMode = 0x0,  ///< x, y, dx, dy pad coordinates
     xyInfSupMode = 0x1 ///< xInf=x, xSup=dx, yInf=y, ySup=dy pad coordinates
   };
   static constexpr double epsilonGeometry =
     1.0e-04; // Uncertainty on pad location (in cm)
   // Representation mode  (see padMode)
-  int mode = xydxdyMode;
+  // PadMode mode;
+  PadMode mode = xydxdyMode;
 
   // Utilities
   static void printNeighbors(const PadIdx_t* neigh, int N);
@@ -63,15 +66,17 @@ class Pads
   };
 
   // Allocation constructor
-  Pads(int N, int chId, int mode = xydxdyMode);
+  Pads(int N, int chId, PadMode mode = xydxdyMode);
   // Build a new set of pads with different coordinates
   // xydxdy mode or xyInfSup
-  Pads(const Pads& pads, int mode_);
+  Pads(const Pads& pads, PadMode mode_);
+  // Pad object with over allocated pads
+  Pads(const Pads* pads, int size);
   // Build a pads set from those selected by "mask"
   // Used to extract sub-clusters
   Pads(const Pads& pads, const Groups_t* mask);
   // Concatenate the 2 pads sets
-  Pads(const Pads* pads1, const Pads* pads2, int mode);
+  Pads(const Pads* pads0, const Pads* pads1, PadMode mode);
   // Main constructor
   Pads(const double* x_, const double* y_, const double* dx_, const double* dy_,
        const double* q_, const short* cathode, const Mask_t* saturate_,
@@ -82,6 +87,7 @@ class Pads
   // Take the ownership of coordinates (x, y, dx, dy)
   Pads(double* x_, double* y_, double* dx_, double* dy_, int chId, int nPads_);
   inline int getNbrOfPads() const { return nPads; };
+  inline int getNbrOfObsPads() const { return nObsPads; };
   inline const double* getX() const { return x; };
   inline const double* getY() const { return y; };
   inline const double* getDX() const { return dx; };
@@ -94,15 +100,33 @@ class Pads
   inline const Mask_t* getSaturates() const { return saturate; };
   inline const Mask_t* getCathodes() const { return cath; };
   inline double getTotalCharge() const { return totalCharge; };
+  double updateTotalCharge();
+  // Mean of the 2 cathodes total Charge
+  double getMeanTotalCharge();
   inline int getChamberId() const { return chamberId; };
   void setCharges(double c);
   void setCharges(double* q_, int n);
+  void setCathodes(Mask_t cath_);
+  void setSaturate(Mask_t val);
+  // Select/keep pads in the list index
+  Pads* selectPads(int* index, int k);
   // Remove pads whos charge is less than qCut
   int removePads(double qCut);
+  // pad coordinates transformations
+  // xydxyMode <-> xyInfSupMode
+  void padBoundsToCenter(const Pads& pads);
+  void padCenterToBounds(const Pads& pads);
+  void padBoundsToCenter();
+  void padCenterToBounds();
   // Charges normalization
   void normalizeCharges();
   // Split each pads in 4 smaller pads with the same sizes
-  Pads* refinePads();
+  Pads* refineAll();
+  // refine only at the local max
+  void refineLocalMax(Pads& localMax, std::vector<PadIdx_t>& localMaxIdx);
+  // refine only at the local max
+  void refineLocalMaxAndUpdateCij(const Pads& pads,
+                                  std::vector<PadIdx_t>& localMaxIdx, double Cij[]);
   // Add zero-charged pads to the neighboring of the pads (cathode cluster)
   Pads* addBoundaryPads();
   // Building Neighbors
@@ -110,12 +134,17 @@ class Pads
   // Building K-Neighbors
   PadIdx_t* buildKFirstsNeighbors(int kernelSize);
   // Extract local maximima
-  Pads* extractLocalMax();
+  Pads* extractLocalMax(std::vector<PadIdx_t>& localMaxIdx, double dxMinPadSize, double dyMinPadSize);
+  Pads* extractLocalMaxOnCoarsePads(std::vector<PadIdx_t>& localMaxIdx);
+
+  Pads* extractLocalMaxOnCoarsePads_Remanent(std::vector<PadIdx_t>& localMaxIdx, double dxMinPadSize, double dyMinPadSize);
   // Extract local maximima, with of without a neighboring
   // Obsolete
   Pads* clipOnLocalMax(bool extractLocalMax);
   // Groups
   int addIsolatedPadInGroups(Mask_t* cathToGrp, Mask_t* grpToGrp, int nGroups);
+  //
+  // inv void print(const char* title);
   ~Pads();
 
  private:
@@ -132,16 +161,23 @@ class Pads
   double* q = nullptr;
   double totalCharge = 0;
   int nPads = 0;
+  // n Observable/ measurable pads
+  // Used to speed-up the fitting
+  int nObsPads = 0;
   int chamberId = -1;
   PadIdx_t* neighbors = nullptr;
   //
   // Memory allocation/deallocation
   void allocate();
+  void allocate(int size);
   void release();
+  void copyPads(const Pads* srcPads, int srcIdx, int destIdx, int N, Mask_t cathValue);
   // Utilities
   void removePad(int index);
   PadIdx_t* buildFirstNeighbors(double* X, double* Y, double* DX, double* DY,
                                 int N);
+  // Assess or not if xyCheck is a remanent local Max (can be removed)
+  bool assessRemanent(double xyCheck, double* xy, double precision, int N);
   void setToZero();
 };
 
diff --git a/Detectors/MUON/MCH/Clustering/include/MCHClustering/clusterProcessing.h b/Detectors/MUON/MCH/Clustering/include/MCHClustering/clusterProcessing.h
index ace8221daa5e5..dcf9a81dadc9b 100644
--- a/Detectors/MUON/MCH/Clustering/include/MCHClustering/clusterProcessing.h
+++ b/Detectors/MUON/MCH/Clustering/include/MCHClustering/clusterProcessing.h
@@ -14,7 +14,7 @@
 
 #include "MCHClustering/ClusterConfig.h"
 
-typedef std::pair<int, const double*> DataBlock_t;
+// ??? Inv typedef std::pair<int, double*> DataBlock_t;
 
 namespace o2
 {
diff --git a/Detectors/MUON/MCH/Clustering/src/ClusterConfig.cxx b/Detectors/MUON/MCH/Clustering/src/ClusterConfig.cxx
new file mode 100644
index 0000000000000..cdb91b5d0545e
--- /dev/null
+++ b/Detectors/MUON/MCH/Clustering/src/ClusterConfig.cxx
@@ -0,0 +1,42 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file ClusterConfig.cxx
+/// \brief Clustering and fitting parameters
+/// \author Gilles Grasseau, Subatech
+
+#include "MCHClustering/ClusterConfig.h"
+
+namespace o2
+{
+namespace mch
+{
+
+ClusterConfig clusterConfig;
+
+void initClusterConfig()
+{
+  clusterConfig.minChargeOfClusterPerCathode = -1;
+}
+
+} // namespace mch
+} // end namespace o2
diff --git a/Detectors/MUON/MCH/Clustering/src/ClusterFinderGEM.cxx b/Detectors/MUON/MCH/Clustering/src/ClusterFinderGEM.cxx
index d99e81b7e7ce3..1b500de937533 100644
--- a/Detectors/MUON/MCH/Clustering/src/ClusterFinderGEM.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/ClusterFinderGEM.cxx
@@ -36,6 +36,7 @@
 // GG
 #include "PadOriginal.h"
 #include "ClusterOriginal.h"
+#include "MCHClustering/ClusterizerParam.h"
 // ??? <<<<<<< HEAD
 #include "MCHBase/MathiesonOriginal.h"
 // #include "mathiesonFit.h"
@@ -55,6 +56,8 @@ namespace o2
 namespace mch
 {
 
+extern ClusterConfig clusterConfig;
+
 //_________________________________________________________________________________________________
 ClusterFinderGEM::ClusterFinderGEM()
   : mMathiesons(std::make_unique<MathiesonOriginal[]>(2)), mPreCluster(std::make_unique<ClusterOriginal>())
@@ -72,7 +75,7 @@ ClusterFinderGEM::ClusterFinderGEM()
   mMathiesons[1].setSqrtKy3AndDeriveKy2Ky4(0.7642);
   // GG
   // init Mathieson
-  o2::mch::initMathieson();
+  o2::mch::initMathieson(clusterConfig.useSpline, 0);
   nPads = 0;
   xyDxy = nullptr;
   cathode = nullptr;
@@ -85,11 +88,30 @@ ClusterFinderGEM::ClusterFinderGEM()
 }
 
 //_________________________________________________________________________________________________
-void ClusterFinderGEM::init(int _mode)
+void ClusterFinderGEM::init(int _mode, bool run2Config)
 {
   /// initialize the clustering
   // ??? Not used
   mode = _mode;
+  // Run 2 case (default)
+  // 4.f * 0.22875f;
+  initClusterConfig();
+  clusterConfig.minChargeOfPads = 1.1;
+  clusterConfig.minChargeOfClusterPerCathode = 2 * clusterConfig.minChargeOfPads;
+  //
+  // ClusterResolution
+  clusterConfig.SDefaultClusterResolution = 0.2f;
+  clusterConfig.SBadClusterResolution = 10.f;
+  if (!run2Config) {
+    // Run 3 case
+    clusterConfig.minChargeOfPads = ClusterizerParam::Instance().lowestPadCharge;
+    clusterConfig.minChargeOfClusterPerCathode = 1.0 * clusterConfig.minChargeOfPads;
+    //
+    // Cluster resolution (for the tracking)
+    clusterConfig.SDefaultClusterResolution = ClusterizerParam::Instance().defaultClusterResolution;
+    clusterConfig.SBadClusterResolution = ClusterizerParam::Instance().badClusterResolution;
+  }
+  // Inv ???  LOG(info) << "Init lowestPadCharge = " << clusterConfig.minChargeOfPads ;
 }
 //_________________________________________________________________________________________________
 void ClusterFinderGEM::deinit()
@@ -175,8 +197,6 @@ void ClusterFinderGEM::dumpPreCluster(ClusterDump* dumpFile, gsl::span<const Dig
     double dx = mSegmentation->padSizeX(padID) / 2.;
     double dy = mSegmentation->padSizeY(padID) / 2.;
     uint32_t adc = digit.getADC();
-    // float charge(0.);
-    // std::memcpy(&charge, &adc, sizeof(adc));
     double charge = mADCToCharge(adc);
     bool isSaturated = digit.isSaturated();
     int plane = mSegmentation->isBendingPad(padID) ? 0 : 1;
@@ -264,6 +284,16 @@ void ClusterFinderGEM::dumpClusterResults(ClusterDump* dumpFile, const std::vect
   }
 }
 
+/* Invalid
+//_________________________________________________________________________________________________
+// void ClusterFinderGEM::saveStatistics(ClusterDump* dumpFile, gsl::span<const Digit> digits, const std::vector<Cluster>& clusters, size_t startIdx, uint16_t bunchCrossing, uint32_t orbit, uint32_t iPreCluster)
+void ClusterFinderGEM::saveStatistics(uint32_t orbit, uint16_t bunchCrossing, uint32_t iPreCluster, uint16_t nPads, uint16_t nbrClusters, uint16_t DEId, double duration)
+{
+  statStream << iPreCluster << " " << bunchCrossing << " " << orbit << " "
+             << nPads << " " << nbrClusters << " " << DEId << " " << duration << std::endl;
+}
+*/
+
 //_________________________________________________________________________________________________
 void ClusterFinderGEM::fillGEMInputData(gsl::span<const Digit>& digits, uint16_t bunchCrossing, uint32_t orbit, uint32_t iPreCluster)
 {
@@ -305,6 +335,7 @@ void ClusterFinderGEM::fillGEMInputData(gsl::span<const Digit>& digits, uint16_t
     if (charge <= 0.) {
       throw std::runtime_error("The precluster contains a digit with charge <= 0");
     }
+    // std::cout << x << ", " << y << ", " << dx << ", " << dy << ", " << charge  << ", " << isSaturated << std::endl;
     mPreCluster->addPad(x, y, dx, dy, charge, isSaturated, plane, iDigit, PadOriginal::kZero);
     // GG
     // Initialisation for GEM processing
@@ -328,9 +359,8 @@ void ClusterFinderGEM::setClusterResolution(Cluster& cluster) const
   if (cluster.getChamberId() < 4) {
 
     // do not consider mono-cathode clusters in stations 1 and 2
-    cluster.ex = SDefaultClusterResolution;
-    cluster.ey = SDefaultClusterResolution;
-
+    cluster.ex = clusterConfig.SDefaultClusterResolution;
+    cluster.ey = clusterConfig.SDefaultClusterResolution;
   } else {
 
     // find pads below the cluster
@@ -350,8 +380,10 @@ void ClusterFinderGEM::setClusterResolution(Cluster& cluster) const
     }
 
     // set the cluster resolution accordingly
-    cluster.ex = (itPadNB == mUsedDigits.end()) ? SBadClusterResolution : SDefaultClusterResolution;
-    cluster.ey = (itPadB == mUsedDigits.end()) ? SBadClusterResolution : SDefaultClusterResolution;
+    cluster.ex = (itPadNB == mUsedDigits.end()) ? clusterConfig.SBadClusterResolution
+                                                : clusterConfig.SDefaultClusterResolution;
+    cluster.ey = (itPadB == mUsedDigits.end()) ? ClusterizerParam::Instance().badClusterResolution
+                                               : ClusterizerParam::Instance().defaultClusterResolution;
   }
 }
 
@@ -367,7 +399,7 @@ void ClusterFinderGEM::findClusters(gsl::span<const Digit> digits,
     return;
   }
   uint32_t nPreviousCluster = mClusters.size();
-  if (ClusterConfig::processingLog >= ClusterConfig::info) {
+  if (clusterConfig.processingLog >= clusterConfig.info) {
     printf("----------------------------------------\n");
     std::cout << "  [GEM] PreCluster BC=" << bunchCrossing
               << ", orbit = " << orbit
@@ -386,7 +418,7 @@ void ClusterFinderGEM::findClusters(gsl::span<const Digit> digits,
 
   // GG process clusters
   int chId = DEId / 100;
-  int nbrOfHits = clusterProcess(xyDxy, cathode, saturated, padCharge, chId, nPads);
+  int nbrOfHits = ::clusterProcess(xyDxy, cathode, saturated, padCharge, chId, nPads);
   double theta[nbrOfHits * 5];
   Groups_t thetaToGroup[nbrOfHits];
   /// collectTheta(theta, thetaToGroup, nbrOfHits);
@@ -460,7 +492,6 @@ void ClusterFinderGEM::findClusters(gsl::span<const Digit> digits,
         setClusterResolution(mClusters[mClusters.size() - 1]);
         // Debug
         int iNewCluster = mClusters.size() - 1;
-        // Debug ???
         /*
         std::cout << "iNewCluster=" << iNewCluster << ", DEId=" << digits[0].getDetID()
                  << ", x" <<  mClusters[iNewCluster].x << ", y" <<  mClusters[iNewCluster].y << ", z" <<  mClusters[iNewCluster].z
diff --git a/Detectors/MUON/MCH/Clustering/src/ClusterFinderOriginal.cxx b/Detectors/MUON/MCH/Clustering/src/ClusterFinderOriginal.cxx
index b2c16a1cffb71..679d0ad8fc53d 100644
--- a/Detectors/MUON/MCH/Clustering/src/ClusterFinderOriginal.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/ClusterFinderOriginal.cxx
@@ -2091,4 +2091,4 @@ void ClusterFinderOriginal::setClusterResolution(Cluster& cluster) const
   }
 }
 
-} // namespace o2
+} // namespace o2::mch
diff --git a/Detectors/MUON/MCH/Clustering/src/ClusterPEM.cxx b/Detectors/MUON/MCH/Clustering/src/ClusterPEM.cxx
index 3184217b3692d..b16745bf9400f 100644
--- a/Detectors/MUON/MCH/Clustering/src/ClusterPEM.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/ClusterPEM.cxx
@@ -17,11 +17,12 @@
 
 #include <cstdio>
 #include <stdexcept>
+#include <cmath>
 
-#include "InspectModel.h"
-#include "MCHClustering/ClusterPEM.h"
 #include "MCHClustering/ClusterConfig.h"
+#include "MCHClustering/ClusterPEM.h"
 #include "MCHClustering/PadsPEM.h"
+#include "InspectModel.h"
 #include "mathUtil.h"
 #include "mathieson.h"
 #include "mathiesonFit.h"
@@ -31,6 +32,9 @@ namespace o2
 {
 namespace mch
 {
+
+extern ClusterConfig clusterConfig;
+
 // Fit parameters
 // doProcess = verbose + (doJacobian << 2) + ( doKhi << 3) + (doStdErr << 4)
 static const int processFitVerbose = 1 + (0 << 2) + (1 << 3) + (1 << 4);
@@ -38,6 +42,169 @@ static const int processFit = 0 + (0 << 2) + (1 << 3) + (1 << 4);
 
 double epsilonGeometry = 1.0e-4;
 
+/**
+ * Compute the (Moore-Penrose) pseudo-inverse of a libgsl matrix in plain C.
+ *
+ * Compile uding:
+ *
+ *     gcc moore_penrose_pseudoinverse.c -lgsl -lblas
+ *
+ * Dependencies:
+ * - libgsl (GNU Scientific Library)
+ * - libblas (Basic Linear Algebra Subprograms)
+ *
+ * Charl Linssen <charl@itfromb.it>
+ * Feb 2016
+ * PUBLIC DOMAIN
+ **/
+
+typedef double realtype;
+
+void print_matrix(const gsl_matrix* m)
+{
+  size_t i, j;
+
+  for (i = 0; i < m->size1; i++) {
+    for (j = 0; j < m->size2; j++) {
+      printf("%f\t", gsl_matrix_get(m, i, j));
+    }
+    printf("\n");
+  }
+}
+
+void printGSLVector(const char* str, const gsl_vector* v)
+{
+  int N = v->size;
+  int nPackets = N / 10 + 1;
+  printf("%s dim=%d nPackets=%d\n  ", str, N, nPackets);
+  for (int i = 0; i < nPackets; i++) {
+    for (int k = 0; (k < 10) && ((i * 10 + k) < N); k++) {
+      printf("%f ", gsl_vector_get(v, i * 10 + k));
+    }
+    printf("\n");
+  }
+  printf("\n");
+}
+
+/**
+ * Compute the (Moore-Penrose) pseudo-inverse of a matrix.
+ *
+ * If the singular value decomposition (SVD) of A = U?V? then the pseudoinverse A?? = V???U?, where ? indicates transpose and ??? is obtained by taking the reciprocal of each nonzero element on the diagonal, leaving zeros in place. Elements on the diagonal smaller than ``rcond`` times the largest singular value are considered zero.
+ *
+ * @parameter A Input matrix. **WARNING**: the input matrix ``A`` is destroyed. However, it is still the responsibility of the caller to free it.
+ * @parameter rcond A real number specifying the singular value threshold for inclusion. NumPy default for ``rcond`` is 1E-15.
+ *
+ * @returns A_pinv Matrix containing the result. ``A_pinv`` is allocated in this function and it is the responsibility of the caller to free it.
+ **/
+gsl_matrix* moore_penrose_pinv(gsl_matrix* A, const realtype rcond)
+{
+
+  gsl_matrix *V, *Sigma_pinv, *U, *A_pinv;
+  gsl_matrix* _tmp_mat = nullptr;
+  gsl_vector* _tmp_vec;
+  gsl_vector* u;
+  realtype x, cutoff;
+  size_t i, j;
+  unsigned int n = A->size1;
+  unsigned int m = A->size2;
+  bool was_swapped = false;
+
+  if (m > n) {
+    /* libgsl SVD can only handle the case m <= n - transpose matrix */
+    was_swapped = true;
+    _tmp_mat = gsl_matrix_alloc(m, n);
+    gsl_matrix_transpose_memcpy(_tmp_mat, A);
+    A = _tmp_mat;
+    i = m;
+    m = n;
+    n = i;
+  }
+
+  /* do SVD */
+  V = gsl_matrix_alloc(m, m);
+  u = gsl_vector_alloc(m);
+  _tmp_vec = gsl_vector_alloc(m);
+  gsl_linalg_SV_decomp(A, V, u, _tmp_vec);
+  gsl_vector_free(_tmp_vec);
+
+  /* compute ??? */
+  Sigma_pinv = gsl_matrix_alloc(m, n);
+  gsl_matrix_set_zero(Sigma_pinv);
+  cutoff = rcond * gsl_vector_max(u);
+
+  for (i = 0; i < m; ++i) {
+    if (gsl_vector_get(u, i) > cutoff) {
+      x = 1. / gsl_vector_get(u, i);
+    } else {
+      x = 0.;
+    }
+    gsl_matrix_set(Sigma_pinv, i, i, x);
+  }
+
+  /* libgsl SVD yields "thin" SVD - pad to full matrix by adding zeros */
+  U = gsl_matrix_alloc(n, n);
+  gsl_matrix_set_zero(U);
+
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < m; ++j) {
+      gsl_matrix_set(U, i, j, gsl_matrix_get(A, i, j));
+    }
+  }
+
+  if (_tmp_mat != nullptr) {
+    gsl_matrix_free(_tmp_mat);
+  }
+
+  /* two dot products to obtain pseudoinverse */
+  _tmp_mat = gsl_matrix_alloc(m, n);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1., V, Sigma_pinv, 0., _tmp_mat);
+
+  if (was_swapped) {
+    A_pinv = gsl_matrix_alloc(n, m);
+    gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1., U, _tmp_mat, 0., A_pinv);
+  } else {
+    A_pinv = gsl_matrix_alloc(m, n);
+    gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1., _tmp_mat, U, 0., A_pinv);
+  }
+
+  gsl_matrix_free(_tmp_mat);
+  gsl_matrix_free(U);
+  gsl_matrix_free(Sigma_pinv);
+  gsl_vector_free(u);
+  gsl_matrix_free(V);
+
+  return A_pinv;
+}
+
+int main()
+{
+
+  const unsigned int N = 2;
+  const unsigned int M = 3;
+  const realtype rcond = 1E-15;
+
+  gsl_matrix* A = gsl_matrix_alloc(N, M);
+  gsl_matrix* A_pinv;
+
+  gsl_matrix_set(A, 0, 0, 1.);
+  gsl_matrix_set(A, 0, 1, 3.);
+  gsl_matrix_set(A, 0, 2, 5.);
+  gsl_matrix_set(A, 1, 0, 2.);
+  gsl_matrix_set(A, 1, 1, 4.);
+  gsl_matrix_set(A, 1, 2, 6.);
+
+  printf("A matrix:\n");
+  print_matrix(A);
+  A_pinv = moore_penrose_pinv(A, rcond);
+  printf("\nPseudoinverse of A:\n");
+  print_matrix(A_pinv);
+
+  gsl_matrix_free(A);
+  gsl_matrix_free(A_pinv);
+
+  return 0;
+}
+
 ClusterPEM::ClusterPEM() = default;
 
 ClusterPEM::ClusterPEM(Pads* pads0, Pads* pads1)
@@ -111,7 +278,7 @@ ClusterPEM::ClusterPEM(const double* x, const double* y, const double* dx,
   aloneKPads = nullptr;
 
   //
-  if (ClusterConfig::processingLog >= ClusterConfig::info) {
+  if (clusterConfig.processingLog >= clusterConfig.info) {
     printf("-----------------------------\n");
     printf("Starting CLUSTER PROCESSING\n");
     printf("# cath0=%2d, cath1=%2d\n", nbrCath0, nbrCath1);
@@ -199,6 +366,17 @@ ClusterPEM::~ClusterPEM()
   deleteInt(aloneKPads);
 }
 
+double ClusterPEM::getMaxCharge()
+{
+  double max = -1;
+  for (int c = 0; c < 2; c++) {
+    if (pads[c] != nullptr) {
+      max = std::fmax(max, vectorMax(pads[c]->getCharges(), getNbrOfPads(c)));
+    }
+  }
+  return max;
+}
+
 int ClusterPEM::getIndexByRow(const char* matrix, PadIdx_t N, PadIdx_t M,
                               PadIdx_t* IIdx)
 {
@@ -380,7 +558,7 @@ void ClusterPEM::computeProjectedPads(const Pads& pad0InfSup,
       mapKToIJ[k].j = j;
       mapIJToK[i * N1 + j] = k;
       // Debug
-      if (ClusterConfig::padMappingLog >= ClusterConfig::debug) {
+      if (clusterConfig.padMappingLog >= clusterConfig.debug) {
         printf("newpad %d %d %d %9.3g %9.3g %9.3g %9.3g\n", i, j, k, projX[k],
                projY[k], projDX[k], projDY[k]);
       }
@@ -437,7 +615,7 @@ void ClusterPEM::computeProjectedPads(const Pads& pad0InfSup,
       ij_ptr++;
     }
   }
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printf("builProjectPads mapIJToK=%p, N0=%d N1=%d\\n", mapIJToK, N0, N1);
     for (int i = 0; i < N0; i++) {
       for (int j = 0; j < N1; j++) {
@@ -504,7 +682,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
     }
   }
   //
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printMatrixChar("  Intersection Matrix", intersectionMatrix, N0, N1);
   }
   //
@@ -529,7 +707,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
   }
   // Add alone pas and row/column separators
   maxNbrOfProjPads += nbrOfSinglePads + fmax(N0, N1);
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printf("  maxNbrOfProjPads %d\n", maxNbrOfProjPads);
   }
   //
@@ -541,7 +719,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
   JInterI = new PadIdx_t[maxNbrOfProjPads];
   int checkr = getIndexByRow(intersectionMatrix, N0, N1, IInterJ);
   int checkc = getIndexByColumns(intersectionMatrix, N0, N1, JInterI);
-  if (ClusterConfig::padMappingCheck) {
+  if (clusterConfig.padMappingCheck) {
     if ((checkr > maxNbrOfProjPads) || (checkc > maxNbrOfProjPads)) {
       printf(
         "Allocation pb for  IInterJ or JInterI: allocated=%d, needed for "
@@ -550,7 +728,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
       throw std::overflow_error("Allocation pb for  IInterJ or JInterI");
     }
   }
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printInterMap("  IInterJ", IInterJ, N0);
     printInterMap("  JInterI", JInterI, N1);
   }
@@ -573,7 +751,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
   computeProjectedPads(padInfSup0, padInfSup1, maxNbrOfProjPads, aloneIPads,
                        aloneJPads, aloneKPads, includeSingleCathodePads);
 
-  if (ClusterConfig::padMappingCheck) {
+  if (clusterConfig.padMappingCheck) {
     checkConsistencyMapKToIJ(intersectionMatrix, mapKToIJ, mapIJToK, aloneIPads,
                              aloneJPads, N0, N1, projectedPads->getNbrOfPads());
   }
@@ -584,7 +762,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
   int thereAreIsolatedPads = 0;
   projNeighbors = projectedPads->buildFirstNeighbors();
   // Pads::printPads("Projected Pads:", *projectedPads);
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printf("  Neighbors of the projected geometry\n");
     Pads::printNeighbors(projNeighbors, projectedPads->getNbrOfPads());
   }
@@ -596,7 +774,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
       thereAreIsolatedPads = 1;
       ij = mapKToIJ[k];
       if ((ij.i >= 0) && (ij.j >= 0)) {
-        if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+        if (clusterConfig.padMappingLog >= clusterConfig.detail) {
           printf(" Isolated pad: nul intersection i,j = %d %d\n", ij.i, ij.j);
         }
         intersectionMatrix[ij.i * N1 + ij.j] = 0;
@@ -605,7 +783,7 @@ int ClusterPEM::buildProjectedGeometry(int includeSingleCathodePads)
       }
     }
   }
-  if ((ClusterConfig::padMappingLog >= ClusterConfig::detail) && thereAreIsolatedPads) {
+  if ((clusterConfig.padMappingLog >= clusterConfig.detail) && thereAreIsolatedPads) {
     printf("There are isolated pads %d\n", thereAreIsolatedPads);
   }
   //
@@ -721,7 +899,7 @@ double* ClusterPEM::projectChargeOnProjGeometry(int includeAlonePads)
     } else if (includeAlonePads) {
       // Alone j-pad
       k = aloneJPads[j];
-      if (ClusterConfig::padMappingCheck && (k < 0)) {
+      if (clusterConfig.padMappingCheck && (k < 0)) {
         printf("ERROR: Alone j-pad with negative index j=%d\n", j);
         // printf("Alone i-pad  i=%d, k=%d\n", i, k);
       }
@@ -767,7 +945,7 @@ int ClusterPEM::buildGroupOfPads()
   int nbrCath0 = (pads[0]) ? pads[0]->getNbrOfPads() : 0;
   int nbrCath1 = (pads[1]) ? pads[1]->getNbrOfPads() : 0;
 
-  if (ClusterConfig::groupsLog >= ClusterConfig::info || ClusterConfig::processingLog >= ClusterConfig::info) {
+  if (clusterConfig.groupsLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
     printf("\n");
     printf("[buildGroupOfPads] Group processing\n");
     printf("----------------\n");
@@ -778,7 +956,7 @@ int ClusterPEM::buildGroupOfPads()
   // are not considered. They are named 'single-pads'
   nbrOfProjGroups = getConnectedComponentsOfProjPadsWOSinglePads();
 
-  if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+  if (clusterConfig.inspectModel >= clusterConfig.active) {
     saveProjPadToGroups(projPadToGrp, projectedPads->getNbrOfPads());
   }
 
@@ -817,7 +995,7 @@ int ClusterPEM::buildGroupOfPads()
     cathGroup[1] = new Groups_t[nCath1];
     vectorSetZeroShort(cathGroup[0], nCath0);
     vectorSetZeroShort(cathGroup[1], nCath1);
-    if (ClusterConfig::groupsLog >= ClusterConfig::info) {
+    if (clusterConfig.groupsLog >= clusterConfig.info) {
       printf("> Projected Groups nbrOfProjGroups=%d\n", nbrOfProjGroups);
       vectorPrintShort("  projPadToGrp", projPadToGrp, nProjPads);
     }
@@ -828,7 +1006,7 @@ int ClusterPEM::buildGroupOfPads()
     // Propagate proj-groups on the cathode pads
     nGroups = assignPadsToGroupFromProj(nbrOfProjGroups);
     // nGroups = assignGroupToCathPads( );
-    if (ClusterConfig::groupsLog >= ClusterConfig::info) {
+    if (clusterConfig.groupsLog >= clusterConfig.info) {
       printf("> Groups after cathodes propagation nCathGroups=%d\n", nGroups);
     }
 
@@ -866,7 +1044,7 @@ int ClusterPEM::buildGroupOfPads()
     for (int p = 0; p < nProjPads; p++) {
       projPadToGrp[p] = mapGrpToGrp[projPadToGrp[p]];
     }
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("> addIsolatedPadInGroups in cath-0 nNewGroups =%d\n", nGroups);
       vectorPrintShort("  mapGrpToGrp", mapGrpToGrp, nGroups + 1);
     }
@@ -886,7 +1064,7 @@ int ClusterPEM::buildGroupOfPads()
     for (int p = 0; p < nProjPads; p++) {
       projPadToGrp[p] = mapGrpToGrp[projPadToGrp[p]];
     }
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("> addIsolatedPadInGroups in cath-1 nNewGroups =%d\n", nGroups);
       vectorPrintShort("  mapGrpToGrp", mapGrpToGrp, nGroups + 1);
     }
@@ -896,7 +1074,7 @@ int ClusterPEM::buildGroupOfPads()
     // Some groups may be merged, others groups may diseappear
     // So the final groups must be renumbered
     int nNewGroups = renumberGroups(mapGrpToGrp, nGroups);
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("> Groups after renumbering nGroups=%d\n", nGroups);
       vectorPrintShort("  projPadToGrp", projPadToGrp, nProjPads);
       printf("  nNewGrpCath0=%d, nNewGrpCath1=%d, nGroups=%d\n", nNewGrpCath0,
@@ -915,7 +1093,7 @@ int ClusterPEM::buildGroupOfPads()
     updateProjectionGroups();
   }
 
-  if (ClusterConfig::groupsLog >= ClusterConfig::info || ClusterConfig::processingLog >= ClusterConfig::info) {
+  if (clusterConfig.groupsLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
     printf("  > Final Groups %d\n", nGroups);
     vectorPrintShort("  cathToGrp[0]", cathGroup[0], nbrCath0);
     vectorPrintShort("  cathToGrp[1]", cathGroup[1], nbrCath1);
@@ -942,7 +1120,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
   int i, j, k;
   // printNeighbors();
 
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     printf(
       "> Extract connected components "
       "[getConnectedComponentsOfProjPadsWOIsolatedPads]\n");
@@ -953,7 +1131,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
       curPadGrp++;
     }
     k = curPadGrp - projPadToGrp;
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("    k=%d, nbrOfPadSetInGrp g=%d: n=%d\n", k, currentGrpId,
              nbrOfPadSetInGrp);
     }
@@ -962,7 +1140,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
     // aloneKPads = 0 if only one cathode
     if (aloneKPads && (aloneKPads[k] != -1)) {
       // Alone Pad no group at the moment
-      if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+      if (clusterConfig.groupsLog >= clusterConfig.detail) {
         printf("    isolated pad %d\n", k);
       }
       projPadToGrp[k] = -1;
@@ -970,7 +1148,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
       continue;
     }
     currentGrpId++;
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("    New Grp, pad k=%d in new grp=%d\n", k, currentGrpId);
     }
     projPadToGrp[k] = currentGrpId;
@@ -981,7 +1159,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
     // Propagation of the group in all neighbour list
     for (; startIdx < endIdx; startIdx++) {
       i = neighToDo[startIdx];
-      if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+      if (clusterConfig.groupsLog >= clusterConfig.detail) {
         printf("    propagate grp to neighbours of i=%d ", i);
       }
       //
@@ -995,14 +1173,14 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
           //
           // aloneKPads = 0 if only one cathode
           if (aloneKPads && (aloneKPads[j] != -1)) {
-            if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+            if (clusterConfig.groupsLog >= clusterConfig.detail) {
               printf("    isolated pad %d, ", j);
             }
             projPadToGrp[j] = -1;
             nbrOfPadSetInGrp++;
             continue;
           }
-          if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+          if (clusterConfig.groupsLog >= clusterConfig.detail) {
             printf("%d, ", j);
           }
           projPadToGrp[j] = currentGrpId;
@@ -1012,7 +1190,7 @@ int ClusterPEM::getConnectedComponentsOfProjPadsWOSinglePads()
           endIdx++;
         }
       }
-      if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+      if (clusterConfig.groupsLog >= clusterConfig.detail) {
         printf("\n");
       }
     }
@@ -1053,8 +1231,8 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
   //
   PadIdx_t i, j;
   short g, prevGroup;
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
-    printf("> Assign cath-grp from proj-grp [AssignPadsToGroupFromProj]\n");
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
+    printf("[AssignPadsToGroupFromProj] Assign cath-grp from proj-grp \n");
   }
   // Expand the projected Groups
   // 'projPadToGrp' to the pad groups 'padToGrp'
@@ -1086,7 +1264,8 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
         // Already a grp (Conflict)
         // Group to fuse
         // Store the grp into grp matrix
-        cathGroup[0][i] = g;
+        // ??? to suppress cathGroup[0][i] = g;
+        cathGroup[0][i] = std::min(g, prevGroup);
         matGrpGrp[g * (nGrp + 1) + prevGroup] = 1;
         matGrpGrp[prevGroup * (nGrp + 1) + g] = 1;
       }
@@ -1106,13 +1285,14 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
         matGrpGrp[g * (nGrp + 1) + g] = 1;
       } else {
         // Already a Group (Conflict)
-        cathGroup[1][j] = g;
+        // cathGroup[1][j] = g;
+        cathGroup[1][j] = std::min(g, prevGroup);
         matGrpGrp[g * (nGrp + 1) + prevGroup] = 1;
         matGrpGrp[prevGroup * (nGrp + 1) + g] = 1;
       }
     }
   }
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     printMatrixShort("  Group/Group matrix", matGrpGrp, nGrp + 1, nGrp + 1);
     vectorPrintShort("  cathToGrp[0]", cathGroup[0], pads[0]->getNbrOfPads());
     vectorPrintShort("  cathToGrp[1]", cathGroup[1], pads[1]->getNbrOfPads());
@@ -1155,6 +1335,7 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
               grpToMergedGrp[g] = grpToMergedGrp[j];
             }
           }
+          curGroup = grpToMergedGrp[j];
         }
       }
     }
@@ -1162,7 +1343,7 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
   }
 
   // Perform the mapping group -> mergedGroups
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     vectorPrintShort("  Mapping grpToMergedGrp", grpToMergedGrp, nGrp + 1);
   }
   //
@@ -1184,7 +1365,7 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
   }
 
   // Perform the mapping grpToMergedGrp to the cath-groups
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     vectorPrintShort("  Mapping renumbered grpToMergedGrp", grpToMergedGrp,
                      nGrp + 1);
   }
@@ -1195,10 +1376,10 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
     }
   }
 
-  if (ClusterConfig::groupsCheck) {
+  if (clusterConfig.groupsCheck) {
     for (int c = 0; c < 2; c++) {
       for (int p = 0; p < pads[c]->getNbrOfPads(); p++) {
-        if (ClusterConfig::groupsLog >= ClusterConfig::info && cathGroup[c][p] == 0) {
+        if (clusterConfig.groupsLog >= clusterConfig.info && cathGroup[c][p] == 0) {
           printf("  [assignPadsToGroupFromProj] pad %d with no group\n", p);
         }
       }
@@ -1215,6 +1396,12 @@ int ClusterPEM::assignPadsToGroupFromProj(int nGrp)
 // Add boundary pads with q charge equal 0
 void ClusterPEM::addBoundaryPads()
 {
+  int nbrOfPads = getNbrOfPads();
+  // Simple case : no adding boundary pads
+  if (nbrOfPads == 1) {
+    return;
+  }
+  //
   for (int c = 0; c < 2; c++) {
     if (pads[c]) {
       Pads* bPads = pads[c]->addBoundaryPads();
@@ -1249,7 +1436,7 @@ int ClusterPEM::assignGroupToCathPads()
   vectorSetZeroShort(projGrpToCathGrp, nGrp + 1);
   int nCathGrp = 0;
   //
-  if (ClusterConfig::groupsLog >= ClusterConfig::info) {
+  if (clusterConfig.groupsLog >= clusterConfig.info) {
     printf("  [assignGroupToCathPads]\n");
   }
   //
@@ -1268,7 +1455,7 @@ int ClusterPEM::assignGroupToCathPads()
     // Intersection indexes of the 2 cath
     i = mapKToIJ[k].i;
     j = mapKToIJ[k].j;
-    if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+    if (clusterConfig.groupsLog >= clusterConfig.detail) {
       printf("map k=%d g=%d to i=%d/%d, j=%d/%d\n", k, g, i, nCath0, j, nCath1);
     }
     //
@@ -1324,7 +1511,7 @@ int ClusterPEM::assignGroupToCathPads()
     }
   }
 
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     printf("  [assignGroupToCathPads] before renumbering nCathGrp=%d\n", nCathGrp);
     vectorPrintShort("    cath0ToGrpFromProj", cath0ToGrpFromProj, nCath0);
     vectorPrintShort("    cath1ToGrpFromProj", cath1ToGrpFromProj, nCath1);
@@ -1352,7 +1539,7 @@ int ClusterPEM::assignGroupToCathPads()
     projPadToGrp[i] = projGrpToCathGrp[projPadToGrp[i]];
   }
 
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     vectorPrintShort("  projPadToGrp", projPadToGrp, nProjPads);
     vectorPrintShort("  cath0ToGrp", cathGroup[0], nCath0);
     vectorPrintShort("  cath1ToGrp", cathGroup[1], nCath1);
@@ -1375,6 +1562,79 @@ int ClusterPEM::getNbrOfPadsInGroup(int g)
   return nbrOfPads;
 }
 
+std::pair<double, double> ClusterPEM::computeChargeBarycenter(int plane)
+{
+  int cStart(0), cEnd(0);
+  if (plane == -1) {
+    cStart = 0;
+    cEnd = 2;
+  } else if (plane == 0) {
+    cStart = 0;
+    cEnd = 1;
+  } else {
+    cStart = 1;
+    cEnd = 2;
+  }
+  double xBary(0), yBary(0), wCharges(0);
+  for (int c = cStart; c < cEnd; c++) {
+    int P = getNbrOfPads(c);
+    if (P > 0) {
+      const double* charges = getCharges(c);
+      const double* X = getPads(c)->getX();
+      const double* Y = getPads(c)->getY();
+      for (int p = 0; p < P; p++) {
+        xBary += charges[p] * X[p];
+        yBary += charges[p] * Y[p];
+        wCharges += charges[p];
+      }
+    }
+  }
+  xBary = xBary / wCharges;
+  yBary = yBary / wCharges;
+  //
+  return std::make_pair(xBary, yBary);
+}
+
+std::pair<int, int> ClusterPEM::getNxNy(int c)
+{
+  int N = pads[c]->getNbrOfObsPads();
+  const double* x = pads[c]->getX();
+  const double* y = pads[c]->getY();
+  const double* dx = pads[c]->getDX();
+  const double* dy = pads[c]->getDY();
+  double xMin = vectorMin(x, N);
+  double xMax = vectorMax(x, N);
+  double yMin = vectorMin(y, N);
+  double yMax = vectorMax(y, N);
+  double dxMin = 2 * vectorMin(dx, N);
+  double dyMin = 2 * vectorMin(dy, N);
+  // For allocation
+  int nXMax = (int)((xMax - xMin) / dxMin + 0.5) + 1;
+  int nYMax = (int)((yMax - yMin) / dyMin + 0.5) + 1;
+  Mask_t xSampling[nXMax];
+  Mask_t ySampling[nYMax];
+  vectorSetShort(xSampling, 0, nXMax);
+  vectorSetShort(ySampling, 0, nYMax);
+  int nX(0), nY(0);
+  for (int i = 0; i < N; i++) {
+    // Calculate the indexes in the 1D charge integral
+    // PadIntegralX:PadIntegralY
+    int xIdx = (int)((x[i] - xMin) / dxMin + 0.5);
+    int yIdx = (int)((y[i] - yMin) / dyMin + 0.5);
+    if (xSampling[xIdx] == 0) {
+      // printf("new x, iIdx=%d, x[i]=%6.2f, xMin=%6.2f, dxMin=%6.2f\n", xIdx, x[i], xMin, dxMin );
+      xSampling[xIdx] = 1;
+      nX++;
+    }
+    if (ySampling[yIdx] == 0) {
+      // printf("new y, yIdx=%d, y[i]=%6.2f, yMin=%6.2f, dyMin=%6.2f\n", yIdx, y[i], yMin, dyMin );
+      ySampling[yIdx] = 1;
+      nY++;
+    }
+  }
+  return std::make_pair(nX, nY);
+}
+
 void ClusterPEM::removeLowChargedGroups(int nGroups)
 {
   int nbrPadsInGroup[2][nGroups + 1];
@@ -1398,11 +1658,13 @@ void ClusterPEM::removeLowChargedGroups(int nGroups)
 
   char str[256];
   for (Groups_t g = 1; g < nGroups + 1; g++) {
-    double charge = chargeInGroup[0][g] + chargeInGroup[1][g];
-    charge = charge / nbrCath;
+    // Better to use max charge of the two cath-planes
+    double chargePerCath = chargeInGroup[0][g] + chargeInGroup[1][g];
+    chargePerCath = chargePerCath / 2;
+    double maxCharge = std::fmax(chargeInGroup[0][g], chargeInGroup[1][g]);
     int nbrPads = nbrPadsInGroup[0][g] + nbrPadsInGroup[1][g];
-    if ((charge < ClusterConfig::minChargeOfClusterPerCathode) &&
-        (nbrPads > 0)) {
+    if ((maxCharge < clusterConfig.minChargeOfClusterPerCathode) && (nbrPads > 0)) {
+      // if ((chargePerCath < clusterConfig.minChargeOfClusterPerCathode) && (nbrPads > 0)) {
       // Remove groups
       // printf("  Remove group %d, charge=%f\n", g, charge);
       // scanf("%s", str);
@@ -1415,18 +1677,55 @@ void ClusterPEM::removeLowChargedGroups(int nGroups)
           }
         }
       }
-      if (ClusterConfig::groupsLog >= ClusterConfig::detail || ClusterConfig::processingLog >= ClusterConfig::info) {
+      if (clusterConfig.groupsLog >= clusterConfig.detail || clusterConfig.processingLog >= clusterConfig.info) {
         int nbrPads = chargeInGroup[0][g] + chargeInGroup[1][g];
-        printf("> [removeLowChargedGroups] Remove low charge group g=%d, # pads=%d\n", g, nbrPads);
+        printf("> [removeLowChargedGroups] Remove low charge group g=%d, charge per cath= %f, #pads=%d \n", g, maxCharge, nbrPads);
       }
     }
   }
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     vectorPrintShort("  cathToGrp[0]", cathGroup[0], pads[0]->getNbrOfPads());
     vectorPrintShort("  cathToGrp[1]", cathGroup[1], pads[1]->getNbrOfPads());
   }
 }
 
+int ClusterPEM::filterFitModelOnSmallChargedSeeds(Pads& pads, double* theta, int K,
+                                                  Mask_t* maskFilteredTheta)
+{
+  //
+  // W filter
+  // w cut-off
+  double* w_ = getW(theta, K);
+  double w[K];
+  double wSum = 0.0;
+  int kSelectedInit = vectorSumShort(maskFilteredTheta, K);
+  double meanCharge = pads.getMeanTotalCharge();
+  // Old relative filter ???
+  // double cutOff = 0.02 / kSelectedSeeds;
+  // meanCharge = 1.0;
+  //
+  double cutOff = clusterConfig.minChargeOfClusterPerCathode;
+  // Normalize new w
+  for (int k = 0; k < K; k++) {
+    wSum += (maskFilteredTheta[k] * w_[k]);
+  }
+  int kWFilter = 0;
+  double norm = meanCharge / wSum;
+  for (int k = 0; k < K; k++) {
+    w[k] = maskFilteredTheta[k] * w_[k] * norm;
+    if ((clusterConfig.processingLog >= clusterConfig.info) && (maskFilteredTheta[k] && (w[k] <= cutOff))) {
+      printf("[filterFitModelOnSmallCharge] remove the %dth seeds, low charge=%f \n", k, w[k]);
+    }
+    maskFilteredTheta[k] = maskFilteredTheta[k] && (w[k] > cutOff);
+    kWFilter += (maskFilteredTheta[k] && (w[k] > cutOff));
+  }
+  if ((clusterConfig.processingLog >= clusterConfig.info) && (kSelectedInit > kWFilter)) {
+    printf("[filterFitModelOnSmallCharge] remove %d seeds (cutOff=%5.2f)\n",
+           kSelectedInit - kWFilter, cutOff);
+  }
+  return kWFilter;
+}
+
 // Remove the seeds outside of the frame delimiting the cluster.
 int ClusterPEM::filterFitModelOnClusterRegion(Pads& pads, double* theta, int K,
                                               Mask_t* maskFilteredTheta)
@@ -1439,7 +1738,7 @@ int ClusterPEM::filterFitModelOnClusterRegion(Pads& pads, double* theta, int K,
   const double* dx = pads.getDX();
   const double* dy = pads.getDY();
   int N = pads.getNbrOfPads();
-  // compute the frame enclosing the pads Min/Max x/y
+  // Compute the frame enclosing the pads Min/Max x/y
   double xyTmp[N];
   int kSpacialFilter = 0;
   vectorAddVector(x, -1.0, dx, N, xyTmp);
@@ -1462,8 +1761,8 @@ int ClusterPEM::filterFitModelOnClusterRegion(Pads& pads, double* theta, int K,
     }
   }
 
-  if ((ClusterConfig::fittingLog >= ClusterConfig::info) && (kSpacialFilter != K)) {
-    printf("[filterFitModelOnClusterRegion] ---> Spacial Filter; removing %d hit\n", K - kSpacialFilter);
+  if ((clusterConfig.processingLog >= clusterConfig.info) && (kSpacialFilter != K)) {
+    printf("[filterFitModelOnClusterRegion] ---> Out of the frame; removing %d hit\n", K - kSpacialFilter);
   }
   //
   // W filter
@@ -1484,7 +1783,7 @@ int ClusterPEM::filterFitModelOnClusterRegion(Pads& pads, double* theta, int K,
     maskFilteredTheta[k] = maskFilteredTheta[k] && (w[k] > cutOff);
     kWFilter += (maskFilteredTheta[k] && (w[k] > cutOff));
   }
-  if ((ClusterConfig::fittingLog >= ClusterConfig::detail) && (kSpacialFilter > kWFilter)) {
+  if ((clusterConfig.processingLog >= clusterConfig.detail) && (kSpacialFilter > kWFilter)) {
     printf(
       "[filterFitModelOnClusterRegion] At least one hit such as w[k] < "
       "(0.05 / K) = %8.4f) -> removing %d hit\n",
@@ -1500,7 +1799,7 @@ int ClusterPEM::filterFitModelOnSpaceVariations(const double* thetaEM, int kEM,
                                                 Mask_t* maskFilteredTheta)
 {
   // Rq: kFit is the same for thetaEM & theta
-  vectorSetShort(maskFilteredTheta, 0, kFit);
+
   int kSpacialFilter = 0;
   //
   // Spatial filter on the theta deplacements
@@ -1536,18 +1835,22 @@ int ClusterPEM::filterFitModelOnSpaceVariations(const double* thetaEM, int kEM,
     // Select Seeds which didn't move with the fitting
     if (((muX[k] > xMin) && (muX[k] < xMax)) &&
         ((muY[k] > yMin) && (muY[k] < yMax))) {
-      maskFilteredTheta[k] = 1;
+      // maskFilteredTheta[k] = 1;
       kSpacialFilter++;
     } else {
-      if (ClusterConfig::processingLog >= ClusterConfig::info) {
+      if (clusterConfig.processingLog >= clusterConfig.info) {
         printf("[filterFitModelOnSpaceVariations] ---> too much drift; deltaX/Y=(%6.2f,%6.2f) ---> k=%3d removed\n",
                muEMX[k] - muX[k], muEMY[k] - muY[k], k);
-        printf("     ??? muEMDx[kMin], muEMDy[kMin] = %f, %f\n", muEMDx[kMin], muEMDy[kMin]);
+        printf("[filterFitModelOnSpaceVariations] ---> too much drift; EM=(%6.2f,%6.2f) dxyEM=(%6.2f,%6.2f) Fit=(%6.2f,%6.2f)\n",
+               muEMX[k], muEMY[k], muEMDx[k], muEMDy[k], muX[k], muY[k]);
+        // printf("     ??? muEMDx[kMin], muEMDy[kMin] = %f, %f\n", muEMDx[kMin], muEMDy[kMin]);
       }
+      // Disable this seeds
+      maskFilteredTheta[k] = 0;
     }
   }
-  if ((ClusterConfig::processingLog >= ClusterConfig::info) && (kSpacialFilter != kFit)) {
-    printf("[filterFitModelOnSpaceVariations] ---> %d hit(s) removed\n", kFit - kSpacialFilter);
+  if ((clusterConfig.processingLog >= clusterConfig.info) && (kSpacialFilter != kFit)) {
+    printf("[filterFitModelOnSpaceVariations] ---> Final filter: %d hit(s) removed\n", kFit - kSpacialFilter);
   }
   //
   // Suppress close seeds ~< 0.5 pad size
@@ -1563,8 +1866,8 @@ int ClusterPEM::filterFitModelOnSpaceVariations(const double* thetaEM, int kEM,
           double maxErrorY = 2.0 * std::fmin(muEMDy[k], muEMDy[l]);
           bool xClose = std::fabs(muX[k] - muX[l]) < maxErrorX;
           bool yClose = std::fabs(muY[k] - muY[l]) < maxErrorY;
-          // printf(" ??? muX k/l= %f, %f, muDX K/l= %f, %f\n",  muX[k], muX[l], muEMDx[k], muEMDx[l]);
-          // printf(" ??? muY k/l= %f, %f, muDY K/l= %f, %f\n",  muY[k], muY[l], muEMDy[k], muEMDy[l]);
+          // printf(" ??? Close seeds muX k/l= %f, %f, muDX K/l= %f, %f\n",  muX[k], muX[l], muEMDx[k], muEMDx[l]);
+          // printf(" ??? Close seeds muY k/l= %f, %f, muDY K/l= %f, %f\n",  muY[k], muY[l], muEMDy[k], muEMDy[l]);
           if (xClose && yClose) {
             // Supress the weakest weight
             if (w[k] > w[l]) {
@@ -1580,9 +1883,9 @@ int ClusterPEM::filterFitModelOnSpaceVariations(const double* thetaEM, int kEM,
     }
   }
   int kCloseFilter = vectorSumShort(maskFilteredTheta, kFit);
-  if (ClusterConfig::processingLog >= ClusterConfig::info && (kSpacialFilter > kCloseFilter)) {
+  if (clusterConfig.processingLog >= clusterConfig.info && (kSpacialFilter > kCloseFilter)) {
     printf(
-      "[filterFitModelOnSpaceVariations] ---> removing %d close seeds\n",
+      "[filterFitModelOnSpaceVariations] ---> Close seeds: removed %d close seeds\n",
       kSpacialFilter - kCloseFilter);
   }
   return kCloseFilter;
@@ -1592,23 +1895,8 @@ DataBlock_t ClusterPEM::fit(double* thetaInit, int kInit)
 {
   int nbrCath0 = getNbrOfPads(0);
   int nbrCath1 = getNbrOfPads(1);
-  /*
-  // ??? (111) Invalid fiting
-  // Build the mask to handle pads with the g-group
-  Mask_t maskFit0[nbrCath0];
-  Mask_t maskFit1[nbrCath1];
-  Mask_t *maskFit[2] = {maskFit0, maskFit1};
-  // printf(" ???? nbrCath0=%d, nbrCath1=%d\n", nbrCath0, nbrCath1);
-  // Ne laplacian ??? getMaskCathToGrpFromProj( g, maskFit0, maskFit1, nbrCath0,
-  nbrCath1); vectorBuildMaskEqualShort( pads[0]->cath, g, nbrCath0, maskFit0);
-  vectorBuildMaskEqualShort( pads[1]->cath, g, nbrCath1, maskFit1);
-  // vectorPrintShort("maskFit0", maskFit0, nbrCath0);
-  // vectorPrintShort("maskFit1", maskFit1, nbrCath1);
-  int nFits[2];
-  nFits[1] = vectorSumShort( maskFit1, nbrCath1);
-  nFits[0] = vectorSumShort( maskFit0, nbrCath0);
-  */
   int nFit = nbrCath0 + nbrCath1;
+  int nObsFit = getNbrOfObsPads();
   // double *xyDxyFit;
   // double *qFit;
   int filteredK = 0;
@@ -1616,7 +1904,82 @@ DataBlock_t ClusterPEM::fit(double* thetaInit, int kInit)
   // ThetaFit (output)
   double* thetaFit = new double[kInit * 5];
   vectorSet(thetaFit, 0, kInit * 5);
-  if (nFit < ClusterConfig::nbrOfPadsLimitForTheFitting) {
+  int nX(0), nY(0);
+  if (nbrOfCathodePlanes == 1) {
+    std::pair<int, int> nXY = getNxNy(singleCathPlaneID);
+    nX = nXY.first;
+    nY = nXY.second;
+  }
+  /*
+    else if( getNbrOfObsPads(0) + getNbrOfObsPads(1) < 5 ) {
+    // ??? maybe to perform before LocalMax
+    std::pair<int,int> nXY0 = getNxNy(0);
+    std::pair<int,int> nXY1 = getNxNy(1);
+    int n
+    if( (nXY0.second == 1) && (nXY1.first == 1) ) {
+
+    }
+  }
+  */
+  // Parameters dimensionality - Default (w,x, y)
+  int dimOfParameters = 3;
+  // Which axe to perform the fitting x(axe=0) or y(axe=1) or both (axe=-1)
+  int axe = -1;
+
+  Pads* fitPads = nullptr;
+
+  if ((kInit == 1) && (nbrOfCathodePlanes == 1)) {
+    // Get the Charge centroid to go closer to the seed
+    std::pair<double, double> bary = computeChargeBarycenter(singleCathPlaneID);
+    double* muX = getMuX(thetaInit, kInit);
+    double* muY = getMuY(thetaInit, kInit);
+    // double *w = getW(thetaInit, kInit);
+    muX[0] = bary.first;
+    muY[0] = bary.second;
+  }
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf("fit nbrCath=%d nbrPads=(%d, %d) nbrObsPads=(%d, %d) nX/Y=(%d, %d)\n",
+           nbrOfCathodePlanes, getNbrOfPads(0), getNbrOfPads(1),
+           getNbrOfObsPads(0), getNbrOfObsPads(1), nX, nY);
+  }
+  // Simple cases
+  if ((nbrOfCathodePlanes == 1) && ((nX == 1) || (nY == 1))) {
+    dimOfParameters = 2;
+    // axe to fit
+    axe = (nX == 1) ? 1 : 0;
+    fitPads = pads[singleCathPlaneID];
+    pads[singleCathPlaneID]->setCathodes(singleCathPlaneID);
+
+  } else {
+    // Concatenate the 2 planes of the subCluster For the fitting
+    fitPads = new Pads(pads[0], pads[1], Pads::xydxdyMode);
+  }
+  // Compute the barycenter to speed
+  /*
+  double xBary(0), yBary(0), wCharges(0);
+  for(int c=0; c <2; c++) {
+    if ( getNbrOfPads(c) > 0) {
+      const double *charges = getCharges(c);
+      const double *X = getPads(c)->getX();
+      const double *Y = getPads(c)->getY();
+      for (int p=0; p < getNbrOfPads(c); p++) {
+        xBary +=  charges[p] * X[p];
+        yBary +=  charges[p] * Y[p];
+        wCharges += charges[p];
+      }
+    }
+  }
+  xBary = xBary / wCharges;
+  yBary = yBary / wCharges;
+  double *muX = getMuX(thetaFit, kInit);
+  double *muY = getMuY(thetaFit, kInit);
+  double *w = getW(thetaFit, kInit);
+  muX[0] = xBary;
+  muY[0] = yBary;
+  w[0] = 1.0;
+  finalK = 1;
+  */
+  if ((nObsFit > 1) && (nObsFit < clusterConfig.nbrOfPadsLimitForTheFitting)) {
     //
     // Preparing the fitting
     //
@@ -1628,46 +1991,55 @@ DataBlock_t ClusterPEM::fit(double* thetaInit, int kInit)
     */
     //
 
-    // Concatenate the 2 planes of the subCluster For the fitting
-    Pads* fitPads = new Pads(pads[0], pads[1], Pads::xydxdyMode);
+    // ??? Pads::printPads("Pads for fitting", *fitPads);
     // khi2 (output)
     double khi2[1];
     // pError (output)
-    double pError[3 * kInit * 3 * kInit];
-    if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+    // double pError[3 * kInit * 3 * kInit];
+    double pError[dimOfParameters * kInit * dimOfParameters * kInit];
+    if (clusterConfig.fittingLog >= clusterConfig.detail) {
       printf("Starting the fitting\n");
       printf("- # cath0, cath1 for fitting: %2d %2d\n", getNbrOfPads(0),
              getNbrOfPads(1));
       printTheta("- thetaInit", 1.0, thetaInit, kInit);
     }
     // Fit
-    if ((kInit * 3 - 1) <= nFit) {
+    if ((kInit * dimOfParameters - 1) <= nFit) {
+      // if ((kInit * 3 - 1) <= nFit) {
       /*
       fitMathieson( thetaInit, xyDxyFit, qFit, cathFit, notSaturatedFit,
       zCathTotalCharge, K, nFit, chamberId, processFitVerbose, thetaFit, khi2,
       pError
                 );
       */
-      fitMathieson(*fitPads, thetaInit, kInit, processFitVerbose, thetaFit,
+      fitMathieson(*fitPads, thetaInit, kInit, dimOfParameters, axe, processFitVerbose, thetaFit,
                    khi2, pError);
     } else {
-      printf("---> Fitting parameters to large : k=%d, 3k-1=%d, nFit=%d\n",
-             kInit, kInit * 3 - 1, nFit);
+      printf("---> Fitting parameters to large : k=%d, (3 or 2)*k-1=%d, nFit=%d\n",
+             kInit, kInit * dimOfParameters - 1, nFit);
       printf("     keep the EM solution\n");
       vectorCopy(thetaInit, kInit * 5, thetaFit);
     }
-    if (ClusterConfig::fittingLog >= ClusterConfig::info) {
+    if (clusterConfig.fittingLog >= clusterConfig.info) {
       printTheta("- thetaFit", 1.0, thetaFit, kInit);
     }
     // Filter Fitting solution
     Mask_t maskFilterFit[kInit];
+    // select all
+    vectorSetShort(maskFilterFit, 1, kInit);
+    int filteredK(0);
     // filteredK =
     //   filterFitModelOnClusterRegion(*fitPads, thetaFit, kInit, maskFilterFit);
-    int filteredK = filterFitModelOnSpaceVariations(thetaInit, kInit,
-                                                    thetaFit, kInit, maskFilterFit);
+
+    // WARNING: can't used because of the fitting permutation
+    // filteredK = filterFitModelOnSpaceVariations( thetaInit, kInit,
+    //                                              thetaFit, kInit, maskFilterFit);
+    // Remove small Cluster Charge
+    filteredK = filterFitModelOnSmallChargedSeeds(*fitPads, thetaFit, kInit,
+                                                  maskFilterFit);
     double filteredTheta[5 * filteredK];
     if ((filteredK != kInit) && (nFit >= filteredK)) {
-      if (ClusterConfig::fittingLog >= ClusterConfig::info) {
+      if (clusterConfig.fittingLog >= clusterConfig.info) {
         printf("Filtering the fitting K=%d >= K=%d\n", nFit, filteredK);
         // ??? Inv printTheta("- filteredTheta", filteredTheta, filteredK);
       }
@@ -1675,22 +2047,17 @@ DataBlock_t ClusterPEM::fit(double* thetaInit, int kInit)
         maskedCopyTheta(thetaFit, kInit, maskFilterFit, kInit, filteredTheta,
                         filteredK);
         /*
-        fitMathieson( filteredTheta, xyDxyFit, qFit, cathFit, notSaturatedFit,
-                    zCathTotalCharge, filteredK, nFit,
-                    chamberId, processFit,
-                    filteredTheta, khi2, pError
-                  );
-        */
-        fitMathieson(*fitPads, filteredTheta, filteredK, processFitVerbose,
+        fitMathieson(*fitPads, filteredTheta, filteredK, dimOfParameters, axe, processFitVerbose,
                      filteredTheta, khi2, pError);
         delete[] thetaFit;
         thetaFit = new double[filteredK * 5];
+         */
         copyTheta(filteredTheta, filteredK, thetaFit, filteredK, filteredK);
         finalK = filteredK;
       } else {
         // No hit with the fitting
-        vectorCopy(thetaInit, kInit * 5, thetaFit);
-        finalK = kInit;
+        // ???? vectorCopy(thetaInit, kInit * 5, thetaFit);
+        finalK = 0;
       }
     } else {
       // ??? InvvectorCopy( thetaFit, K*5, thetaFitFinal);
@@ -1698,16 +2065,18 @@ DataBlock_t ClusterPEM::fit(double* thetaInit, int kInit)
       finalK = kInit;
     }
   } else {
-    // Keep "thetaInit
-    if (ClusterConfig::fittingLog >= ClusterConfig::info) {
-      printf(
-        "[Cluster.fit] Keep the EM Result: nFit=%d >= "
-        "nbrOfPadsLimitForTheFitting=%d\n",
-        nFit, ClusterConfig::nbrOfPadsLimitForTheFitting);
+    // Keep "thetaInit (not enough pads)
+    // or only one pad
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf("[Cluster.fit] nbrOfPadsLimit reach. Keep the EM Result: nFit=%d >= nbrOfPadsLimitForTheFitting=%d\n",
+             nFit, clusterConfig.nbrOfPadsLimitForTheFitting);
     }
     vectorCopy(thetaInit, kInit * 5, thetaFit);
     finalK = kInit;
   }
+  if (axe == -1) {
+    delete fitPads;
+  }
   return std::make_pair(finalK, thetaFit);
 }
 
@@ -1768,7 +2137,7 @@ int ClusterPEM::renumberGroups(Mask_t* grpToGrp, int nGrp)
     }
   }
   int newNbrGroups = currentGrp;
-  if (ClusterConfig::groupsLog >= ClusterConfig::info) {
+  if (clusterConfig.groupsLog >= clusterConfig.info) {
     printf("> Groups renumbering [renumberGroups] newNbrGroups=%d\n",
            newNbrGroups);
     vectorPrintShort("  cath0ToGrp", cathGroup[0], pads[0]->getNbrOfPads());
@@ -1777,7 +2146,7 @@ int ClusterPEM::renumberGroups(Mask_t* grpToGrp, int nGrp)
   return newNbrGroups;
 }
 
-int ClusterPEM::findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath)
+Pads* ClusterPEM::findLocalMaxWithRefinement(double* thetaL, int nbrOfPadsInTheGroupCath)
 {
 
   /// ??? Verify if not already done
@@ -1791,192 +2160,1200 @@ int ClusterPEM::findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath)
   double preClusterCharge = cWeight0 + cWeight1;
   cWeight0 /= preClusterCharge;
   cWeight1 /= preClusterCharge;
+  int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
+  double dxMinPadSize = 1000.0;
+  double dyMinPadSize = 1000.0;
+  double minDY[2] = {1000.0, 1000.0};
+  int n0 = getNbrOfObsPads(0);
+  int n1 = getNbrOfObsPads(1);
+  if (n0) {
+    dxMinPadSize = vectorMin(cath0->getDX(), n0);
+  }
+  if (n1) {
+    dyMinPadSize = vectorMin(cath1->getDY(), n1);
+  }
   //
   int chId = chamberId;
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info || ClusterConfig::processingLog >= ClusterConfig::info) {
-    printf("  - [findLocalMaxWithPEM]\n");
-  }
-  // Trivial cluster : only 1 pads
-  if (projPads->getNbrOfPads() == 1) {
-    // Return the unique local maximum : the center of the pads
-    double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
-    double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
-    double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
-    double* muDX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
-    double* muDY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
-    w[0] = 1.0;
-    muX[0] = projPads->getX()[0];
-    muY[0] = projPads->getY()[0];
-    muDX[0] = projPads->getDX()[0] * 0.5;
-    muDY[0] = projPads->getDY()[0] * 0.5;
-    return 1;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  - [findLocalMaxWithRefinement]\n");
   }
-  int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
-  Pads* pixels = projPads->refinePads();
+
+  // Over allocate pixel for the refinement
+  int maxNbrOfPixels = 4 * projPads->getNbrOfPads();
+  // Call constructor with maxNbrOfPixels over allocation
+  Pads* pixels = new Pads(projPads, maxNbrOfPixels);
   int nPixels = pixels->getNbrOfPads();
   // Merge pads of the 2 cathodes
   // TODO ??? : see if it can be once with Fitting (see fitPads)
-  Pads* mPads = new Pads(pads[0], pads[1], Pads::xyInfSupMode);
-  int nPads = mPads->getNbrOfPads();
+  Pads* mergedPads = new Pads(pads[0], pads[1], Pads::xyInfSupMode);
+  int nPads = mergedPads->getNbrOfPads();
+  // Local maximum locations
   Pads* localMax = nullptr;
   Pads* saveLocalMax = nullptr;
   std::pair<double, double> chi2;
   int dof, nParameters;
-
   // Pixel initilization
   // Rq: the charge projection is not used
   pixels->setCharges(1.0);
+  // The field saturate is use to tag pixels as already refined
+  pixels->setSaturate(0);
   // Init Cij
-  double* Cij = new double[nPads * nPixels];
-  // Compute pad charge xyInfSup induiced by a set of charge (the pixels)
-  computeFastCij(*mPads, *pixels, Cij);
-  //
-  // Debug computeFastCij
-  /*
-  double *CijTmp = new double[nPads*nPixels];
-  computeCij( *mPads, *pixels, CijTmp);
-  vectorAddVector( Cij, -1, CijTmp, nPads*nPixels, CijTmp);
-  vectorAbs( CijTmp, nPads*nPixels, CijTmp);
-  double minDiff = vectorMin(CijTmp, nPads*nPixels);
-  double maxDiff = vectorMax(CijTmp, nPads*nPixels);
-  int argMax = vectorArgMax(CijTmp, nPads*nPixels);
-  printf("\n\n nPads, nPixels %d %d\n", nPads, nPixels);
-  printf("\n\n min/max(FastCij-Cij)=%f %f nPads*i+j %d %d\n", minDiff, maxDiff,
-  argMax / nPads, argMax % nPads); delete [] CijTmp;
-  */
+  double* Cij = new double[nPads * maxNbrOfPixels];
+  // ??? to be removed : MaskCij Not used
   // MaskCij: Used to disable Cij contribution (disable pixels)
-  Mask_t* maskCij = new Mask_t[nPads * nPixels];
-  // Init loop
+  Mask_t* maskCij = new Mask_t[nPads * maxNbrOfPixels];
+  // Compute pad charge (xyInfSup mode) induced with a set of charge (the pixels)
+  // computeCij(*mergedPads, *pixels, Cij);
+  computeFastCij(*mergedPads, *pixels, Cij);
 
-  double previousCriteriom = DBL_MAX;
-  double criteriom = DBL_MAX;
+  //
+  // Check computeFastCij
+  if (clusterConfig.mathiesonCheck) {
+    // Mode abort (-1)
+    checkCij(*mergedPads, *pixels, Cij, -1);
+  }
+
+  // Init loop
+  int nbrLocalMax = 0;
+  int nbrPrevLocalMax = 0;
+  double previousCriterion = DBL_MAX;
+  double criterion = DBL_MAX;
   bool goon = true;
   int macroIt = 0;
-  if (ClusterConfig::processingLog >= ClusterConfig::info) {
+  if (clusterConfig.processingLog >= clusterConfig.info) {
     printf(
-      "    Macro  nParam    chi2 chi2/ndof chi2/ndof chi2/ndof chi2/ndof  ch2/ndof\n"
-      "     It.      -       -       -       cath-0    cath-1   sum 0/1  weight-sum\n");
+      "    Macro  nParam    ndof   ndof   chi2 chi2/ndof chi2/ndof chi2/ndof chi2/ndof  ch2/ndof\n"
+      "     It.      -     cath0   cath1    -       -       cath-0    cath-1   sum 0/1  weight-sum\n");
   }
+
   while (goon) {
+    // Save previous local maxima and the criterion
     if (localMax != nullptr) {
       saveLocalMax = new Pads(*localMax, o2::mch::Pads::xydxdyMode);
     }
-    previousCriteriom = criteriom;
-    if (0) {
-      vectorSet(Cij, 0.0, nPads * nPixels);
-      for (int j = 0; j < nPads; j++) {
-        Cij[nPads * j + j] = 1.0;
-        // for (int i = 0; i < nPixels; i++) {
-        //   qPadPrediction[j] += Cij[nPads * i + j] * qPixels[i];
-        // }
-      }
-    }
+    previousCriterion = criterion;
+
     chi2 =
-      PoissonEMLoop(*mPads, *pixels, Cij, maskCij, 0, minPadResidues[macroIt],
-                    nIterations[macroIt], getNbrOfPads(0));
+      PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, 0, minPadResidues[macroIt],
+                    nIterations[macroIt]);
     // Obsolete
     // localMax = pixels->clipOnLocalMax(true);
-    localMax = pixels->extractLocalMax();
+
+    // Find local maxima and set the pixel to be refined in newPixelIdx
+    std::vector<PadIdx_t> newPixelIdx;
+    localMax = pixels->extractLocalMax(newPixelIdx, dxMinPadSize, dyMinPadSize);
+    nbrLocalMax = newPixelIdx.size();
+    // Debug
+    if (0) {
+      for (int t = 0; t < newPixelIdx.size(); t++) {
+        int idx = newPixelIdx[t];
+        printf("  localMax idx=%d, xy(%f, %f), q[idx]=%f, localMax.q[t]=%f max(pixels)=%f \n",
+               idx, pixels->getX()[idx], pixels->getY()[idx], pixels->getCharges()[idx], localMax->getCharges()[t], vectorMax(pixels->getCharges(), pixels->getNbrOfPads()));
+      }
+    }
     nParameters = localMax->getNbrOfPads();
     dof = nMaxPads - 3 * nParameters + 1;
+    if (dof == 0) {
+      dof = 1;
+    }
     double chi20 = chi2.first;
     double chi21 = chi2.second;
-    int ndof0 = getNbrOfPads(0) - 3 * nParameters + 1;
+    int ndof0, ndof1;
+    if (1) {
+      ndof0 = getNbrOfPads(0) - 3 * nParameters + 1;
+      ndof1 = getNbrOfPads(1) - 3 * nParameters + 1;
+    } else {
+      ndof0 = getNbrOfObsPads(0) - 3 * nParameters + 1;
+      ndof1 = getNbrOfObsPads(1) - 3 * nParameters + 1;
+    }
+    // printf("??? ndof0/1=%d %d \n", ndof0, ndof1);
+    if ((ndof0 <= 0) && (ndof1 <= 0)) {
+      // No good discriminant
+      // Force ndofx = 1
+      ndof0 = 1;
+      ndof1 = 1;
+    }
+    // ndofx <=0, deseable the cathode contribution
     if (ndof0 <= 0) {
       ndof0 = 1;
+      cWeight0 = 0.0;
+      cWeight1 = 1.0;
     }
-    int ndof1 = getNbrOfPads(1) - 3 * nParameters + 1;
     if (ndof1 <= 0.) {
       ndof1 = 1;
+      cWeight0 = 1.0;
+      cWeight1 = 0.0;
     }
-    if (dof == 0) {
-      dof = 1;
-    }
+
     // Model selection criteriom (nbre of parameters/seeds)
     // criteriom0 = fabs( (chi20+chi21 / dof));
     // criteriom1 = fabs(sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1));
     // printf( "??? cWeight0=%f, cWeight1=%f\n", cWeight0, cWeight1);
-    criteriom = cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1);
+    criterion = cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1);
+    // Inv ??? 2       2   5272.16     14.24     12.73     10.86     23.59     11.93
+    //         3       3   4389.29     13.81     12.25     12.52     24.76     12.36
+
+    // printf( " ??? cWeight0=%f, sqrt(chi20 / ndof0)=%f, cWeight1=%f, sqrt(chi21 / ndof1)=%f\n", cWeight0, sqrt(chi20 / ndof0), cWeight1, sqrt(chi21 / ndof1));
 
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
+    if (clusterConfig.processingLog >= clusterConfig.info) {
       printf(
-        "     %2d     %3d   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f\n",
-        macroIt, nParameters, chi20 + chi21, sqrt((chi20 + chi21) / dof),
+        "     %2d    %3d   %3d    %3d   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f\n",
+        macroIt, nParameters, ndof0, ndof1, chi20 + chi21, sqrt((chi20 + chi21) / dof),
         sqrt(chi20 / ndof0), sqrt(chi21 / ndof1),
         sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1),
-        cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1));
+        criterion);
     }
-    if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
       inspectSavePixels(macroIt, *pixels);
     }
+
+    // printf("Before refinement pixel.nPads=%d\n", pixels->getNbrOfPads());
+    pixels->refineLocalMaxAndUpdateCij(*mergedPads, newPixelIdx, Cij);
+    // printf("After refinement pixel.nPads=%d\n", pixels->getNbrOfPads());
+    if (clusterConfig.mathiesonCheck) {
+      checkCij(*mergedPads, *pixels, Cij, -1);
+    }
+
     macroIt++;
     goon =
-      (criteriom < 1.01 * previousCriteriom) && (macroIt < nMacroIterations);
-  }
-  delete pixels;
-  if (criteriom < 1.01 * previousCriteriom) {
+      ((criterion < 1.0 * previousCriterion) || (macroIt < 3)) && (macroIt < nMacroIterations);
+    // (criterion < 1.0 * previousCriterion) && (macroIt < nMacroIterations);
+    // ((criteriom < 1.0 * previousCriteriom) || ( nbrLocalMax > nbrPrevLocalMax)) && (macroIt < nMacroIterations);
+    nbrPrevLocalMax = nbrLocalMax;
+  }
+  /// with refinement ???
+  // delete pixels;
+  if (criterion < 1.0 * previousCriterion) {
     delete saveLocalMax;
   } else {
     delete localMax;
     localMax = saveLocalMax;
   }
 
-  //
-  // Select local Max
-  // Remove local Max < 0.01 * max(LocalMax)
-  //
-  double cutRatio = 0.01;
-  double qCut =
-    cutRatio * vectorMax(localMax->getCharges(), localMax->getNbrOfPads());
-  int k = 0;
-  double qSum = 0.0;
-  // Remove the last hits if > (nMaxPads +1) / 3
-  int nMaxSolutions =
-    int((std::max(getNbrOfPads(0), getNbrOfPads(1)) + 1.0) / 3.0);
-  // if (nMaxSolutions < 1) {
-  //     nMaxSolutions = 1;
-  //}
-  // To avoid 0 possibility and give more inputs to the fitting
-  nMaxSolutions += 1;
+  delete[] Cij;
+  delete[] maskCij;
+  return localMax;
+}
 
-  int removedLocMax = localMax->getNbrOfPads();
+Pads* ClusterPEM::findLocalMaxWithoutRefinement(double* thetaL, int nbrOfPadsInTheGroupCath)
+{
 
-  if (localMax->getNbrOfPads() > nMaxSolutions) {
-    if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
-      printf("seed selection: nbr Max parameters =%d, nLocMax=%d\n",
-             nMaxSolutions, localMax->getNbrOfPads());
-      printf(
-        "seed selection: Reduce the nbr of solutions to fit: Take %d/%d "
-        "solutions\n",
-        nMaxSolutions, localMax->getNbrOfPads());
-    }
-    int index[localMax->getNbrOfPads()];
-    for (int k = 0; k < localMax->getNbrOfPads(); k++) {
-      index[k] = k;
-    }
-    const double* qLocalMax = localMax->getCharges();
-    std::sort(index, &index[localMax->getNbrOfPads()],
-              [=](int a, int b) { return (qLocalMax[a] > qLocalMax[b]); });
-    // Reoder
-    qCut = qLocalMax[index[nMaxSolutions - 1]] - 1.e-03;
+  /// ??? Verify if not already done
+  // Already done if 1 group
+  Pads* cath0 = pads[0];
+  Pads* cath1 = pads[1];
+  Pads* projPads = projectedPads;
+  // Compute the charge weight of each cathode
+  double cWeight0 = getTotalCharge(0);
+  double cWeight1 = getTotalCharge(1);
+  double preClusterCharge = cWeight0 + cWeight1;
+  cWeight0 /= preClusterCharge;
+  cWeight1 /= preClusterCharge;
+  double dxMinPadSize, dyMinPadSize;
+  int n0 = getNbrOfObsPads(0);
+  int n1 = getNbrOfObsPads(1);
+  if (n0) {
+    dxMinPadSize = 0.5 * vectorMin(cath0->getDX(), n0);
   }
-  k = localMax->removePads(qCut);
-  localMax->normalizeCharges();
-  removedLocMax -= k;
-
-  if (ClusterConfig::processingLog >= ClusterConfig::info && removedLocMax != 0) {
-    printf(
-      "    > seed selection: Final cut -> %d percent (qcut=%8.2f), number of "
-      "local max removed = %d\n",
-      int(cutRatio * 100), qCut, removedLocMax);
+  if (n1) {
+    dyMinPadSize = 0.5 * vectorMin(cath1->getDY(), n1);
+  }
+  // Choose ???
+  dxMinPadSize = 1.0e-4;
+  dyMinPadSize = 1.0e-4;
+  //
+  int chId = chamberId;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  - [findLocalMaxWithoutRefinement]\n");
   }
 
-  // Store the
-  int K0 = localMax->getNbrOfPads();
-  int K = std::min(K0, nbrOfPadsInTheGroupCath);
-  double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
-  double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
-  double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+  int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
+
+  // ??? To Optimize/debug
+  // Pads* pixels = projPads->refinePads();
+  // Pads* pixels = projPads;
+
+  // Over allocate pixel for the refinement
+  int maxNbrOfPixels = projPads->getNbrOfPads();
+  if (maxNbrOfPixels == 0) {
+    throw std::out_of_range("[findLocalMaxWithoutRefinement] No projected pads");
+  }
+  // Call constructor with maxNbrOfPixels over allocation
+  Pads* pixels = new Pads(projPads, maxNbrOfPixels);
+  int nPixels = pixels->getNbrOfPads();
+  // Merge pads of the 2 cathodes
+  // TODO ??? : see if it can be once with Fitting (see fitPads)
+  Pads* mergedPads = new Pads(pads[0], pads[1], Pads::xyInfSupMode);
+  int nPads = mergedPads->getNbrOfPads();
+  // ??? printf("    nbr merged pads = %d\n", nPads);
+  // Local maximum locations
+  Pads* localMax = nullptr;
+  Pads* saveLocalMax = nullptr;
+  std::pair<double, double> chi2;
+  int dof, nParameters;
+  // Pixel initilization
+  // Rq: the charge projection is not used
+  pixels->setCharges(1.0);
+  // The field saturate is use to tag pixels as already refined
+  pixels->setSaturate(0);
+  // Init Cij
+  double* Cij = new double[nPads * maxNbrOfPixels];
+  // ??? to be removed : MaskCij Not used
+  // MaskCij: Used to disable Cij contribution (disable pixels)
+  Mask_t* maskCij = new Mask_t[nPads * maxNbrOfPixels];
+  // Compute pad charge (xyInfSup mode) induced with a set of charge (the pixels)
+  // computeCij(*mergedPads, *pixels, Cij);
+  computeFastCij(*mergedPads, *pixels, Cij);
+
+  //
+  // Check computeFastCij
+  if (clusterConfig.mathiesonCheck) {
+    // Mode abort (-1)
+    checkCij(*mergedPads, *pixels, Cij, -1);
+  }
+
+  // Init loop
+  int nbrLocalMax = 0;
+  int nbrPrevLocalMax = 0;
+  double previousCriterion = DBL_MAX;
+  double criterion = DBL_MAX;
+  bool goon = true;
+  int macroIt = 0;
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf(
+      "    Macro  nParam    ndof   ndof   chi2 chi2/ndof chi2/ndof chi2/ndof chi2/ndof  ch2/ndof\n"
+      "     It.      -     cath0   cath1    -       -       cath-0    cath-1   sum 0/1  weight-sum\n");
+  }
+
+  int nTotalIterations = nPads / 10;
+  int chunk = ceil(float(nTotalIterations) / (nMacroIterations + 1));
+  chunk = std::min(chunk, 5);
+  for (int it = 0; it < nMacroIterations; it++) {
+    nIterations[it] = (it + 1) * chunk;
+  }
+  nIterations[nMacroIterations - 1] += chunk;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+    printf("    Macro Iterations: nIterations[0, 1, ..,nMacroIterations-1] = [%d, %d, ..., %d] \n",
+           nIterations[0], nIterations[1], nIterations[nMacroIterations - 1]);
+  }
+  chi2 = PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, 0, 0,
+                       10);
+  while (goon) {
+    // Save previous local maxima and the criterion
+    if (localMax != nullptr) {
+      saveLocalMax = new Pads(*localMax, o2::mch::Pads::xydxdyMode);
+    }
+    previousCriterion = criterion;
+
+    chi2 =
+      // PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, 0, minPadResidues[macroIt],
+      //              nIterations[macroIt]);
+      PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, 0, 0,
+                    10);
+    // Obsolete
+    // localMax = pixels->clipOnLocalMax(true);
+
+    // Find local maxima and set the pixel to be refined in newPixelIdx
+    std::vector<PadIdx_t> newPixelIdx;
+    localMax = pixels->extractLocalMaxOnCoarsePads_Remanent(newPixelIdx, dxMinPadSize, dyMinPadSize);
+    // localMax = pixels->extractLocalMaxOnCoarsePads_Remanent( newPixelIdx, -1., -1.);
+    // localMax = pixels->extractLocalMaxOnCoarsePads( newPixelIdx);
+    nbrLocalMax = newPixelIdx.size();
+    // Debug
+    if (0) {
+      for (int t = 0; t < newPixelIdx.size(); t++) {
+        int idx = newPixelIdx[t];
+        printf("  localMax idx=%d, xy(%f, %f), q[idx]=%f, localMax.q[t]=%f max(pixels)=%f \n",
+               idx, pixels->getX()[idx], pixels->getY()[idx], pixels->getCharges()[idx], localMax->getCharges()[t], vectorMax(pixels->getCharges(), pixels->getNbrOfPads()));
+      }
+    }
+    nParameters = localMax->getNbrOfPads();
+    dof = nMaxPads - 3 * nParameters + 1;
+    if (dof == 0) {
+      dof = 1;
+    }
+    double chi20 = chi2.first;
+    double chi21 = chi2.second;
+    int ndof0, ndof1;
+    if (1) {
+      ndof0 = getNbrOfPads(0) - 3 * nParameters + 1;
+      ndof1 = getNbrOfPads(1) - 3 * nParameters + 1;
+    } else {
+      ndof0 = getNbrOfObsPads(0) - 3 * nParameters + 1;
+      ndof1 = getNbrOfObsPads(1) - 3 * nParameters + 1;
+    }
+    // printf("??? ndof0/1=%d %d \n", ndof0, ndof1);
+    if ((ndof0 <= 0) && (ndof1 <= 0)) {
+      // No good discriminant
+      // Force ndofx = 1
+      ndof0 = 1;
+      ndof1 = 1;
+    }
+    // ndofx <=0, deseable the cathode contribution
+    if (ndof0 <= 0) {
+      ndof0 = 1;
+      cWeight0 = 0.0;
+      cWeight1 = 1.0;
+    }
+    if (ndof1 <= 0.) {
+      ndof1 = 1;
+      cWeight0 = 1.0;
+      cWeight1 = 0.0;
+    }
+
+    // Model selection criteriom (nbre of parameters/seeds)
+    // criteriom0 = fabs( (chi20+chi21 / dof));
+    // criteriom1 = fabs(sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1));
+    // printf( "??? cWeight0=%f, cWeight1=%f\n", cWeight0, cWeight1);
+    criterion = cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1);
+    // Inv ??? 2       2   5272.16     14.24     12.73     10.86     23.59     11.93
+    //         3       3   4389.29     13.81     12.25     12.52     24.76     12.36
+
+    // printf( " ??? cWeight0=%f, sqrt(chi20 / ndof0)=%f, cWeight1=%f, sqrt(chi21 / ndof1)=%f\n", cWeight0, sqrt(chi20 / ndof0), cWeight1, sqrt(chi21 / ndof1));
+
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf(
+        "     %2d    %3d   %3d    %3d   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f\n",
+        macroIt, nParameters, ndof0, ndof1, chi20 + chi21, sqrt((chi20 + chi21) / dof),
+        sqrt(chi20 / ndof0), sqrt(chi21 / ndof1),
+        sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1),
+        criterion);
+    }
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
+      inspectSavePixels(macroIt, *pixels);
+    }
+
+    macroIt++;
+    goon =
+      ((criterion < 1.0 * previousCriterion) && (macroIt < nMacroIterations));
+    //((criterion < 1.0 * previousCriterion) || (macroIt  < 3)) && (macroIt < nMacroIterations) ;
+    // (criterion < 1.0 * previousCriterion) && (macroIt < nMacroIterations);
+    // ((criteriom < 1.0 * previousCriteriom) || ( nbrLocalMax > nbrPrevLocalMax)) && (macroIt < nMacroIterations);
+    nbrPrevLocalMax = nbrLocalMax;
+  }
+  /// with refinement ???
+  // delete pixels;
+  if (criterion < 1.0 * previousCriterion) {
+    delete saveLocalMax;
+  } else {
+    delete localMax;
+    localMax = saveLocalMax;
+  }
+
+  delete[] Cij;
+  delete[] maskCij;
+  return localMax;
+}
+
+int ClusterPEM::findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath)
+{
+
+  /// ??? Verify if not already done
+  // Already done if 1 group
+  Pads* cath0 = pads[0];
+  Pads* cath1 = pads[1];
+  Pads* projPads = projectedPads;
+
+  // Compute the charge weight of each cathode
+  double cWeight0 = getTotalCharge(0);
+  double cWeight1 = getTotalCharge(1);
+  double clusterCharge = cWeight0 + cWeight1;
+  cWeight0 /= clusterCharge;
+  cWeight1 /= clusterCharge;
+  //
+  int chId = chamberId;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  - [findLocalMaxWithPEM]\n");
+  }
+  //
+  // Trivial cluster : only 1 pads
+  //
+  // ??? if (projPads->getNbrOfPads() == 1) {
+  if (getNbrOfPads() == 1) {
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf("    Trivial case: only one pad\n");
+    }
+    // Return the unique local maximum : the center of the pads
+    double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+    double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
+    w[0] = 1.0;
+    muX[0] = projPads->getX()[0];
+    muY[0] = projPads->getY()[0];
+    muDX[0] = projPads->getDX()[0] * 0.5;
+    muDY[0] = projPads->getDY()[0] * 0.5;
+    // Return 0 seed if cluster < minClusterCharge
+    return (clusterCharge < clusterConfig.minChargeOfClusterPerCathode) ? 0 : 1;
+  }
+  Pads* localMax = nullptr;
+
+  // int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
+
+  //
+  double minDX[2] = {1000.0, 1000.0};
+  double minDY[2] = {1000.0, 1000.0};
+  int n0 = getNbrOfObsPads(0);
+  int n1 = getNbrOfObsPads(1);
+  if (n0) {
+    minDX[0] = vectorMin(cath0->getDX(), n0);
+    minDY[0] = vectorMin(cath0->getDY(), n0);
+  }
+  if (n1) {
+    minDX[1] = vectorMin(cath1->getDX(), n1);
+    minDY[1] = vectorMin(cath1->getDY(), n1);
+  }
+
+  // Large pads > 10.0
+  bool largePads = (minDX[0] > 3.5) || (minDY[1] > 3.5);
+  if (largePads) {
+    // printf("??? minDXY %f %f without refinement \n", minDX, minDY);
+    localMax = findLocalMaxWithoutRefinement(thetaL, nbrOfPadsInTheGroupCath);
+  } else {
+    // printf("??? minDXY %f %f with refinement\n", minDX, minDY);
+    localMax = findLocalMaxWithRefinement(thetaL, nbrOfPadsInTheGroupCath);
+  }
+  // Debug ???
+  /*
+  for (int k = 0; k < localMax->getNbrOfPads(); k++) {
+    printf("findLocalMax ??? k=%d q=%f,  XY=%f,%f \n", k,
+            localMax->getCharges()[k], localMax->getX()[k],localMax->getY()[k]);
+  }
+  */
+  //
+  // Select local Max
+  // Remove local Max < 0.01 * max(LocalMax)
+  //
+  //
+  // NOT USED ???
+  //
+  if (0) {
+    double cutRatio = 0.01;
+    double qCut =
+      cutRatio * vectorMax(localMax->getCharges(), localMax->getNbrOfPads());
+    int k = 0;
+    double qSum = 0.0;
+    // Remove the last hits if > (nMaxPads +1) / 3
+    int nMaxSolutions =
+      int((std::max(getNbrOfPads(0), getNbrOfPads(1)) + 1.0) / 3.0);
+    // if (nMaxSolutions < 1) {
+    //     nMaxSolutions = 1;
+    //}
+    // To avoid 0 possibility and give more inputs to the fitting
+    nMaxSolutions += 1;
+    int removedLocMax = localMax->getNbrOfPads();
+
+    if (localMax->getNbrOfPads() > nMaxSolutions) {
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+        printf("seed selection: nbr Max parameters =%d, nLocMax=%d\n",
+               nMaxSolutions, localMax->getNbrOfPads());
+        printf(
+          "seed selection: Reduce the nbr of solutions to fit: Take %d/%d "
+          "solutions\n",
+          nMaxSolutions, localMax->getNbrOfPads());
+      }
+      int index[localMax->getNbrOfPads()];
+      for (int k = 0; k < localMax->getNbrOfPads(); k++) {
+        index[k] = k;
+      }
+      const double* qLocalMax = localMax->getCharges();
+      std::sort(index, &index[localMax->getNbrOfPads()],
+                [=](int a, int b) { return (qLocalMax[a] > qLocalMax[b]); });
+      // Reoder
+      qCut = qLocalMax[index[nMaxSolutions - 1]] - 1.e-03;
+    } else {
+      qCut = 0.70 * clusterConfig.minChargeOfClusterPerCathode;
+    }
+  }
+
+  // Suppress local max with charge > 70 % of min Charge of a cluster/seeds
+  double cutRatio = 0.7;
+  cutRatio = largePads ? 0.0 : cutRatio;
+  // Local max Charge normalization
+  // double coef = ( (getNbrOfPads(0) == 0) || (getNbrOfPads(0) == 0) ) ? 1.0 : 0.5
+  // double meanCharge = coef * (getTotalCharge(0) + getTotalCharge(1));
+  double qPadMax = getMaxCharge();
+  double qPixMax = vectorMax(localMax->getCharges(), localMax->getNbrOfPads());
+  double qCut = cutRatio * clusterConfig.minChargeOfClusterPerCathode * qPixMax / qPadMax;
+  int k0 = localMax->getNbrOfPads();
+  int k = localMax->removePads(qCut);
+  localMax->normalizeCharges();
+  int removedLocMax = k0 - k;
+  // printf("k0, k %d %d qCut=%f qPixMax=%f qPadMax=%f\n", k0, k, qCut, qPixMax, qPadMax);
+  if (clusterConfig.processingLog >= clusterConfig.info && removedLocMax != 0) {
+    printf(
+      "    > seed selection: Final cut -> %d percent (qcut=%8.2f), number of "
+      "local max removed = %d\n",
+      int(cutRatio * 100), qCut, removedLocMax);
+  }
+
+  // Store the local max
+  int K0 = localMax->getNbrOfPads();
+  int K = std::min(K0, nbrOfPadsInTheGroupCath);
+  double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+  double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+  double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+  double* varX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
+  double* varY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
+  const double* ql = localMax->getCharges();
+  const double* xl = localMax->getX();
+  const double* yl = localMax->getY();
+  const double* dxl = localMax->getDX();
+  const double* dyl = localMax->getDY();
+  for (int k = 0; k < K; k++) {
+    w[k] = ql[k];
+    muX[k] = xl[k];
+    muY[k] = yl[k];
+    varX[k] = dxl[k];
+    varY[k] = dyl[k];
+    if (clusterConfig.processingLog >= clusterConfig.info && removedLocMax != 0) {
+      printf("    k=%d w=%f,  XY=%f,%f varXY=%f,%f\n", k, w[k], muX[k], muY[k], varX[k], varY[k]);
+    }
+  }
+  // printf("K0, K nbrOfPadsInTheGroupCath %d %d %d\n", K0, K, nbrOfPadsInTheGroupCath);
+  delete localMax;
+  return K;
+}
+
+// Without ajusted rafinement
+int ClusterPEM::findLocalMaxWithPEMFullRefinement(double* thetaL, int nbrOfPadsInTheGroupCath)
+{
+
+  /// ??? Verify if not already done
+  // Already done if 1 group
+  Pads* cath0 = pads[0];
+  Pads* cath1 = pads[1];
+  Pads* projPads = projectedPads;
+  // Compute the charge weight of each cathode
+  double cWeight0 = getTotalCharge(0);
+  double cWeight1 = getTotalCharge(1);
+  double preClusterCharge = cWeight0 + cWeight1;
+  cWeight0 /= preClusterCharge;
+  cWeight1 /= preClusterCharge;
+  //
+  int chId = chamberId;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  - [findLocalMaxWithPEM]\n");
+  }
+  // Trivial cluster : only 1 pads
+  if (projPads->getNbrOfPads() == 1) {
+    // Return the unique local maximum : the center of the pads
+    double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+    double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
+    w[0] = 1.0;
+    muX[0] = projPads->getX()[0];
+    muY[0] = projPads->getY()[0];
+    muDX[0] = projPads->getDX()[0] * 0.5;
+    muDY[0] = projPads->getDY()[0] * 0.5;
+    return 1;
+  }
+  int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
+
+  // ??? To Optimize/debug
+  Pads* pixels = projPads->refineAll();
+  // Pads* pixels = projPads;
+  // Reserve place for refinment
+  /*
+  int maxNbrOfPixels = 4*projPads->getNbrOfPads();
+  Pads* pixels = new Pads(projPads, maxNbrOfPixels);
+  printf("pixel allocation %d\n", maxNbrOfPixels);
+  */
+  int nPixels = pixels->getNbrOfPads();
+  // Merge pads of the 2 cathodes
+  // TODO ??? : see if it can be once with Fitting (see fitPads)
+  Pads* mergedPads = new Pads(pads[0], pads[1], Pads::xyInfSupMode);
+  int nPads = mergedPads->getNbrOfPads();
+  Pads* localMax = nullptr;
+  Pads* saveLocalMax = nullptr;
+  std::pair<double, double> chi2;
+  int dof, nParameters;
+  Pads::printPads("???????? mergedPads", *mergedPads);
+  // Pixel initilization
+  // Rq: the charge projection is not used
+  pixels->setCharges(1.0);
+  // The field saturate is use to tag pixels already refined
+  // no refinment
+  // pixels->setSaturate(0);
+  // Init Cij
+  double* Cij = new double[nPads * nPixels];
+  /// double* Cij = new double[nPads * maxNbrOfPixels];
+  // MaskCij: Used to disable Cij contribution (disable pixels)
+  Mask_t* maskCij = new Mask_t[nPads * nPixels];
+  // Mask_t* maskCij = new Mask_t[nPads * maxNbrOfPixels];
+  // Compute pad charge xyInfSup induiced by a set of charge (the pixels)
+  computeFastCij(*mergedPads, *pixels, Cij);
+  // computeCij(*mergedPads, *pixels, Cij);
+
+  //
+  // Check computeFastCij
+  /*
+  if (clusterConfig.mathiesonCheck) {
+    double *CijTmp = new double[nPads*nPixels];
+    double *diffCij = new double[nPads*nPixels];
+    computeCij( *mergedPads, *pixels, CijTmp);
+    vectorAddVector( Cij, -1, CijTmp, nPads*nPixels, diffCij);
+    vectorAbs( diffCij, nPads*nPixels, diffCij);
+    double minDiff = vectorMin(diffCij, nPads*nPixels);
+    double maxDiff = vectorMax(diffCij, nPads*nPixels);
+    int argMax = vectorArgMax(diffCij, nPads*nPixels);
+    printf("\n\n nPads, nPixels %d %d\n", nPads, nPixels);
+    int iIdx = argMax / nPads;
+    int jIdx = argMax % nPads;
+    printf("\n\n min/max(FastCij-Cij)=%f %f nPads*i+j %d %d\n", minDiff, maxDiff,
+    iIdx, jIdx);
+    printf("\n FastCij=%f differ from  Cij=%f\n", Cij[iIdx*nPads+jIdx], CijTmp[iIdx*nPads+jIdx]);
+    if ( maxDiff > 1.0e-5) {
+      for( int k=0; k< nPixels; k++) {
+        for( int l=0; l< nPads; l++) {
+          if (diffCij[k*nPads+l] >1.0e-5) {
+            printf("pad=%d pixel=%d FastCij=%f Cij=%f diff=%f\n", l, k, Cij[k*nPads+l], CijTmp[k*nPads+l], diffCij[k*nPads+l]);
+          }
+        }
+      }
+      printf("findLocalMaxWithPEM: WARNING maxDiff(Cij)=%f\n", maxDiff);
+      // throw std::out_of_range(
+      //    "[findLocalMaxWithPEM] bad Cij value");
+    }
+    delete [] CijTmp;
+  }
+  */
+
+  // Init loop
+
+  double previousCriteriom = DBL_MAX;
+  double criteriom = DBL_MAX;
+  bool goon = true;
+  int macroIt = 0;
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf(
+      "    Macro  nParam    chi2 chi2/ndof chi2/ndof chi2/ndof chi2/ndof  ch2/ndof\n"
+      "     It.      -       -       -       cath-0    cath-1   sum 0/1  weight-sum\n");
+  }
+  while (goon) {
+    if (localMax != nullptr) {
+      saveLocalMax = new Pads(*localMax, o2::mch::Pads::xydxdyMode);
+    }
+    previousCriteriom = criteriom;
+    /*
+    if (0) {
+    vectorSet( Cij, 0.0, nPads*nPixels);
+    for (int j = 0; j < nPads; j++) {
+      Cij[nPads * j + j] = 1.0;
+      // for (int i = 0; i < nPixels; i++) {
+      //   qPadPrediction[j] += Cij[nPads * i + j] * qPixels[i];
+      // }
+    }
+    }
+    */
+    int qCutMode = 0;
+    // int qCutMode = -1;
+    chi2 =
+      PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, qCutMode, minPadResidues[macroIt],
+                    nIterations[macroIt]);
+    // Obsolete
+    // localMax = pixels->clipOnLocalMax(true);
+    std::vector<PadIdx_t> newPixelIdx;
+    localMax = pixels->extractLocalMax(newPixelIdx, 0.0, 0.0);
+    // Debug
+    /*
+    for (int t=0; t < newPixelIdx.size(); t++) {
+      int idx = newPixelIdx[t];
+      printf("localMax idx=%d, xy(%f, %f), q[idx]=%f, localMax.q[t]=%f max(pixels)=%f \n",
+              idx, pixels->getX()[idx], pixels->getY()[idx], pixels->getCharges()[idx], localMax->getCharges()[t],  vectorMax(pixels->getCharges(),  pixels->getNbrOfPads()));
+    }
+    */
+    nParameters = localMax->getNbrOfPads();
+    dof = nMaxPads - 3 * nParameters + 1;
+    double chi20 = chi2.first;
+    double chi21 = chi2.second;
+    int ndof0 = getNbrOfPads(0) - 3 * nParameters + 1;
+    if (ndof0 <= 0) {
+      ndof0 = 1;
+    }
+    int ndof1 = getNbrOfPads(1) - 3 * nParameters + 1;
+    if (ndof1 <= 0.) {
+      ndof1 = 1;
+    }
+    if (dof == 0) {
+      dof = 1;
+    }
+    // Model selection criteriom (nbre of parameters/seeds)
+    // criteriom0 = fabs( (chi20+chi21 / dof));
+    // criteriom1 = fabs(sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1));
+    // printf( "??? cWeight0=%f, cWeight1=%f\n", cWeight0, cWeight1);
+    criteriom = cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1);
+    // Inv ??? 2       2   5272.16     14.24     12.73     10.86     23.59     11.93
+    //         3       3   4389.29     13.81     12.25     12.52     24.76     12.36
+
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf(
+        "     %2d     %3d   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f\n",
+        macroIt, nParameters, chi20 + chi21, sqrt((chi20 + chi21) / dof),
+        sqrt(chi20 / ndof0), sqrt(chi21 / ndof1),
+        sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1),
+        cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1));
+    }
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
+      inspectSavePixels(macroIt, *pixels);
+    }
+    /*
+    printf("pixels.size=%d\n",  pixels->getNbrOfPads());
+    printf("mergedPads.mode %d, mergedPads.size=%d\n", mergedPads->mode, mergedPads->getNbrOfPads());
+    printf("nexPixelIdx[0;N-1]=(%d, %d) localMax->nPads=%d\n",
+            newPixelIdx[0], newPixelIdx[newPixelIdx.size()-1],
+            localMax->getNbrOfPads()
+            );
+    // pixels->refinePads( *localMax, newPixelIdx);
+    printf("pixels.size=%d\n",  pixels->getNbrOfPads());
+    printf("mergedPads.mode %d, mergedPads.size=%d\n", mergedPads->mode, mergedPads->getNbrOfPads());
+    printf("nexPixelIdx[0;N-1]=(%d, %d) localMax->nPads=%d\n",
+            newPixelIdx[0], newPixelIdx[newPixelIdx.size()-1],
+            localMax->getNbrOfPads()
+            );
+    */
+    // pixel->padCenterToBounds();
+    // computeFastCij(*mergedPads, *pixels, Cij);
+    macroIt++;
+    printf(" min/max, %g, %g \n", vectorMin(pixels->getCharges(), nPixels), vectorMax(pixels->getCharges(), nPixels));
+    goon =
+      (criteriom < 1.0 * previousCriteriom) && (macroIt < nMacroIterations);
+  }
+  /// with refinement ???
+  delete pixels;
+  if (criteriom < 1.01 * previousCriteriom) {
+    delete saveLocalMax;
+  } else {
+    delete localMax;
+    localMax = saveLocalMax;
+  }
+
+  //
+  // Select local Max
+  // Remove local Max < 0.01 * max(LocalMax)
+  //
+  double cutRatio = 0.01;
+  double qCut =
+    cutRatio * vectorMax(localMax->getCharges(), localMax->getNbrOfPads());
+  int k = 0;
+  double qSum = 0.0;
+  // Remove the last hits if > (nMaxPads +1) / 3
+  int nMaxSolutions =
+    int((std::max(getNbrOfPads(0), getNbrOfPads(1)) + 1.0) / 3.0);
+  // if (nMaxSolutions < 1) {
+  //     nMaxSolutions = 1;
+  //}
+  // To avoid 0 possibility and give more inputs to the fitting
+  nMaxSolutions += 1;
+
+  int removedLocMax = localMax->getNbrOfPads();
+
+  if (localMax->getNbrOfPads() > nMaxSolutions) {
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+      printf("seed selection: nbr Max parameters =%d, nLocMax=%d\n",
+             nMaxSolutions, localMax->getNbrOfPads());
+      printf(
+        "seed selection: Reduce the nbr of solutions to fit: Take %d/%d "
+        "solutions\n",
+        nMaxSolutions, localMax->getNbrOfPads());
+    }
+    int index[localMax->getNbrOfPads()];
+    for (int k = 0; k < localMax->getNbrOfPads(); k++) {
+      index[k] = k;
+    }
+    const double* qLocalMax = localMax->getCharges();
+    std::sort(index, &index[localMax->getNbrOfPads()],
+              [=](int a, int b) { return (qLocalMax[a] > qLocalMax[b]); });
+    // Reoder
+    qCut = qLocalMax[index[nMaxSolutions - 1]] - 1.e-03;
+  }
+  k = localMax->removePads(qCut);
+  localMax->normalizeCharges();
+  removedLocMax -= k;
+
+  if (clusterConfig.processingLog >= clusterConfig.info && removedLocMax != 0) {
+    printf(
+      "    > seed selection: Final cut -> %d percent (qcut=%8.2f), number of "
+      "local max removed = %d\n",
+      int(cutRatio * 100), qCut, removedLocMax);
+  }
+
+  // Store the
+  int K0 = localMax->getNbrOfPads();
+  int K = std::min(K0, nbrOfPadsInTheGroupCath);
+  double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+  double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+  double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+  double* varX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
+  double* varY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
+  const double* ql = localMax->getCharges();
+  const double* xl = localMax->getX();
+  const double* yl = localMax->getY();
+  const double* dxl = localMax->getDX();
+  const double* dyl = localMax->getDY();
+  for (int k = 0; k < K; k++) {
+    w[k] = ql[k];
+    muX[k] = xl[k];
+    muY[k] = yl[k];
+    varX[k] = dxl[k];
+    varY[k] = dyl[k];
+    printf("k=%d XY=%f,%f varXY=%f,%f\n", k, muX[k], muY[k], varX[k], varY[k]);
+  }
+  //
+  // SVD
+  //
+  if (0) {
+    double rcond = 1.e-2;
+    gsl_matrix_view Cij_gsl = gsl_matrix_view_array(Cij, nPixels, nPads);
+    double* qPixelsStar = new double[nPixels];
+    gsl_vector_view qPixelsStar_gsl = gsl_vector_view_array(qPixelsStar, nPixels);
+    /*
+    double *Tji = new double[nPads*nPixels];
+    gsl_matrix_view Tji_gsl = gsl_matrix_view_array(Tji, nPads, nPixels);
+    gsl_matrix_transpose_memcpy(&Tji_gsl.matrix, &Cij_gsl.matrix);
+    gsl_matrix* pInv = moore_penrose_pinv(&Tji_gsl.matrix, rcond);
+
+    // qPads
+    gsl_vector_const_view qPads_gsl = gsl_vector_const_view_array(mergedPads->getCharges(), nPads);
+    // qPixels solution
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, pInv, &qPads_gsl.vector, 0.0, &qPixelsStar_gsl.vector);
+    */
+
+    // Cij . Cji
+    gsl_matrix* CCii = gsl_matrix_alloc(nPixels, nPixels);
+    gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1., &Cij_gsl.matrix, &Cij_gsl.matrix, 0., CCii);
+    gsl_matrix* pInv = moore_penrose_pinv(CCii, rcond);
+    double* pix = new double[nPixels];
+    vectorSet(pix, 0.0, nPixels);
+    pix[0] = 1.0;
+    gsl_vector_view pix_gsl = gsl_vector_view_array(pix, nPixels);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, pInv, &pix_gsl.vector, 0.0, &qPixelsStar_gsl.vector);
+    vectorAddScalar(pix, -1, nPixels, pix);
+    //
+    printGSLVector("qPixelStar", &qPixelsStar_gsl.vector);
+    vectorPrint("qPixels", pixels->getCharges(), nPixels);
+
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
+      inspectOverWriteQ(macroIt - 1, qPixelsStar);
+      // pixels->setCharges( qPixelsStar, nPixels );
+      //  inspectSavePixels(macroIt-1, *pixels);
+    }
+
+    /*
+    double *Tji = new double[nPads*nPixels];
+    gsl_matrix_view Tji_gsl = gsl_matrix_view_array(Tji, nPads, nPixels);
+    gsl_matrix_transpose_memcpy(&Tji_gsl.matrix, &Cij_gsl.matrix);
+    double *V = new double[nPads*nPixels];
+    double *S = new double[nPixels];
+    double *work = new double[nPixels];
+    // gsl_matrix_view V_gsl = gsl_matrix_view_array(V, nPads, nPixels);
+    // gsl_vector_view S_gsl = gsl_vector_view_array(S, nPixels);
+    gsl_matrix_view V_gsl = gsl_matrix_view_array(V, nPads, nPads);
+    gsl_vector_view S_gsl = gsl_vector_view_array(S, nPads);
+    gsl_vector_view work_gsl = gsl_vector_view_array(work, nPads);
+    // A[M,N] = t(Cij), M=nPads, N=nPixels
+    gsl_linalg_SV_decomp (&Cij_gsl.matrix , &V_gsl.matrix, &S_gsl.vector, &work_gsl.vector);
+    printf("Matrix S:");
+    for (int j = 0; j < nPads; j++) {
+        double Sjj = gsl_vector_get(&S_gsl.vector,j);
+        printf("%6.2f ", gsl_vector_get(&S_gsl.vector,j));
+        if (Sjj > 1.0e-2) {
+          Sjj = 1.0/Sjj;
+        } else {
+          Sjj = 0;
+        }
+        gsl_vector_set(&S_gsl.vector,j, Sjj);
+    }
+    printf("\n");
+
+    gsl_matrix *PInv = gsl_matrix_alloc (nPads, nPixels);
+    gsl_matrix *Ut = gsl_matrix_alloc (nPads, nPixels);
+    gsl_matrix_transpose_memcpy (Ut, &Cij_gsl.matrix);
+    //gsl_matrix * SIpVT = gsl_matrix_alloc (n_row, n_row);
+    for (int i = 0; i < nPads; i++) {
+      for (int j = 0; j < nPads; j++) {
+        // Vij = Vij*Sjj
+        gsl_matrix_set(&V_gsl.matrix, i, j, gsl_matrix_get(&V_gsl.matrix, i, j) * gsl_vector_get(&S_gsl.vector,j));
+      }
+    }
+    gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,          // Calculating  inv(S).trans(V)
+                    1.0, &V_gsl.matrix, Ut,
+                    0.0, PInv);
+
+    gsl_matrix *Id = gsl_matrix_alloc (nPixels, nPixels);
+    // Test if 1 matrix
+    gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,
+                    1.0, &Cij_gsl.matrix, PInv,
+                    0.0, Id);
+
+
+    printf("Matrix Id:");
+    for (int j = 0; j < nPads; j++) {
+      for (int i = 0; i < nPixels; i++) {
+        printf("%6.2f ", gsl_matrix_get(Id, j, i));
+      }
+      printf("\n");
+    }
+    */
+  }
+  delete mergedPads;
+  /*
+  delete [] V;
+  delete [] S;
+  delete [] work;
+  */
+  delete localMax;
+  delete[] Cij;
+  delete[] maskCij;
+  return K;
+}
+// Withot ajusted rafinement
+int ClusterPEM::findLocalMaxWithPEM2Lev(double* thetaL, int nbrOfPadsInTheGroupCath)
+{
+
+  /// ??? Verify if not already done
+  // Already done if 1 group
+  Pads* cath0 = pads[0];
+  Pads* cath1 = pads[1];
+  Pads* projPads = projectedPads;
+  // Compute the charge weight of each cathode
+  double cWeight0 = getTotalCharge(0);
+  double cWeight1 = getTotalCharge(1);
+  double preClusterCharge = cWeight0 + cWeight1;
+  cWeight0 /= preClusterCharge;
+  cWeight1 /= preClusterCharge;
+  //
+  int chId = chamberId;
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info || clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  - [findLocalMaxWithPEM]\n");
+  }
+  // Trivial cluster : only 1 pads
+  if (projPads->getNbrOfPads() == 1) {
+    // Return the unique local maximum : the center of the pads
+    double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+    double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
+    double* muDY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
+    w[0] = 1.0;
+    muX[0] = projPads->getX()[0];
+    muY[0] = projPads->getY()[0];
+    muDX[0] = projPads->getDX()[0] * 0.5;
+    muDY[0] = projPads->getDY()[0] * 0.5;
+    return 1;
+  }
+  int nMaxPads = std::fmax(getNbrOfPads(0), getNbrOfPads(1));
+
+  // ??? To Optimize/debug
+  // Pads* pixels = projPads->refinePads();
+  Pads* pixels = projPads;
+  // Reserve place for refinment
+  /*
+  int maxNbrOfPixels = 4*projPads->getNbrOfPads();
+  Pads* pixels = new Pads(projPads, maxNbrOfPixels);
+  printf("pixel allocation %d\n", maxNbrOfPixels);
+  */
+  int nPixels = pixels->getNbrOfPads();
+  // Merge pads of the 2 cathodes
+  // TODO ??? : see if it can be once with Fitting (see fitPads)
+  Pads* mergedPads = new Pads(pads[0], pads[1], Pads::xyInfSupMode);
+  int nPads = mergedPads->getNbrOfPads();
+  Pads* localMax = nullptr;
+  Pads* saveLocalMax = nullptr;
+  std::pair<double, double> chi2;
+  int dof, nParameters;
+
+  // Pixel initilization
+  // Rq: the charge projection is not used
+  pixels->setCharges(1.0);
+  // The field saturate is use to tag pixels already refined
+  // no refinment
+  // pixels->setSaturate(0);
+  // Init Cij
+  double* Cij = new double[nPads * nPixels];
+  /// double* Cij = new double[nPads * maxNbrOfPixels];
+  // MaskCij: Used to disable Cij contribution (disable pixels)
+  Mask_t* maskCij = new Mask_t[nPads * nPixels];
+  // Mask_t* maskCij = new Mask_t[nPads * maxNbrOfPixels];
+  // Compute pad charge xyInfSup induiced by a set of charge (the pixels)
+  computeFastCij(*mergedPads, *pixels, Cij);
+  // computeCij(*mergedPads, *pixels, Cij);
+
+  // Init loop
+
+  double previousCriteriom = DBL_MAX;
+  double criteriom = DBL_MAX;
+  bool goon = true;
+  int macroIt = 0;
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf(
+      "    Macro  nParam    chi2 chi2/ndof chi2/ndof chi2/ndof chi2/ndof  ch2/ndof\n"
+      "     It.      -       -       -       cath-0    cath-1   sum 0/1  weight-sum\n");
+  }
+  while (goon) {
+    if (localMax != nullptr) {
+      saveLocalMax = new Pads(*localMax, o2::mch::Pads::xydxdyMode);
+    }
+    previousCriteriom = criteriom;
+    /*
+    if (0) {
+    vectorSet( Cij, 0.0, nPads*nPixels);
+    for (int j = 0; j < nPads; j++) {
+      Cij[nPads * j + j] = 1.0;
+      // for (int i = 0; i < nPixels; i++) {
+      //   qPadPrediction[j] += Cij[nPads * i + j] * qPixels[i];
+      // }
+    }
+    }
+    */
+    int qCutMode = 0;
+    // int qCutMode = -1;
+    chi2 =
+      PoissonEMLoop(*mergedPads, *pixels, Cij, maskCij, qCutMode, minPadResidues[macroIt],
+                    nIterations[macroIt]);
+    // Obsolete
+    // localMax = pixels->clipOnLocalMax(true);
+    std::vector<PadIdx_t> newPixelIdx;
+    localMax = pixels->extractLocalMax(newPixelIdx, 0.0, 0.0);
+    for (int t = 0; t < newPixelIdx.size(); t++) {
+      int idx = newPixelIdx[t];
+      printf("localMax idx=%d, xy(%f, %f), q[idx]=%f, localMax.q[t]=%f max(pixels)=%f \n",
+             idx, pixels->getX()[idx], pixels->getY()[idx], pixels->getCharges()[idx], localMax->getCharges()[t], vectorMax(pixels->getCharges(), pixels->getNbrOfPads()));
+    }
+    nParameters = localMax->getNbrOfPads();
+    dof = nMaxPads - 3 * nParameters + 1;
+    double chi20 = chi2.first;
+    double chi21 = chi2.second;
+    int ndof0 = getNbrOfPads(0) - 3 * nParameters + 1;
+    if (ndof0 <= 0) {
+      ndof0 = 1;
+    }
+    int ndof1 = getNbrOfPads(1) - 3 * nParameters + 1;
+    if (ndof1 <= 0.) {
+      ndof1 = 1;
+    }
+    if (dof == 0) {
+      dof = 1;
+    }
+    // Model selection criteriom (nbre of parameters/seeds)
+    // criteriom0 = fabs( (chi20+chi21 / dof));
+    // criteriom1 = fabs(sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1));
+    // printf( "??? cWeight0=%f, cWeight1=%f\n", cWeight0, cWeight1);
+    criteriom = cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1);
+    // Inv ??? 2       2   5272.16     14.24     12.73     10.86     23.59     11.93
+    //         3       3   4389.29     13.81     12.25     12.52     24.76     12.36
+
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf(
+        "     %2d     %3d   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f   %7.2f\n",
+        macroIt, nParameters, chi20 + chi21, sqrt((chi20 + chi21) / dof),
+        sqrt(chi20 / ndof0), sqrt(chi21 / ndof1),
+        sqrt(chi20 / ndof0) + sqrt(chi21 / ndof1),
+        cWeight0 * sqrt(chi20 / ndof0) + cWeight1 * sqrt(chi21 / ndof1));
+    }
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
+      inspectSavePixels(macroIt, *pixels);
+    }
+    /*
+    printf("pixels.size=%d\n",  pixels->getNbrOfPads());
+    printf("mergedPads.mode %d, mergedPads.size=%d\n", mergedPads->mode, mergedPads->getNbrOfPads());
+    printf("nexPixelIdx[0;N-1]=(%d, %d) localMax->nPads=%d\n",
+            newPixelIdx[0], newPixelIdx[newPixelIdx.size()-1],
+            localMax->getNbrOfPads()
+            );
+    // pixels->refinePads( *localMax, newPixelIdx);
+    printf("pixels.size=%d\n",  pixels->getNbrOfPads());
+    printf("mergedPads.mode %d, mergedPads.size=%d\n", mergedPads->mode, mergedPads->getNbrOfPads());
+    printf("nexPixelIdx[0;N-1]=(%d, %d) localMax->nPads=%d\n",
+            newPixelIdx[0], newPixelIdx[newPixelIdx.size()-1],
+            localMax->getNbrOfPads()
+            );
+    */
+    // pixel->padCenterToBounds();
+    // computeFastCij(*mergedPads, *pixels, Cij);
+    macroIt++;
+    if (macroIt == 4) {
+      pixels = projPads->refineAll();
+      nPixels = pixels->getNbrOfPads();
+      delete[] Cij;
+      Cij = new double[nPads * nPixels];
+      delete[] maskCij;
+      maskCij = new Mask_t[nPads * nPixels];
+      computeFastCij(*mergedPads, *pixels, Cij);
+    }
+    printf(" min/max, %g, %g \n", vectorMin(pixels->getCharges(), nPixels), vectorMax(pixels->getCharges(), nPixels));
+    goon =
+      (criteriom < 1.0 * previousCriteriom) && (macroIt < nMacroIterations);
+  }
+  /// with refinement ???
+  delete pixels;
+  if (criteriom < 1.01 * previousCriteriom) {
+    delete saveLocalMax;
+  } else {
+    delete localMax;
+    localMax = saveLocalMax;
+  }
+
+  //
+  // Select local Max
+  // Remove local Max < 0.01 * max(LocalMax)
+  //
+  double cutRatio = 0.01;
+  double qCut =
+    cutRatio * vectorMax(localMax->getCharges(), localMax->getNbrOfPads());
+  int k = 0;
+  double qSum = 0.0;
+  // Remove the last hits if > (nMaxPads +1) / 3
+  int nMaxSolutions =
+    int((std::max(getNbrOfPads(0), getNbrOfPads(1)) + 1.0) / 3.0);
+  // if (nMaxSolutions < 1) {
+  //     nMaxSolutions = 1;
+  //}
+  // To avoid 0 possibility and give more inputs to the fitting
+  nMaxSolutions += 1;
+
+  int removedLocMax = localMax->getNbrOfPads();
+
+  if (localMax->getNbrOfPads() > nMaxSolutions) {
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+      printf("seed selection: nbr Max parameters =%d, nLocMax=%d\n",
+             nMaxSolutions, localMax->getNbrOfPads());
+      printf(
+        "seed selection: Reduce the nbr of solutions to fit: Take %d/%d "
+        "solutions\n",
+        nMaxSolutions, localMax->getNbrOfPads());
+    }
+    int index[localMax->getNbrOfPads()];
+    for (int k = 0; k < localMax->getNbrOfPads(); k++) {
+      index[k] = k;
+    }
+    const double* qLocalMax = localMax->getCharges();
+    std::sort(index, &index[localMax->getNbrOfPads()],
+              [=](int a, int b) { return (qLocalMax[a] > qLocalMax[b]); });
+    // Reoder
+    qCut = qLocalMax[index[nMaxSolutions - 1]] - 1.e-03;
+  }
+  k = localMax->removePads(qCut);
+  localMax->normalizeCharges();
+  removedLocMax -= k;
+
+  if (clusterConfig.processingLog >= clusterConfig.info && removedLocMax != 0) {
+    printf(
+      "    > seed selection: Final cut -> %d percent (qcut=%8.2f), number of "
+      "local max removed = %d\n",
+      int(cutRatio * 100), qCut, removedLocMax);
+  }
+
+  // Store the
+  int K0 = localMax->getNbrOfPads();
+  int K = std::min(K0, nbrOfPadsInTheGroupCath);
+  double* w = getW(thetaL, nbrOfPadsInTheGroupCath);
+  double* muX = getMuX(thetaL, nbrOfPadsInTheGroupCath);
+  double* muY = getMuY(thetaL, nbrOfPadsInTheGroupCath);
   double* varX = getVarX(thetaL, nbrOfPadsInTheGroupCath);
   double* varY = getVarY(thetaL, nbrOfPadsInTheGroupCath);
   const double* ql = localMax->getCharges();
@@ -1990,7 +3367,15 @@ int ClusterPEM::findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath)
     muY[k] = yl[k];
     varX[k] = dxl[k];
     varY[k] = dyl[k];
+    printf("k=%d XY=%f,%f varXY=%f,%f\n", k, muX[k], muY[k], varX[k], varY[k]);
   }
+
+  delete mergedPads;
+  /*
+  delete [] V;
+  delete [] S;
+  delete [] work;
+  */
   delete localMax;
   delete[] Cij;
   delete[] maskCij;
@@ -2000,7 +3385,7 @@ int ClusterPEM::findLocalMaxWithPEM(double* thetaL, int nbrOfPadsInTheGroupCath)
 // Propagate back cath-group to projection pads
 void ClusterPEM::updateProjectionGroups()
 {
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     printf("> Update projected Groups [updateProjectionGroups]\n");
   }
   int nProjPads = projectedPads->getNbrOfPads();
@@ -2009,7 +3394,7 @@ void ClusterPEM::updateProjectionGroups()
 
   // Save projPadToGrp to Check
   Groups_t savePadGrp[nProjPads];
-  if (ClusterConfig::groupsCheck) {
+  if (clusterConfig.groupsCheck) {
     vectorCopyShort(projPadToGrp, nProjPads, savePadGrp);
   }
   for (int k = 0; k < nProjPads; k++) {
@@ -2031,7 +3416,7 @@ void ClusterPEM::updateProjectionGroups()
     } else if ((i > -1) && (j > -1)) {
       // projPadToGrp[k] = grpToGrp[ projPadToGrp[k] ];
       projPadToGrp[k] = cath0ToGrp[i];
-      // ??? if (ClusterConfig::groupsCheck && (cath0ToGrp[i] != cath1ToGrp[j])) {
+      // ??? if (clusterConfig.groupsCheck && (cath0ToGrp[i] != cath1ToGrp[j])) {
       if (0) {
         printf(
           "  [updateProjectionGroups] i, cath0ToGrp[i]=(%d, %d); j, "
@@ -2046,7 +3431,7 @@ void ClusterPEM::updateProjectionGroups()
       throw std::overflow_error("updateProjectionGroups i,j=-1");
     }
   }
-  if (ClusterConfig::groupsLog >= ClusterConfig::detail) {
+  if (clusterConfig.groupsLog >= clusterConfig.detail) {
     vectorPrintShort("  updated projGrp", projPadToGrp, nProjPads);
   }
   if (0) {
@@ -2111,7 +3496,7 @@ int ClusterPEM::laplacian2D(const Pads& pads_, PadIdx_t* neigh, int chId,
     }
     unselected[i] = (lapl[i] != 1.0);
     smoothQ[i] = sumNeigh / nNeigh;
-    if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
       printf(
         "Laplacian i=%d, x[i]=%6.3f, y[i]=%6.3f, z[i]=%6.3f, "
         "smoothQ[i]=%6.3f, lapl[i]=%6.3f\n",
@@ -2133,7 +3518,7 @@ int ClusterPEM::laplacian2D(const Pads& pads_, PadIdx_t* neigh, int chId,
   // return smoothQ[a] > smoothQ[b]; });
   std::sort(sortedLocalMax, &sortedLocalMax[nSortedIdx],
             [=](int a, int b) { return q[a] > q[b]; });
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
     vectorPrint("  sort w", q, N);
     vectorPrintInt("  sorted q-indexes", sortedLocalMax, nSortedIdx);
   }
@@ -2142,13 +3527,13 @@ int ClusterPEM::laplacian2D(const Pads& pads_, PadIdx_t* neigh, int chId,
   // Filtering local max
   ////
 
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
-    printf("  filtering Local Max\n");
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+    printf("  [laplacian2D] (InspectModel) filtering Local Max\n");
   }
   // At Least one locMax
   if ((nSortedIdx == 0) && (N != 0)) {
     // Take the first pad
-    if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
       printf("-> No local Max, take the highest value < 1\n");
     }
     sortedLocalMax[0] = 0;
@@ -2178,7 +3563,7 @@ int ClusterPEM::laplacian2D(const Pads& pads_, PadIdx_t* neigh, int chId,
     if (aspectRatio > 0.6) {
       // Take the max
       nSortedIdx = 1;
-      if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
         printf(
           "  -> Limit to one local Max, nPads=%d, chId=%d, aspect ratio=%6.3f\n",
           N, chId, aspectRatio);
@@ -2196,7 +3581,7 @@ int ClusterPEM::laplacian2D(const Pads& pads_, PadIdx_t* neigh, int chId,
       }
     }
     nSortedIdx = std::max(trunkIdx, 1);
-    if ((ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) && (trunkIdx != nSortedIdx)) {
+    if ((clusterConfig.EMLocalMaxLog >= clusterConfig.info) && (trunkIdx != nSortedIdx)) {
       printf("-> Suppress %d local Max. too noisy (q < %6.3f),\n",
              nSortedIdx - trunkIdx, 2 * noise);
     }
@@ -2242,7 +3627,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
   double smoothQ1[N1];
   // Local Maximum for each cathodes
   // There are sorted with the lissed q[O/1] values
-  if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+  if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
     printf("> [findLocalMaxWithBothCathodes] N0=%d N1=%d\n", N0, N1);
   }
   PadIdx_t* grpNeighborsCath0 = nullptr;
@@ -2317,7 +3702,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
   // Debug
   // vectorPrintInt( "mapIToGrpIdx", mapIToGrpIdx, N0);
   // vectorPrintInt( "mapJToGrpIdx", mapJToGrpIdx, N1);
-  if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+  if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
     vectorPrint("findLocalMax q0", q0, N0);
     vectorPrint("findLocalMax q1", q1, N1);
     vectorPrintInt("findLocalMax localMax0", localMax0, K0);
@@ -2328,7 +3713,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
   // Make the combinatorics between the 2 cathodes
   // - Take the maxOf( N0,N1) for the external loop
   //
-  if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+  if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
     printf("  Local max per cathode K0=%d, K1=%d\n", K0, K1);
   }
   bool K0GreaterThanK1 = (K0 >= K1);
@@ -2414,7 +3799,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
   PadIdx_t* UInterV;
   //
   // Cathodes combinatorics
-  if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+  if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
     printf("  Local max combinatorics: KU=%d KV=%d\n", KU, KV);
     // printXYdXY("Projection", xyDxyProj, NProj, NProj, 0, 0);
     // printf("  mapIJToK=%p, N0=%d N1=%d\n", mapIJToK, N0, N1);
@@ -2447,7 +3832,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
     // to checck the intersection
     // VPads int ug = mapGrpIdxToU[uPadIdx];
     int ug = uPadIdx;
-    if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+    if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
       printf("  Cathode u=%d localMaxU[u]=%d, x,y= %6.3f,  %6.3f, q=%6.3f\n", u,
              localMaxU[u], xu[localMaxU[u]], yu[localMaxU[u]],
              qU[localMaxU[u]]);
@@ -2497,7 +3882,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
       localQMax[k] = qU[uPadIdx];
       // Cannot be selected again as a seed
       qvAvailable[maxCathVIdx] = 0;
-      if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+      if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
         printf(
           "    found intersection of u with v: u,v=(%d,%d) , x=%f, y=%f, "
           "w=%f\n",
@@ -2517,7 +3902,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
       // Search v pads intersepting u
       PadIdx_t* uInterV;
       PadIdx_t uPad = 0;
-      if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+      if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
         printf(
           "  No intersection between u=%d and v-set of , approximate the "
           "location\n",
@@ -2543,7 +3928,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
           // Find the y range intercepting pad u
           for (; *uInterV != -1; uInterV++) {
             PadIdx_t idx = mapVToGrpIdx[*uInterV];
-            if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+            if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
               printf("  Global upad=%d intersect global vpad=%d grpIdx=%d\n",
                      uPad, *uInterV, idx);
             }
@@ -2556,7 +3941,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
           localYMax[k] = 0.5 * (vMin + vMax);
           localQMax[k] = qU[uPadIdx];
           if (localYMax[k] == 0 &&
-              (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info)) {
+              (clusterConfig.laplacianLocalMaxLog > clusterConfig.info)) {
             printf("WARNING localYMax[k] == 0, meaning no intersection");
           }
         } else {
@@ -2564,12 +3949,12 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
           // Find the x range intercepting pad u
           for (; *uInterV != -1; uInterV++) {
             PadIdx_t idx = mapVToGrpIdx[*uInterV];
-            if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+            if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
               printf(" Global upad=%d intersect global vpad=%d  grpIdx=%d \n",
                      uPad, *uInterV, idx);
             }
             if (idx != -1) {
-              if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+              if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
                 printf(
                   "xv[idx], yv[idx], dxv[idx], dyv[idx]: %6.3f %6.3f "
                   "%6.3f %6.3f\n",
@@ -2584,11 +3969,11 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
           localQMax[k] = qU[uPadIdx];
           // printf(" uPadIdx = %d/%d\n", uPadIdx, KU);
           if (localXMax[k] == 0 &&
-              (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info)) {
+              (clusterConfig.laplacianLocalMaxLog > clusterConfig.info)) {
             printf("WARNING localXMax[k] == 0, meaning no intersection");
           }
         }
-        if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::no) {
+        if (clusterConfig.laplacianLocalMaxLog > clusterConfig.no) {
           printf(
             "  solution found with all intersection of u=%d with all v, x "
             "more precise %d, position=(%f,%f), qU=%f\n",
@@ -2606,7 +3991,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
         localXMax[k] = xu[uPadIdx];
         localYMax[k] = yu[uPadIdx];
         localQMax[k] = qU[uPadIdx];
-        if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::no) {
+        if (clusterConfig.laplacianLocalMaxLog > clusterConfig.no) {
           printf(
             "  No intersection with u, u added in local Max: k=%d u=%d, "
             "position=(%f,%f), qU=%f\n",
@@ -2623,7 +4008,7 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
       localXMax[k] = xv[l];
       localYMax[k] = yv[l];
       localQMax[k] = qV[l];
-      if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+      if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
         printf(
           "  Remaining VMax, v added in local Max:  v=%d, "
           "position=(%f,%f), qU=%f\n",
@@ -2644,14 +4029,14 @@ int ClusterPEM::findLocalMaxWithBothCathodes(double* thetaOut, int kMax)
     wRatio += localQMax[k_];
   }
   wRatio = 1.0 / wRatio;
-  if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+  if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
     printf("Local max found k=%d kmax=%d\n", k, kMax);
   }
   for (int k_ = 0; k_ < k; k_++) {
     muX[k_] = localXMax[k_];
     muY[k_] = localYMax[k_];
     w[k_] = localQMax[k_] * wRatio;
-    if (ClusterConfig::laplacianLocalMaxLog > ClusterConfig::info) {
+    if (clusterConfig.laplacianLocalMaxLog > clusterConfig.info) {
       printf("  w=%6.3f, mux=%7.3f, muy=%7.3f\n", w[k_], muX[k_], muY[k_]);
     }
   }
diff --git a/Detectors/MUON/MCH/Clustering/src/InspectModel.cxx b/Detectors/MUON/MCH/Clustering/src/InspectModel.cxx
index bccc621d886bb..aaf0b5f6e6aac 100644
--- a/Detectors/MUON/MCH/Clustering/src/InspectModel.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/InspectModel.cxx
@@ -31,12 +31,20 @@ static InspectModel inspectModel={.nbrOfProjPads=0, .projectedPads=0,
 .projGroups=0, .thetaInit=0, .kThetaInit=0, .totalNbrOfSubClusterPads=0,
 .totalNbrOfSubClusterThetaEMFinal=0, .nCathGroups=0, .padToCathGrp=0};
 */
-static InspectModel inspectModel;
 
+namespace o2
+{
+namespace mch
+{
+extern ClusterConfig clusterConfig;
+}
+} // namespace o2
+
+static InspectModel inspectModel;
 // Used when several sub-cluster occur in the precluster
 // Append the new hits/clusters in the thetaList of the pre-cluster
 void copyInGroupList(const double* values, int N, int item_size,
-                     std::vector<DataBlock_t>& groupList)
+                     std::vector<o2::mch::DataBlock_t>& groupList)
 {
   double* ptr = new double[N * item_size];
   // memcpy( (void *) ptr, (const void*) values, N*item_size*sizeof(double));
@@ -78,12 +86,12 @@ void collectTheta(double* theta, o2::mch::Groups_t* thetaToGroup, int K)
 {
   int sumK = 0;
 
-  if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+  if (o2::mch::clusterConfig.inspectModelLog >= o2::mch::ClusterConfig::info) {
     printf("collectTheta : nbrOfGroups with clusters = %lu\n", inspectModel.subClusterThetaFitList.size());
   }
   for (int h = 0; h < inspectModel.subClusterThetaFitList.size(); h++) {
     int k = inspectModel.subClusterThetaFitList[h].first;
-    if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+    if (o2::mch::clusterConfig.inspectModelLog >= o2::mch::ClusterConfig::info) {
       o2::mch::printTheta("  ", 1.0,
                           inspectModel.subClusterThetaFitList[h].second,
                           inspectModel.subClusterThetaFitList[h].first);
@@ -94,7 +102,7 @@ void collectTheta(double* theta, o2::mch::Groups_t* thetaToGroup, int K)
       o2::mch::vectorSetShort(&thetaToGroup[sumK], h + 1, k);
     }
     sumK += k;
-    if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+    if (o2::mch::clusterConfig.inspectModelLog >= o2::mch::ClusterConfig::info) {
       printf("collect theta grp=%d,  grpSize=%d, adress=%p\n", h, k,
              inspectModel.subClusterThetaFitList[h].second);
     }
@@ -170,6 +178,11 @@ void cleanInspectModel()
   delete[] inspectModel.padToCathGrp;
   inspectModel.padToCathGrp = nullptr;
   inspectModel.nCathGroups = 0;
+
+  // Timing
+  for (int i = 0; i < 4; i++) {
+    inspectModel.duration[i] = 0;
+  }
 }
 
 void finalizeInspectModel()
@@ -279,7 +292,7 @@ void savePadToCathGroup(const o2::mch::Groups_t* cath0Grp,
 
 void collectPadToCathGroup(o2::mch::Mask_t* padToMGrp, int nPads)
 {
-  if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+  if (o2::mch::clusterConfig.inspectModelLog >= o2::mch::ClusterConfig::info) {
     printf("collectPadToCathGroup nPads=%d\n", nPads);
   }
   o2::mch::vectorCopyShort(inspectModel.padToCathGrp, nPads, padToMGrp);
@@ -449,6 +462,19 @@ int collectPixels(int which, int N, double* xyDxy, double* q)
   return nSrc;
 }
 
+void inspectOverWriteQ(int which, const double* qPixels)
+{
+  int G = inspectPadProcess.xyDxyQPixels[which].size();
+  /// Last Group
+  int N = inspectPadProcess.xyDxyQPixels[which][G - 1].first;
+  if (N != 0) {
+    double* xyDxyQ = inspectPadProcess.xyDxyQPixels[which][G - 1].second;
+    double* q = &xyDxyQ[4 * N];
+    o2::mch::vectorCopy(qPixels, N, q);
+    o2::mch::vectorPrint("inspectOverWriteQ ???", q, N);
+  }
+}
+
 void inspectSavePixels(int which, o2::mch::Pads& pixels)
 {
   int N = pixels.getNbrOfPads();
@@ -460,7 +486,7 @@ void inspectSavePixels(int which, o2::mch::Pads& pixels)
   o2::mch::vectorCopy(pixels.getDX(), N, &xyDxy[2 * N]);
   o2::mch::vectorCopy(pixels.getDY(), N, &xyDxy[3 * N]);
   o2::mch::vectorCopy(pixels.getCharges(), N, q);
-  DataBlock_t db = {N, xyDxyQ};
+  o2::mch::DataBlock_t db = {N, xyDxyQ};
   inspectPadProcess.xyDxyQPixels[which].push_back(db);
   // printf("[inspectPadProcess], chanel=%d, nbrGrp=%ld\n", which,
   // inspectPadProcess.xyDxyQPixels[which].size() );
@@ -474,6 +500,24 @@ void setNbrProjectedPads(int n)
   // inspectModel.maxNbrOfProjPads= n;
 };
 
+void InspectModelChrono(int type, bool end)
+{
+  if (type == -1) {
+    // printf("Duration all=%f localMax=%f fitting=%f\n", inspectModel.duration[0], inspectModel.duration[1], inspectModel.duration[2]);
+    return;
+  }
+  if (!end) {
+    // Start
+    inspectModel.startTime[type] = std::chrono::high_resolution_clock::now();
+  } else {
+    std::chrono::time_point<std::chrono::high_resolution_clock> tEnd;
+    tEnd = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double> duration_ = tEnd - inspectModel.startTime[type];
+    inspectModel.duration[type] += duration_.count();
+  }
+}
+
+/*
 int f_ChargeIntegralMag(const gsl_vector* gslParams, void* dataFit,
                         gsl_vector* residuals)
 {
@@ -587,25 +631,6 @@ int f_ChargeIntegralMag(const gsl_vector* gslParams, void* dataFit,
     // gsl_vector_set(residuals, i, (zObs[i] - z[i]) * (1.0 + cathPenal) +
     // wPenal);
   }
-  /*
-  if (o2::mch::ClusterConfig::fittingLog >= o2::mch::ClusterConfig::debug) {
-    printf("    Observed sumCath0=%15.8f, sumCath1=%15.8f,\n",
-           zCathTotalCharge[0], zCathTotalCharge[1]);
-    // printf("  fitted   sumCath0=%15.8f, sumCath1=%15.8f,\n", chargePerCath,
-    // chargePerCath);
-    printf("    Penalties cathPenal=%5.4g wPenal=%5.4g \n", 1.0 + cathPenal,
-           wPenal);
-    printf("    Residues\n");
-    printf("  %15s  %15s  %15s %15s %15s %15s\n", "zObs", "z", "cathWeight",
-           "norm. factor", "notSaturated", "residual");
-    for (int i = 0; i < N; i++) {
-      printf("  %15.8f  %15.8f  %15.8f  %15.8f         %d  %15.8f\n", zObs[i],
-             z[i], cathWeights[i], sumNormalizedZ[cath[i]] * cathWeights[i],
-             notSaturated[i], gsl_vector_get(residuals, i));
-    }
-    printf("\n");
-  }
-  */
   return GSL_SUCCESS;
 }
 
@@ -678,36 +703,6 @@ void fitMathiesonMag(const double* xyDxDy, const double* q,
   mathiesonData.notSaturated_ptr = notSaturated;
 
   // Total Charge per cathode plane
-  /*
-  double zCathTotalCharge[2];
-  o2::mch::Mask_t mask[N];
-  // Cath 1
-  o2::mch::vectorCopyShort(mathiesonData.cath_ptr, N, mask);
-  // Logic And operation
-  o2::mch::vectorMultVectorShort(mathiesonData.notSaturated_ptr, mask, N, mask);
-  zCathTotalCharge[0] = o2::mch::vectorMaskedSum(mathiesonData.zObs_ptr, mask, N);
-  // cath 0
-  o2::mch::vectorCopyShort(mathiesonData.cath_ptr, N, mask);
-  o2::mch::vectorNotShort(mask, N, mask);
-  // Logic And operation
-  o2::mch::vectorMultVectorShort(mathiesonData.notSaturated_ptr, mask, N, mask);
-  zCathTotalCharge[1] = o2::mch::vectorMaskedSum(mathiesonData.zObs_ptr, mask, N);
-  // Init the weights
-  cathWeights = new double[N];
-  for (int i = 0; i < N; i++) {
-    cathWeights[i] = (mathiesonData.cath_ptr[i] == 0) ? zCathTotalCharge[0]
-                                                      : zCathTotalCharge[1];
-    cathMax[mathiesonData.cath_ptr[i]] = std::fmax(
-      cathMax[mathiesonData.cath_ptr[i]],
-      mathiesonData.notSaturated_ptr[i] * mathiesonData.zObs_ptr[i]);
-  }
-  if (o2::mch::ClusterConfig::fittingLog >= o2::mch::ClusterConfig::detail) {
-    vectorPrintShort("mathiesonData.cath_ptr", mathiesonData.cath_ptr, N);
-    vectorPrintShort("mathiesonData.notSaturated_ptr",
-                     mathiesonData.notSaturated_ptr, N);
-    vectorPrint("mathiesonData.zObs_ptr", mathiesonData.zObs_ptr, N);
-  }
-  */
   mathiesonData.cathWeights_ptr = nullptr;
   mathiesonData.cathMax_ptr = nullptr;
   mathiesonData.chamberId = chId;
@@ -830,11 +825,6 @@ void fitMathiesonMag(const double* xyDxDy, const double* q,
       o2::mch::copyTheta(thetaInit, K, thetaFinal, K, K);
     } else {
       // Fitted parameters
-      /* Invalid ???
-      for (int k = 0; k < (3 * K - 1); k++) {
-        muAndWf[k] = gsl_vector_get(s->x, k);
-      }
-      */
 
       // Mu part
       for (int k = 0; k < K; k++) {
@@ -850,19 +840,6 @@ void fitMathiesonMag(const double* xyDxDy, const double* q,
       }
       // Last w : 1.0 - sumW
       muAndWf[3 * K - 1] = 1.0 - sumW;
-
-      // Parameter error
-      /* Pb Mac compilation
-      if (computeStdDev && (pError != nullptr)) { //
-        // Covariance matrix an error
-        gsl_matrix* covar = gsl_matrix_alloc(3 * K - 1, 3 * K - 1);
-        gsl_multifit_covar(s->J, 0.0, covar);
-        for (int k = 0; k < (3 * K - 1); k++) {
-          pError[k] = sqrt(gsl_matrix_get(covar, k, k));
-        }
-        gsl_matrix_free(covar);
-      }
-      */
     }
     if (o2::mch::ClusterConfig::fittingLog >= o2::mch::ClusterConfig::detail) {
       printf("  status parameter error = %s\n", gsl_strerror(status));
@@ -875,4 +852,5 @@ void fitMathiesonMag(const double* xyDxDy, const double* q,
   // Release memory
   //
   return;
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file
diff --git a/Detectors/MUON/MCH/Clustering/src/InspectModel.h b/Detectors/MUON/MCH/Clustering/src/InspectModel.h
index 0aaf0f5b8a603..6019c81bacd62 100644
--- a/Detectors/MUON/MCH/Clustering/src/InspectModel.h
+++ b/Detectors/MUON/MCH/Clustering/src/InspectModel.h
@@ -19,6 +19,7 @@
 #define O2_MCH_INSPECTMODEL_H_
 
 #include <vector>
+#include <chrono>
 
 #include <gsl/gsl_blas.h>
 #include <gsl/gsl_multifit_nlin.h>
@@ -43,15 +44,20 @@ typedef struct dummy_t {
   int totalNbrOfSubClusterPads = 0;
   int totalNbrOfSubClusterThetaEMFinal = 0;
   int totalNbrOfSubClusterThetaExtra = 0;
-  std::vector<DataBlock_t> subClusterPadList;
-  std::vector<DataBlock_t> subClusterChargeList;
-  std::vector<DataBlock_t> subClusterThetaEMFinal;
-  std::vector<DataBlock_t> subClusterThetaFitList;
-  std::vector<DataBlock_t> subClusterThetaExtra;
+  std::vector<o2::mch::DataBlock_t> subClusterPadList;
+  std::vector<o2::mch::DataBlock_t> subClusterChargeList;
+  std::vector<o2::mch::DataBlock_t> subClusterThetaEMFinal;
+  std::vector<o2::mch::DataBlock_t> subClusterThetaFitList;
+  std::vector<o2::mch::DataBlock_t> subClusterThetaExtra;
 
   // Cath groups
   int nCathGroups = 0;
   short* padToCathGrp = nullptr;
+
+  // Timing
+  std::chrono::time_point<std::chrono::high_resolution_clock> startTime[3];
+  double duration[4];
+
 } InspectModel;
 //
 
@@ -59,7 +65,7 @@ typedef struct dummy_t {
 typedef struct dummyPad_t {
   // Data on Pixels
   const static int nPixelStorage = 8;
-  std::vector<DataBlock_t> xyDxyQPixels[nPixelStorage];
+  std::vector<o2::mch::DataBlock_t> xyDxyQPixels[nPixelStorage];
 } InspectPadProcessing_t;
 
 extern "C" {
@@ -99,6 +105,7 @@ void collectThetaEMFinal(double* thetaEM, int K);
 void collectThetaExtra(double* thetaExtra, int K);
 void cleanPixels();
 int collectPixels(int which, int N, double* xyDxy, double* q);
+void inspectOverWriteQ(int which, const double* qPixels);
 void inspectSavePixels(int which, o2::mch::Pads& pixels);
 int getNbrProjectedPads();
 void setNbrProjectedPads(int n);
@@ -112,4 +119,7 @@ void fitMathiesonMag(const double* xyDxDy, const double* q,
                      double* thetaInit, int K, int N,
                      double* thetaFinal, double* khi2);
 }
+
+void InspectModelChrono(int type, bool end);
+
 #endif // O2_MCH_INSPECTMODEL_H_
\ No newline at end of file
diff --git a/Detectors/MUON/MCH/Clustering/src/PadsPEM.cxx b/Detectors/MUON/MCH/Clustering/src/PadsPEM.cxx
index 9dfd2d3c3a419..b13e7d54e1dc5 100644
--- a/Detectors/MUON/MCH/Clustering/src/PadsPEM.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/PadsPEM.cxx
@@ -19,6 +19,7 @@
 #include <vector>
 
 #include "MCHClustering/PadsPEM.h"
+#include "mathieson.h"
 #include "mathUtil.h"
 
 #define VERBOSE 1
@@ -29,6 +30,79 @@ namespace o2
 namespace mch
 {
 
+extern ClusterConfig clusterConfig;
+
+void Pads::padBoundsToCenter(const Pads& pads)
+{
+  if (mode == xyInfSupMode) {
+    double* xInf = pads.x;
+    double* yInf = pads.y;
+    double* xSup = pads.dx;
+    double* ySup = pads.dy;
+    for (int i = 0; i < nPads; i++) {
+      dx[i] = 0.5 * (xSup[i] - xInf[i]);
+      dy[i] = 0.5 * (ySup[i] - yInf[i]);
+      x[i] = xInf[i] + dx[i];
+      y[i] = yInf[i] + dy[i];
+    }
+    mode = xydxdyMode;
+  }
+}
+
+void Pads::padCenterToBounds(const Pads& pads)
+{
+  if (mode == xyInfSupMode) {
+    double* xInf = x;
+    double* yInf = y;
+    double* xSup = dx;
+    double* ySup = dy;
+    for (int i = 0; i < nPads; i++) {
+      xInf[i] = pads.x[i] - pads.dx[i];
+      xSup[i] = pads.x[i] + pads.dx[i];
+      yInf[i] = pads.y[i] - pads.dy[i];
+      ySup[i] = pads.y[i] + pads.dy[i];
+    }
+    mode = xydxdyMode;
+  }
+}
+
+void Pads::padCenterToBounds()
+{
+  if (mode == xydxdyMode) {
+    double* xInf = x;
+    double* yInf = y;
+    double* xSup = dx;
+    double* ySup = dy;
+    double u;
+    for (int i = 0; i < nPads; i++) {
+      u = x[i];
+      xInf[i] = u - dx[i];
+      xSup[i] = u + dx[i];
+      u = y[i];
+      yInf[i] = u - dy[i];
+      ySup[i] = u + dy[i];
+    }
+    mode = xyInfSupMode;
+  }
+}
+
+void Pads::padBoundsToCenter()
+{
+  if (mode == xyInfSupMode) {
+    double* xInf = x;
+    double* yInf = y;
+    double* xSup = dx;
+    double* ySup = dy;
+    double du;
+    for (int i = 0; i < nPads; i++) {
+      dx[i] = 0.5 * (xSup[i] - xInf[i]);
+      dy[i] = 0.5 * (ySup[i] - yInf[i]);
+      x[i] = xInf[i] + dx[i];
+      y[i] = yInf[i] + dy[i];
+    }
+    mode = xydxdyMode;
+  }
+}
 PadIdx_t* Pads::buildFirstNeighbors(double* X, double* Y, double* DX,
                                     double* DY, int N)
 {
@@ -109,7 +183,7 @@ PadIdx_t* Pads::buildKFirstsNeighbors(int kernelSize)
     }
   }
 
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     Pads::printNeighbors(neighbors_, N);
   }
 
@@ -125,13 +199,14 @@ Pads* Pads::addBoundaryPads()
   std::vector<double> bdX;
   std::vector<double> bdY;
   int N = nPads;
-  // Build neigbours if required
+  // Build neigbors if required
   PadIdx_t* neigh = buildFirstNeighbors();
   for (int i = 0; i < N; i++) {
     bool east = true, west = true, north = true, south = true;
     for (const PadIdx_t* neigh_ptr = getTheFirtsNeighborOf(neigh, i);
          *neigh_ptr != -1; neigh_ptr++) {
       PadIdx_t v = *neigh_ptr;
+      // If neighbours then no boundary pads to add
       double xDelta = (x[v] - x[i]);
       if (fabs(xDelta) > eps) {
         if (xDelta > 0) {
@@ -175,9 +250,76 @@ Pads* Pads::addBoundaryPads()
       bdY.push_back(dy[i]);
     }
   }
+  // Suppress new pads which overlaps
   int nPadToAdd = bX.size();
+  double error = epsilonGeometry;
+  int K = bX.size();
+  Mask_t toKeep[K];
+  vectorSetShort(toKeep, 1, K);
+  double xInf[K], xSup[K], yInf[K], ySup[K];
+  double maxInf[K], minSup[K];
+  double xOverlap[K], yOverlap[K];
+  double overlap;
+  // Compute x/y inf/sup
+  for (int k = 0; k < K; k++) {
+    xInf[k] = bX[k] - bdX[k];
+    xSup[k] = bX[k] + bdX[k];
+    yInf[k] = bY[k] - bdY[k];
+    ySup[k] = bY[k] + bdY[k];
+  }
+  // printf("[addBoundary] n=%d boundary pads added\n", K);
+
+  for (int k = 0; k < (K - 1); k++) {
+    if (toKeep[k]) {
+      // X overlap
+      vectorMaxScalar(&xInf[k + 1], xInf[k], K - k - 1, &maxInf[k + 1]);
+      vectorMinScalar(&xSup[k + 1], xSup[k], K - k - 1, &minSup[k + 1]);
+      vectorAddVector(&minSup[k + 1], -1.0, &maxInf[k + 1], K - k - 1, &xOverlap[k + 1]);
+      // Y overlap
+      vectorMaxScalar(&yInf[k + 1], yInf[k], K - k - 1, &maxInf[k + 1]);
+      vectorMinScalar(&ySup[k + 1], ySup[k], K - k - 1, &minSup[k + 1]);
+      vectorAddVector(&minSup[k + 1], -1.0, &maxInf[k + 1], K - k - 1, &yOverlap[k + 1]);
+
+      for (int l = k + 1; l < K; l++) {
+        // printf("             xOverlap[l]=%f, yOverlap[l]=%f\n", xOverlap[l], yOverlap[l]);
+        overlap = (xOverlap[l] < error) ? 0.0 : 1.0;
+        overlap = (yOverlap[l] < error) ? 0.0 * overlap : overlap * 1;
+        if (toKeep[l] && (overlap > 0.0)) {
+          toKeep[l] = 0;
+          nPadToAdd--;
+          // printf("[addBoundary] overlapping k=%d l=%d \n", k, l);
+          // printf("              pad k x=%f, dx=%f, y=%f, dy=%f\n", bX[k], bdX[k], bY[k], bdY[k]);
+          // printf("              pad l x=%f, dx=%f, y=%f, dy=%f\n", bX[l], bdX[l], bY[l], bdY[l]);
+          //
+          // Update boundary Pads
+          double infxy_ = bX[k] - bdX[k];
+          double supxy_ = bX[k] + bdX[k];
+          infxy_ = std::fmax(infxy_, xInf[l]);
+          supxy_ = std::fmin(supxy_, xSup[l]);
+          double dxy_ = 0.5 * (supxy_ - infxy_);
+          // pad center : xInf + 0.5 dx
+          bX[k] = infxy_ + dxy_;
+          bdX[k] = dxy_;
+          //
+          // The same for Y
+          infxy_ = bY[k] - bdY[k];
+          supxy_ = bY[k] + bdY[k];
+          infxy_ = std::fmax(infxy_, yInf[l]);
+          supxy_ = std::fmin(supxy_, ySup[l]);
+          dxy_ = 0.5 * (supxy_ - infxy_);
+          bY[k] = infxy_ + dxy_;
+          bdY[k] = dxy_;
+          // printf("              new pad k x=%f, dx=%f, y=%f, dy=%f\n", bX[k], bdX[k], bY[k], bdY[k]);
+        }
+      }
+    } // if (toKeep[k])
+  }
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf("[addBoundary] n=%d final boundary pads added, %d removed overlapping pads\n", nPadToAdd, K - nPadToAdd);
+  }
+
   int nTotalPads = N + nPadToAdd;
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printf("nTotalPads=%d, nPads=%d,  nPadToAdd=%d\n", nTotalPads, N,
            nPadToAdd);
   }
@@ -191,22 +333,28 @@ Pads* Pads::addBoundaryPads()
     newPads->q[i] = q[i];
     newPads->saturate[i] = saturate[i];
   }
-  for (int i = N, k = 0; i < nTotalPads; i++, k++) {
-    newPads->x[i] = bX[k];
-    newPads->y[i] = bY[k];
-    newPads->dx[i] = bdX[k];
-    newPads->dy[i] = bdY[k];
-    newPads->q[i] = 0.0;
-    newPads->saturate[i] = 0;
+  newPads->nObsPads = N;
+  for (int i = N, k = 0; i < nTotalPads; k++) {
+    if (toKeep[k]) {
+      newPads->x[i] = bX[k];
+      newPads->y[i] = bY[k];
+      newPads->dx[i] = bdX[k];
+      newPads->dy[i] = bdY[k];
+      newPads->q[i] = 0.0;
+      newPads->saturate[i] = 0;
+      i++;
+    }
   }
   newPads->totalCharge = totalCharge;
   //
+  // printPads( "[addBoundary] pads", *newPads );
   return padsWithBoundaries;
 }
 
-Pads::Pads(int N, int chId, int mode_)
+Pads::Pads(int N, int chId, PadMode mode_)
 {
   nPads = N;
+  nObsPads = N;
   mode = mode_;
   chamberId = chId;
   allocate();
@@ -258,9 +406,32 @@ Pads::Pads( const Pads *pads0, const Pads *pads1) {
 }
 */
 
-Pads::Pads(const Pads& pads, int mode_)
+// Over allocation
+Pads::Pads(const Pads* pads, int size)
+{
+  nPads = pads->nPads;
+  nObsPads = pads->nObsPads;
+  mode = pads->mode;
+  chamberId = pads->chamberId;
+  totalCharge = pads->totalCharge;
+  allocate(size);
+  memcpy(x, pads->x, sizeof(double) * nPads);
+  memcpy(y, pads->y, sizeof(double) * nPads);
+  memcpy(dx, pads->dx, sizeof(double) * nPads);
+  memcpy(dy, pads->dy, sizeof(double) * nPads);
+  memcpy(q, pads->q, sizeof(double) * nPads);
+  if (pads->saturate != nullptr) {
+    memcpy(saturate, pads->saturate, sizeof(Mask_t) * nPads);
+  }
+  if (pads->cath != nullptr) {
+    memcpy(cath, pads->cath, sizeof(Mask_t) * nPads);
+  }
+}
+
+Pads::Pads(const Pads& pads, PadMode mode_)
 {
   nPads = pads.nPads;
+  nObsPads = pads.nObsPads;
   mode = mode_;
   chamberId = pads.chamberId;
   totalCharge = pads.totalCharge;
@@ -273,32 +444,19 @@ Pads::Pads(const Pads& pads, int mode_)
     memcpy(q, pads.q, sizeof(double) * nPads);
   } else if (mode == xydxdyMode) {
     //  xyInfSupMode ->  xydxdyMode
-    double* xInf = pads.x;
-    double* yInf = pads.y;
-    double* xSup = pads.dx;
-    double* ySup = pads.dy;
-    for (int i = 0; i < nPads; i++) {
-      dx[i] = 0.5 * (xSup[i] - xInf[i]);
-      dy[i] = 0.5 * (ySup[i] - yInf[i]);
-      x[i] = xInf[i] + dx[i];
-      y[i] = yInf[i] + dy[i];
-    }
+    padBoundsToCenter(pads);
     memcpy(q, pads.q, sizeof(double) * nPads);
   } else {
     // xydxdyMode -> xyInfSupMode
-    double* xInf = x;
-    double* yInf = y;
-    double* xSup = dx;
-    double* ySup = dy;
-    for (int i = 0; i < nPads; i++) {
-      xInf[i] = pads.x[i] - pads.dx[i];
-      xSup[i] = pads.x[i] + pads.dx[i];
-      yInf[i] = pads.y[i] - pads.dy[i];
-      ySup[i] = pads.y[i] + pads.dy[i];
-    }
+    padCenterToBounds(pads);
     memcpy(q, pads.q, sizeof(double) * nPads);
   }
-  memcpy(saturate, pads.saturate, sizeof(Mask_t) * nPads);
+  if (pads.saturate) {
+    memcpy(saturate, pads.saturate, sizeof(Mask_t) * nPads);
+  }
+  if (pads.cath) {
+    memcpy(cath, pads.cath, sizeof(Mask_t) * nPads);
+  }
 }
 
 Pads::Pads(const Pads& pads, const Mask_t* mask)
@@ -316,7 +474,11 @@ Pads::Pads(const Pads& pads, const Mask_t* mask)
   if (pads.saturate) {
     vectorGatherShort(pads.saturate, mask, pads.nPads, saturate);
   }
+  if (pads.cath) {
+    vectorGatherShort(pads.cath, mask, pads.nPads, cath);
+  }
   totalCharge = vectorSum(q, nPads);
+  nObsPads = nPads;
 }
 
 /* Old version: Unused
@@ -341,6 +503,7 @@ Pads::Pads(double* x_, double* y_, double* dx_, double* dy_, int chId,
 {
   mode = xydxdyMode;
   nPads = nPads_;
+  nObsPads = nPads;
   chamberId = chId;
   x = x_;
   y = y_;
@@ -349,6 +512,9 @@ Pads::Pads(double* x_, double* y_, double* dx_, double* dy_, int chId,
   q = new double[nPads];
   // Set null Charge
   vectorSetZero(q, nPads);
+  // others
+  saturate = nullptr;
+  cath = nullptr;
   neighbors = nullptr;
   totalCharge = 0;
 }
@@ -364,6 +530,7 @@ Pads::Pads(const double* x_, const double* y_, const double* dx_,
   if (selectedCath == 0) {
     nPads = nAllPads - nCathode1;
   }
+  nObsPads = nPads;
   chamberId = chId;
   allocate();
   double qSum = 0;
@@ -392,6 +559,7 @@ Pads::Pads(const double* x_, const double* y_, const double* dx_,
   mode = xydxdyMode;
   // int nCathode1 = vectorSumShort(cathode, nAllPads);
   nPads = nAllPads;
+  nObsPads = nPads;
   /*
   if (selectedCath == 0) {
     nPads = nAllPads - nCathode1;
@@ -412,65 +580,105 @@ Pads::Pads(const double* x_, const double* y_, const double* dx_,
 }
 
 // Concatenate pads
-Pads::Pads(const Pads* pads1, const Pads* pads2, int mode_)
+Pads::Pads(const Pads* pads0, const Pads* pads1, PadMode mode_)
 {
-  // Take Care: pads1 and pads2 must be in xydxdyMode
+
+  // Take Care: pads0 and pads2 must be in xydxdyMode
+  bool padsMode = (pads0 == nullptr) ? true : (pads0->mode == xydxdyMode);
+  padsMode = (pads1 == nullptr) ? padsMode : (pads1->mode == xydxdyMode);
+  if (!padsMode) {
+    throw std::out_of_range("Pads:: bad mode (xydxdyMode required) for pad merging");
+  }
+
+  int N0 = (pads0 == nullptr) ? 0 : pads0->nPads;
   int N1 = (pads1 == nullptr) ? 0 : pads1->nPads;
-  int N2 = (pads2 == nullptr) ? 0 : pads2->nPads;
-  nPads = N1 + N2;
-  chamberId = (N1) ? pads1->chamberId : pads2->chamberId;
-  mode = mode_;
+  int nObs0 = (pads0 == nullptr) ? 0 : pads0->nObsPads;
+  int nObs1 = (pads1 == nullptr) ? 0 : pads1->nObsPads;
+  nPads = N0 + N1;
+  chamberId = (N0) ? pads0->chamberId : pads1->chamberId;
   allocate();
-  if (mode == xydxdyMode) {
-    // Copy pads1
-    if (N1) {
-      memcpy(x, pads1->x, sizeof(double) * N1);
-      memcpy(y, pads1->y, sizeof(double) * N1);
-      memcpy(dx, pads1->dx, sizeof(double) * N1);
-      memcpy(dy, pads1->dy, sizeof(double) * N1);
-      memcpy(q, pads1->q, sizeof(double) * N1);
-      memcpy(saturate, pads1->saturate, sizeof(Mask_t) * N1);
-      vectorSetShort(cath, 0, N1);
-    }
-    if (N2) {
-      // Copy pads2
-      memcpy(&x[N1], pads2->x, sizeof(double) * N2);
-      memcpy(&y[N1], pads2->y, sizeof(double) * N2);
-      memcpy(&dx[N1], pads2->dx, sizeof(double) * N2);
-      memcpy(&dy[N1], pads2->dy, sizeof(double) * N2);
-      memcpy(&q[N1], pads2->q, sizeof(double) * N2);
-      memcpy(&saturate[N1], pads2->saturate, sizeof(Mask_t) * N2);
-      vectorSetShort(&cath[N1], 1, N2);
-    }
-  } else {
-    double* xInf = x;
-    double* yInf = y;
-    double* xSup = dx;
-    double* ySup = dy;
-    for (int i = 0; i < N1; i++) {
-      xInf[i] = pads1->x[i] - pads1->dx[i];
-      xSup[i] = pads1->x[i] + pads1->dx[i];
-      yInf[i] = pads1->y[i] - pads1->dy[i];
-      ySup[i] = pads1->y[i] + pads1->dy[i];
-      q[i] = pads1->q[i];
-      saturate[i] = pads1->saturate[i];
-      cath[i] = 0;
-    }
-    for (int i = 0; i < N2; i++) {
-      xInf[i + N1] = pads2->x[i] - pads2->dx[i];
-      xSup[i + N1] = pads2->x[i] + pads2->dx[i];
-      yInf[i + N1] = pads2->y[i] - pads2->dy[i];
-      ySup[i + N1] = pads2->y[i] + pads2->dy[i];
-      q[i + N1] = pads2->q[i];
-      saturate[i + N1] = pads2->saturate[i];
-      cath[i + N1] = 1;
-    }
+  // Copy observable pads0
+  int destIdx = 0;
+  copyPads(pads0, 0, destIdx, nObs0, 0);
+  destIdx += nObs0;
+  // Copy observable pads1
+  copyPads(pads1, 0, destIdx, nObs1, 1);
+  destIdx += nObs1;
+
+  // Boundary pads0
+  int n = N0 - nObs0;
+  copyPads(pads0, nObs0, destIdx, n, 0);
+  destIdx += n;
+  n = N1 - nObs1;
+  copyPads(pads1, nObs1, destIdx, n, 1);
+  destIdx += n;
+
+  /*
+  if (N1) {
+    memcpy(x, pads1->x, sizeof(double) * N1);
+    memcpy(y, pads1->y, sizeof(double) * N1);
+    memcpy(dx, pads1->dx, sizeof(double) * N1);
+    memcpy(dy, pads1->dy, sizeof(double) * N1);
+    memcpy(q, pads1->q, sizeof(double) * N1);
+    memcpy(saturate, pads1->saturate, sizeof(Mask_t) * N1);
+    vectorSetShort(cath, 0, N1);
+  }
+  if (N2) {
+    // Copy pads2
+    memcpy(&x[N1], pads2->x, sizeof(double) * N2);
+    memcpy(&y[N1], pads2->y, sizeof(double) * N2);
+    memcpy(&dx[N1], pads2->dx, sizeof(double) * N2);
+    memcpy(&dy[N1], pads2->dy, sizeof(double) * N2);
+    memcpy(&q[N1], pads2->q, sizeof(double) * N2);
+    memcpy(&saturate[N1], pads2->saturate, sizeof(Mask_t) * N2);
+    vectorSetShort(&cath[N1], 1, N2);
+  }
+  */
+  // ??? printPads(" Before InfSup", *this);
+  if (mode_ == xyInfSupMode) {
+    padCenterToBounds();
   }
   totalCharge = vectorSum(q, nPads);
+  nObsPads = nObs0 + nObs1;
+  // ??? printPads(" after InfSup", *this);
+}
+/*
+void Pads::print(const char *title)
+{
+  printf("%s\n", title);
+  printf("print pads nPads=%4d nObsPads=%4d mode=%1d\n", nPads, nObsPads, mode);
+  printf("idx      x       y       dx        dy cath  sat  charge \n");
+  for (int i=0; i < nPads; i++) {
+    printf("%2d %7.3f %7.3f %7.3f %7.3f    %1d     %1d %7.3f \n", i, x[i], y[i], dx[i], dy[i], cath[i], saturate[i], q[i]);
+  }
+}
+*/
+
+Pads* Pads::selectPads(int* index, int K)
+{
+  Pads* sPads = new Pads(K, chamberId, mode);
+  int k0 = 0;
+  for (int k = 0; k < K; k++) {
+    int idx = index[k];
+    sPads->x[k0] = x[idx];
+    sPads->y[k0] = y[idx];
+    sPads->dx[k0] = dx[idx];
+    sPads->dy[k0] = dy[idx];
+    sPads->q[k0] = q[idx];
+    sPads->saturate[k0] = saturate[idx];
+    k0++;
+  }
+  sPads->nPads = K;
+  sPads->nObsPads = K;
+  return sPads;
 }
 
+// ??? removePad can be suppressed ????
 void Pads::removePad(int index)
 {
+  if (nObsPads != nPads) {
+    throw std::out_of_range("Pads::removePad: bad usage");
+  }
   if ((index < 0) || (index >= nPads)) {
     return;
   }
@@ -488,6 +696,7 @@ void Pads::removePad(int index)
   vectorCopyShort(&saturate[index + 1], nItems, &saturate[index]);
   //
   nPads = nPads - 1;
+  nObsPads = nObsPads - 1;
 }
 
 void Pads::allocate()
@@ -510,6 +719,61 @@ void Pads::allocate()
   q = new double[N];
 }
 
+// Over-allocation of pads
+void Pads::allocate(int size)
+{
+  // Note: Must be deallocated/releases if required
+  x = nullptr;
+  y = nullptr;
+  dx = nullptr;
+  dy = nullptr;
+  saturate = nullptr;
+  q = nullptr;
+  neighbors = nullptr;
+  // N nbr of pads used
+  int N = nPads;
+  // size allocation
+  x = new double[size];
+  y = new double[size];
+  dx = new double[size];
+  dy = new double[size];
+  saturate = new Mask_t[size];
+  cath = new Mask_t[size];
+  q = new double[size];
+}
+
+void Pads::copyPads(const Pads* srcPads, int srcIdx, int dstIdx, int N, Mask_t cathValue)
+{
+  if (N) {
+    memcpy(&x[dstIdx], &srcPads->x[srcIdx], sizeof(double) * N);
+    memcpy(&y[dstIdx], &srcPads->y[srcIdx], sizeof(double) * N);
+    memcpy(&dx[dstIdx], &srcPads->dx[srcIdx], sizeof(double) * N);
+    memcpy(&dy[dstIdx], &srcPads->dy[srcIdx], sizeof(double) * N);
+    memcpy(&q[dstIdx], &srcPads->q[srcIdx], sizeof(double) * N);
+    memcpy(&saturate[dstIdx], &srcPads->saturate[srcIdx], sizeof(Mask_t) * N);
+    vectorSetShort(&cath[dstIdx], cathValue, N);
+  }
+}
+double Pads::updateTotalCharge()
+{
+  totalCharge = vectorSum(q, nPads);
+  return totalCharge;
+}
+//
+double Pads::getMeanTotalCharge()
+{
+  double meanCharge;
+  if (cath != nullptr) {
+    int nCath1 = vectorSumShort(cath, nPads);
+    int nCath0 = nPads - nCath1;
+    int nCath = (nCath0 > 0) + (nCath1 > 0);
+    meanCharge = totalCharge / nCath;
+  } else {
+    meanCharge = totalCharge;
+  }
+  return meanCharge;
+}
+
 void Pads::setCharges(double c)
 {
   vectorSet(q, c, nPads);
@@ -522,6 +786,10 @@ void Pads::setCharges(double* q_, int n)
   totalCharge = vectorSum(q_, n);
 }
 
+void Pads::setCathodes(Mask_t cath_) { vectorSetShort(cath, cath_, nPads); }
+
+void Pads::setSaturate(Mask_t val) { vectorSetShort(saturate, val, nPads); }
+
 void Pads::setToZero()
 {
   for (int i = 0; i < nPads; i++) {
@@ -535,10 +803,14 @@ void Pads::setToZero()
 
 int Pads::removePads(double qCut)
 {
+  if (nObsPads != nPads) {
+    throw std::out_of_range("Pads::removePad: bad usage");
+  }
   double qSum = 0.0;
   int k = 0;
   for (int i = 0; i < nPads; i++) {
-    if (q[i] > qCut) {
+    // printf("q %f\n", q[i]);
+    if (q[i] >= qCut) {
       qSum += q[i];
       q[k] = q[i];
       x[k] = x[i];
@@ -550,6 +822,7 @@ int Pads::removePads(double qCut)
   }
   totalCharge = qSum;
   nPads = k;
+  nObsPads = k;
   return k;
 }
 
@@ -578,7 +851,7 @@ int Pads::addIsolatedPadInGroups(Mask_t* cathToGrp, Mask_t* grpToGrp,
     return nGroups;
   }
 
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     printf("[addIsolatedPadInGroups]  nGroups=%d\n", nGroups);
     vectorPrintShort("  cathToGrp input", cathToGrp, nPads);
   }
@@ -635,7 +908,7 @@ int Pads::addIsolatedPadInGroups(Mask_t* cathToGrp, Mask_t* grpToGrp,
     grpToGrp[g] = gBar;
   }
 
-  if (ClusterConfig::padMappingLog >= ClusterConfig::debug) {
+  if (clusterConfig.padMappingLog >= clusterConfig.debug) {
     printf("  grpToGrp\n");
     for (int g = 0; g < (nGroups + 1); g++) {
       printf("  %d -> %d\n", g, grpToGrp[g]);
@@ -684,9 +957,216 @@ void Pads::release()
   }
   deleteInt(neighbors);
   nPads = 0;
+  nObsPads = 0;
+}
+
+// Refine on/around localMax
+void Pads::refineLocalMaxAndUpdateCij(const Pads& pads,
+                                      std::vector<PadIdx_t>& pixToRefineIdx, double Cij[])
+{
+
+  // Take care : here all pads data describe the pixels
+  // Number of Pixels
+  int K = nPads;
+  // Number of Pads
+  int N = pads.getNbrOfPads();
+
+  const double* xInf = pads.getXInf();
+  const double* yInf = pads.getYInf();
+  const double* xSup = pads.getXSup();
+  const double* ySup = pads.getYSup();
+  int chId = pads.getChamberId();
+
+  double cut = -1;
+  int count = N;
+  //
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
+    vectorPrint("Pads::refinePads", q, N);
+    printf("Pads::refinePads count(new nPads)=%d\n", count);
+  }
+
+  double* xWestIntegrals = new double[N];
+  double* xEastIntegrals = new double[N];
+  double* yNorthIntegrals = new double[N];
+  double* ySouthIntegrals = new double[N];
+  int axe;
+  double totalChargeInc = 0.0;
+  int k = K;
+  for (int i = 0; i < pixToRefineIdx.size(); i++) {
+    int pixelMaxIdx = pixToRefineIdx[i];
+    // printf("Refine pixel i=%d, q[pixelMaxIdx]=%f saturate[pixelMaxIdx]=%d\n", i, q[pixelMaxIdx], saturate[pixelMaxIdx]);
+    //
+    // saturate is used to tag pixels already refined
+    if (saturate[pixelMaxIdx] == 0) {
+
+      saturate[pixelMaxIdx] = 1;
+      double xOld = x[pixelMaxIdx];
+      double yOld = y[pixelMaxIdx];
+      double dxOld = dx[pixelMaxIdx];
+      double dyOld = dy[pixelMaxIdx];
+      double qOld = q[pixelMaxIdx];
+      totalChargeInc += (3 * qOld);
+      // NW
+      // Done in place (same pixel index)
+      x[pixelMaxIdx] = xOld - 0.5 * dxOld;
+      y[pixelMaxIdx] = yOld + 0.5 * dyOld;
+      dx[pixelMaxIdx] = 0.5 * dxOld;
+      dy[pixelMaxIdx] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[pixelMaxIdx] = qOld;
+      // Update Cij
+      axe = 0;
+      compute1DPadIntegrals(xInf, xSup, N, x[pixelMaxIdx], axe, chId, xWestIntegrals);
+      axe = 1;
+      compute1DPadIntegrals(yInf, ySup, N, y[pixelMaxIdx], axe, chId, yNorthIntegrals);
+      // 2D Integral
+      vectorMultVector(xWestIntegrals, yNorthIntegrals, N, &Cij[N * pixelMaxIdx]);
+      // k++;
+
+      // NE
+      x[k] = xOld + 0.5 * dxOld;
+      y[k] = yOld + 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      // Update Cij
+      axe = 0;
+      compute1DPadIntegrals(xInf, xSup, N, x[k], axe, chId, xEastIntegrals);
+      vectorMultVector(xEastIntegrals, yNorthIntegrals, N, &Cij[N * k]);
+      k++;
+
+      // SW
+      x[k] = xOld - 0.5 * dxOld;
+      y[k] = yOld - 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      // Update Cij
+      axe = 1;
+      compute1DPadIntegrals(yInf, ySup, N, y[k], axe, chId, ySouthIntegrals);
+      vectorMultVector(xWestIntegrals, ySouthIntegrals, N, &Cij[N * k]);
+      k++;
+
+      // SE
+      x[k] = xOld + 0.5 * dxOld;
+      y[k] = yOld - 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      // Update Cij
+      vectorMultVector(xEastIntegrals, ySouthIntegrals, N, &Cij[N * k]);
+      k++;
+      nPads += 3;
+    }
+  }
+  totalCharge += totalChargeInc;
+  nObsPads = nPads;
+  delete[] xWestIntegrals;
+  delete[] xEastIntegrals;
+  delete[] yNorthIntegrals;
+  delete[] ySouthIntegrals;
+}
+
+// refinement on locam mwxima
+// use for pixels
+// Old version (without Cij update)
+// To keep ???
+void Pads::refineLocalMax(Pads& localMax, std::vector<PadIdx_t>& localMaxIdx)
+{
+  // ??? LocalMax not used except for the cutoff
+
+  // Take care : here all pads data describe the pixels
+  int N = nPads;
+  int nLocalMax = localMax.getNbrOfPads();
+  /* qCut : not used
+  // Count pad such as q > 4 * pixCutOf
+  int count=0;
+  double cut = 0.2;
+  for (int i=0; i < N; i++) {
+    if ( q[i] > cut ) {
+      count++;
+    }
+  }
+  */
+  // printf("nPixels=%d nLocalMax=%d localMaxIdx.size=%lu\n", N, nLocalMax, localMaxIdx.size());
+  double cut = -1;
+  int count = N;
+  //
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
+    vectorPrint("Pads::refinePads", q, N);
+    printf("Pads::refinePads count(new nPads)=%d\n", count);
+  }
+
+  double totalChargeInc = 0.0;
+  int k = N;
+  for (int i = 0; i < localMaxIdx.size(); i++) {
+    int pixelMaxIdx = localMaxIdx[i];
+    // saturate is used to tag pixels already refined
+    printf("Refinement i=%d, localMax.q[i]=%f saturate[pixelMaxIdx]=%d\n", i, localMax.q[i], saturate[pixelMaxIdx]);
+    if ((localMax.q[i] > cut) && (saturate[pixelMaxIdx] == 0)) {
+      saturate[pixelMaxIdx] = 1;
+      double xOld = x[pixelMaxIdx];
+      double yOld = y[pixelMaxIdx];
+      double dxOld = dx[pixelMaxIdx];
+      double dyOld = dy[pixelMaxIdx];
+      double qOld = q[pixelMaxIdx];
+      printf("refine on pixel %d\n", pixelMaxIdx);
+      totalChargeInc += (3 * qOld);
+      // NW
+      // Done in place (same pixel index)
+      x[pixelMaxIdx] = xOld - 0.5 * dxOld;
+      y[pixelMaxIdx] = yOld + 0.5 * dyOld;
+      dx[pixelMaxIdx] = 0.5 * dxOld;
+      dy[pixelMaxIdx] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[pixelMaxIdx] = qOld;
+      // k++;
+
+      // NE
+      x[k] = xOld + 0.5 * dxOld;
+      y[k] = yOld + 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      k++;
+
+      // SW
+      x[k] = xOld - 0.5 * dxOld;
+      y[k] = yOld - 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      k++;
+
+      // SE
+      x[k] = xOld + 0.5 * dxOld;
+      y[k] = yOld - 0.5 * dyOld;
+      dx[k] = 0.5 * dxOld;
+      dy[k] = 0.5 * dyOld;
+      // rPads->q[k] = 0.25 * qOld;
+      q[k] = qOld;
+      saturate[k] = 1;
+      k++;
+      nPads += 3;
+    }
+  }
+  totalCharge += totalChargeInc;
+  // nPads = N+3*nLocalMax;
+  nObsPads = nPads;
+  // return rPads;
 }
 
-Pads* Pads::refinePads()
+Pads* Pads::refineAll()
 {
   int N = nPads;
   /* qCut : not used
@@ -702,7 +1182,7 @@ Pads* Pads::refinePads()
   double cut = -1;
   int count = N;
   //
-  if (ClusterConfig::padMappingLog >= ClusterConfig::detail) {
+  if (clusterConfig.padMappingLog >= clusterConfig.detail) {
     vectorPrint("Pads::refinePads", q, N);
     printf("Pads::refinePads count(new nPads)=%d\n", count);
   }
@@ -751,26 +1231,30 @@ Pads* Pads::refinePads()
   return rPads;
 }
 
-Pads* Pads::extractLocalMax()
+Pads* Pads::extractLocalMaxOnCoarsePads(std::vector<PadIdx_t>& localMaxIdx)
 {
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
-    printf("  - Pads::extractLocalMax (extractLocalMax nPads=%d)\n",
-           nPads);
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
+    printf("  - Pads::extractLocalMax on Coarse Pads(extractLocalMax nPads=%d)\n", nPads);
   }
   double qMax = vectorMax(q, nPads);
   //
-  // Compute the neighbors once
-  if (neighbors == nullptr) {
-    // Kernel size of 1
-    neighbors = buildKFirstsNeighbors(1);
+  // TO DO ??? Compute the neighbors once
+  // between to refinements
+  if (neighbors != nullptr) {
+    delete[] neighbors;
   }
+  // 4(5) neighbors
+  neighbors = buildFirstNeighbors();
   PadIdx_t* neigh = neighbors;
+  // printNeighbors( neigh, nPads);
+  //
+  // Part I - Morphologic Laplacian operator
   //
-  // Result of the Laplacian-like operator
   double morphLaplacian[nPads];
   double laplacian[nPads];
   double weight[nPads];
   vectorSet(morphLaplacian, -1.0, nPads);
+  // Invalid the neighbors of a local max
   Mask_t alreadyDone[nPads];
   vectorSetZeroShort(alreadyDone, nPads);
   std::vector<PadIdx_t> newPixelIdx;
@@ -792,7 +1276,7 @@ Pads* Pads::extractLocalMax()
           nLess++;
           // Laplacian
           double cst;
-          cst = (i == v) ? 1.0 : -0.125;
+          cst = (i == v) ? 1.0 : -0.25;
           laplacian[i] += cst * q[v];
           weight[i] += q[v];
         }
@@ -800,11 +1284,11 @@ Pads* Pads::extractLocalMax()
       // Invalid ?? morphLaplacian[i] = double(nLess) / (count - 1);
       morphLaplacian[i] = double(nLess) / count;
       //
-      if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
         printf(
-          "    Laplacian i=%d, x[i]=%6.3f, y[i]=%6.3f, z[i]=%6.3f, count=%d, "
-          "morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight[i]=%6.3f\n",
-          i, x[i], y[i], q[i], count, morphLaplacian[i], laplacian[i],
+          "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+          "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+          i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
           weight[i]);
       }
       if (morphLaplacian[i] >= 1.0) {
@@ -813,11 +1297,12 @@ Pads* Pads::extractLocalMax()
         // Inv ??? if ((q[i] > 0.015 * qMax) || (fabs(laplacian[i]) > (0.5 * q[i]))) {
         if (q[i] > 0.015 * qMax) {
           newPixelIdx.push_back(i);
-          if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
+          // if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+          if (0) {
             printf(
-              "    Laplacian i=%d, x[i]=%6.3f, y[i]=%6.3f, z[i]=%6.3f, "
-              "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight[i]=%6.3f",
-              i, x[i], y[i], q[i], count, morphLaplacian[i], laplacian[i],
+              "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+              "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+              i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
               weight[i]);
             printf("  Selected %d\n", i);
           }
@@ -827,13 +1312,24 @@ Pads* Pads::extractLocalMax()
         for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i); *neigh_ptr != -1;
              neigh_ptr++) {
           PadIdx_t v = *neigh_ptr;
-          alreadyDone[v] = 1;
+          alreadyDone[v] += 1;
+          /*
+          if (q[v] > 0.5 * q[i] ) {
+            // Tag to be refined
+            newPixelIdx.push_back(v);
+
+          }
+          */
         }
       }
     }
   }
+  //
+  // Part II - Extract the local max
+  //
   // Extract the new selected pixels
   int nNewPixels = newPixelIdx.size();
+  // int indexInThePixel[nNewPixels];
   Pads* newPixels = new Pads(nNewPixels, chamberId);
   for (int i = 0; i < nNewPixels; i++) {
     newPixels->x[i] = x[newPixelIdx[i]];
@@ -847,55 +1343,717 @@ Pads* Pads::extractLocalMax()
   // of the max charge of local Max
   double cutRatio = 0.01;
   double qCut = cutRatio * vectorMax(newPixels->q, newPixels->nPads);
-  //
-  // Refine the charge and coordinates of the local max.
-  //
-  // ??? TODO:  suppress te refinment to optimize
+
   localMax = new Pads(nNewPixels, chamberId);
   localMax->setToZero();
-  // Sort local max by charge value
-  int index[nNewPixels];
+
+  int k0 = 0;
+  printf(" q Cut-Off %f\n", qCut);
   for (int k = 0; k < nNewPixels; k++) {
-    index[k] = k;
+    if (newPixels->q[k] > qCut) {
+      localMax->q[k0] = newPixels->q[k];
+      localMax->x[k0] = newPixels->x[k];
+      localMax->y[k0] = newPixels->y[k];
+      localMax->dx[k0] = newPixels->dx[k];
+      localMax->dy[k0] = newPixels->dy[k];
+      printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
+             localMax->q[k0], localMax->x[k0], localMax->y[k0]);
+      k0++;
+    }
   }
-  std::sort(index, &index[nNewPixels], [=](int a, int b) {
-    return (newPixels->q[a] > newPixels->q[b]);
-  });
+  localMax->nPads = k0;
+  localMax->nObsPads = k0;
   /// ???? delete[] neigh;
-  neigh = newPixels->buildKFirstsNeighbors(1);
+  //
+  // Part IV - Refine the charge and coordinates of the local max.
+  //
   // Avoid taking the same charge for 2 different localMax
-  Mask_t mask[nNewPixels];
-  vectorSetShort(mask, 1, nNewPixels);
-  int kSelected = 0;
-  for (int k = 0; k < nNewPixels; k++) {
-    if (mask[k] == 1) {
-      // Compute the barycenter
-      for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, k);
-           *neigh_ptr != -1; neigh_ptr++) {
-        PadIdx_t v = *neigh_ptr;
-        localMax->q[k] += newPixels->q[v] * mask[v];
-        localMax->x[k] += newPixels->x[v] * newPixels->q[v] * mask[v];
-        localMax->y[k] += newPixels->y[v] * newPixels->q[v] * mask[v];
-        mask[v] = 0;
-      }
-      // Select (or not) the local Max
-      if (localMax->q[k] > qCut) {
-        localMax->q[kSelected] = localMax->q[k];
-        localMax->x[kSelected] = localMax->x[k] / localMax->q[k];
-        localMax->y[kSelected] = localMax->y[k] / localMax->q[k];
-        localMax->dx[kSelected] = newPixels->dx[k];
-        localMax->dy[kSelected] = newPixels->dy[k];
-        if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
-          printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
-                 localMax->q[k], localMax->x[k], localMax->q[k]);
-        }
-        kSelected++;
-      }
-    }
-  }
-  localMax->nPads = kSelected;
+  // neigh = newPixels->buildFirstNeighbors();
+  // printNeighbors( neigh, newPixels->getNbrOfPads());
+  if (0) {
+    Mask_t mask[nNewPixels];
+    vectorSetShort(mask, 1, nNewPixels);
+    int kSelected = 0;
+    // ???
+    qCut = 0.0;
 
-  delete[] neigh;
+    for (int k = 0; k < nNewPixels; k++) {
+      if (mask[k] == 1) {
+        // Compute the charge barycenter
+        for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, k);
+             *neigh_ptr != -1; neigh_ptr++) {
+          PadIdx_t v = *neigh_ptr;
+          localMax->q[k] += newPixels->q[v] * mask[v];
+          localMax->x[k] += newPixels->x[v] * newPixels->q[v] * mask[v];
+          localMax->y[k] += newPixels->y[v] * newPixels->q[v] * mask[v];
+          mask[v] = 0;
+        }
+        // Select (or not) the local Max
+        if (localMax->q[k] > qCut) {
+          localMax->q[kSelected] = localMax->q[k];
+          localMax->x[kSelected] = localMax->x[k] / localMax->q[k];
+          localMax->y[kSelected] = localMax->y[k] / localMax->q[k];
+          localMax->dx[kSelected] = newPixels->dx[k];
+          localMax->dy[kSelected] = newPixels->dy[k];
+          if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+            printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
+                   localMax->q[k], localMax->x[k], localMax->y[k]);
+          }
+          localMaxIdx.push_back(newPixelIdx[k]);
+          kSelected++;
+        }
+      }
+    }
+
+    for (int k = 0; k < nNewPixels; k++) {
+      localMax->q[k] = newPixels->q[k];
+      localMax->x[k] = newPixels->x[k];
+      localMax->y[k] = newPixels->y[k];
+      localMax->dx[k] = newPixels->dx[k];
+      localMax->dy[k] = newPixels->dy[k];
+      printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
+             localMax->q[k], localMax->x[k], localMax->y[k]);
+    }
+    kSelected = nNewPixels;
+    localMax->nPads = kSelected;
+    localMax->nObsPads = kSelected;
+  }
+
+  delete[] neighbors;
+  neighbors = nullptr;
+
+  delete newPixels;
+
+  return localMax;
+}
+
+// Assess or not if xyCheck is a remanent local Max (can be removed)
+bool Pads::assessRemanent(double xyCheck, double* xy, double precision, int N)
+{
+  //
+  double xyDiff[N];
+  Mask_t mask[N];
+  vectorAddScalar(xy, -xyCheck, N, xyDiff);
+  // vectorPrint("  [assessRemanent] xy", xy, N);
+  // vectorPrint("  [assessRemanent] xyDiff", xyDiff, N);
+  vectorAbs(xyDiff, N, xyDiff);
+  vectorBuildMaskLess(xyDiff, precision, N, mask);
+  int nRemanents = vectorSumShort(mask, N);
+  // One xyDiff is zero => nRemanents >= 1
+  bool remanent = (nRemanents > 1) ? true : false;
+  // printf("  [assessRemanent] xyCheck=%f precision=%f remanent=%d\n", xyCheck, precision, nRemanents);
+  return remanent;
+}
+
+Pads* Pads::extractLocalMaxOnCoarsePads_Remanent(std::vector<PadIdx_t>& localMaxIdx, double dxMinPadSize, double dyMinPadSize)
+{
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
+    printf("  - Pads::extractLocalMax on Coarse Pads(extractLocalMax nPads=%d)\n", nPads);
+  }
+  double qMax = vectorMax(q, nPads);
+  //
+  // TO DO ??? Compute the neighbors once
+  // between to refinements
+  if (neighbors != nullptr) {
+    delete[] neighbors;
+  }
+  // 4(5) neighbors
+  neighbors = buildFirstNeighbors();
+  PadIdx_t* neigh = neighbors;
+  // printNeighbors( neigh, nPads);
+  //
+  // Part I - Morphologic Laplacian operator
+  //
+  double morphLaplacian[nPads];
+  double laplacian[nPads];
+  double weight[nPads];
+  vectorSet(morphLaplacian, -1.0, nPads);
+  // Invalid the neighbors of a local max
+  Mask_t alreadyDone[nPads];
+  vectorSetZeroShort(alreadyDone, nPads);
+  std::vector<PadIdx_t> newPixelIdx;
+  bool less;
+  for (int i = 0; i < nPads; i++) {
+    if (alreadyDone[i] == 0) {
+      int nLess = 0;
+      int count = 0;
+      laplacian[i] = 0.0;
+      weight[i] = 0.0;
+      for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i); *neigh_ptr != -1;
+           neigh_ptr++) {
+        PadIdx_t v = *neigh_ptr;
+        // Morphologic Laplacian
+        // nLess += (q[v] < q[i]);
+        less = (q[v] <= q[i]);
+        count++;
+        if (less) {
+          nLess++;
+          // Laplacian
+          double cst;
+          cst = (i == v) ? 1.0 : -0.25;
+          laplacian[i] += cst * q[v];
+          weight[i] += q[v];
+        }
+      }
+      // Invalid ?? morphLaplacian[i] = double(nLess) / (count - 1);
+      morphLaplacian[i] = double(nLess) / count;
+      //
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
+        printf(
+          "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+          "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+          i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
+          weight[i]);
+      }
+      if (morphLaplacian[i] >= 1.0) {
+        //  Local max charge must be higher than 1.5 % of the max and
+        //  the curvature must be greater than 50% of the peak
+        // Inv ??? if ((q[i] > 0.015 * qMax) || (fabs(laplacian[i]) > (0.5 * q[i]))) {
+        if (q[i] > 0.015 * qMax) {
+          newPixelIdx.push_back(i);
+          // if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+          if (0) {
+            printf(
+              "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+              "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+              i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
+              weight[i]);
+            printf("  Selected %d\n", i);
+          }
+        }
+        // Invalid the neihbors
+        // they can't be a maximun
+        for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i); *neigh_ptr != -1;
+             neigh_ptr++) {
+          PadIdx_t v = *neigh_ptr;
+          alreadyDone[v] += 1;
+          /*
+          if (q[v] > 0.5 * q[i] ) {
+            // Tag to be refined
+            newPixelIdx.push_back(v);
+
+          }
+          */
+        }
+      }
+    }
+  }
+  //
+  // Part II - Extract the local max
+  //
+  // Extract the new selected pixels
+  int nNewPixels = newPixelIdx.size();
+  // int indexInThePixel[nNewPixels];
+  Pads* newPixels = new Pads(nNewPixels, chamberId);
+  for (int i = 0; i < nNewPixels; i++) {
+    newPixels->x[i] = x[newPixelIdx[i]];
+    newPixels->y[i] = y[newPixelIdx[i]];
+    newPixels->dx[i] = dx[newPixelIdx[i]];
+    newPixels->dy[i] = dy[newPixelIdx[i]];
+    newPixels->q[i] = q[newPixelIdx[i]];
+  }
+  Pads* localMax = nullptr;
+  // Suppress local max. whose charge is less of 1%
+  // of the max charge of local Max
+  double cutRatio = 0.01;
+  double qCut = cutRatio * vectorMax(newPixels->q, newPixels->nPads);
+
+  // Add pads / pixel to be refined.
+  // They are neigbous of 2 or more local max
+  /*
+  for (int i = 0; i < nPads; i++) {
+    if (alreadyDone[i] > 1) {
+      newPixelIdx.push_back(i);
+      printf("Other pad/pixel to be refined: i=%d x,y=(%7.2f,%7.2f) q=%8.1f \n", i, x[i], y[i], q[i]);
+    }
+  }
+  */
+
+  //
+  // Part III - suppress the remanent local max
+  //
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  [extractLocalMaxOnCoarsePads] Start suppressing remanent localMax: nbr Local Max [nNewPixels]=%d\n", nNewPixels);
+  }
+  int k0;
+  if (nNewPixels > 3) {
+    // ??? TODO:  suppress the refinment to optimize
+    localMax = new Pads(nNewPixels, chamberId);
+    localMax->setToZero();
+    // Sort local max by charge value
+    // ??? visibly not used ???
+    int index[nNewPixels];
+    for (int k = 0; k < nNewPixels; k++) {
+      index[k] = k;
+    }
+    std::sort(index, &index[nNewPixels], [=](int a, int b) {
+      return (newPixels->q[a] > newPixels->q[b]);
+    });
+    // k0 describe the list of true local max (local max - remanent local max)
+    // k0 number of true local max
+    k0 = 0;
+    for (int k = 0; k < nNewPixels; k++) {
+      if (index[k] > -1) {
+        // Store the true local max
+        index[k0] = index[k];
+        int idx0 = index[k0];
+
+        // Remove horizontal/vertical remanent local max
+        double x0 = newPixels->x[idx0];
+        double y0 = newPixels->y[idx0];
+        if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+          printf("    Check remanent from loc.max k=%d, (x,y,q)= %f %f %f\n", k, x0, y0, newPixels->q[idx0]);
+        }
+        for (int l = k + 1; l < nNewPixels; l++) {
+          if (index[l] > -1) {
+            if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+              printf("      Case l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+            }
+
+            bool sameX = (std::abs(newPixels->x[index[l]] - x0) < dxMinPadSize);
+            bool sameY = (std::abs(newPixels->y[index[l]] - y0) < dyMinPadSize);
+            if (sameX) {
+              // Check in Y axe
+              // Check other remanent loc max in y direction)
+              // If founded : true remanent loc Max
+              // if not a real remanent loc max (must be kept)
+              bool realRemanent = assessRemanent(newPixels->y[index[l]], newPixels->y, dyMinPadSize, nNewPixels);
+              if (realRemanent) {
+                // Remanent local max: remove it
+                // The local max absorb the charge of the remanent loc max                newPixels->q[idx0] += newPixels->q[index[l]];
+                // Remove the remanent
+                if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+                  printf("      XY-Remanent: remove l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+                }
+                index[l] = -1;
+              }
+            }
+            if (sameY) {
+              // Check in Y axe
+              // Check other remanent loc max in y direction)
+              // If founded : true remanent loc Max
+              // if not a real remanent loc max (must be kept)
+              bool realRemanent = assessRemanent(newPixels->x[index[l]], newPixels->x, dyMinPadSize, nNewPixels);
+              if (realRemanent) {
+                // Remanent local max: remove it
+                // The local max absorb the charge of the remanent loc max
+                newPixels->q[idx0] += newPixels->q[index[l]];
+                // Remove the remanent
+                if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+                  printf("      YX-Remanent: remove l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+                }
+                index[l] = -1;
+              }
+              if ((sameX == 0) && (sameX == 0) && (clusterConfig.EMLocalMaxLog >= clusterConfig.info)) {
+                printf("      Keep l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+              }
+            }
+          }
+        }
+        k0++;
+      }
+    }
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+      for (int l = 0; l < k0; l++) {
+        printf("    l=%d index[l]=%d (x, y, q)= %f %f %f\n", l, index[l], newPixels->getX()[index[l]], newPixels->getY()[index[l]], newPixels->getCharges()[index[l]]);
+      }
+    }
+    // Clean the local Max - Remove definitely remanent local max
+    localMax = newPixels->selectPads(index, k0);
+    nNewPixels = k0;
+  } else {
+    localMax = new Pads(newPixels, PadMode::xydxdyMode);
+    k0 = nNewPixels;
+  }
+
+  if (0) {
+    localMax = new Pads(nNewPixels, chamberId);
+    localMax->setToZero();
+  }
+  /// ???? delete[] neigh;
+  //
+  // Part IV - Refine the charge and coordinates of the local max.
+  //
+  // Avoid taking the same charge for 2 different localMax
+  // neigh = newPixels->buildFirstNeighbors();
+  // printNeighbors( neigh, newPixels->getNbrOfPads());
+  if (0) {
+    Mask_t mask[nNewPixels];
+    vectorSetShort(mask, 1, nNewPixels);
+    int kSelected = 0;
+    // ???
+    qCut = 0.0;
+
+    for (int k = 0; k < nNewPixels; k++) {
+      if (mask[k] == 1) {
+        // Compute the charge barycenter
+        for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, k);
+             *neigh_ptr != -1; neigh_ptr++) {
+          PadIdx_t v = *neigh_ptr;
+          localMax->q[k] += newPixels->q[v] * mask[v];
+          localMax->x[k] += newPixels->x[v] * newPixels->q[v] * mask[v];
+          localMax->y[k] += newPixels->y[v] * newPixels->q[v] * mask[v];
+          mask[v] = 0;
+        }
+        // Select (or not) the local Max
+        if (localMax->q[k] > qCut) {
+          localMax->q[kSelected] = localMax->q[k];
+          localMax->x[kSelected] = localMax->x[k] / localMax->q[k];
+          localMax->y[kSelected] = localMax->y[k] / localMax->q[k];
+          localMax->dx[kSelected] = newPixels->dx[k];
+          localMax->dy[kSelected] = newPixels->dy[k];
+          if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+            printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
+                   localMax->q[k], localMax->x[k], localMax->y[k]);
+          }
+          localMaxIdx.push_back(newPixelIdx[k]);
+          kSelected++;
+        }
+      }
+    }
+
+    for (int k = 0; k < nNewPixels; k++) {
+      localMax->q[k] = newPixels->q[k];
+      localMax->x[k] = newPixels->x[k];
+      localMax->y[k] = newPixels->y[k];
+      localMax->dx[k] = newPixels->dx[k];
+      localMax->dy[k] = newPixels->dy[k];
+      printf("    seed selected q=%8.2f, (x,y) = (%8.3f, %8.3f)\n",
+             localMax->q[k], localMax->x[k], localMax->y[k]);
+    }
+    kSelected = nNewPixels;
+    localMax->nPads = kSelected;
+    localMax->nObsPads = kSelected;
+  }
+
+  delete[] neighbors;
+  neighbors = nullptr;
+
+  delete newPixels;
+
+  return localMax;
+}
+
+Pads* Pads::extractLocalMax(std::vector<PadIdx_t>& localMaxIdx, double dxMinPadSize, double dyMinPadSize)
+{
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
+    printf("  - Pads::extractLocalMax (extractLocalMax nPads=%d)\n", nPads);
+  }
+  double qMax = vectorMax(q, nPads);
+  //
+  // TO DO ??? Compute the neighbors once
+  // between to refinements
+  if (neighbors != nullptr) {
+    delete[] neighbors;
+  }
+  // Kernel size of 1
+  neighbors = buildKFirstsNeighbors(1);
+  PadIdx_t* neigh = neighbors;
+  // printNeighbors( neigh, nPads);
+  //
+  // Result of the Laplacian-like operator
+  double morphLaplacian[nPads];
+  double laplacian[nPads];
+  double weight[nPads];
+  vectorSet(morphLaplacian, -1.0, nPads);
+  Mask_t alreadyDone[nPads];
+  vectorSetZeroShort(alreadyDone, nPads);
+  std::vector<PadIdx_t> newPixelIdx;
+  bool less;
+  for (int i = 0; i < nPads; i++) {
+    if (alreadyDone[i] == 0) {
+      int nLess = 0;
+      int count = 0;
+      laplacian[i] = 0.0;
+      weight[i] = 0.0;
+      for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i); *neigh_ptr != -1;
+           neigh_ptr++) {
+        PadIdx_t v = *neigh_ptr;
+        // Morphologic Laplacian
+        // nLess += (q[v] < q[i]);
+        less = (q[v] <= q[i]);
+        count++;
+        if (less) {
+          nLess++;
+          // Laplacian
+          double cst;
+          cst = (i == v) ? 1.0 : -0.125;
+          laplacian[i] += cst * q[v];
+          weight[i] += q[v];
+        }
+      }
+      // Invalid ?? morphLaplacian[i] = double(nLess) / (count - 1);
+      morphLaplacian[i] = double(nLess) / count;
+      //
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
+        printf(
+          "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+          "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+          i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
+          weight[i]);
+      }
+      if (morphLaplacian[i] >= 1.0) {
+        //  Local max charge must be higher than 1.5 % of the max and
+        //  the curvature must be greater than 50% of the peak
+        // Inv ??? if ((q[i] > 0.015 * qMax) || (fabs(laplacian[i]) > (0.5 * q[i]))) {
+        if (q[i] > 0.015 * qMax) {
+          newPixelIdx.push_back(i);
+          // if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+          if (0) {
+            printf(
+              "    Laplacian i=%d, x=%6.3f, y=%6.3f, dx=%6.3f,dy=%6.3f, q=%6.3f, "
+              "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight=%6.3f",
+              i, x[i], y[i], dx[i], dy[i], q[i], count, morphLaplacian[i], laplacian[i],
+              weight[i]);
+            printf("  Selected %d\n", i);
+          }
+        }
+        // Invalid the neihbors
+        // they can't be a maximun
+        for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i); *neigh_ptr != -1;
+             neigh_ptr++) {
+          PadIdx_t v = *neigh_ptr;
+          alreadyDone[v] += 1;
+          /*
+          if (q[v] > 0.5 * q[i] ) {
+            // Tag to be refined
+            newPixelIdx.push_back(v);
+
+          }
+          */
+        }
+      }
+    }
+  }
+  //
+  // Extract the new selected pixels
+  int nNewPixels = newPixelIdx.size();
+  // int indexInThePixel[nNewPixels];
+  Pads* newPixels = new Pads(nNewPixels, chamberId);
+  for (int i = 0; i < nNewPixels; i++) {
+    newPixels->x[i] = x[newPixelIdx[i]];
+    newPixels->y[i] = y[newPixelIdx[i]];
+    newPixels->dx[i] = dx[newPixelIdx[i]];
+    newPixels->dy[i] = dy[newPixelIdx[i]];
+    newPixels->q[i] = q[newPixelIdx[i]];
+    localMaxIdx.push_back(newPixelIdx[i]);
+  }
+  Pads* localMax = nullptr;
+  // Suppress local max. whose charge is less of 1%
+  // of the max charge of local Max
+  double cutRatio = 0.01;
+  double qCut = cutRatio * vectorMax(newPixels->q, newPixels->nPads);
+
+  // Add pads / pixel to be refined.
+  // They are neigbous of 2 or more local max
+  /*
+  for (int i = 0; i < nPads; i++) {
+    if (alreadyDone[i] > 1) {
+       newPixelIdx.push_back(i);
+      printf("Other pad/pixel to be refined: i=%d x,y=(%7.2f,%7.2f) q=%8.1f \n", i, x[i], y[i], q[i]);
+    }
+  }
+   */
+
+  //
+  // Part III - suppress the remanent local max
+  //
+  if (clusterConfig.processingLog >= clusterConfig.info) {
+    printf("  [extractLocalMax] (medium pads) Starting suppressing remanent Loc. Max nNewPixels=%d\n", nNewPixels);
+  }
+  int k0;
+  std::vector<int> newPixelIdx2;
+
+  // if ( (nNewPixels > 3) && ( (dxMinPadSize > 3.5) || (dyMinPadSize > 3.5) )) {
+  if ((nNewPixels > 3) && ((dxMinPadSize > 2.4) || (dyMinPadSize > 2.4))) {
+    // ??? TODO:  suppress the refinment to optimize
+    localMax = new Pads(nNewPixels, chamberId);
+    localMax->setToZero();
+    // Sort local max by charge value
+    // ??? visibly not used ???
+    int index[nNewPixels];
+    for (int k = 0; k < nNewPixels; k++) {
+      index[k] = k;
+    }
+    std::sort(index, &index[nNewPixels], [=](int a, int b) {
+      return (newPixels->q[a] > newPixels->q[b]);
+    });
+    // k0 describe the list of true local max (local max - remanent local max)
+    // k0 number of true local max
+    k0 = 0;
+    // vectorPrintInt("Index",  index, nNewPixels);
+    // Pads::printPads("local max", *newPixels);
+
+    for (int k = 0; k < nNewPixels; k++) {
+      if (index[k] > -1) {
+        // Store the true local max
+        index[k0] = index[k];
+        int idx0 = index[k0];
+
+        // Remove horizontal/vertical remanent local max
+        double x0 = newPixels->x[idx0];
+        double y0 = newPixels->y[idx0];
+        double dx0 = newPixels->dx[idx0];
+        double dy0 = newPixels->dy[idx0];
+        if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+          printf("  Remanent from loc max k=%d, (x,y,q)= %f %f %f (dx, dy)= (%f, %f)\n", k, x0, y0, newPixels->q[idx0], dx0, dy0);
+        }
+        for (int l = k + 1; l < nNewPixels; l++) {
+          if (index[l] > -1) {
+            double dx_ = 0.5 * (dx0 + newPixels->dx[index[l]]);
+            double dy_ = 0.5 * (dy0 + newPixels->dy[index[l]]);
+            bool sameX = (std::abs(newPixels->x[index[l]] - x0) < dx_);
+            bool sameY = (std::abs(newPixels->y[index[l]] - y0) < dy_);
+            // printf("  Remanent: precision l=%d, (dx,dy)= %f %f \n", l, dx_, dy_ );
+            if (sameX) {
+              // Check in Y axe
+              // Check other remanent loc max in y direction)
+              // If founded : true remanent loc Max
+              // if not a real remanent loc max (must be kept)
+              bool realRemanent = assessRemanent(newPixels->y[index[l]], newPixels->y, dy_, nNewPixels);
+              if (realRemanent) {
+                // Remanent local max: remove it
+                // The local max absorb the charge of the remanent loc max                newPixels->q[idx0] += newPixels->q[index[l]];
+                // Remove the remanent
+                if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+                  printf("    XY-Remanent: remove l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+                }
+                index[l] = -1;
+              }
+            }
+            if (sameY) {
+              // Check in Y axe
+              // Check other remanent loc max in y direction)
+              // If founded : true remanent loc Max
+              // if not a real remanent loc max (must be kept)
+              bool realRemanent = assessRemanent(newPixels->x[index[l]], newPixels->x, dx_, nNewPixels);
+              if (realRemanent) {
+                // Remanent local max: remove it
+                // The local max absorb the charge of the remanent loc max
+                newPixels->q[idx0] += newPixels->q[index[l]];
+                // Remove the remanent
+                if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+                  printf("    YX-Remanent: remove l=%d, (x,y,q)= %f %f %f\n", l, newPixels->x[index[l]], newPixels->y[index[l]], newPixels->q[index[l]]);
+                }
+                index[l] = -1;
+              }
+            }
+          }
+        }
+        k0++;
+      }
+    }
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+      printf("  Local. max status before to suppress remanents\n");
+      for (int l = 0; l < k0; l++) {
+        printf("   l=%d index[l]=%d (x, y, q)= %f %f %f\n", l, index[l], newPixels->getX()[index[l]], newPixels->getY()[index[l]], newPixels->getCharges()[index[l]]);
+      }
+    }
+
+    // Clean the local Max - Remove definitely remanent local max
+    localMax = newPixels->selectPads(index, k0);
+    // Update  newPixelIdx
+    if (1) {
+      for (int l = 0; l < k0; l++) {
+        int idx = index[l];
+        newPixelIdx2.push_back(newPixelIdx[idx]);
+        // Debug
+        // printf("  newPixelIdx2 l=%d index[l]=%d (x, y, q)= %f %f %f\n", l, index[l], x[newPixelIdx[l]], y[newPixelIdx[l]], q[newPixelIdx[l]]);
+      }
+    }
+    nNewPixels = k0;
+  } else {
+    // Copy newPixels -> localMax
+    localMax = new Pads(*newPixels, PadMode::xydxdyMode);
+    k0 = nNewPixels;
+    newPixelIdx2 = newPixelIdx;
+  }
+
+  /*
+  //
+  // Refine the charge and coordinates of the local max.
+  //
+  // ??? TODO:  suppress te refinment to optimize
+  localMax = new Pads(nNewPixels, chamberId);
+  localMax->setToZero();
+  // Sort local max by charge value
+  int index[nNewPixels];
+  for (int k = 0; k < nNewPixels; k++) {
+    index[k] = k;
+  }
+  // ??? visibly not used ???
+  std::sort(index, &index[nNewPixels], [=](int a, int b) {
+    return (newPixels->q[a] > newPixels->q[b]);
+  });
+  */
+
+  /// ???? delete[] neigh;
+  // Avoid taking the same charge for 2 different localMax
+  // Add the local max in list (to be refined)
+
+  // Unused
+  // Mask_t mask[nNewPixels];
+  // vectorSetShort(mask, 1, nNewPixels);
+  int kSelected = 0;
+  for (int l = 0; l < nNewPixels; l++) {
+    PadIdx_t pixelIdx = newPixelIdx2[l];
+    // Unused
+    // if (mask[k] == 1) {
+    // Compute the charge barycenter
+    localMax->q[l] = 0.0;
+    localMax->x[l] = 0.0;
+    localMax->y[l] = 0.0;
+    int nNeigh = 0;
+    for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, pixelIdx);
+         *neigh_ptr != -1; neigh_ptr++) {
+      PadIdx_t v = *neigh_ptr;
+      localMax->q[l] += q[v];        // * mask[v];
+      localMax->x[l] += x[v] * q[v]; // * mask[v];
+      localMax->y[l] += y[v] * q[v]; // * mask[v];
+      // Unused
+      // mask[v] = 0;
+      nNeigh++;
+    }
+    // Select (or not) the local Max
+    if (localMax->q[l] > qCut) {
+
+      localMax->x[kSelected] = localMax->x[l] / localMax->q[l];
+      localMax->y[kSelected] = localMax->y[l] / localMax->q[l];
+      localMax->q[kSelected] = localMax->q[l] / nNeigh;
+      localMax->dx[kSelected] = dx[pixelIdx];
+      localMax->dy[kSelected] = dy[pixelIdx];
+      if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
+        printf("  [extractLocalMax] final seed selected (x,y) = (%8.3f, %8.3f), q=%8.2f\n",
+               localMax->x[l], localMax->y[l], localMax->q[l]);
+      }
+      // localMaxIdx.push_back( pixelIdx );
+      kSelected++;
+    }
+    // Unused }
+  }
+  localMax->nPads = kSelected;
+  localMax->nObsPads = kSelected;
+
+  // Add high charge neighbors to be refined
+  if (0) {
+    for (int k = 0; k < nNewPixels; k++) {
+      // Compute the charge barycenter
+      PadIdx_t idxMax = newPixelIdx[k];
+      for (PadIdx_t* neigh_ptr = getNeighborListOf(neighbors, idxMax);
+           *neigh_ptr != -1; neigh_ptr++) {
+        PadIdx_t v = *neigh_ptr;
+        if ((q[v] > 0.5 * q[idxMax]) && (q[v] > clusterConfig.minChargeOfClusterPerCathode)) {
+          // Tag to be refined
+          localMaxIdx.push_back(v);
+          printf("??? neigbors of idMax=%d: %d to be refined (charge %f/%f)\n", idxMax, v, q[v], q[idxMax]);
+          // Inv printf("x,y : %f %f \n", x[v], y[v]);
+        }
+      }
+    }
+  }
+
+  delete[] neighbors;
+  neighbors = nullptr;
   delete newPixels;
 
   return localMax;
@@ -906,7 +2064,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
   // Option extractLocalMax
   //   - true: extraxt local maxima
   //   - false: filter pixels arround the maxima
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
     printf("  - ClipOnLocalMax (extractLocalMax Flag=%d, nPads=%d)\n",
            extractLocalMax, nPads);
   }
@@ -952,7 +2110,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
     }
     morphLaplacian[i] = double(nLess) / (count - 1);
     //
-    if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+    if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
       printf(
         "  Laplacian i=%d, x[i]=%6.3f, y[i]=%6.3f, z[i]=%6.3f, count=%d, "
         "morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight[i]=%6.3f\n",
@@ -965,7 +2123,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
         //  the curvature must be greater than 50% of the peak
         if ((q[i] > 0.015 * qMax) || (fabs(laplacian[i]) > (0.5 * q[i]))) {
           newPixelIdx.push_back(i);
-          if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+          if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
             printf(
               "  Laplacian i=%d, x[i]=%6.3f, y[i]=%6.3f, z[i]=%6.3f, "
               "count=%d, morphLapl[i]=%6.3f, lapl[i]=%6.3f, weight[i]=%6.3f",
@@ -976,7 +2134,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
         }
       } else {
         // Select as new pixels in the vinicity of the local max
-        if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+        if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
           printf("  Selected neighbors of i=%d: ", i);
         }
         for (PadIdx_t* neigh_ptr = getNeighborListOf(neigh, i);
@@ -985,12 +2143,12 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
           if (alreadySelect[v] == 0) {
             alreadySelect[v] = 1;
             newPixelIdx.push_back(v);
-            if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+            if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
               printf("%d, ", v);
             }
           }
         }
-        if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+        if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
           printf("\n");
         }
       }
@@ -1048,7 +2206,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
           localMax->y[kSelected] = localMax->y[k] / localMax->q[k];
           localMax->dx[kSelected] = newPixels->dx[k];
           localMax->dy[kSelected] = newPixels->dy[k];
-          if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::detail) {
+          if (clusterConfig.EMLocalMaxLog >= clusterConfig.detail) {
             printf("  add a seed q=%9.4f, (x,y) = (%9.4f, %9.4f)\n",
                    localMax->q[k], localMax->x[k], localMax->q[k]);
           }
@@ -1057,6 +2215,7 @@ Pads* Pads::clipOnLocalMax(bool extractLocalMax)
       }
     }
     localMax->nPads = kSelected;
+    localMax->nObsPads = kSelected;
   }
   delete[] neigh;
   if (extractLocalMax) {
@@ -1083,18 +2242,26 @@ void Pads::printNeighbors(const PadIdx_t* neigh, int N)
 
 void Pads::printPads(const char* title, const Pads& pads)
 {
+  // if (&pads != nullptr) {
   printf("%s\n", title);
+  printf("print pads nPads=%4d nObsPads=%4d mode=%1d\n", pads.nPads, pads.nObsPads, pads.mode);
   if (pads.mode == xydxdyMode) {
+    printf("    i       x       y      dx      dy         q\n");
     for (int i = 0; i < pads.nPads; i++) {
-      printf("  pads i=%3d: x=%3.5f, dx=%3.5f, y=%3.5f, dy=%3.5f\n", i,
-             pads.x[i], pads.dx[i], pads.y[i], pads.dy[i]);
+      printf("  %3d %7.3f %7.3f %7.3f %7.3f %9.2f\n", i,
+             pads.x[i], pads.dx[i], pads.y[i], pads.dy[i], pads.q[i]);
     }
   } else {
+    printf("    i    xInf    xSup    yInf    ySup         q\n");
     for (int i = 0; i < pads.nPads; i++) {
-      printf("  pads i=%3d: xInf=%3.5f, xSup=%3.5f, yInf=%3.5f, ySup=%3.5f\n",
-             i, pads.x[i], pads.dx[i], pads.y[i], pads.dy[i]);
+      printf("  %3d %7.3f %7.3f %7.3f %7.3f %9.2f\n",
+             i, pads.x[i], pads.dx[i], pads.y[i], pads.dy[i], pads.q[i]);
     }
   }
+  // Invalid
+  // } else {
+  //  printf("%s can't print nullptr\n", title);
+  // }
 }
 
 Pads::~Pads() { release(); }
diff --git a/Detectors/MUON/MCH/Clustering/src/clusterProcessing.cxx b/Detectors/MUON/MCH/Clustering/src/clusterProcessing.cxx
index 37fc9c40c7a1f..4f4bfc968a945 100644
--- a/Detectors/MUON/MCH/Clustering/src/clusterProcessing.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/clusterProcessing.cxx
@@ -14,6 +14,7 @@
 #include <cstring>
 #include <vector>
 
+#include "MCHClustering/ClusterConfig.h"
 #include "MCHClustering/clusterProcessing.h"
 #include "mathUtil.h"
 #include "mathieson.h"
@@ -29,6 +30,14 @@
 // in other plane)
 static int includeSinglePads = 1;
 
+namespace o2
+{
+namespace mch
+{
+extern ClusterConfig clusterConfig;
+}
+} // namespace o2
+
 using namespace o2::mch;
 
 // Total number of hits/seeds (number of mathieson)
@@ -55,7 +64,7 @@ void cleanClusterResults()
 void o2::mch::collectGroupMapping(o2::mch::Mask_t* padToMGrp, int nPads)
 {
 
-  if (o2::mch::ClusterConfig::processingLog >= o2::mch::ClusterConfig::info) {
+  if (clusterConfig.processingLog >= ClusterConfig::info) {
     printf("collectGroupMapping nPads=%d\n", nPads);
   }
   o2::mch::vectorCopyShort(clusterResults.padToGroups, nPads, padToMGrp);
@@ -88,7 +97,7 @@ void o2::mch::collectSeeds(double* theta, o2::mch::Groups_t* thetaToGroup, int K
   // printf("collectSeeds : nbrOfGroups with clusters = %d\n", clusterResults.seedList.size());
   for (int h = 0; h < clusterResults.seedList.size(); h++) {
     int k = clusterResults.seedList[h].first;
-    // if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+    // if (clusterConfig.inspectModelLog >= ClusterConfig.info) {
     //  o2::mch::printTheta("  ",
     //                    clusterResults.seedList[h].second,
     //                    clusterResults.seedList[h].first);
@@ -99,7 +108,7 @@ void o2::mch::collectSeeds(double* theta, o2::mch::Groups_t* thetaToGroup, int K
       o2::mch::vectorSetShort(&thetaToGroup[sumK], h + 1, k);
     }
     sumK += k;
-    // if (o2::mch::ClusterConfig::inspectModelLog >= o2::mch::ClusterConfig::info) {
+    // if (clusterConfig.inspectModelLog >= ClusterConfig.info) {
     //  printf("collect theta grp=%d,  grpSize=%d, adress=%p\n", h, k,
     //         clusterResults.seedList[h].second);
     //}
@@ -122,6 +131,7 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
   cleanClusterResults();
   // if (INSPECTMODEL) {
   cleanInspectModel();
+  InspectModelChrono(0, false);
   //}
 
   const double* xyDxyi;
@@ -136,21 +146,32 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
   Mask_t* saturated__;
   Mask_t noiseMask[nPads];
   int nNewPads = 0;
+  double qCutOff = 0.0;
 
   // Pad filter when there are a too large number of pads
-  if (nPads > 800) {
+  if (nPads > clusterConfig.nbrPadLimit) {
     // Remove noisy event
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
-      printf("WARNING: remove noisy pads nPads=%d, <z>=%f, min/max z=%f,%f\n",
+    if (clusterConfig.processingLog >= ClusterConfig::info) {
+      printf("WARNING: remove noisy pads nPads=%d, <q>=%8.1f, min/max q=%8.1f, %8.1f\n",
              nPads, vectorSum(zi_, nPads) / nPads, vectorMin(zi_, nPads),
              vectorMax(zi_, nPads));
     }
-    // Select pads which q > 2.0
-    vectorBuildMaskGreater(zi_, 2.0, nPads, noiseMask);
-    nNewPads = vectorSumShort(noiseMask, nPads);
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
-      printf("WARNING: remove noisy pads qCutOff=2.0, nbr of kept Pads=%d/%d\n",
-             nNewPads, nPads);
+    // Select pads which q > qCutOff
+    double ratioStep = clusterConfig.ratioStepForLargeCluster;
+    double ratio = 1.;
+    double qMax = vectorMax(zi_, nPads);
+    int nPadsTest = nPads;
+
+    while (nPadsTest > clusterConfig.nbrPadLimit) {
+      ratio -= ratioStep;
+      qCutOff = ratio * qMax;
+      vectorBuildMaskGreater(zi_, qCutOff, nPads, noiseMask);
+      nPadsTest = vectorSumShort(noiseMask, nPads);
+    }
+    nNewPads = nPadsTest;
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf("WARNING: remove noisy pads qCutOff=%8.1f, nbr of kept Pads=%d/%d\n",
+             qCutOff, nNewPads, nPads);
     }
     xyDxyi__ = new double[nNewPads * 4];
     zi__ = new double[nNewPads];
@@ -172,7 +193,6 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
     cathi = cathi_;
     saturated = saturated_;
   }
-
   // Build a cluster object
   ClusterPEM cluster(getConstX(xyDxyi, nPads), getConstY(xyDxyi, nPads),
                      getConstDX(xyDxyi, nPads), getConstDY(xyDxyi, nPads), zi,
@@ -195,7 +215,7 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
                     cluster.getNbrOfPads(0), cluster.getCathGroup(1),
                     cluster.getMapCathPadToPad(1), cluster.getNbrOfPads(1));
 
-  if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+  if (clusterConfig.inspectModel >= clusterConfig.active) {
     // Compute the charge on projected geometry
     double* qProj = cluster.projectChargeOnProjGeometry(includeSinglePads);
     // Save the projection with projected pads
@@ -221,10 +241,11 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
   // Find local maxima (seeds)
   //
   for (int g = 1; g <= nGroups; g++) {
+    InspectModelChrono(1, false);
     //
     //  Exctract the current group
     //
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
+    if (clusterConfig.processingLog >= clusterConfig.info) {
       printf("----------------\n");
       printf("Group %d/%d \n", g, nGroups);
       printf("----------------\n");
@@ -240,18 +261,22 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
     } else {
       subCluster = new ClusterPEM(cluster, g);
     }
+
+    // To do something ???
     double meanCharge = 0.5 * (subCluster->getTotalCharge(0) + subCluster->getTotalCharge(1));
 
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
-      printf("[clusterProcessing] charge-0= %8.2f charge-1= %8.2f\n", subCluster->getTotalCharge(0), subCluster->getTotalCharge(1));
+    if (clusterConfig.processingLog >= clusterConfig.info) {
+      printf("[clusterProcessing] charge= (%7.0f %2.0f) nPads=(%d, %d)\n",
+             subCluster->getTotalCharge(0), subCluster->getTotalCharge(1),
+             subCluster->getNbrOfPads(0), subCluster->getNbrOfPads(1));
     }
-    int nbrOfPadsInTheGroup =
-      subCluster->getNbrOfPads(0) + subCluster->getNbrOfPads(1);
+    int nbrOfPadsInTheGroup = subCluster->getNbrOfPads();
+
     // Allocation of possible nbr of seeds
     // (.i.e the nbr of Pads)
     double thetaL[nbrOfPadsInTheGroup * 5];
 
-    if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+    if (clusterConfig.inspectModel >= clusterConfig.active) {
       // Compute the local max with laplacian method
       // Used only to give insights of the cluster
       subCluster->buildProjectedGeometry(includeSinglePads);
@@ -260,11 +285,12 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       double thetaExtra[kEM * 5];
       copyTheta(thetaL, nbrOfPadsInTheGroup, thetaExtra, kEM, kEM);
       saveThetaExtraInGroupList(thetaExtra, kEM);
-      if (ClusterConfig::inspectModelLog > ClusterConfig::info) {
+      if (clusterConfig.inspectModelLog > clusterConfig.info) {
         printTheta("Theta findLocalMaxWithBothCathodes", meanCharge, thetaExtra, kEM);
       }
     }
     // Add null pads in the neighboring of the sub-cluster
+    // ???
     subCluster->addBoundaryPads();
     //
     // Search for seeds on this sub-cluster
@@ -273,7 +299,7 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       double thetaEM[kEM * 5];
       copyTheta(thetaL, nbrOfPadsInTheGroup, thetaEM, kEM, kEM);
 
-      if (ClusterConfig::processingLog >= ClusterConfig::info) {
+      if (clusterConfig.processingLog >= clusterConfig.info) {
         printf("[clusterProcessing] Find %2d PEM local maxima : \n", kEM);
         printTheta("ThetaEM", meanCharge, thetaEM, kEM);
       }
@@ -291,10 +317,12 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       vectorMax( projYc, nbrOfProjPadsInTheGroup));
       }
       */
-      if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+      if (clusterConfig.inspectModel >= clusterConfig.active) {
         // Save the seed founds by the EM algorithm
         saveThetaEMInGroupList(thetaEM, kEM);
       }
+      InspectModelChrono(1, true);
+
       //
       //
       //
@@ -302,6 +330,8 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       // is well separated at the 2 planes level (cath0, cath1)
       // If not the EM result is kept
       //
+      InspectModelChrono(2, false);
+
       DataBlock_t newSeeds = subCluster->fit(thetaEM, kEM);
       finalK = newSeeds.first;
       nbrOfHits += finalK;
@@ -309,9 +339,10 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       // Store result (hits/seeds)
       clusterResults.seedList.push_back(newSeeds);
       //
-      if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+      if (clusterConfig.inspectModel >= clusterConfig.active) {
         saveThetaFitInGroupList(newSeeds.second, newSeeds.first);
       }
+      InspectModelChrono(2, true);
     } else {
       // No EM seeds
       finalK = kEM;
@@ -320,7 +351,7 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
       DataBlock_t newSeeds = std::make_pair(finalK, nullptr);
       clusterResults.seedList.push_back(newSeeds);
     }
-    if (ClusterConfig::processingLog >= ClusterConfig::info) {
+    if (clusterConfig.processingLog >= clusterConfig.info) {
       printTheta("ThetaFit:", meanCharge, clusterResults.seedList.back().second, clusterResults.seedList.back().first);
     }
     // Release pointer for group
@@ -332,9 +363,11 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
   } // next group
 
   // Finalise inspectModel
-  if (ClusterConfig::inspectModel >= ClusterConfig::active) {
+  if (clusterConfig.inspectModel >= clusterConfig.active) {
     finalizeInspectModel();
   }
+  InspectModelChrono(0, true);
+  InspectModelChrono(-1, true);
 
   if (nNewPads) {
     delete[] xyDxyi__;
@@ -344,3 +377,7 @@ int clusterProcess(const double* xyDxyi_, const Mask_t* cathi_,
   }
   return nbrOfHits;
 }
+/*
+} // namespace mch
+} // namespace o2
+*/
\ No newline at end of file
diff --git a/Detectors/MUON/MCH/Clustering/src/mathUtil.cxx b/Detectors/MUON/MCH/Clustering/src/mathUtil.cxx
index 25ede7d9ff76c..dac8f551296f0 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathUtil.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/mathUtil.cxx
@@ -19,9 +19,14 @@ namespace mch
 {
 void vectorPrint(const char* str, const double* x, int K)
 {
-  printf("%s", str);
-  for (int k = 0; k < K; k++) {
-    printf(" %9.6g", x[k]);
+  int nPackets = K / 10 + 1;
+
+  printf("%s %d ???%d\n", str, K, nPackets);
+  for (int i = 0; i < nPackets; i++) {
+    for (int k = 0; (k < 10) && ((i * 10 + k) < K); k++) {
+      printf(" %9.6g", x[i * 10 + k]);
+    }
+    printf("\n");
   }
   printf("\n");
 }
diff --git a/Detectors/MUON/MCH/Clustering/src/mathUtil.h b/Detectors/MUON/MCH/Clustering/src/mathUtil.h
index 0d667f76adf4f..e4945fa3a6497 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathUtil.h
+++ b/Detectors/MUON/MCH/Clustering/src/mathUtil.h
@@ -150,6 +150,15 @@ inline static void vectorMultScalar(const double* u, double cst, int N,
   return;
 }
 
+inline static double vectorDotProd(const double* u, const double* v, int N)
+{
+  double res = 0.0;
+  for (int i = 0; i < N; i++) {
+    res += u[i] * v[i];
+  }
+  return res;
+}
+
 inline static double vectorNorm(const double* u, int N)
 {
   double res = 0;
@@ -257,6 +266,20 @@ inline static short vectorMaxShort(const short* u, int N)
   }
   return res;
 }
+
+inline static void vectorMaxScalar(const double* u, double cst, int N, double* res)
+{
+  for (int i = 0; i < N; i++) {
+    res[i] = std::fmax(cst, u[i]);
+  }
+}
+
+inline static void vectorMinScalar(const double* u, double cst, int N, double* res)
+{
+  for (int i = 0; i < N; i++) {
+    res[i] = std::fmin(cst, u[i]);
+  }
+}
 //
 // Logical operations
 //
@@ -330,6 +353,15 @@ inline static void vectorBuildMaskGreater(const double* src, double value,
   return;
 }
 
+inline static void vectorBuildMaskLess(const double* src, double value,
+                                       int N, short* mask)
+{
+  for (int i = 0; i < N; i++) {
+    mask[i] = (src[i] < value);
+  }
+  return;
+}
+
 inline static void vectorBuildMaskEqual(const double* src, double value, int N,
                                         short* mask)
 {
diff --git a/Detectors/MUON/MCH/Clustering/src/mathieson.cxx b/Detectors/MUON/MCH/Clustering/src/mathieson.cxx
index 7d097c2be4318..d619fe6109c75 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathieson.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/mathieson.cxx
@@ -12,7 +12,10 @@
 #include <cstdio>
 #include <cstdlib>
 #include <stdexcept>
+#include <map>
+#include <limits>
 
+#include "MCHClustering/ClusterConfig.h"
 #include "mathUtil.h"
 #include "mathieson.h"
 
@@ -20,6 +23,8 @@ namespace o2
 {
 namespace mch
 {
+extern ClusterConfig clusterConfig;
+
 // Chamber 1, 2
 const double sqrtK3x1_2 = 0.7000; // Pitch= 0.21 cm
 const double sqrtK3y1_2 = 0.7550; // Pitch= 0.21 cm
@@ -34,14 +39,45 @@ static double K1x[2], K1y[2];
 static double K2x[2], K2y[2];
 static const double sqrtK3x[2] = {sqrtK3x1_2, sqrtK3x3_10},
                     sqrtK3y[2] = {sqrtK3y1_2, sqrtK3y3_10};
+static double K3x[2], K3y[2];
 static double K4x[2], K4y[2];
 static double pitch[2] = {pitch1_2, pitch3_10};
 static double invPitch[2];
 
-void initMathieson()
+// Spline Coef
+int useSpline = 0;
+SplineCoef* splineCoef[2][2];
+static double splineXYStep = 1.0e-3;
+static double splineXYLimit = 3.0;
+static int nSplineSampling = 0;
+double* splineXY = nullptr;
+
+//
+int useCache = 0;
+
+SplineCoef::SplineCoef(int N)
+{
+  a = new double[N];
+  b = new double[N];
+  c = new double[N];
+  d = new double[N];
+}
+
+SplineCoef::~SplineCoef()
 {
+  delete[] a;
+  delete[] b;
+  delete[] c;
+  delete[] d;
+}
+void initMathieson(int useSpline_, int useCache_)
+{
+  useSpline = useSpline_;
+  useCache = useCache_;
   //
   for (int i = 0; i < 2; i++) {
+    K3x[i] = sqrtK3x[i] * sqrtK3x[i];
+    K3y[i] = sqrtK3y[i] * sqrtK3y[i];
     K2x[i] = M_PI * 0.5 * (1.0 - sqrtK3x[i] * 0.5);
     K2y[i] = M_PI * 0.5 * (1.0 - sqrtK3y[i] * 0.5);
     K1x[i] = K2x[i] * sqrtK3x[i] * 0.25 / (atan(sqrtK3x[i]));
@@ -50,83 +86,234 @@ void initMathieson()
     K4y[i] = K1y[i] / K2y[i] / sqrtK3y[i];
     invPitch[i] = 1.0 / pitch[i];
   }
+  if (useSpline) {
+    initSplineMathiesonPrimitive();
+  }
 }
 
-void compute2DPadIntegrals(const double* xInf, const double* xSup,
-                           const double* yInf, const double* ySup, int N,
-                           int chamberId, double Integrals[])
+void initSplineMathiesonPrimitive()
 {
-  // Returning array: Charge Integral on all the pads
+  // x/y Interval and positive x/y limit
+  double xyStep = splineXYStep;
+  double xyLimit = splineXYLimit;
+  // X/Y Sampling
+  nSplineSampling = int(xyLimit / xyStep) + 1;
+  int N = nSplineSampling;
+
+  splineXY = new double[N];
+  for (int i = 0; i < N; i++) {
+    splineXY[i] = xyStep * i;
+  }
+  double* xy = splineXY;
+
+  // Spline coef allocation for the 4 functions
+  splineCoef[0][0] = new SplineCoef(N);
+  splineCoef[0][1] = new SplineCoef(N);
+  splineCoef[1][0] = new SplineCoef(N);
+  splineCoef[1][1] = new SplineCoef(N);
+
+  // Compute the spline Coef. for the 4 Mathieson primitives
+  double mathPrimitive[N];
+  double rightDerivative(0.0), leftDerivative;
+  // X and Y primitives on chambers <= 2 (Mathieson Type = 0)
+  int mathiesonType = 0;
+  int axe = 0;
+  mathiesonPrimitive(xy, N, axe, 2, mathPrimitive);
+  leftDerivative = 2.0 * K4x[mathiesonType] * sqrtK3x[mathiesonType] * K2x[mathiesonType] * invPitch[mathiesonType];
+  computeSplineCoef(xy, xyStep, mathPrimitive, N, leftDerivative, rightDerivative, o2::mch::splineCoef[mathiesonType][axe]);
+  axe = 1;
+  mathiesonPrimitive(xy, N, axe, 2, mathPrimitive);
+  leftDerivative = 2.0 * K4y[mathiesonType] * sqrtK3y[mathiesonType] * K2y[mathiesonType] * invPitch[mathiesonType];
+  computeSplineCoef(xy, xyStep, mathPrimitive, N, leftDerivative, rightDerivative, splineCoef[mathiesonType][axe]);
+  mathiesonType = 1;
+  axe = 0;
+  mathiesonPrimitive(xy, N, axe, 3, mathPrimitive);
+  leftDerivative = 2.0 * K4x[mathiesonType] * sqrtK3x[mathiesonType] * K2x[mathiesonType] * invPitch[mathiesonType];
+  computeSplineCoef(xy, xyStep, mathPrimitive, N, leftDerivative, rightDerivative, splineCoef[mathiesonType][axe]);
+  axe = 1;
+  mathiesonPrimitive(xy, N, axe, 3, mathPrimitive);
+  leftDerivative = 2.0 * K4y[mathiesonType] * sqrtK3y[mathiesonType] * K2y[mathiesonType] * invPitch[mathiesonType];
+  computeSplineCoef(xy, xyStep, mathPrimitive, N, leftDerivative, rightDerivative, splineCoef[mathiesonType][axe]);
+}
+
+// Spline implementation of the book "Numerical Analysis" - 9th edition
+// Richard L Burden, J Douglas Faires
+// Section 3.5, p. 146
+// Restrictions : planed with a regular sampling (dx = cst)
+// spline(x) :[-inf, +inf] -> [-1/2, +1/2]
+// Error < 7.0 e-11 for 1001 sampling between [0, 3.0]
+void computeSplineCoef(const double* xy, double xyStep, const double* f, int N,
+                       double leftDerivative, double rightDerivative, SplineCoef* splineCoef)
+{
+  double* a = splineCoef->a;
+  double* b = splineCoef->b;
+  double* c = splineCoef->c;
+  double* d = splineCoef->d;
+
+  // a coef : the sampled function
+  vectorCopy(f, N, a);
+
+  // Step 1
+  double h = xyStep;
+
+  // Step 2 & 3 : Compute alpha
+  double alpha[N];
+  alpha[0] = 3.0 / h * (f[1] - f[0]) - 3 * leftDerivative;
+  alpha[N - 1] = 3 * rightDerivative - 3.0 / h * (f[N - 1] - f[N - 2]);
+  for (int i = 1; i < N - 1; i++) {
+    // To keep the general case if h is not constant
+    alpha[i] = 3.0 / h * (f[i + 1] - f[i]) - 3.0 / h * (f[i] - f[i - 1]);
+  }
+
+  // Step 4 to 6 solve a tridiagonal linear system
   //
-  if (chamberId <= 2) {
-    mathiesonType = 0;
-  } else {
-    mathiesonType = 1;
+  // Step 4
+  double l[N], mu[N], z[N];
+  l[0] = 2.0 * h;
+  mu[0] = 0.5;
+  z[0] = alpha[0] / l[0];
+  //
+  // Step 5
+  for (int i = 1; i < N - 1; i++) {
+    l[i] = 2.0 * (xy[i + 1] - xy[i - 1]) - h * mu[i - 1];
+    mu[i] = h / l[i];
+    z[i] = (alpha[i] - h * z[i - 1]) / l[i];
+  }
+  //
+  // Step 6
+  l[N - 1] = h * (2.0 - mu[N - 2]);
+  z[N - 1] = (alpha[N - 1] - h * z[N - 2]) / l[N - 1];
+  c[N - 1] = z[N - 1];
+
+  // Step 7 : calculate cubic coefficients
+  for (int j = N - 2; j >= 0; j--) {
+    c[j] = z[j] - mu[j] * c[j + 1];
+    b[j] = (f[j + 1] - f[j]) / h - h / 3.0 * (c[j + 1] + 2 * c[j]);
+    d[j] = (c[j + 1] - c[j]) / (3 * h);
+  }
+}
+
+void splineMathiesonPrimitive(const double* x, int N, int axe, int chamberId, double* mPrimitive)
+{
+  int mathiesonType = (chamberId <= 2) ? 0 : 1;
+  double* a = splineCoef[mathiesonType][axe]->a;
+  double* b = splineCoef[mathiesonType][axe]->b;
+  double* c = splineCoef[mathiesonType][axe]->c;
+  double* d = splineCoef[mathiesonType][axe]->d;
+  double dx = splineXYStep;
+  // printf("dx=%f nSplineSampling=%d\n", dx, nSplineSampling);
+  double signX[N];
+  // x without sign
+  double uX[N];
+  for (int i = 0; i < N; i++) {
+    signX[i] = (x[i] >= 0) ? 1 : -1;
+    uX[i] = signX[i] * x[i];
+    /*
+    if( uX[i] > (2.0 * splineXYLimit)) {
+      // x >> 0, f(x) = 0.0
+      signX[i] = 0.0;
+      uX[i] = 0.5;
+    }
+    */
+  }
+
+  double cst = 1.0 / dx;
+  // Get indexes in the sample function
+  int idx;
+  double h;
+  for (int i = 0; i < N; i++) {
+    // int k = int( uX[i] * cst + dx*0.1 );
+    //  if ( k < nSplineSampling) {
+    if (uX[i] < splineXYLimit) {
+      idx = int(uX[i] * cst + dx * 0.1);
+      h = uX[i] - idx * dx;
+    } else {
+      idx = nSplineSampling - 1;
+      h = 0;
+    }
+    mPrimitive[i] = signX[i] * (a[idx] + h * (b[idx] + h * (c[idx] + h * (d[idx]))));
+    // printf("x[i]=%f, signX[i]=%f uX[i]=%f idx=%d, h=%f, prim=%f, splineXYLimit=%f\n", x[i], signX[i], uX[i], idx, h, mPrimitive[i], splineXYLimit );
   }
+  // print ("uX ",  uX)
+  //     print ("h ",  h)
+  //     print ("f(x0) ",  a[idx])
+  //     print ("df|dx0",  h*( b[idx] + h*( c[idx] + h *(d[idx]))))
+  //     print ("f, ",  a[idx] + h*( b[idx] + h*( c[idx] + h *(d[idx]))))
+}
+
+// Return the Mathieson primitive at x or y
+void mathiesonPrimitive(const double* xy, int N,
+                        int axe, int chamberId, double mPrimitive[])
+{
+  mathiesonType = (chamberId <= 2) ? 0 : 1;
   //
   // Select Mathieson coef.
-  double curK2x = K2x[mathiesonType];
-  double curK2y = K2y[mathiesonType];
-  double curSqrtK3x = sqrtK3x[mathiesonType];
-  double curSqrtK3y = sqrtK3y[mathiesonType];
-  double curK4x = K4x[mathiesonType];
-  double curK4y = K4y[mathiesonType];
+  double curK2xy = (axe == 0) ? K2x[mathiesonType] : K2y[mathiesonType];
+  double curSqrtK3xy = (axe == 0) ? sqrtK3x[mathiesonType] : sqrtK3y[mathiesonType];
   double curInvPitch = invPitch[mathiesonType];
-  double cst2x = curK2x * curInvPitch;
-  double cst2y = curK2y * curInvPitch;
-  double cst4 = 4.0 * curK4x * curK4y;
-  double uInf, uSup, vInf, vSup;
+  double cst2xy = curK2xy * curInvPitch;
+  double curK4xy = (axe == 0) ? K4x[mathiesonType] : K4y[mathiesonType];
 
   for (int i = 0; i < N; i++) {
-    // x/u
-    uInf = curSqrtK3x * tanh(cst2x * xInf[i]);
-    uSup = curSqrtK3x * tanh(cst2x * xSup[i]);
-    // y/v
-    vInf = curSqrtK3y * tanh(cst2y * yInf[i]);
-    vSup = curSqrtK3y * tanh(cst2y * ySup[i]);
-    //
-    Integrals[i] = cst4 * (atan(uSup) - atan(uInf)) * (atan(vSup) - atan(vInf));
-    // printf(" xyInfSup %2d  [%10.6g, %10.6g] x [%10.6g, %10.6g]-> %10.6g\n",
-    // i, xInf[i], xSup[i], yInf[i], ySup[i], Integrals[i]);
+    double u = curSqrtK3xy * tanh(cst2xy * xy[i]);
+    mPrimitive[i] = 2 * curK4xy * atan(u);
   }
-  // printf(" I[0..%3ld] = %f, %f, ... %f\n", N-1, Integrals[0], Integrals[1],
-  // Integrals[N-1]);
-  return;
 }
 
-void compute1DPadIntegrals(const double* xInf, const double* xSup, int N,
-                           int chamberId, bool xAxe, double* Integrals)
+void compute1DMathieson(const double* xy, int N,
+                        int axe, int chamberId, double mathieson[])
 {
   // Returning array: Charge Integral on all the pads
   //
-  if (chamberId <= 2) {
-    mathiesonType = 0;
-  } else {
-    mathiesonType = 1;
-  }
+  mathiesonType = (chamberId <= 2) ? 0 : 1;
+
   //
   // Select Mathieson coef.
+
+  double curK1xy = (axe == 0) ? K1x[mathiesonType] : K1y[mathiesonType];
+  double curK2xy = (axe == 0) ? K2x[mathiesonType] : K2y[mathiesonType];
+  double curK3xy = (axe == 0) ? K3x[mathiesonType] : K3y[mathiesonType];
   double curInvPitch = invPitch[mathiesonType];
-  double curK2, curSqrtK3, curK4, cst2;
-  if (xAxe) {
-    curK2 = K2x[mathiesonType];
-    curSqrtK3 = sqrtK3x[mathiesonType];
-    curK4 = K4x[mathiesonType];
-    cst2 = curK2 * curInvPitch;
-  } else {
-    curK2 = K2y[mathiesonType];
-    curSqrtK3 = sqrtK3y[mathiesonType];
-    curK4 = K4y[mathiesonType];
-    cst2 = curK2 * curInvPitch;
+  double cst2xy = curK2xy * curInvPitch;
+
+  for (int i = 0; i < N; i++) {
+    //  tanh(x) & tanh(y)
+    double xTanh = tanh(cst2xy * xy[i]);
+    double xTanh2 = xTanh * xTanh;
+    mathieson[i] = curK1xy * (1.0 - xTanh2) / (1.0 + curK3xy * xTanh2);
   }
-  double cst4 = 2.0 * curK4;
+  return;
+}
+void compute1DPadIntegrals(const double* xyInf, const double* xySup, int N,
+                           double xy0, int axe, int chamberId, double* integrals)
+{
+  double zInf[N], zSup[N];
+  vectorAddScalar(xyInf, -xy0, N, zInf);
+  vectorAddScalar(xySup, -xy0, N, zSup);
+  compute1DPadIntegrals(zInf, zSup, N, axe, chamberId, integrals);
+}
+
+void compute1DPadIntegrals(const double* xyInf, const double* xySup, int N,
+                           int axe, int chamberId, double* Integrals)
+{
+  // Returning array: Charge Integral on all the pads
+  //
+  mathiesonType = (chamberId <= 2) ? 0 : 1;
 
-  double uInf, uSup, vInf, vSup;
+  //
+  // Select Mathieson coef.
+  double curInvPitch = invPitch[mathiesonType];
+  double curK2 = (axe == 0) ? K2x[mathiesonType] : K2y[mathiesonType];
+  double curSqrtK3 = (axe == 0) ? sqrtK3x[mathiesonType] : sqrtK3y[mathiesonType];
+  double curK4 = (axe == 0) ? K4x[mathiesonType] : K4y[mathiesonType];
+  double cst2 = curK2 * curInvPitch;
+  double cst4 = 2.0 * curK4;
 
+  double uInf, uSup;
   for (int i = 0; i < N; i++) {
     // x/u
-    uInf = curSqrtK3 * tanh(cst2 * xInf[i]);
-    uSup = curSqrtK3 * tanh(cst2 * xSup[i]);
+    uInf = curSqrtK3 * tanh(cst2 * xyInf[i]);
+    uSup = curSqrtK3 * tanh(cst2 * xySup[i]);
     //
     Integrals[i] = cst4 * (atan(uSup) - atan(uInf));
     // printf(" xyInfSup %2d  [%10.6g, %10.6g] x [%10.6g, %10.6g]-> %10.6g\n",
@@ -137,6 +324,279 @@ void compute1DPadIntegrals(const double* xInf, const double* xSup, int N,
   return;
 }
 
+int compressSameValues(const double* x1, const double* x2, int* map1, int* map2, int N, double* xCompress)
+{
+  // map1[0..N-1]: i in [0..N-1] -> integral index for x1 [0..nCompressed-1]
+  // map2[0..N-1]: the same for x2
+  // xCompress[0..nCompressed]: values of x1 & x2 compressed (unique values)
+  // The xCompress values will be used to compute the primitive
+  // The map1/2 will be used to find the corresponding index in the xCompress or primitive arrays
+  // Return nCompressed
+
+  // Transform to integer to avoid comparison on close x values
+  const double* x[2] = {x1, x2};
+  int* xCode = new int[2 * N];
+  for (int i = 0; i < N; i++) {
+    for (int b = 0; b < 2; b++) {
+      // Calculate the indexes in the 1D charge integral
+      // Error on pad position > 10-3 cm
+      xCode[i + b * N] = (int)(x[b][i] * 1000 + 0.5);
+    }
+  }
+  // Sort the code
+  int sIdx[2 * N];
+  for (int k = 0; k < 2 * N; k++) {
+    sIdx[k] = k;
+  }
+  std::sort(sIdx, &sIdx[2 * N], [=](int a, int b) {
+    return (xCode[a] < xCode[b]);
+  });
+
+  // printf("sort  xCode[sIdx[0]]=%d xCode[sIdx[2*N-1]]=%d\n", xCode[sIdx[0]], xCode[sIdx[2*N-1]]);
+  // vectorPrintInt("xCode",xCode, 2*N);
+  // vectorPrintInt("sIdx",sIdx, 2*N);
+
+  // Renumber and compress
+  int nCompress = 0;
+  int prevCode = std::numeric_limits<int>::max();
+
+  // Map1
+  for (int i = 0; i < 2 * N; i++) {
+    int idx = sIdx[i];
+    if (xCode[idx] != prevCode) {
+      if (idx < N) {
+        // Store the compress value in map1
+        xCompress[nCompress] = x1[idx];
+        map1[idx] = nCompress;
+        // printf("i=%d sIdx[i]=%d nCompress=%d idx=%d map1[idx]=%d\n", i, idx, nCompress, idx, map1[idx]);
+      } else {
+        // Store the compress value in map2
+        xCompress[nCompress] = x2[idx - N];
+        map2[idx - N] = nCompress;
+        // printf("i=%d sIdx[i]=%d nCompress=%d idx-N=%d map2[idx]=%d\n", i, idx, nCompress,  idx-N, map2[idx-N]);
+      }
+      nCompress++;
+    } else {
+      // the code is the same (same values)
+      if (idx < N) {
+        map1[idx] = nCompress - 1;
+        // printf("identical i=%d sIdx[i]=%d nCompress-1=%d idx=%d\n", i, idx, nCompress-1, idx);
+      } else {
+        map2[idx - N] = nCompress - 1;
+        // printf("identical i=%d sIdx[i]=%d nCompress-1=%d idx=%d\n", i, idx, nCompress-1, idx-N);
+      }
+    }
+    prevCode = xCode[idx];
+  }
+  // printf(" compress nCompress/N=%d/%d \n", nCompress, N);
+  // vectorPrint("x1", x1, N);
+  // vectorPrintInt("map1",map1, N);
+  // vectorPrint("x2", x2, N);
+  // vectorPrintInt("map2",map2, N);
+  // vectorPrint("xCompress", xCompress, nCompress);
+  delete[] xCode;
+  return nCompress;
+}
+
+CompressedPads_t* compressPads(const double* xInf, const double* xSup,
+                               const double* yInf, const double* ySup, int N)
+{
+  CompressedPads_t* compressedPads = new CompressedPads_t;
+  // On x axe
+  compressedPads->xCompressed = new double[2 * N];
+  compressedPads->mapXInf = new int[N];
+  compressedPads->mapXSup = new int[N];
+  compressedPads->nXc = compressSameValues(xInf, xSup, compressedPads->mapXInf, compressedPads->mapXSup, N, compressedPads->xCompressed);
+  compressedPads->yCompressed = new double[2 * N];
+  compressedPads->mapYInf = new int[N];
+  compressedPads->mapYSup = new int[N];
+  compressedPads->nYc = compressSameValues(yInf, ySup, compressedPads->mapYInf, compressedPads->mapYSup, N, compressedPads->yCompressed);
+  return compressedPads;
+}
+
+void deleteCompressedPads(CompressedPads_t* compressedPads)
+{
+  delete[] compressedPads->mapXInf;
+  delete[] compressedPads->mapXSup;
+  delete[] compressedPads->mapYInf;
+  delete[] compressedPads->mapYSup;
+  delete[] compressedPads->xCompressed;
+  delete[] compressedPads->yCompressed;
+}
+
+void computeCompressed2DPadIntegrals(
+  /* const double* xInf, const double* xSup,
+                             const double* yInf, const double* ySup,
+  */
+  CompressedPads_t* compressedPads, double xShift, double yShift, int N,
+  int chamberId, double Integrals[])
+{
+
+  int nXc = compressedPads->nXc;
+  int nYc = compressedPads->nYc;
+  // Compute the integrals on Compressed pads
+  double xy[N];
+  double xPrimitives[nXc];
+  double yPrimitives[nYc];
+  // X axe
+  int axe = 0;
+  // x Translation (seed location)
+  vectorAddScalar(compressedPads->xCompressed, -xShift, nXc, xy);
+  // Primitives on compressed pads
+  mathiesonPrimitive(xy, nXc, axe, chamberId, xPrimitives);
+  // Y axe
+  axe = 1;
+  // x Translation (seed location)
+  vectorAddScalar(compressedPads->yCompressed, -yShift, nYc, xy);
+  // Primitives on compressed pads
+  mathiesonPrimitive(xy, nYc, axe, chamberId, yPrimitives);
+
+  // Compute all the integrals
+  int* mapXInf = compressedPads->mapXInf;
+  int* mapXSup = compressedPads->mapXSup;
+  int* mapYInf = compressedPads->mapYInf;
+  int* mapYSup = compressedPads->mapYSup;
+  for (int i = 0; i < N; i++) {
+    Integrals[i] = (xPrimitives[mapXSup[i]] - xPrimitives[mapXInf[i]]) * (yPrimitives[mapYSup[i]] - yPrimitives[mapYInf[i]]);
+    // printf(" i=%d mapXInf=%d mapXSup=%d mapYInf=%d mapYSup=%d xyIntegrals=%f, %f \n", i,
+    //        mapXInf[i], mapXSup[i], mapYInf[i], mapYSup[i], xPrimitives[mapXSup[i]] - xPrimitives[mapXInf[i]],
+    //        yPrimitives[mapYSup[i]] - yPrimitives[mapYInf[i]]);
+  }
+
+  // vectorPrint("xPrimitives", xPrimitives, nXc);
+  // vectorPrint("yPrimitives", yPrimitives, nYc);
+}
+
+void compute2DPadIntegrals(const double* xInf, const double* xSup,
+                           const double* yInf, const double* ySup, int N,
+                           int chamberId, double Integrals[])
+{
+  if (1) {
+    int mapXInf[N], mapXSup[N];
+    int mapYInf[N], mapYSup[N];
+    double xy[2 * N];
+    // Primitives on x axe
+    int nXc = compressSameValues(xInf, xSup, mapXInf, mapXSup, N, xy);
+    // vectorPrint("x map", xy, nXc);
+    int axe = 0;
+    double xPrimitives[nXc];
+    mathiesonPrimitive(xy, nXc, axe, chamberId, xPrimitives);
+    // Primitives on y axe
+    int nYc = compressSameValues(yInf, ySup, mapYInf, mapYSup, N, xy);
+    // vectorPrint("y map", xy, nYc);
+    double yPrimitives[nYc];
+    axe = 1;
+    mathiesonPrimitive(xy, nYc, axe, chamberId, yPrimitives);
+
+    for (int i = 0; i < N; i++) {
+      Integrals[i] = (xPrimitives[mapXSup[i]] - xPrimitives[mapXInf[i]]) * (yPrimitives[mapYSup[i]] - yPrimitives[mapYInf[i]]);
+      // printf(" i=%d mapXInf=%d mapXSup=%d mapYInf=%d mapYSup=%d xyIntegrals=%f, %f \n", i,
+      //        mapXInf[i], mapXSup[i], mapYInf[i], mapYSup[i], xPrimitives[mapXSup[i]] - xPrimitives[mapXInf[i]],
+      //        yPrimitives[mapYSup[i]] - yPrimitives[mapYInf[i]]);
+    }
+
+    // vectorPrint("xPrimitives", xPrimitives, nXc);
+    // vectorPrint("yPrimitives", yPrimitives, nYc);
+
+  } else {
+
+    if (useSpline) {
+      double lBoundPrim[N], uBoundPrim[N], xIntegrals[N], yIntegrals[N];
+      int axe = 0;
+      // mathiesonPrimitive(xInf, N, axe, chamberId, lBoundPrim);
+      splineMathiesonPrimitive(xInf, N, axe, chamberId, lBoundPrim);
+      // mathiesonPrimitive(xSup, N, axe, chamberId, uBoundPrim);
+      splineMathiesonPrimitive(xSup, N, axe, chamberId, uBoundPrim);
+      vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, xIntegrals);
+      // vectorPrint("xIntegrals analytics ", xIntegrals, N);
+      for (int i = 0; i < N; i++) {
+        if (xIntegrals[i] < 0.0) {
+          printf("??? %d x (%f %f) lInt=%f uInt%f xInt=%f\n", i, xInf[i], xSup[i], lBoundPrim[i], uBoundPrim[i], xIntegrals[i]);
+          throw std::out_of_range(
+            "[findLocalMaxWithPEM] ????");
+        }
+      }
+      axe = 1;
+      // mathiesonPrimitive(yInf, N, axe, chamberId, lBoundPrim);
+      splineMathiesonPrimitive(yInf, N, axe, chamberId, lBoundPrim);
+      // mathiesonPrimitive(ySup, N, axe, chamberId, uBoundPrim);
+      splineMathiesonPrimitive(ySup, N, axe, chamberId, uBoundPrim);
+      vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, yIntegrals);
+      // vectorPrint("yIntegrals analytics ", yIntegrals, N);
+      vectorMultVector(xIntegrals, yIntegrals, N, Integrals);
+      // Invald ????
+
+      for (int i = 0; i < N; i++) {
+        if (yIntegrals[i] < 0.0) {
+          printf("??? %d y (%f %f) lInt=%f uInt%f yInt=%f\n", i, yInf[i], ySup[i], lBoundPrim[i], uBoundPrim[i], yIntegrals[i]);
+          throw std::out_of_range(
+            "[findLocalMaxWithPEM] ????");
+        }
+      } // vectorPrint("Integrals analytics", Integrals, N);
+
+      /* ??????????????????????
+      axe = 0;
+      splineMathiesonPrimitive( xInf, N, axe, chamberId, lBoundPrim );
+      // vectorPrint("x lBoundPrim spline ", lBoundPrim, N);
+      splineMathiesonPrimitive( xSup, N, axe, chamberId, uBoundPrim );
+      vectorAddVector( uBoundPrim, -1.0, lBoundPrim, N, xIntegrals);
+      // vectorPrint("xIntegrals spline", xIntegrals, N);
+      axe = 1;
+      splineMathiesonPrimitive( yInf, N, axe, chamberId, lBoundPrim );
+      splineMathiesonPrimitive( ySup, N, axe, chamberId, uBoundPrim );
+      vectorAddVector( uBoundPrim, -1.0, lBoundPrim, N, yIntegrals);
+
+      // vectorPrint("yIntegrals spline", yIntegrals, N);
+      */
+
+      vectorMultVector(xIntegrals, yIntegrals, N, Integrals);
+      // vectorPrint("Integrals spline", Integrals, N);
+
+    } else {
+      // Returning array: Charge Integral on all the pads
+      //
+      if (chamberId <= 2) {
+        mathiesonType = 0;
+      } else {
+        mathiesonType = 1;
+      }
+      //
+      // Select Mathieson coef.
+      double curK2x = K2x[mathiesonType];
+      double curK2y = K2y[mathiesonType];
+      double curSqrtK3x = sqrtK3x[mathiesonType];
+      double curSqrtK3y = sqrtK3y[mathiesonType];
+      double curK4x = K4x[mathiesonType];
+      double curK4y = K4y[mathiesonType];
+      double curInvPitch = invPitch[mathiesonType];
+      double cst2x = curK2x * curInvPitch;
+      double cst2y = curK2y * curInvPitch;
+      double cst4 = 4.0 * curK4x * curK4y;
+      double uInf, uSup, vInf, vSup;
+
+      for (int i = 0; i < N; i++) {
+        // x/u
+        uInf = curSqrtK3x * tanh(cst2x * xInf[i]);
+        uSup = curSqrtK3x * tanh(cst2x * xSup[i]);
+        // y/v
+        vInf = curSqrtK3y * tanh(cst2y * yInf[i]);
+        vSup = curSqrtK3y * tanh(cst2y * ySup[i]);
+        //
+        Integrals[i] = cst4 * (atan(uSup) - atan(uInf)) * (atan(vSup) - atan(vInf));
+        // printf(" Ix=%10.6g Iy=%10.6g\n", 2*curK4x * (atan(uSup) - atan(uInf)),  2*curK4y * (atan(vSup) - atan(vInf)));
+        // printf(" xyInfSup %2d  [%10.6g, %10.6g] x [%10.6g, %10.6g]-> %10.6g * %10.6g = %10.6g\n",
+        // i, xInf[i], xSup[i], yInf[i], ySup[i], Integrals[i], 2.0 * curK4x*(atan(uSup) - atan(uInf)), 2.0 * curK4y*(atan(vSup) - atan(vInf)) ) ;
+      }
+      // printf(" I[0..%3ld] = %f, %f, ... %f\n", N-1, Integrals[0], Integrals[1],
+      // Integrals[N-1]);
+    }
+  }
+  // CHECK
+  if (clusterConfig.mathiesonCheck) {
+    checkIntegrals(xInf, xSup, yInf, ySup, Integrals, chamberId, N);
+  }
+}
+
 void compute2DMathiesonMixturePadIntegrals(const double* xyInfSup0,
                                            const double* theta, int N, int K,
                                            int chamberId, double Integrals[])
@@ -172,6 +632,44 @@ void compute2DMathiesonMixturePadIntegrals(const double* xyInfSup0,
   }
 }
 
+bool checkIntegrals(const double* xInf, const double* xSup, const double* yInf, const double* ySup,
+                    const double* integralsToCheck, int chId, int N)
+{
+  double lBoundPrim[N], uBoundPrim[N];
+  double xIntegrals[N], yIntegrals[N], Integrals[N];
+  // ??? find the reason for high value
+  double precision = 5.e-5;
+  int axe = 0;
+  mathiesonPrimitive(xInf, N, axe, chId, lBoundPrim);
+  mathiesonPrimitive(xSup, N, axe, chId, uBoundPrim);
+  vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, xIntegrals);
+  /*
+  for (int i=0; i < N; i++) {
+    if ( xIntegrals[i] >= 0.0) {
+      printf("i=%d xInf=%f xSup=%f, uBoundPrim=%f lBoundPrim=%f\n", i,
+              xInf[i], xSup[i], uBoundPrim[i], lBoundPrim[i]);
+    }
+  }
+  */
+  axe = 1;
+  mathiesonPrimitive(yInf, N, axe, chId, lBoundPrim);
+  mathiesonPrimitive(ySup, N, axe, chId, uBoundPrim);
+  vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, yIntegrals);
+  // vectorPrint("yIntegrals analytics ", yIntegrals, N);
+  vectorMultVector(xIntegrals, yIntegrals, N, Integrals);
+  bool ok = true;
+  for (int i = 0; i < N; i++) {
+    if (std::fabs(integralsToCheck[i] - Integrals[i]) > precision) {
+      printf("i=%d xInf=%f xSup=%f, yInf=%f ySup=%f, reference=%f check value=%f\n", i,
+             xInf[i], xSup[i], yInf[i], ySup[i], Integrals[i], integralsToCheck[i]);
+      ok = false;
+      throw std::out_of_range("[checkIntegral] bad integral value");
+    }
+  }
+
+  return ok;
+}
+
 void computeFastCij(const Pads& pads, const Pads& pixel, double Cij[])
 {
   // Compute the Charge Integral Cij of pads (j index), considering the
@@ -180,7 +678,100 @@ void computeFastCij(const Pads& pads, const Pads& pixel, double Cij[])
   // to reduce the computation cost
   // CI(x) is store in PadIntegralX
   // CI(y) is store in PadIntegralY
-  // A subsampling of CI(x_i + k*minDx) (or CI(y_i + l*minDY)) is used
+  // A sub-sampling of CI(x_i + k*minDx) (or CI(y_i + l*minDY)) is used
+  // by taking the mininimun of pads.dx(pads.dy) to discretize the x/y space
+  //
+  // CI(x)/CI(y) are computed if they are requested.
+  //
+  // Returning array: Charge Integral on all the pads Cij[]
+
+  if ((pads.mode != Pads::xyInfSupMode) || (pixel.mode != Pads::xydxdyMode)) {
+    printf(
+      "[computeFastCij] exception: bad representation (mode) of pads in "
+      "computeCij (padMode=%d, pixelMode=%d)\n",
+      pads.mode, pixel.mode);
+    throw std::overflow_error("Bad mode");
+    return;
+  }
+  int N = pads.getNbrOfPads();
+  int K = pixel.getNbrOfPads();
+  // Pads
+  int chId = pads.getChamberId();
+  const double* xInf0 = pads.getXInf();
+  const double* yInf0 = pads.getYInf();
+  const double* xSup0 = pads.getXSup();
+  const double* ySup0 = pads.getYSup();
+  // Pixels
+  const double* muX = pixel.getX();
+  const double* muY = pixel.getY();
+
+  double zInf[N];
+  double zSup[N];
+  int axe;
+
+  // Loop on Pixels
+  std::map<int, double*> xMap;
+  std::map<int, double*> yMap;
+  for (int k = 0; k < K; k++) {
+    // Calculate the indexes in the 1D charge integral
+    // Error on pad position > 10-3 cm
+    int xCode = (int)(muX[k] * 1000 + 0.5);
+    int yCode = (int)(muY[k] * 1000 + 0.5);
+    if (xMap.find(xCode) == xMap.end()) {
+      // Not yet computed
+      vectorAddScalar(xInf0, -muX[k], N, zInf);
+      vectorAddScalar(xSup0, -muX[k], N, zSup);
+      axe = 0;
+      double* xIntegrals = new double[N];
+      compute1DPadIntegrals(zInf, zSup, N, axe, chId, xIntegrals);
+      xMap[xCode] = xIntegrals;
+    }
+    if (yMap.find(yCode) == yMap.end()) {
+      // Not yet computed
+      vectorAddScalar(yInf0, -muY[k], N, zInf);
+      vectorAddScalar(ySup0, -muY[k], N, zSup);
+      axe = 1;
+      double* yIntegrals = new double[N];
+      compute1DPadIntegrals(zInf, zSup, N, axe, chId, yIntegrals);
+      yMap[yCode] = yIntegrals;
+    }
+    // Compute IC(xy) = IC(x) * IC(y)
+    vectorMultVector(xMap[xCode], yMap[yCode], N, &Cij[N * k]);
+    //
+    // Check
+    if (clusterConfig.mathiesonCheck) {
+      double xInf[N], xSup[N];
+      double yInf[N], ySup[N];
+      double lBoundPrim[N], uBoundPrim[N];
+      double xIntegrals[N], yIntegrals[N], Integrals[N];
+      // printf("pad xyPad[0]= %f %f \n", (xSup0[0] - xInf0[0])*0.5, (ySup0[0] - yInf0[0])*0.5);
+      // printf("pad xyPad[0]= %f %f \n", xSup0[0], ySup0[0]);
+      // printf("pad xyPix[0]= %f %f \n", muX[k], muY[k]);
+      vectorAddScalar(xInf0, -muX[k], N, xInf);
+      vectorAddScalar(xSup0, -muX[k], N, xSup);
+      vectorAddScalar(yInf0, -muY[k], N, yInf);
+      vectorAddScalar(ySup0, -muY[k], N, ySup);
+      checkIntegrals(xInf, xSup, yInf, ySup, &Cij[N * k], chId, N);
+    }
+  }
+  // Free map
+  for (auto it = xMap.begin(); it != xMap.end(); ++it) {
+    delete[] it->second;
+  }
+  for (auto it = yMap.begin(); it != yMap.end(); ++it) {
+    delete[] it->second;
+  }
+}
+
+void computeFastCijV0(const Pads& pads, const Pads& pixel, double Cij[])
+{
+  // Compute the Charge Integral Cij of pads (j index), considering the
+  // center of the Mathieson fct on a pixel (i index)
+  // Use the fact that the charge integral CI(x,y) = CI(x) * CI(y)
+  // to reduce the computation cost
+  // CI(x) is store in PadIntegralX
+  // CI(y) is store in PadIntegralY
+  // A sub-sampling of CI(x_i + k*minDx) (or CI(y_i + l*minDY)) is used
   // by taking the mininimun of pads.dx(pads.dy) to discretize the x/y space
   //
   // CI(x)/CI(y) are computed if they are requested.
@@ -211,8 +802,8 @@ void computeFastCij(const Pads& pads, const Pads& pixel, double Cij[])
   double xPixMax = vectorMax(muX, K);
   double yPixMin = vectorMin(muY, K);
   double yPixMax = vectorMax(muY, K);
-  double dxMinPix = 2 * vectorMin(pixel.getDX(), K);
-  double dyMinPix = 2 * vectorMin(pixel.getDY(), K);
+  double dxMinPix = vectorMin(pixel.getDX(), K);
+  double dyMinPix = vectorMin(pixel.getDY(), K);
   // Sampling of PadIntegralX/PadIntegralY
   int nXPixels = (int)((xPixMax - xPixMin) / dxMinPix + 0.5) + 1;
   int nYPixels = (int)((yPixMax - yPixMin) / dyMinPix + 0.5) + 1;
@@ -228,7 +819,7 @@ void computeFastCij(const Pads& pads, const Pads& pixel, double Cij[])
   vectorSet((double*)PadIntegralY, -1.0, nYPixels * N);
   double zInf[N];
   double zSup[N];
-  bool xAxe;
+  int axe;
   /*
   for (int kx=0; kx < nXPixels; kx++) {
     double x = xPixMin + kx * dxPix;
@@ -259,18 +850,46 @@ void computeFastCij(const Pads& pads, const Pads& pixel, double Cij[])
       // Not yet computed
       vectorAddScalar(xInf0, -muX[k], N, zInf);
       vectorAddScalar(xSup0, -muX[k], N, zSup);
-      xAxe = true;
-      compute1DPadIntegrals(zInf, zSup, N, chId, xAxe, &PadIntegralX[xIdx * N + 0]);
+      axe = 0;
+      compute1DPadIntegrals(zInf, zSup, N, axe, chId, &PadIntegralX[xIdx * N + 0]);
     }
     if (PadIntegralY[yIdx * N + 0] == -1) {
       // Not yet computed
       vectorAddScalar(yInf0, -muY[k], N, zInf);
       vectorAddScalar(ySup0, -muY[k], N, zSup);
-      xAxe = false;
-      compute1DPadIntegrals(zInf, zSup, N, chId, xAxe, &PadIntegralY[yIdx * N + 0]);
+      axe = 1;
+      compute1DPadIntegrals(zInf, zSup, N, axe, chId, &PadIntegralY[yIdx * N + 0]);
     }
     // Compute IC(xy) = IC(x) * IC(y)
     vectorMultVector(&PadIntegralX[xIdx * N + 0], &PadIntegralY[yIdx * N + 0], N, &Cij[N * k]);
+
+    double xInf[N], xSup[N];
+    double yInf[N], ySup[N];
+    double lBoundPrim[N], uBoundPrim[N];
+    double xIntegrals[N], yIntegrals[N], Integrals[N];
+
+    vectorAddScalar(xInf0, -muX[k], N, xInf);
+    vectorAddScalar(xSup0, -muX[k], N, xSup);
+    vectorAddScalar(yInf0, -muY[k], N, yInf);
+    vectorAddScalar(ySup0, -muY[k], N, ySup);
+    double integral;
+    int axe = 0;
+    mathiesonPrimitive(xInf, N, axe, chId, lBoundPrim);
+    mathiesonPrimitive(xSup, N, axe, chId, uBoundPrim);
+    vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, xIntegrals);
+    axe = 1;
+    mathiesonPrimitive(yInf, N, axe, chId, lBoundPrim);
+    mathiesonPrimitive(ySup, N, axe, chId, uBoundPrim);
+    vectorAddVector(uBoundPrim, -1.0, lBoundPrim, N, yIntegrals);
+    // vectorPrint("yIntegrals analytics ", yIntegrals, N);
+    vectorMultVector(xIntegrals, yIntegrals, N, Integrals);
+    for (int i = 0; i < N; i++) {
+      // compute2DPadIntegrals(xInf[i], xSup, yInf, ySup, 1, chId, &integral);
+      if (std::fabs(Cij[N * k + i] - Integrals[i]) > 1.0e-6) {
+        printf("i(pixel)=%d j(pad)=%d cij=%f xInt=%f yInt=%f fastcij=%f xFast=%f yFast=%f\n", k, i,
+               Integrals[i], xIntegrals[i], yIntegrals[i], Cij[N * k + i], PadIntegralX[xIdx * N + i], PadIntegralY[yIdx * N + i]);
+      }
+    }
   }
   delete[] PadIntegralX;
   delete[] PadIntegralY;
@@ -297,7 +916,7 @@ void computeCij(const Pads& pads, const Pads& pixel, double Cij[])
   const double* xInf0 = pads.getXInf();
   const double* yInf0 = pads.getYInf();
   const double* xSup0 = pads.getXSup();
-  const double* ySup0 = pads.getXSup();
+  const double* ySup0 = pads.getYSup();
 
   //
   const double* muX = pixel.getX();
@@ -318,6 +937,47 @@ void computeCij(const Pads& pads, const Pads& pixel, double Cij[])
     // vectorSum( &Cij[N*k], N) );
   }
 }
+
+void checkCij(const Pads& pads, const Pads& pixels, const double* checkCij, int mode)
+{
+  // Mode : 0 (nothing), 1 (info), 2 (detail), -1 (exception)
+  int nPads = pads.getNbrOfPads();
+  int nPixels = pixels.getNbrOfPads();
+  double* Cij = new double[nPads * nPixels];
+  double* diffCij = new double[nPads * nPixels];
+  double precision = 2.0e-5;
+  computeCij(pads, pixels, Cij);
+  vectorAddVector(Cij, -1, checkCij, nPads * nPixels, diffCij);
+  vectorAbs(diffCij, nPads * nPixels, diffCij);
+  double minDiff = vectorMin(diffCij, nPads * nPixels);
+  double maxDiff = vectorMax(diffCij, nPads * nPixels);
+  int argMax = vectorArgMax(diffCij, nPads * nPixels);
+  // printf("\n\n nPads, nPixels %d %d\n", nPads, nPixels);
+  int iIdx = argMax / nPads;
+  int jIdx = argMax % nPads;
+  if ((maxDiff > precision) && (mode != 0)) {
+    printf("\n\n[checkCij] min/max(checkCij-Cij)=(%f, %f) argmin/max=(i=%d, j=%d)\n",
+           minDiff, maxDiff, iIdx, jIdx);
+    printf("\n checkCij=%f differ from  Cij=%f\n", checkCij[iIdx * nPads + jIdx], Cij[iIdx * nPads + jIdx]);
+  }
+
+  if ((maxDiff > precision) && (mode > 1)) {
+    for (int k = 0; k < nPixels; k++) {
+      for (int l = 0; l < nPads; l++) {
+        if (diffCij[k * nPads + l] > precision) {
+          printf("pad=%d pixel=%d checkCij=%f Cij=%f diff=%f\n", l, k, checkCij[k * nPads + l], Cij[k * nPads + l], diffCij[k * nPads + l]);
+        }
+      }
+    }
+    // printf("findLocalMaxWithPEM: WARNING maxDiff(Cij)=%f\n", maxDiff);
+  }
+  if ((maxDiff > precision) && (mode == -1)) {
+    throw std::out_of_range("[checkCij] bad Cij value");
+  }
+  delete[] Cij;
+  delete[] diffCij;
+}
+
 // theta
 double* getVarX(double* theta, int K) { return &theta[0 * K]; };
 double* getVarY(double* theta, int K) { return &theta[1 * K]; };
@@ -564,7 +1224,22 @@ void printXYdXY(const char* str, const double* xyDxy, int NMax, int N,
 } // namespace o2
 
 // C Wrapper
-void o2_mch_initMathieson() { o2::mch::initMathieson(); }
+void o2_mch_initMathieson()
+{
+  o2::mch::initMathieson(o2::mch::clusterConfig.useSpline, 0);
+  /* Debuging ???
+  int N = 4;
+  double xInf[N] = {-0.1, -0.2, -0.3, -0.5};
+  double yInf[N] = {-0.1, -0.2, -0.3, -0.5};
+  double xSup[N], ySup[N], integrals[N];
+  o2::mch::vectorAddScalar(xInf, 0.5, N, xSup);
+  o2::mch::vectorAddScalar(yInf, 0.5, N, ySup);
+
+  o2::mch::compute2DPadIntegrals(xInf, xSup, yInf, ySup, N, 2,
+                                 integrals);
+  o2::mch::vectorPrint("Integral", integrals, N);
+  */
+}
 
 void o2_mch_compute2DPadIntegrals(const double* xInf, const double* xSup,
                                   const double* yInf, const double* ySup, int N,
diff --git a/Detectors/MUON/MCH/Clustering/src/mathieson.h b/Detectors/MUON/MCH/Clustering/src/mathieson.h
index 66c0bc0243a9c..718063437444e 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathieson.h
+++ b/Detectors/MUON/MCH/Clustering/src/mathieson.h
@@ -32,21 +32,64 @@ namespace o2
 {
 namespace mch
 {
-void initMathieson();
-
+typedef struct SplineCoef {
+  double* a;
+  double* b;
+  double* c;
+  double* d;
+  SplineCoef(int N);
+  ~SplineCoef();
+} SplineCoef_t;
+
+typedef struct {
+  // Compressed values along X and Y
+  int nYc;
+  int nXc;
+  // Mapping : original array -> compressed array
+  int* mapXInf;
+  int* mapXSup;
+  int* mapYInf;
+  int* mapYSup;
+  double* xCompressed;
+  double* yCompressed;
+} CompressedPads_t;
+
+void initMathieson(int useSpline_, int useCache_);
+void mathiesonPrimitive(const double* xy, int N,
+                        int axe, int chamberId, double mPrimitive[]);
+void initSplineMathiesonPrimitive();
+void computeSplineCoef(const double* xy, double xyStep, const double* f, int N,
+                       double leftDerivative, double rightDerivative, SplineCoef* splineCoef);
+void splineMathiesonPrimitive(const double* x, int N, int axe, int chamberId, double* mPrimitive);
+
+void compute1DMathieson(const double* xy, int N,
+                        int axe, int chamberId, double mathieson[]);
+void compute1DPadIntegrals(const double* xyInf, const double* xySup, int N,
+                           double xy0, int axe, int chamberId, double* integrals);
 void compute1DPadIntegrals(const double* xInf, const double* xSup, int N,
-                           int chamberId, bool xAxe, double Integrals[]);
+                           int axe, int chamberId, double Integrals[]);
 
 void compute2DPadIntegrals(const double* xInf, const double* xSup,
                            const double* yInf, const double* ySup, int N,
                            int chamberId, double Integrals[]);
 
+CompressedPads_t* compressPads(const double* xInf, const double* xSup,
+                               const double* yInf, const double* ySup, int N);
+void computeCompressed2DPadIntegrals(CompressedPads_t* compressedPads, double xShift, double yShift, int N,
+                                     int chamberId, double Integrals[]);
+void deleteCompressedPads(CompressedPads_t* compressedPads);
+
 void compute2DMathiesonMixturePadIntegrals(const double* xyInfSup0,
                                            const double* theta, int N, int K,
                                            int chamberId, double Integrals[]);
 
-void computeFastCij(const Pads& pads, const Pads& theta, double Cij[]);
 void computeCij(const Pads& pads, const Pads& theta, double Cij[]);
+void computeFastCij(const Pads& pads, const Pads& theta, double Cij[]);
+void checkCij(const Pads& pads, const Pads& pixels, const double* checkCij, int mode);
+
+bool checkIntegrals(const double* xInf, const double* xSup, const double* yInf, const double* ySup,
+                    const double* integralsToCheck, int chId, int N);
+
 // Utilities to handle mixture of parameter theta
 double* getVarX(double* theta, int K);
 double* getVarY(double* theta, int K);
diff --git a/Detectors/MUON/MCH/Clustering/src/mathiesonFit.cxx b/Detectors/MUON/MCH/Clustering/src/mathiesonFit.cxx
index b8349775fccab..a496af110482f 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathiesonFit.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/mathiesonFit.cxx
@@ -17,18 +17,69 @@
 #include "mathieson.h"
 #include "mathiesonFit.h"
 
+namespace o2
+{
+namespace mch
+{
+extern ClusterConfig clusterConfig;
+}
+} // namespace o2
+
 using namespace o2::mch;
 
+extern o2::mch::ClusterConfig o2::mch::clusterConfig;
+
+double chargeNormalization(const Mask_t* cath, const Mask_t* notSaturated, const double* cathMaxObs, int N, double* z, double* coefNorm)
+{
+  double zMax[2] = {0, 0};
+  for (int i = 0; i < N; i++) {
+    zMax[cath[i]] = std::fmax(zMax[cath[i]], notSaturated[i] * z[i]);
+  }
+  // Avoid dividing by 0
+  for (int c = 0; c < 2; c++) {
+    if (zMax[c] < 1.0e-6) {
+      // In this case cathMax[c] must be 0
+      zMax[c] = 1.0;
+    }
+  }
+  //
+  // Normalization coefficient
+  //
+  // Use the max charge cathode for each cathode
+  coefNorm[0] = cathMaxObs[0] / zMax[0];
+  coefNorm[1] = cathMaxObs[1] / zMax[1];
+  // Perform the normalization
+  for (int i = 0; i < N; i++) {
+    z[i] = z[i] * coefNorm[cath[i]];
+    // To have traces about the fitting
+    // chargePerCath[cath[i]] += z[i];
+  }
+  // printf(" cathMaxObs??? %f %f \n", cathMaxObs[0], cathMaxObs[1] );
+  // printf("coefNorm ??? %f %f \n", coefNorm[0], coefNorm[1] );
+  // Use to weight the penalization
+  double meanCoef = (coefNorm[0] + coefNorm[1]) /
+                    ((coefNorm[0] > 1.0e-6) + (coefNorm[1] > 1.0e-6));
+
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
+    printf(
+      "    Max of unsaturated (observed) pads (cathMax0/1)= %f, %f, "
+      "maxThZ (computed)  %f, %f\n",
+      cathMaxObs[0], cathMaxObs[1], zMax[0], zMax[1]);
+  }
+
+  return meanCoef;
+}
+
 int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
                      gsl_vector* residuals)
 {
   funcDescription_t* dataPtr = (funcDescription_t*)dataFit;
   int N = dataPtr->N;
   int K = dataPtr->K;
-  const double* x = dataPtr->x_ptr;
-  const double* y = dataPtr->y_ptr;
-  const double* dx = dataPtr->dx_ptr;
-  const double* dy = dataPtr->dy_ptr;
+  const double* xInf = dataPtr->xInf_ptr;
+  const double* yInf = dataPtr->yInf_ptr;
+  const double* xSup = dataPtr->xSup_ptr;
+  const double* ySup = dataPtr->ySup_ptr;
   const Mask_t* cath = dataPtr->cath_ptr;
   const double* zObs = dataPtr->zObs_ptr;
   Mask_t* notSaturated = dataPtr->notSaturated_ptr;
@@ -36,6 +87,11 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
   double* cathWeights = dataPtr->cathWeights_ptr;
   double* cathMax = dataPtr->cathMax_ptr;
   double* zCathTotalCharge = dataPtr->zCathTotalCharge_ptr;
+  double* cathCoefNorm = dataPtr->cathCoefNorm_ptr;
+  int dimOfParameters = dataPtr->dimOfParameters;
+  int axe = dataPtr->axe;
+
+  // printf("  dimOfParameters, axe: %d %d\n", dimOfParameters, axe);
   // ??? int verbose = dataPtr->verbose;
   // Parameters
   const double* params = gsl_vector_const_ptr(gslParams, 0);
@@ -43,16 +99,21 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
   //  mux = mu[0:K-1]
   //  muy = mu[K:2K-1]
   const double* mu = &params[0];
-  double* w = (double*)&params[2 * K];
+  // ??? inv double* w = (double*)&params[2 * K];
+  double* w = (double*)&params[(dimOfParameters - 1) * K];
 
   // Set constrain: sum_(w_k) = 1
   double lastW = 1.0 - vectorSum(w, K - 1);
   //
   // Display paramameters (w, mu_x, mu_x
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
     printf("  Function evaluation at:\n");
     for (int k = 0; k < K; k++) {
-      printf("    mu_k[%d] = %g %g \n", k, mu[k], mu[K + k]);
+      if (dimOfParameters == 3) {
+        printf("    mu_k[%d] = %g %g \n", k, mu[k], mu[K + k]);
+      } else {
+        printf("    mu_k[%d] = %g \n", k, mu[k]);
+      }
     }
     for (int k = 0; k < K - 1; k++) {
       printf("    w_k[%d] = %g \n", k, w[k]);
@@ -66,43 +127,74 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
   vectorSetZero(z, N);
   double zTmp[N];
   //
-  double xyInfSup[4 * N];
+  double xyInf0[N];
+  double xySup0[N];
+  /*
   double* xInf = getXInf(xyInfSup, N);
   double* xSup = getXSup(xyInfSup, N);
   double* yInf = getYInf(xyInfSup, N);
   double* ySup = getYSup(xyInfSup, N);
-
+  */
   // Compute the pads charge considering the
   // Mathieson set w_k, mu_x, mu_y
   // TODO: Minor optimization  avoid to
   // compute  x[:] - dx[:]  i.E use xInf / xSup
   for (int k = 0; k < K; k++) {
-    // xInf[:] = x[:] - dx[:] - muX[k]
-    vectorAddVector(x, -1.0, dx, N, xInf);
-    vectorAddScalar(xInf, -mu[k], N, xInf);
-    // xSup = xInf + 2.0 * dxy[0]
-    vectorAddVector(xInf, 2.0, dx, N, xSup);
-    // yInf = xy[1] - dxy[1] - mu[k,1]
-    // ySup = yInf + 2.0 * dxy[1]
-    vectorAddVector(y, -1.0, dy, N, yInf);
-    vectorAddScalar(yInf, -mu[K + k], N, yInf);
-    // ySup = yInf + 2.0 * dxy[0]
-    vectorAddVector(yInf, 2.0, dy, N, ySup);
+    if (axe == 0) {
+      // xInf[:] = x[:] - dx[:] - muX[k]
+      // Inv vectorAddVector(x, -1.0, dx, N, xInf);
+      vectorAddScalar(xInf, -mu[k], N, xyInf0);
+      vectorAddScalar(xSup, -mu[k], N, xySup0);
+      // xSup = xInf + 2.0 * dxy[0]
+      // Inv vectorAddVector(xInf, 2.0, dx, N, xSup);
+      // yInf = xy[1] - dxy[1] - mu[k,1]
+      // ySup = yInf + 2.0 * dxy[1]
+      // vectorAddScalar(xSup, -mu[k], N, xSup);
+      compute1DPadIntegrals(xyInf0, xySup0, N, 0, chamberId, zTmp);
+      // Unnecessary to multiply by a cst (y integral part)
+      // vectorMultScal( xIntegrals, yCstIntegral, N, Integrals);
+    } else if (axe == 1) {
+
+      /*
+      vectorAddVector(y, -1.0, dy, N, yInf);
+      // Take care : not -mu[K + k] for muy
+      vectorAddScalar(yInf, -mu[k], N, yInf);
+      // ySup = yInf + 2.0 * dxy[0]
+      vectorAddVector(yInf, 2.0, dy, N, ySup);
+      */
+      vectorAddScalar(yInf, -mu[K + k], N, xyInf0);
+      vectorAddScalar(xSup, -mu[K + k], N, xySup0);
+      compute1DPadIntegrals(xyInf0, xySup0, N, 1, chamberId, zTmp);
+      // Unnecessary to multiply by a cst (x integral part)
+    } else {
+      // xInf[:] = x[:] - dx[:] - muX[k]
+      /*
+      vectorAddVector(x, -1.0, dx, N, xInf);
+      vectorAddScalar(xInf, -mu[k], N, xInf);
+      // xSup = xInf + 2.0 * dxy[0]
+      vectorAddVector(xInf, 2.0, dx, N, xSup);
+      // yInf = xy[1] - dxy[1] - mu[k,1]
+      // ySup = yInf + 2.0 * dxy[1]
+      vectorAddVector(y, -1.0, dy, N, yInf);
+      vectorAddScalar(yInf, -mu[K + k], N, yInf);
+      // ySup = yInf + 2.0 * dxy[0]
+      vectorAddVector(yInf, 2.0, dy, N, ySup);
+      */
+      computeCompressed2DPadIntegrals(dataPtr->compressedPads, mu[k], mu[K + k], N, chamberId, zTmp);
+    }
     //
-    compute2DPadIntegrals(xInf, xSup, yInf, ySup, N, chamberId, zTmp);
     // Multiply by the weight w[k]
     double wTmp = (k != K - 1) ? w[k] : lastW;
     vectorAddVector(z, wTmp, zTmp, N, z);
   }
-  // ??? vectorPrint("z", z, N);
   // ??? vectorPrint("zObs", zObs, N);
 
   //
   // To Normalize each cathode with the charge sum
   // of unsaturated pads
-  // Not used in residual computation
+  // NOT USED in this residual computation
   double sumNormalizedZ[2];
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
     for (int i = 0; i < N; i++) {
       if (cath[i] == 0) {
         sumNormalizedZ[0] += notSaturated[i] * z[i];
@@ -112,56 +204,252 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
     }
   }
 
-  // Get the max charge of unsaturrated pads for each cathodes
-  // Will be used to normalize the charge
-  double maxThZ[2] = {0, 0};
+  // Charge normalization
+  // Get the max charge of unsaturated pads for each cathodes
+  double meanCoef = chargeNormalization(cath, notSaturated, cathMax, N, z, cathCoefNorm);
+
+  //
+  // printf("maxCath: %f %f\n", cathMax[0], cathMax[1]);
+  // printf("coefNorm: %f %f\n", coefNorm[0], coefNorm[1]);
+  // printf("meaCoef: %f \n", meanCoef);
+  //
+
+  //
+  // Cathode Penalization
+  //
+  // Consider the charge sum for each cathode
+  // Tested but NOT USED
+  // To be removed for perf
+  double chargePerCath[2] = {0., 0.};
   for (int i = 0; i < N; i++) {
-    maxThZ[cath[i]] = fmax(maxThZ[cath[i]], notSaturated[i] * z[i]);
+    // To have traces about the fitting
+    chargePerCath[cath[i]] += z[i];
   }
-  // Avoid dividing by 0
-  for (int c = 0; c < 2; c++) {
-    if (maxThZ[c] < 1.0e-6) {
-      // cathMax[c] sould be 0
-      maxThZ[c] = 1.0;
-    }
+  double cathPenal = 0;
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
+    cathPenal = fabs(zCathTotalCharge[0] - chargePerCath[0]) +
+                fabs(zCathTotalCharge[1] - chargePerCath[1]);
   }
+
   //
-  // Normalization coefficient
+  // w-Penalization
   //
-  // Use the max charge cathode for each cathode
-  double coefNorm[2] = {cathMax[0] / maxThZ[0], cathMax[1] / maxThZ[1]};
-  // Use to wheight the penalization
-  double meanCoef = (coefNorm[0] + coefNorm[1]) /
-                    ((coefNorm[0] > 1.0e-6) + (coefNorm[1] > 1.0e-6));
-  double chargePerCath[2] = {0.0, 0.0};
-  // Perform the normalization
+  // Each w, must be 0 < w < 1
+  double wPenal = 0.0;
+  for (int k = 0; k < (K - 1); k++) {
+    if (w[k] < 0.0) {
+      wPenal += (-w[k]);
+    } else if (w[k] > 1.0) {
+      wPenal += (w[k] - 1.0);
+    }
+  }
+  // ... and the w-sum must be equal to 1
+  wPenal = wPenal + fabs(1.0 - vectorSum(w, K - 1) - lastW);
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
+    printf("    wPenal: %f\n", wPenal);
+  }
+  // Compute residual
   for (int i = 0; i < N; i++) {
-    z[i] = z[i] * coefNorm[cath[i]];
-    // To have traces about the fitting
-    chargePerCath[cath[i]] += z[i];
+    // Don't consider saturated pads (notSaturated[i] = 0)
+    double mask = notSaturated[i];
+    if ((notSaturated[i] == 0) && (z[i] < zObs[i])) {
+      // Except those charge < Observed charge
+      mask = 1.0;
+    }
+    //
+    // Residuals with penalization
+    //
+    gsl_vector_set(residuals, i, mask * ((z[i] - zObs[i]) + meanCoef * wPenal));
+    //
+    // Without penalization
+    // gsl_vector_set(residuals, i, mask * (zObs[i] - z[i]) + 0 * wPenal);
+    //
+    // Other studied penalization
+    // gsl_vector_set(residuals, i, (zObs[i] - z[i]) * (1.0 + cathPenal) +
+    // wPenal);
+  }
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
+    printf("    Observed sumCath0=%15.8f, sumCath1=%15.8f,\n",
+           zCathTotalCharge[0], zCathTotalCharge[1]);
+    // printf("  fitted   sumCath0=%15.8f, sumCath1=%15.8f,\n", chargePerCath,
+    // chargePerCath);
+    printf("    Penalties cathPenal=%5.4g wPenal=%5.4g \n", 1.0 + cathPenal,
+           wPenal);
+    printf("    Residues\n");
+    printf("  %15s  %15s  %15s %15s %15s %15s\n", "zObs", "z", "cathWeight",
+           "norm. factor", "notSaturated", "residual");
+    for (int i = 0; i < N; i++) {
+      printf("  %15.8f  %15.8f  %15.8f  %15.8f         %d  %15.8f\n", zObs[i],
+             z[i], cathWeights[i], sumNormalizedZ[cath[i]] * cathWeights[i],
+             notSaturated[i], gsl_vector_get(residuals, i));
+    }
+    printf("\n");
+  }
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
+    printf("    |f| = %g \n", gsl_blas_dnrm2(residuals));
+  }
+  /*
+  for (int i = 0; i < N; i++) {
+    printf("%f ",  gsl_vector_get(residuals, i));
   }
+  printf("\n");
+  */
+  // char str[16];
+  // scanf( "%s", str);
+  // printf("  norm cst  meanCoef=%f, wPenal=%f \n", meanCoef, wPenal);
+  return GSL_SUCCESS;
+}
+/*
+int f_ChargeIntegralBeforeCompressVersion(const gsl_vector* gslParams, void* dataFit,
+                     gsl_vector* residuals)
+{
+  funcDescription_t* dataPtr = (funcDescription_t*)dataFit;
+  int N = dataPtr->N;
+  int K = dataPtr->K;
+  const double* x = dataPtr->x_ptr;
+  const double* y = dataPtr->y_ptr;
+  const double* dx = dataPtr->dx_ptr;
+  const double* dy = dataPtr->dy_ptr;
+  const Mask_t* cath = dataPtr->cath_ptr;
+  const double* zObs = dataPtr->zObs_ptr;
+  Mask_t* notSaturated = dataPtr->notSaturated_ptr;
+  int chamberId = dataPtr->chamberId;
+  double* cathWeights = dataPtr->cathWeights_ptr;
+  double* cathMax = dataPtr->cathMax_ptr;
+  double* zCathTotalCharge = dataPtr->zCathTotalCharge_ptr;
+  double* cathCoefNorm = dataPtr->cathCoefNorm_ptr;
+  int dimOfParameters = dataPtr->dimOfParameters;
+  int axe = dataPtr->axe;
+
+  // printf("  dimOfParameters, axe: %d %d\n", dimOfParameters, axe);
+  // ??? int verbose = dataPtr->verbose;
+  // Parameters
+  const double* params = gsl_vector_const_ptr(gslParams, 0);
+  // Note:
+  //  mux = mu[0:K-1]
+  //  muy = mu[K:2K-1]
+  const double* mu = &params[0];
+  // ??? inv double* w = (double*)&params[2 * K];
+  double* w = (double*)&params[ (dimOfParameters - 1) * K];
+
+  // Set constrain: sum_(w_k) = 1
+  double lastW = 1.0 - vectorSum(w, K - 1);
+  //
+  // Display paramameters (w, mu_x, mu_x
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
+    printf("  Function evaluation at:\n");
+    for (int k = 0; k < K; k++) {
+      if (dimOfParameters==3) {
+        printf("    mu_k[%d] = %g %g \n", k, mu[k], mu[K + k]);
+      } else {
+        printf("    mu_k[%d] = %g \n", k, mu[k]);
+      }
+    }
+    for (int k = 0; k < K - 1; k++) {
+      printf("    w_k[%d] = %g \n", k, w[k]);
+    }
+    // Last W
+    printf("    w_k[%d] = %g \n", K - 1, lastW);
+  }
+
+  // Charge Integral on Pads
+  double z[N];
+  vectorSetZero(z, N);
+  double zTmp[N];
+  //
+  double xyInfSup[4 * N];
+  double* xInf = getXInf(xyInfSup, N);
+  double* xSup = getXSup(xyInfSup, N);
+  double* yInf = getYInf(xyInfSup, N);
+  double* ySup = getYSup(xyInfSup, N);
+
+  // Compute the pads charge considering the
+  // Mathieson set w_k, mu_x, mu_y
+  // TODO: Minor optimization  avoid to
+  // compute  x[:] - dx[:]  i.E use xInf / xSup
+  for (int k = 0; k < K; k++) {
+    if (axe == 0) {
+      // xInf[:] = x[:] - dx[:] - muX[k]
+      vectorAddVector(x, -1.0, dx, N, xInf);
+      vectorAddScalar(xInf, -mu[k], N, xInf);
+      // xSup = xInf + 2.0 * dxy[0]
+      vectorAddVector(xInf, 2.0, dx, N, xSup);
+      // yInf = xy[1] - dxy[1] - mu[k,1]
+      // ySup = yInf + 2.0 * dxy[1]
+      compute1DPadIntegrals( xInf, xSup, N, 0, chamberId, zTmp);
+      // Unnecessary to multiply by a cst (y integral part)
+      // vectorMultScal( xIntegrals, yCstIntegral, N, Integrals);
+    } else if (axe == 1) {
+       vectorAddVector(y, -1.0, dy, N, yInf);
+       // Take care : not -mu[K + k] for muy
+       vectorAddScalar(yInf, -mu[k], N, yInf);
+       // ySup = yInf + 2.0 * dxy[0]
+       vectorAddVector(yInf, 2.0, dy, N, ySup);
+       compute1DPadIntegrals( yInf, ySup, N, 1, chamberId, zTmp);
+       // Unnecessary to multiply by a cst (x integral part)
+    } else {
+      // xInf[:] = x[:] - dx[:] - muX[k]
+      vectorAddVector(x, -1.0, dx, N, xInf);
+      vectorAddScalar(xInf, -mu[k], N, xInf);
+      // xSup = xInf + 2.0 * dxy[0]
+      vectorAddVector(xInf, 2.0, dx, N, xSup);
+      // yInf = xy[1] - dxy[1] - mu[k,1]
+      // ySup = yInf + 2.0 * dxy[1]
+      vectorAddVector(y, -1.0, dy, N, yInf);
+      vectorAddScalar(yInf, -mu[K + k], N, yInf);
+      // ySup = yInf + 2.0 * dxy[0]
+      vectorAddVector(yInf, 2.0, dy, N, ySup);
+      compute2DPadIntegrals(xInf, xSup, yInf, ySup, N, chamberId, zTmp);
+    }
+    //
+    // Multiply by the weight w[k]
+    double wTmp = (k != K - 1) ? w[k] : lastW;
+    vectorAddVector(z, wTmp, zTmp, N, z);
+  }
+  // ??? vectorPrint("zObs", zObs, N);
+
+  //
+  // To Normalize each cathode with the charge sum
+  // of unsaturated pads
+  // NOT USED in this residual computation
+  double sumNormalizedZ[2];
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
+    for (int i = 0; i < N; i++) {
+      if (cath[i] == 0) {
+        sumNormalizedZ[0] += notSaturated[i] * z[i];
+      } else {
+        sumNormalizedZ[1] += notSaturated[i] * z[i];
+      }
+    }
+  }
+
+  // Charge normalization
+  // Get the max charge of unsaturated pads for each cathodes
+  double meanCoef = chargeNormalization( cath, notSaturated, cathMax, N, z, cathCoefNorm );
+
   //
   // printf("maxCath: %f %f\n", cathMax[0], cathMax[1]);
   // printf("coefNorm: %f %f\n", coefNorm[0], coefNorm[1]);
   // printf("meaCoef: %f \n", meanCoef);
   //
 
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
-    printf(
-      "    Max of unsaturated (observed) pads (cathMax0/1)= %f, %f, "
-      "maxThZ (computed)  %f, %f\n",
-      cathMax[0], cathMax[1], maxThZ[0], maxThZ[1]);
-  }
   //
   // Cathode Penalization
   //
   // Consider the charge sum for each cathode
-  // Tested but Not used
+  // Tested but NOT USED
+  // To be removed for perf
+  double chargePerCath[2] = {0., 0.};
+  for (int i = 0; i < N; i++) {
+    // To have traces about the fitting
+    chargePerCath[cath[i]] += z[i];
+  }
   double cathPenal = 0;
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
     cathPenal = fabs(zCathTotalCharge[0] - chargePerCath[0]) +
                 fabs(zCathTotalCharge[1] - chargePerCath[1]);
   }
+
   //
   // w-Penalization
   //
@@ -176,7 +464,7 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
   }
   // ... and the w-sum must be equal to 1
   wPenal = wPenal + fabs(1.0 - vectorSum(w, K - 1) - lastW);
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
     printf("    wPenal: %f\n", wPenal);
   }
   // Compute residual
@@ -190,7 +478,7 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
     //
     // Residuals with penalization
     //
-    gsl_vector_set(residuals, i, mask * ((zObs[i] - z[i]) + meanCoef * wPenal));
+    gsl_vector_set(residuals, i, mask * ((z[i] - zObs[i]) + meanCoef * wPenal));
     //
     // Without penalization
     // gsl_vector_set(residuals, i, mask * (zObs[i] - z[i]) + 0 * wPenal);
@@ -199,7 +487,7 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
     // gsl_vector_set(residuals, i, (zObs[i] - z[i]) * (1.0 + cathPenal) +
     // wPenal);
   }
-  if (ClusterConfig::fittingLog >= ClusterConfig::debug) {
+  if (clusterConfig.fittingLog >= clusterConfig.debug) {
     printf("    Observed sumCath0=%15.8f, sumCath1=%15.8f,\n",
            zCathTotalCharge[0], zCathTotalCharge[1]);
     // printf("  fitted   sumCath0=%15.8f, sumCath1=%15.8f,\n", chargePerCath,
@@ -216,9 +504,153 @@ int f_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
     }
     printf("\n");
   }
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
+    printf("    |f| = %g \n", gsl_blas_dnrm2(residuals));
+  }
+  // char str[16];
+  // scanf( "%s", str);
+  // printf("  norm cst  meanCoef=%f, wPenal=%f \n", meanCoef, wPenal);
   return GSL_SUCCESS;
 }
+*/
+
+/*
+// Derivate of the Charge Integral i.e. mathieson
+int df_ChargeIntegral(const gsl_vector* gslParams, void* dataFit,
+                     gsl_matrix* J)
+{
+  funcDescription_t* dataPtr = (funcDescription_t*)dataFit;
+  int N = dataPtr->N;
+  int K = dataPtr->K;
+  const double* x = dataPtr->x_ptr;
+  const double* y = dataPtr->y_ptr;
+  const double* dx = dataPtr->dx_ptr;
+  const double* dy = dataPtr->dy_ptr;
+  const Mask_t* cath = dataPtr->cath_ptr;
+  const double* zObs = dataPtr->zObs_ptr;
+  Mask_t* notSaturated = dataPtr->notSaturated_ptr;
+  int chamberId = dataPtr->chamberId;
+  double* cathWeights = dataPtr->cathWeights_ptr;
+  double* cathMax = dataPtr->cathMax_ptr;
+  double* zCathTotalCharge = dataPtr->zCathTotalCharge_ptr;
+  double* cathCoefNorm = dataPtr->cathCoefNorm_ptr;
+  // ??? int verbose = dataPtr->verbose;
+  // Parameters
+  const double* params = gsl_vector_const_ptr(gslParams, 0);
+  // Note:
+  //  mux = mu[0:K-1]
+  //  muy = mu[K:2K-1]
+  const double* mux = &params[0];
+  const double* muy = &params[K];
+  double* w = (double*)&params[2 * K];
 
+  // Compute mathieson on x/y
+  // and charge integral on x/y
+  double xCI[N], yCI[N];
+  double xMath[N], yMath[N], xyMath[N];
+  double xyInf[N], xySup[N];
+  double xyVar[N];
+  // Set constrain: sum_(w_k) = 1
+  double lastW = 1.0 - vectorSum(w, K - 1);
+
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
+    printf("  df evaluation at:\n");
+    for (int k = 0; k < K; k++) {
+      printf("    mu_k[%d] = %g %g \n", k, mux[k], muy[k]);
+    }
+    for (int k = 0; k < K - 1; k++) {
+      printf("    w_k[%d] = %g \n", k, w[k]);
+    }
+    // Last W
+    printf("    w_k[%d] = %g \n", K - 1, lastW);
+  }
+
+  for (int k = 0; k < K; k++) {
+    double w_k = (k < (K-1))? w[k]: lastW;
+    //
+    // X components for CI and mathieson
+    //
+    // xyVar = x - mux[k]
+    vectorAddScalar(x, -mux[k], N, xyVar);
+    // xInf = xyVar - dx
+    vectorAddVector(xyVar, -1.0, dx, N, xyInf);
+    // xSup = xInf + 2.0 * dx
+    vectorAddVector(xyInf, 2.0, dx, N, xySup);
+    // Compute the derivate : mathieson(xSup) - mathieson(xInf)
+    compute1DMathieson( xyInf, N, 0, chamberId, xyMath);
+    compute1DMathieson( xySup, N, 0, chamberId, xMath);
+    vectorAddVector( xMath, -1, xyMath, N, xMath);
+    vectorMultScalar(xMath, 4, N, xMath);
+    // Compute the 1D Charge integral on x
+    compute1DPadIntegrals(xyInf, xySup, N, 0, chamberId, xCI);
+    //
+    // Y components for CI and mathieson
+    //
+    // xyVar = y - muy[k]
+    vectorAddScalar(y, -muy[k], N, xyVar);
+    // Mathieson at  xyVar
+    compute1DMathieson( xyVar, N, 1, chamberId, yMath);
+    // yInf = xyVar - dy
+    vectorAddVector(xyVar, -1.0, dy, N, xyInf);
+    // ySup = yInf + 2.0 * dy
+    vectorAddVector(xyInf, 2.0, dy, N, xySup);
+    // Compute the derivate : mathieson(ySup) - mathieson(yInf)
+    compute1DMathieson( xyInf, N, 1, chamberId, xyMath);
+    compute1DMathieson( xySup, N, 1, chamberId, yMath);
+    vectorAddVector( yMath, -1, xyMath, N, yMath);
+    vectorMultScalar(yMath, 4, N, yMath);
+    // Compute the 1D Charge integral on y
+    compute1DPadIntegrals(xyInf, xySup, N, 1, chamberId, yCI);
+
+    // Normalization factor
+    // double meanCoef = chargeNormalization( cath, notSaturated, cathMax, N, z);
+    //
+    //  Jacobian matrix
+    //
+    // d / dmux_k component
+
+    for (int i = 0; i < N; i++) {
+      gsl_matrix_set (J, i, k,  -0.5*w_k*cathCoefNorm[cath[i]]*xMath[i]*yCI[i]);
+    }
+    // d / dmuy_k component
+    for (int i = 0; i < N; i++) {
+      gsl_matrix_set (J, i, k+K,  -0.5*w_k*cathCoefNorm[cath[i]]*xCI[i]*yMath[i]);
+    }
+    // d / dw_k component
+    if (k < K-1) {
+      for (int i = 0; i < N; i++) {
+        gsl_matrix_set (J, i, 2*K+k,  -0.5*cathCoefNorm[cath[i]]*xCI[i]*yCI[i]);
+      }
+    }
+    // ??? vectorPrint("xMath", xMath, N);
+    // vectorPrint("yMath", yMath, N);
+    // vectorPrint("xCI", xCI, N);
+    // vectorPrint("yCI", yCI, N);
+  }
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
+    double sumdParam[3*K];
+    vectorSet( sumdParam, 0.0, 3*K);
+    for (int k=0; k < 3*K - 1; k++) {
+      printf("%2d: ", k);
+      for (int i=0; i < N; i++) {
+         sumdParam[k] += gsl_matrix_get( J, i, k);
+         printf("%g ", gsl_matrix_get(J, i, k) );
+      }
+      printf("\n");
+    }
+    printf("  Sum_i d/dparam :\n");
+    for (int k = 0; k < K; k++) {
+      printf("    mux/y[%d] = %g %g \n", k, sumdParam[k], sumdParam[K + k]);
+    }
+    for (int k = 0; k < K; k++) {
+      printf("    w_k[%d] = %g \n", k, sumdParam[2*K + k]);
+    }
+  }
+  return GSL_SUCCESS;
+}
+*/
+
+/*
 // Invalid version
 int f_ChargeIntegral0(const gsl_vector* gslParams, void* dataFit,
                       gsl_vector* residuals)
@@ -390,9 +822,9 @@ int f_ChargeIntegral0(const gsl_vector* gslParams, void* dataFit,
   return GSL_SUCCESS;
 }
 
-void printState(int iter, gsl_multifit_fdfsolver* s, int K)
+void printState(int iter, gsl_multifit_fdfsolver* s, int K, int N)
 {
-  printf("  Fitting iter=%3d |f(x)|=%g\n", iter, gsl_blas_dnrm2(s->f));
+  printf("  Fitting iter=%3d |f(x)| =%g\n", iter, gsl_blas_dnrm2(s->f));
   printf("    mu (x,y):");
   int k = 0;
   for (; k < 2 * K; k++) {
@@ -416,7 +848,18 @@ void printState(int iter, gsl_multifit_fdfsolver* s, int K)
     printf(" % 7.3f", gsl_vector_get(s->dx, k));
   }
   printf("\n");
+  printf("    Jacobian");
+  double sum = 0.0;
+  for (int k=0; k < K; k++) {
+    printf("    k:");
+    for (int i=0; i < N; i++) {
+      printf(" % 7.3f",  gsl_matrix_get (s->J, i, k) );
+    }
+    printf("\n");
+  }
+  printf("\n");
 }
+*/
 
 // Notes :
 //  - the intitialization of Mathieson module must be done before
@@ -426,18 +869,32 @@ namespace o2
 {
 namespace mch
 {
-void printState(int iter, gsl_multifit_fdfsolver* s, int K)
+void printState(int iter, gsl_multifit_fdfsolver* s, int axe, int K, int N)
 {
   printf("  Fitting iter=%3d |f(x)|=%g\n", iter, gsl_blas_dnrm2(s->f));
-  printf("    mu (x,y):");
+  if (axe == 0) {
+    printf("    mu (x):");
+  } else if (axe == 1) {
+    printf("    mu (y):");
+  } else {
+    printf("    mu (x,y):");
+  }
   int k = 0;
-  for (; k < 2 * K; k++) {
-    printf(" % 7.3f", gsl_vector_get(s->x, k));
+  if (axe == -1) {
+    for (; k < 2 * K; k++) {
+      printf(" % 7.3f", gsl_vector_get(s->x, k));
+    }
+    printf("\n");
+  } else {
+    for (; k < 1 * K; k++) {
+      printf(" % 7.3f", gsl_vector_get(s->x, k));
+    }
+    printf("\n");
   }
-  printf("\n");
   double sumW = 0;
   printf("    w:");
-  for (; k < 3 * K - 1; k++) {
+  int nDimensions = (axe == -1) ? 3 : 2;
+  for (; k < nDimensions * K - 1; k++) {
     double w = gsl_vector_get(s->x, k);
     sumW += w;
     printf(" %7.3f", gsl_vector_get(s->x, k));
@@ -447,14 +904,45 @@ void printState(int iter, gsl_multifit_fdfsolver* s, int K)
 
   printf("\n");
   k = 0;
-  printf("    dx:");
-  for (; k < 2 * K; k++) {
-    printf(" % 7.3f", gsl_vector_get(s->dx, k));
+  double dxMax = -1.0;
+  printf("    dxyw:");
+  for (; k < (nDimensions - 1) * K; k++) {
+    double dx_k = gsl_vector_get(s->dx, k);
+    printf(" %7.3f", dx_k);
+    dxMax = (dxMax < dx_k) ? dx_k : dxMax;
+  }
+  printf("\n");
+  printf(" max(dxyw) = %7.3f", dxMax);
+  printf("    Jacobian\n");
+  /* Compilation pb on MacOS s->J not recognized !!
+  double sum = 0.0;
+  for (int k = 0; k < K; k++) {
+    if (nDimensions == 3) {
+      printf("    k=%2d mux:", k);
+      for (int i = 0; i < N; i++) {
+        printf(" % 7.3f", gsl_matrix_get(s->J, i, k));
+      }
+      printf("\n");
+    }
+    printf("    k=%2d mux/y:", k);
+    for (int i = 0; i < N; i++) {
+      printf(" % 7.3f", gsl_matrix_get(s->J, i, k + (nDimensions - 2) * K));
+    }
+    printf("\n");
+    if (k < K - 1) {
+      printf("    k=%2d w  :", k);
+      for (int i = 0; i < N; i++) {
+        printf(" % 7.3f", gsl_matrix_get(s->J, i, k + (nDimensions - 1) * K));
+      }
+    }
+    printf("\n");
   }
+  */
   printf("\n");
 }
 
-void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
+void fitMathieson(const Pads& iPads, double* thetaInit, int kInit,
+                  int dimOfParameters, int axe, int mode,
                   double* thetaFinal, double* khi2, double* pError)
 {
   int status;
@@ -469,20 +957,26 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   int computeKhi2 = p & 0x1;
   p = p >> 1;
   int computeStdDev = p & 0x1;
-  if (ClusterConfig::fittingLog >= ClusterConfig::info) {
+  if (clusterConfig.fittingLog >= clusterConfig.info) {
     printf("\n> [fitMathieson] Fitting \n");
     printf(
       "  mode: verbose, doJacobian, computeKhi2, computeStdDev %d %d %d %d\n",
       verbose, doJacobian, computeKhi2, computeStdDev);
   }
   //
-  int N = iPads.getNbrOfPads();
+  // int N = iPads.getNbrOfPads();
+  int N;
+  if (axe == -1) {
+    N = iPads.getNbrOfPads();
+  } else {
+    N = iPads.getNbrOfObsPads();
+  }
   //
   double* muAndWi = getMuAndW(thetaInit, kInit);
   //
   // Check if fitting is possible
   double* muAndWf = getMuAndW(thetaFinal, kInit);
-  if (3 * kInit - 1 > N) {
+  if (dimOfParameters * kInit - 1 > N) {
     muAndWf[0] = NAN;
     muAndWf[kInit] = NAN;
     muAndWf[2 * kInit] = NAN;
@@ -493,7 +987,7 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   double cathMax[2] = {0.0, 0.0};
   double* cathWeights;
 
-  if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
     vectorPrintShort("  iPads.cath", iPads.getCathodes(), N);
     vectorPrint("  iPads.q", iPads.getCharges(), N);
   }
@@ -506,16 +1000,27 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   // Function description (extra data nor parameters)
   mathiesonData.N = N;
   mathiesonData.K = kInit;
-  mathiesonData.x_ptr = iPads.getX();
-  mathiesonData.y_ptr = iPads.getY();
-  mathiesonData.dx_ptr = iPads.getDX();
-  mathiesonData.dy_ptr = iPads.getDY();
+  double* xInf = new double[N];
+  double* xSup = new double[N];
+  double* yInf = new double[N];
+  double* ySup = new double[N];
+  vectorAddVector(iPads.getX(), -1.0, iPads.getDX(), N, xInf);
+  vectorAddVector(iPads.getX(), +1.0, iPads.getDX(), N, xSup);
+  vectorAddVector(iPads.getY(), -1.0, iPads.getDY(), N, yInf);
+  vectorAddVector(iPads.getY(), +1.0, iPads.getDY(), N, ySup);
+  mathiesonData.xInf_ptr = xInf;
+  mathiesonData.yInf_ptr = yInf;
+  mathiesonData.xSup_ptr = xSup;
+  mathiesonData.ySup_ptr = ySup;
   mathiesonData.cath_ptr = iPads.getCathodes();
   mathiesonData.zObs_ptr = iPads.getCharges();
   Mask_t notSaturated[N];
   vectorCopyShort(iPads.getSaturates(), N, notSaturated);
   vectorNotShort(notSaturated, N, notSaturated);
   mathiesonData.notSaturated_ptr = notSaturated;
+  mathiesonData.dimOfParameters = dimOfParameters;
+  mathiesonData.axe = axe;
+  mathiesonData.compressedPads = compressPads(xInf, xSup, yInf, ySup, N);
   //} else {
   /*
   // Function description (extra data nor parameters)
@@ -535,6 +1040,7 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   */
   // Total Charge per cathode plane
   double zCathTotalCharge[2];
+  double cathCoefNorm[2] = {0.0};
   Mask_t mask[N];
   // Cath 1
   vectorCopyShort(mathiesonData.cath_ptr, N, mask);
@@ -557,7 +1063,7 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
       cathMax[mathiesonData.cath_ptr[i]],
       mathiesonData.notSaturated_ptr[i] * mathiesonData.zObs_ptr[i]);
   }
-  if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+  if (clusterConfig.fittingLog >= clusterConfig.detail) {
     vectorPrintShort("mathiesonData.cath_ptr", mathiesonData.cath_ptr, N);
     vectorPrintShort("mathiesonData.notSaturated_ptr",
                      mathiesonData.notSaturated_ptr, N);
@@ -567,15 +1073,17 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   mathiesonData.cathMax_ptr = cathMax;
   mathiesonData.chamberId = iPads.getChamberId();
   mathiesonData.zCathTotalCharge_ptr = zCathTotalCharge;
+  mathiesonData.cathCoefNorm_ptr = cathCoefNorm;
   mathiesonData.verbose = verbose;
   //
   // Define Function, jacobian
   gsl_multifit_function_fdf f;
   f.f = &f_ChargeIntegral;
   f.df = nullptr;
+  // f.df = df_ChargeIntegral;
   f.fdf = nullptr;
   f.n = N;
-  f.p = 3 * kInit - 1;
+  f.p = dimOfParameters * kInit - 1;
   f.params = &mathiesonData;
 
   bool doFit = true;
@@ -589,63 +1097,86 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   double* w = &muAndWi[2 * kInit];
   std::sort(maxIndex, &maxIndex[kInit],
             [=](int a, int b) { return (w[a] > w[b]); });
-
+  // Remove this loop ???
+  int iter = 0;
   while (doFit) {
     // Select the best K's
     // Copy kTest max
-    double muAndWTest[3 * K];
+    double muAndWTest[dimOfParameters * K];
     // Mu part
-    for (int k = 0; k < K; k++) {
-      // Respecttively mux, muy, w
-      muAndWTest[k] = muAndWi[maxIndex[k]];
-      muAndWTest[k + K] = muAndWi[maxIndex[k] + kInit];
-      muAndWTest[k + 2 * K] = muAndWi[maxIndex[k] + 2 * kInit];
+    if (dimOfParameters == 3) {
+      for (int k = 0; k < K; k++) {
+        // Respecttively mux, muy, w
+        muAndWTest[k] = muAndWi[maxIndex[k]];
+        muAndWTest[k + K] = muAndWi[maxIndex[k] + kInit];
+        muAndWTest[k + 2 * K] = muAndWi[maxIndex[k] + 2 * kInit];
+      }
+    } else {
+      for (int k = 0; k < K; k++) {
+        // Respecttively mux, muy, w
+        if (axe == 0) {
+          // x axe
+          muAndWTest[k] = muAndWi[maxIndex[k]];
+        } else {
+          // y axe
+          muAndWTest[k] = muAndWi[maxIndex[k] + kInit];
+        }
+        // w
+        if (K != 1) {
+          muAndWTest[k + K] = muAndWi[maxIndex[k] + 2 * kInit];
+        }
+      }
     }
-    if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
-      vectorPrint("  Selected w", &muAndWTest[2 * K], K);
-      vectorPrint("  Selected mux", &muAndWTest[0], K);
-      vectorPrint("  Selected muy", &muAndWTest[K], K);
+
+    if (clusterConfig.fittingLog >= clusterConfig.detail) {
+      if (dimOfParameters == 3) {
+        vectorPrint("  Selected w", &muAndWTest[2 * K], K);
+        vectorPrint("  Selected mux", &muAndWTest[0], K);
+        vectorPrint("  Selected muy", &muAndWTest[K], K);
+      } else {
+        printf("  Selected dimOfParameters=2, axe=%d", axe);
+        vectorPrint("  Selected w   ", &muAndWTest[K], K);
+        vectorPrint("  Selected muxy", &muAndWTest[0], K);
+      }
     }
     mathiesonData.K = K;
-    f.p = 3 * K - 1;
+    f.p = dimOfParameters * K - 1;
     // Set initial parameters
     // Inv ??? gsl_vector_view params0 = gsl_vector_view_array(muAndWi, 3 * K -
     // 1);
-    gsl_vector_view params0 = gsl_vector_view_array(muAndWTest, 3 * K - 1);
+    gsl_vector_view params0 = gsl_vector_view_array(muAndWTest, dimOfParameters * K - 1);
 
     // Fitting method
     gsl_multifit_fdfsolver* s = gsl_multifit_fdfsolver_alloc(
-      gsl_multifit_fdfsolver_lmsder, N, 3 * K - 1);
+      gsl_multifit_fdfsolver_lmsder, N, dimOfParameters * K - 1);
     // associate the fitting mode, the function, and the starting parameters
     gsl_multifit_fdfsolver_set(s, &f, &params0.vector);
 
-    if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
-      o2::mch::printState(-1, s, K);
+    if (clusterConfig.fittingLog >= clusterConfig.detail) {
+      o2::mch::printState(-1, s, axe, K, N);
     }
     // double initialResidual = gsl_blas_dnrm2(s->f);
     double initialResidual = 0.0;
     // Fitting iteration
     status = GSL_CONTINUE;
     double residual = DBL_MAX;
-    ;
     double prevResidual = DBL_MAX;
-    ;
-    double prevTheta[3 * K - 1];
+    double prevTheta[dimOfParameters * K - 1];
     // ??? for (int iter = 0; (status == GSL_CONTINUE) && (iter < 500); iter++)
     // {
-    for (int iter = 0; (status == GSL_CONTINUE) && (iter < 50); iter++) {
+    for (; (status == GSL_CONTINUE) && (iter < 50); iter++) {
       // TODO: to speed if possible
-      for (int k = 0; k < (3 * K - 1); k++) {
+      for (int k = 0; k < (dimOfParameters * K - 1); k++) {
         prevTheta[k] = gsl_vector_get(s->x, k);
       }
       // printf("  Debug Fitting iter=%3d |f(x)|=%g\n", iter,
       // gsl_blas_dnrm2(s->f));
       status = gsl_multifit_fdfsolver_iterate(s);
-      if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+      if (clusterConfig.fittingLog >= clusterConfig.detail) {
         printf("  Solver status = %s\n", gsl_strerror(status));
       }
-      if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
-        o2::mch::printState(iter, s, K);
+      if (clusterConfig.fittingLog >= clusterConfig.detail) {
+        o2::mch::printState(iter, s, axe, K, N);
       }
       /* ???? Inv
       if (status) {
@@ -655,7 +1186,7 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
       */
       // GG TODO ???: adjust error in fct of charge
       status = gsl_multifit_test_delta(s->dx, s->x, 1e-4, 1e-4);
-      if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+      if (clusterConfig.fittingLog >= clusterConfig.detail) {
         printf("  Status multifit_test_delta = %d %s\n", status,
                gsl_strerror(status));
       }
@@ -665,14 +1196,24 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
       // vectorPrint(" prevtheta", prevTheta, 3*K-1);
       // vectorPrint(" theta", s->dx->data, 3*K-1);
       // printf(" prevResidual, residual %f %f\n", prevResidual, residual );
-      if (fabs(prevResidual - residual) < 1.0e-2) {
+      //
+      // max dx/dy (dw not included)
+      double tmp[(dimOfParameters - 1) * K];
+      vectorAbs(s->dx->data, (dimOfParameters - 1) * K, tmp);
+      double maxDxy = vectorMax(tmp, (dimOfParameters - 1) * K);
+      bool converged = (fabs(prevResidual - residual) / residual < 1.0e-2) || (maxDxy < clusterConfig.minFittingXYStep);
+      if (converged) {
         // Stop iteration
         // Take the previous value of theta
-        if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
-          printf("  Stop iteration (dResidu~0), prevResidual=%f residual=%f\n",
-                 prevResidual, residual);
+        if (clusterConfig.fittingLog >= clusterConfig.info) {
+          printf("  Stop iteration iteration=%d (dResidu/residu~0), prevResidual=%f residual=%f\n",
+                 iter, prevResidual, residual);
+          printf("  End max dxy=%f\n", vectorMax(s->dx->data, (dimOfParameters - 1) * K));
+          if (K > 1) {
+            printf("  End max dw=%f\n", vectorMax(&s->dx->data[(dimOfParameters - 1) * K], K - 1));
+          }
         }
-        for (int k = 0; k < (3 * K - 1); k++) {
+        for (int k = 0; k < (dimOfParameters * K - 1); k++) {
           gsl_vector_set(s->x, k, prevTheta[k]);
         }
         status = GSL_SUCCESS;
@@ -686,9 +1227,9 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
     if (computeKhi2 && (khi2 != nullptr)) {
       // Khi2
       double chi = gsl_blas_dnrm2(s->f);
-      double dof = N - (3 * K - 1);
+      double dof = N - (dimOfParameters * K - 1);
       double c = fmax(1.0, chi / sqrt(dof));
-      if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+      if (clusterConfig.fittingLog >= clusterConfig.detail) {
         printf("  K=%d, chi=%f, chisq/dof = %g\n", K, chi * chi,
                chi * chi / dof);
       }
@@ -709,19 +1250,34 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
 
       // Mu part
       for (int k = 0; k < K; k++) {
-        muAndWf[k] = gsl_vector_get(s->x, k);
-        muAndWf[k + kInit] = gsl_vector_get(s->x, k + K);
+        if (axe == 0) {
+          // x
+          muAndWf[k] = gsl_vector_get(s->x, k);
+          // y
+          // muAndWf[k+kInit] =  mathiesonData.y_ptr[0];
+          muAndWf[k + kInit] = iPads.getY()[0];
+        } else if (axe == 1) {
+          // x
+          // muAndWf[k] =  mathiesonData.x_ptr[0];
+          muAndWf[k] = iPads.getX()[0];
+          // y
+          muAndWf[k + kInit] = gsl_vector_get(s->x, k);
+        } else if (axe == -1) {
+          // x
+          muAndWf[k] = gsl_vector_get(s->x, k);
+          // y
+          muAndWf[k + kInit] = gsl_vector_get(s->x, k + K);
+        }
       }
       // w part
       double sumW = 0;
       for (int k = 0; k < K - 1; k++) {
-        double w = gsl_vector_get(s->x, k + 2 * K);
+        double w = gsl_vector_get(s->x, k + (dimOfParameters - 1) * K);
         sumW += w;
         muAndWf[k + 2 * kInit] = w;
       }
       // Last w : 1.0 - sumW
       muAndWf[3 * kInit - 1] = 1.0 - sumW;
-
       // Parameter error
       /* Pb Mac compilation
       if (computeStdDev && (pError != nullptr)) { //
@@ -735,7 +1291,7 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
       }
       */
     }
-    if (ClusterConfig::fittingLog >= ClusterConfig::detail) {
+    if (clusterConfig.fittingLog >= clusterConfig.detail) {
       printf("  status parameter error = %s\n", gsl_strerror(status));
     }
     gsl_multifit_fdfsolver_free(s);
@@ -745,7 +1301,13 @@ void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
   } // while(doFit)
   // Release memory
   delete[] cathWeights;
-  //
+  delete[] xInf;
+  delete[] xSup;
+  delete[] yInf;
+  delete[] ySup;
+  deleteCompressedPads(mathiesonData.compressedPads);
+
+  // printf("End fitting: iteration=%d nPads=%d \n", iter, N);
   return;
 }
 
@@ -759,7 +1321,11 @@ void fitMathieson(const double* x, const double* y, const double* dx, const doub
 {
   //
   Pads pads = o2::mch::Pads(x, y, dx, dy, q, cath, sat, chId, nPads);
+  // Default
   int mode = 0;
-  o2::mch::fitMathieson(pads, thetaInit, kInit, mode,
+  int dimOfParameters = 3;
+  int axe = -1;
+  o2::mch::fitMathieson(pads, thetaInit, kInit,
+                        dimOfParameters, axe, mode,
                         thetaFinal, khi2, pError);
 }
\ No newline at end of file
diff --git a/Detectors/MUON/MCH/Clustering/src/mathiesonFit.h b/Detectors/MUON/MCH/Clustering/src/mathiesonFit.h
index 3d16ef957c058..3319a0f8d571c 100644
--- a/Detectors/MUON/MCH/Clustering/src/mathiesonFit.h
+++ b/Detectors/MUON/MCH/Clustering/src/mathiesonFit.h
@@ -17,6 +17,7 @@
 #include <gsl/gsl_vector.h>
 
 #include "MCHClustering/PadsPEM.h"
+#include "mathieson.h"
 #include "mathUtil.h"
 
 namespace o2
@@ -26,10 +27,10 @@ namespace mch
 typedef struct dataFit {
   int N;
   int K;
-  const double* x_ptr;
-  const double* dx_ptr;
-  const double* y_ptr;
-  const double* dy_ptr;
+  const double* xInf_ptr;
+  const double* xSup_ptr;
+  const double* yInf_ptr;
+  const double* ySup_ptr;
   const Mask_t* cath_ptr;
   const double* zObs_ptr;
   Mask_t* notSaturated_ptr;
@@ -38,10 +39,15 @@ typedef struct dataFit {
   int chamberId;
   double* zCathTotalCharge_ptr;
   int verbose;
-  double* thetaInit; // Only used by InspectModel
+  double* thetaInit;        // Only used by InspectModel
+  double* cathCoefNorm_ptr; // Used to keep the normalization of the 2 cathodes
+  int dimOfParameters;      // default is 3 dimensions (x, y, w), 2 is for (x/y, w) fits
+  int axe;                  // -1 for both axes, 0 for x axis, 1 for y axis
+  CompressedPads_t* compressedPads;
 } funcDescription_t;
 
-void fitMathieson(const Pads& iPads, double* thetaInit, int kInit, int mode,
+void fitMathieson(const Pads& iPads, double* thetaInit, int kInit,
+                  int dimOfParameters, int axe, int mode,
                   double* thetaFinal, double* khi2, double* pError);
 
 void printState(int iter, gsl_multifit_fdfsolver* s, int K);
diff --git a/Detectors/MUON/MCH/Clustering/src/poissonEM.cxx b/Detectors/MUON/MCH/Clustering/src/poissonEM.cxx
index d0c145a47d938..acb5c7c3aeff6 100644
--- a/Detectors/MUON/MCH/Clustering/src/poissonEM.cxx
+++ b/Detectors/MUON/MCH/Clustering/src/poissonEM.cxx
@@ -25,14 +25,13 @@
 // TODO : Optimization,  generateMixedGaussians2D computed twice.
 //
 
-static int nIterMin = 10;
-static int nIterMax = 400;
-
 namespace o2
 {
 namespace mch
 {
 
+extern ClusterConfig clusterConfig;
+
 void iterateEMPoisson(const double* Cij, const double* Ci,
                       const Mask_t* maskCij, const double* qPixels,
                       const double* qPad, double* qPadPrediction, int nPixels,
@@ -47,7 +46,8 @@ void iterateEMPoisson(const double* Cij, const double* Ci,
     qPadPrediction[j] = 0;
     for (int i = 0; i < nPixels; i++) {
       qPadPrediction[j] +=
-        maskCij[nPads * i + j] * Cij[nPads * i + j] * qPixels[i];
+        // maskCij[nPads * i + j] * Cij[nPads * i + j] * qPixels[i];
+        Cij[nPads * i + j] * qPixels[i];
     }
     // Prevent  zero division
     if (qPadPrediction[j] < 1.0e-6) {
@@ -70,7 +70,8 @@ void iterateEMPoisson(const double* Cij, const double* Ci,
     if (Ci[i] > 1.0e-10) {
       double s_i = 0;
       for (int j = 0; j < nPads; j++) {
-        s_i += maskCij[nPads * i + j] * Cij[nPads * i + j] * qPad[j] /
+        // s_i += maskCij[nPads * i + j] * Cij[nPads * i + j] * qPad[j] /
+        s_i += Cij[nPads * i + j] * qPad[j] /
                qPadPrediction[j];
       }
       newQPixels[i] = s_i * qPixels[i] / Ci[i];
@@ -144,6 +145,8 @@ void fastIterateEMPoisson(const double* Cij, const double* Ci,
 
   // Compute charge prediction on pad j based on pixel charges
   //  qPadPrediction[j] = Sum_i{ Cij[i,j].qPixels[i] }
+  // printf("fastIterateEMPoisson Cij=%p, Ci=%p, qPixels=%p, qPad=%p, qPadPrediction=%p, nPixels=%d, nPads=%d, newQPixels=%p\n",
+  //        Cij, Ci, qPixels, qPad, qPadPrediction, nPixels, nPads,newQPixels);
   gsl_matrix_const_view Cij_gsl = gsl_matrix_const_view_array(Cij, nPixels, nPads);
   gsl_vector_const_view qPixels_gsl = gsl_vector_const_view_array(qPixels, nPixels);
   gsl_vector_view qPadPrediction_gsl = gsl_vector_view_array(qPadPrediction, nPads);
@@ -236,10 +239,30 @@ double computeChiSquare(const Pads& pads, const double* qPredictedPads,
   return chi2;
 }
 
+std::pair<double, double> computeChiSquare(const Pads& pads, const double* qPredictedPads,
+                                           int N)
+{
+  // Compute Chi2 on unsaturated pads
+  double chi20 = 0.0;
+  double chi21 = 0.0;
+  const double* q = pads.getCharges();
+  const Mask_t* cath = pads.getCathodes();
+  const Mask_t* sat = pads.getSaturates();
+  for (int i = 0; i < N; i++) {
+    double var = (1 - sat[i]) * (q[i] - qPredictedPads[i]);
+    if (cath[i] == 0) {
+      chi20 += var * var;
+    } else {
+      chi21 += var * var;
+    }
+  }
+  return std::make_pair(chi20, chi21);
+}
+
 std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
                                         const double* Cij, Mask_t* maskCij,
-                                        int qCutMode, double minPadResidu,
-                                        int nItMax, int n0)
+                                        int qCutMode, double minPadError,
+                                        int nItMax)
 {
   // The array pixels return the last state
   //
@@ -267,7 +290,7 @@ std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
   // Init convergence criteria
   bool converge = false;
   int it = 0;
-  if (ClusterConfig::EMLocalMaxLog > ClusterConfig::detail) {
+  if (clusterConfig.EMLocalMaxLog > clusterConfig.info) {
     printf("Poisson EM\n");
     printf(
       "   it.  <Pixels_residu>   <Pad_residu>   max(Pad_residu)   "
@@ -275,14 +298,19 @@ std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
   }
   double meanPixelsResidu = 0.0;
   double maxPixelsResidu = 0.0;
+  double maxRelResidu;
+  double pixelVariation;
+  double padRelError;
   //
+
   while (!converge) {
     //
     // Filter pixels
     //
     if (qCutMode == -1) {
       // Percent of the min charge
-      qPixCut = 1.02 * vectorMin(qPixels, nPixels);
+      // qPixCut = 1.02 * vectorMin(qPixels, nPixels);
+      qPixCut = 1.0e-14;
     } else {
       // qCutMode = 0
       // No filtering
@@ -293,24 +321,33 @@ std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
     if (qPixCut > 0.0) {
       for (int i = 0; i < (nPixels); i++) {
         if (qPixels[i] < qPixCut) {
-          vectorSetShort(&maskCij[nPads * i], 0, nPads);
+          // old version with mask vectorSetShort(&maskCij[nPads * i], 0, nPads);
         }
       }
     }
     // Update Ci
     for (int i = 0; i < nPixels; i++) {
       Ci[i] = 0;
-      for (int j = 0; j < nPads; j++) {
-        Ci[i] += Cij[nPads * i + j] * maskCij[nPads * i + j];
+      int start = nPads * i;
+      int end = start + nPads;
+      // for (int j = 0; j < nPads; j++) {
+      for (int l = start; l < end; l++) {
+        // Ci[i] += Cij[nPads * i + j] * maskCij[nPads * i + j];
+        // Ci[i] += Cij[nPads * i + j];
+        Ci[i] += Cij[l];
       }
     }
+    // Not needed
+    /*
     double Cj[nPads];
     for (int j = 0; j < nPads; j++) {
       Cj[j] = 0;
       for (int i = 0; i < nPixels; i++) {
-        Cj[j] += Cij[nPads * i + j] * maskCij[nPads * i + j];
+        // Cj[j] += Cij[nPads * i + j] * maskCij[nPads * i + j];
+        Cj[j] += Cij[nPads * i + j];
       }
     }
+    */
     // Store previous qPixels state
     vectorCopy(qPixels, nPixels, previousQPixels);
     //
@@ -318,73 +355,42 @@ std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
     // Poisson EM Iterations
     //
 
-    // Convergence acceleration process
-    // Not used
-    /*
-    if(0) {
-    double qPixels1[nPixels], qPixels2[nPixels];
-    // Speed-up factors
-    double r[nPixels], v[nPixels];
-    // Perform 2 iterations
-    // Test simple iteration if(1) {
-    iterateEMPoisson( Cij, Ci, maskCij, qPixels, qPads, qPadPrediction, nPixels,
-    nPads, qPixels1); iterateEMPoisson( Cij, Ci, maskCij, qPixels1, qPads,
-    qPadPrediction, nPixels, nPads, qPixels2);
-    // ??? To optimize : loop fusion
-    // Compute r[:] = (qPixels1[:] - qPixels[:])
-    vectorAddVector( qPixels1, -1.0, qPixels,nPixels, r );
-    // Compute v[:] = (qPixels2[:] - qPixels[:]) - r[:]
-    vectorAddVector( qPixels2, -1.0, qPixels1, nPixels, v );
-    vectorAddVector( v, -1.0, r, nPixels, v );
-    double rNorm = vectorNorm(r, nPixels);
-    double vNorm = vectorNorm(v, nPixels);
-    // printf("rNorm=%f vNorm=%f\n", rNorm, vNorm);
-    if (( rNorm < 1.0e-12 ) || (vNorm < 1.0e-12 )) {
-      converge = true;
-    } else {
-      double alpha = - rNorm / vNorm;
-      // qPixels[:] = qPixels[:] - 2.0*alpha*r[:] + alpha*alpha*v[:]
-      vectorAddVector( qPixels, -2.0*alpha, r, nPixels, qPixels);
-      vectorAddVector( qPixels, alpha*alpha, v, nPixels, qPixels);
-      iterateEMPoisson( Cij, Ci, maskCij, qPixels, qPads, qPadPrediction,
-    nPixels, nPads, qPixels);
-    }
-
-    } else {
-    */
     // iterateEMPoisson( Cij, Ci, maskCij, qPixels, qPads, qPadPrediction,
     // nPixels, nPads, qPixels);
-    // fastIterateEMPoisson(Cij, Ci, qPixels, qPads, qPadPrediction, nPixels,
-    //                     nPads, qPixels);
+
     fastIterateEMPoisson(Cij, Ci, previousQPixels, qPads, qPadPrediction, nPixels,
                          nPads, qPixels);
-    // }
 
-    // Compute pixel residues: pixResidu[:] = abs( previousQPixels[:] -
-    // qPixels[:] )
+    // Measure of pixel variation to stop
+    // the iteration if required
     double pixResidu[nPixels];
     vectorAddVector(previousQPixels, -1.0, qPixels, nPixels, pixResidu);
     vectorAbs(pixResidu, nPixels, pixResidu);
-    meanPixelsResidu = vectorSum(pixResidu, nPixels) / nPixels;
-    maxPixelsResidu = vectorMax(pixResidu, nPixels);
-    // Compute pad residues: padResidu[:] = abs( qPads - qPadPrediction[:] )
+    // Pixel variation
+    pixelVariation = vectorSum(pixResidu, nPixels) / vectorSum(qPixels, nPixels);
+    int iMaxResidu = vectorArgMax(pixResidu, nPixels);
+    maxRelResidu = pixResidu[iMaxResidu] / previousQPixels[iMaxResidu];
+
+    // Relative error on pad prediction
+    // Compute a stdError normalized with the pad Esperance
     double padResidu[nPads];
     vectorAddVector(qPads, -1.0, qPadPrediction, nPads, padResidu);
-    vectorAbs(padResidu, nPads, padResidu);
-    double meanPadResidu = vectorSum(padResidu, nPads) / nPads;
-    if (ClusterConfig::EMLocalMaxLog > ClusterConfig::detail) {
-      printf(" %4d    %10.6f      %10.6f      %10.6f             %10.6f\n", it,
-             meanPixelsResidu, meanPadResidu, vectorMax(padResidu, nPads),
-             vectorSum(padResidu, nPads) / vectorSum(qPads, nPads));
-      int u = vectorArgMax(padResidu, nPads);
-      // printf("max pad residu:    qPads=%10.6f, qPadPrediction=%10.6f \n",
-      // qPads[u], qPadPrediction[u]);
+    double var = vectorDotProd(padResidu, padResidu, nPads) / nPads;
+    double E = vectorSum(qPads, nPads) / nPads;
+    padRelError = std::sqrt(var) / E;
+
+    if (clusterConfig.EMLocalMaxLog > clusterConfig.info) {
+      printf("    EM it=%d   <pixelResidu>=%10.6f, dQPixel/qPixel=%10.6f, max(dQPix)/qPix=%10.6f, relPadError=%10.6f\n",
+             it, vectorSum(pixResidu, nPixels) / nPixels, pixelVariation, maxRelResidu, padRelError);
     }
-    converge = (meanPixelsResidu < 1.0e-12) || (meanPadResidu < minPadResidu) ||
+    // maxPixelVariation = 1 / 20 * minPadError;
+    converge = (pixelVariation < minPadError * 0.03) && (padRelError < minPadError) ||
                (it > nItMax);
     it += 1;
   }
-
+  if (clusterConfig.EMLocalMaxLog > clusterConfig.info) {
+    printf("  Exit criterom pixelVariation=%d padRelError=%d itend=%d \n", (pixelVariation < minPadError * 0.03), (padRelError < minPadError), (it > nItMax));
+  }
   // Update pixels charge
   // Remove small charged pixels (<qPixCut)
   int oldValueNPads = pixels.getNbrOfPads();
@@ -393,14 +399,27 @@ std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
   if (qPixCut > 0.0) {
     k = pixels.removePads(qPixCut);
   }
-
-  // Chi2 on cathode0
-  double chi20 = computeChiSquare(pads, qPadPrediction, 0, n0);
+  // Chi2 on cathodes 0/1
+  double chi20, chi21;
   // Chi2 on cathode1
-  double chi21 = computeChiSquare(pads, qPadPrediction, n0, nPads);
-
+  std::pair<double, double> chi = computeChiSquare(pads, qPadPrediction, pads.getNbrOfPads());
+  std::pair<double, double> chiObs = computeChiSquare(pads, qPadPrediction, pads.getNbrOfObsPads());
+  if (clusterConfig.EMLocalMaxLog > clusterConfig.info) {
+    printf(" ??? Chi2 over NbrPads  = (%f, %f); Chi2 over NbrObsPads = (%f, %f) \n",
+           chi.first, chi.second, chiObs.first, chiObs.second);
+  }
+  // ??? Must chose a method. A the moment over pads is better
+  if (1) {
+    // Chi2 over Pads
+    chi20 = chi.first;
+    chi21 = chi.second;
+  } else {
+    // Chi2 over ObsPads
+    chi20 = chiObs.first;
+    chi21 = chiObs.second;
+  }
   // Take care to the leadind dimension is getNbrOfPads()
-  if (ClusterConfig::EMLocalMaxLog >= ClusterConfig::info) {
+  if (clusterConfig.EMLocalMaxLog >= clusterConfig.info) {
     printf("End poisson EM :\n");
     printf("  Total Pad Charge       = %14.6f\n", vectorSum(qPads, nPads));
     printf("  Total Predicted Charge = %14.6f\n",
diff --git a/Detectors/MUON/MCH/Clustering/src/poissonEM.h b/Detectors/MUON/MCH/Clustering/src/poissonEM.h
index 4716cbcf931c7..d0a09f6642119 100644
--- a/Detectors/MUON/MCH/Clustering/src/poissonEM.h
+++ b/Detectors/MUON/MCH/Clustering/src/poissonEM.h
@@ -40,16 +40,20 @@ namespace mch
 
 // namespace  PEM {
 // public :
-constexpr int nMacroIterations = 8;
-static constexpr int nIterations[nMacroIterations] = {5, 10, 10, 10,
-                                                      10, 10, 10, 30};
+static constexpr int nMacroIterations = 8;
+static int nIterations[nMacroIterations] = {5, 10, 10, 10,
+                                            10, 10, 10, 30};
+/*
 static constexpr double minPadResidues[nMacroIterations] = {2.0, 2.0, 1.5, 1.5,
                                                             1.0, 1.0, 0.5, 0.5};
+*/
+static constexpr double minPadResidues[nMacroIterations] = {0.8, 0.6, 0.5, 0.4,
+                                                            0.3, 0.25, 0.2, 0.15};
 
 std::pair<double, double> PoissonEMLoop(const Pads& pads, Pads& pixels,
                                         const double* Cij, Mask_t* maskCij,
                                         int qCutMode, double minPadResidu,
-                                        int nItMax, int n0);
+                                        int nItMax);
 // static double computeChiSquare( const Pads &pads, const double
 // *qPredictedPads);
 //};
diff --git a/Detectors/MUON/MCH/Workflow/src/ClusterFinderGEMSpec.cxx b/Detectors/MUON/MCH/Workflow/src/ClusterFinderGEMSpec.cxx
index e357638946e3d..45ea294005734 100644
--- a/Detectors/MUON/MCH/Workflow/src/ClusterFinderGEMSpec.cxx
+++ b/Detectors/MUON/MCH/Workflow/src/ClusterFinderGEMSpec.cxx
@@ -43,6 +43,7 @@
 #include "MCHClustering/ClusterDump.h"
 #include "Framework/ConfigParamRegistry.h"
 #include "CommonUtils/ConfigurableParam.h"
+#include "MCHClustering/ClusterizerParam.h"
 
 namespace o2
 {
@@ -60,12 +61,15 @@ class ClusterFinderGEMTask
   static constexpr int DumpOriginal = 0x0004;
   static constexpr int DumpGEM = 0x0008;
   static constexpr int GEMOutputStream = 0x0010; // default is Original
+  static constexpr int TimingStats = 0x0020;
+  static constexpr char statFileName[] = "statistics.csv";
+  std::fstream statStream;
   //
-  bool isGEMActivated()
+  bool isActive(int selectedMode) const
   {
-    return (mode & DoGEM);
+    return (mode & selectedMode);
   }
-
+  /* invalid
   bool isGEMDumped()
   {
     return (mode & DumpGEM);
@@ -85,6 +89,18 @@ class ClusterFinderGEMTask
   {
     return (mode & GEMOutputStream);
   }
+
+  bool isGEMTimingStats() const
+  {
+    return (mode & isGEMTimingStats());
+  }
+  */
+  void saveStatistics(uint32_t orbit, uint16_t bunchCrossing, uint32_t iPreCluster, uint16_t nPads, uint16_t nbrClusters, uint16_t DEId, double duration)
+  {
+    statStream << iPreCluster << " " << bunchCrossing << " " << orbit << " "
+               << nPads << " " << nbrClusters << " " << DEId << " " << duration << std::endl;
+  }
+
   //_________________________________________________________________________________________________
   void init(framework::InitContext& ic)
   {
@@ -104,48 +120,54 @@ class ClusterFinderGEMTask
     /// Prepare the clusterizer
     LOG(info) << "initializing cluster finder";
 
-    if (isOriginalDumped() && !isOriginalActivated()) {
+    if (isActive(DumpOriginal) && !isActive(DoOriginal)) {
       mode = mode & (~DumpOriginal);
     }
-    if (isGEMDumped() && !isGEMActivated()) {
+    if (isActive(DumpGEM) && !isActive(DoGEM)) {
       mode = mode & (~DumpGEM);
     }
-    if (isOriginalDumped()) {
+    if (isActive(DumpOriginal)) {
       mOriginalDump = new ClusterDump("OrigRun2.dat", 0);
     }
-    if (isGEMDumped()) {
+    if (isActive(DumpGEM)) {
       mGEMDump = new ClusterDump("GEMRun2.dat", 0);
     }
+    if (isActive(TimingStats)) {
+      statStream.open(statFileName, std::fstream::out);
+      statStream << "# iPrecluster bunchCrossing   orbit  nPads  nClusters  DEId  duration (in ms)" << std::endl;
+    }
 
     //
-    LOG(info) << "Configuration" << std::endl;
-    LOG(info) << "  Mode: " << mode << std::endl;
-    LOG(info) << "  Original: " << isOriginalActivated() << std::endl;
-    LOG(info) << "  GEM     : " << isGEMActivated() << std::endl;
-    LOG(info) << "  Dump Original: " << isOriginalDumped() << std::endl;
-    LOG(info) << "  Dump GEM     : " << isGEMDumped() << std::endl;
-    LOG(info) << "  GEM stream output: " << isGEMOutputStream() << std::endl;
+    LOG(info) << "Configuration";
+    LOG(info) << "  Mode    : " << mode;
+    LOG(info) << "  Original: " << isActive(DoOriginal);
+    LOG(info) << "  GEM     : " << isActive(DoGEM);
+    LOG(info) << "  Dump Original:         " << isActive(DumpOriginal);
+    LOG(info) << "  Dump GEM     :         " << isActive(DumpGEM);
+    LOG(info) << "  GEM stream output    : " << isActive(GEMOutputStream);
+    LOG(info) << "  Timing statistics: " << isActive(TimingStats);
 
     // mClusterFinder.init( ClusterFinderGEM::DoGEM );
-    if (isOriginalActivated()) {
+    if (isActive(DoOriginal)) {
       mClusterFinderOriginal.init(run2Config);
-    } else if (isGEMActivated()) {
-      mClusterFinderGEM.init(mode);
+    } else if (isActive(DoGEM)) {
+      mClusterFinderGEM.init(mode, run2Config);
     }
+    // Inv ??? LOG(info) << "GG = lowestPadCharge = " << ClusterizerParam::Instance().lowestPadCharge;
 
     /// Print the timer and clear the clusterizer when the processing is over
     ic.services().get<CallbackService>().set(CallbackService::Id::Stop, [this]() {
       LOG(info) << "cluster finder duration = " << mTimeClusterFinder.count() << " s";
-      if (isOriginalActivated()) {
+      if (isActive(DoOriginal)) {
         this->mClusterFinderOriginal.deinit();
-      } else if (isGEMActivated()) {
+      } else if (isActive(DoGEM)) {
         this->mClusterFinderGEM.deinit();
       }
-      if (isOriginalDumped()) {
+      if (isActive(DumpOriginal)) {
         delete mOriginalDump;
         mOriginalDump = nullptr;
       }
-      if (isGEMDumped()) {
+      if (isActive(DumpGEM)) {
         delete mGEMDump;
         mGEMDump = nullptr;
       }
@@ -173,60 +195,79 @@ class ClusterFinderGEMTask
     auto& clusterROFs = pc.outputs().make<std::vector<ROFRecord>>(OutputRef{"clusterrofs"});
     auto& clusters = pc.outputs().make<std::vector<Cluster>>(OutputRef{"clusters"});
     auto& usedDigits = pc.outputs().make<std::vector<Digit>>(OutputRef{"clusterdigits"});
+    uint32_t iPreCluster = 0;
 
     clusterROFs.reserve(preClusterROFs.size());
     for (const auto& preClusterROF : preClusterROFs) {
-      LOG(info) << "processing interaction: time frame " << preClusterROF.getBCData().orbit << "...";
+      // LOG(info) << "processing interaction: time frame " << preClusterROF.getBCData().orbit << "...";
       // GG infos
       // uint16_t bc = DummyBC;       ///< bunch crossing ID of interaction
       // uint32_t orbit = DummyOrbit; ///< LHC orbit
       // clusterize every preclusters
       uint16_t bCrossing = preClusterROF.getBCData().bc;
       uint32_t orbit = preClusterROF.getBCData().orbit;
-      uint32_t iPreCluster = 0;
+      std::chrono::duration<double> preClusterDuration{}; ///< timer
       auto tStart = std::chrono::high_resolution_clock::now();
+
       // Inv ??? if ( orbit==22 ) {
       //
-      if (isOriginalActivated()) {
+      if (isActive(DoOriginal)) {
         mClusterFinderOriginal.reset();
       }
-      if (isGEMActivated()) {
+      if (isActive(DoGEM)) {
         mClusterFinderGEM.reset();
       }
       // Get the starting index for new cluster founds
       size_t startGEMIdx = mClusterFinderGEM.getClusters().size();
       size_t startOriginalIdx = mClusterFinderOriginal.getClusters().size();
+      uint16_t nbrClusters(0);
       // std::cout << "Start index GEM=" <<  startGEMIdx << ", Original=" << startOriginalIdx << std::endl;
       for (const auto& preCluster : preClusters.subspan(preClusterROF.getFirstIdx(), preClusterROF.getNEntries())) {
+        auto tPreClusterStart = std::chrono::high_resolution_clock::now();
         // Inv ??? for (const auto& preCluster : preClusters.subspan(preClusterROF.getFirstIdx(), 1102)) {
         startGEMIdx = mClusterFinderGEM.getClusters().size();
         startOriginalIdx = mClusterFinderOriginal.getClusters().size();
         // Dump preclusters
         // std::cout << "bCrossing=" << bCrossing << ", orbit=" << orbit << ", iPrecluster" << iPreCluster
         //        << ", PreCluster: digit start=" << preCluster.firstDigit <<" , digit size=" << preCluster.nDigits << std::endl;
-        if (isOriginalDumped()) {
+        if (isActive(DumpOriginal)) {
           mClusterFinderGEM.dumpPreCluster(mOriginalDump, digits.subspan(preCluster.firstDigit, preCluster.nDigits), bCrossing, orbit, iPreCluster);
         }
-        if (isGEMDumped()) {
+        if (isActive(DumpGEM)) {
           mClusterFinderGEM.dumpPreCluster(mGEMDump, digits.subspan(preCluster.firstDigit, preCluster.nDigits), bCrossing, orbit, iPreCluster);
         }
         // Clusterize
-        if (isOriginalActivated()) {
+        if (isActive(DoOriginal)) {
           mClusterFinderOriginal.findClusters(digits.subspan(preCluster.firstDigit, preCluster.nDigits));
+          nbrClusters = mClusterFinderOriginal.getClusters().size() - startOriginalIdx;
         }
-        if (isGEMActivated()) {
+        if (isActive(DoGEM)) {
           mClusterFinderGEM.findClusters(digits.subspan(preCluster.firstDigit, preCluster.nDigits), bCrossing, orbit, iPreCluster);
+          nbrClusters = mClusterFinderGEM.getClusters().size() - startGEMIdx;
         }
         // Dump clusters (results)
         // std::cout << "[Original] total clusters.size=" << mClusterFinderOriginal.getClusters().size() << std::endl;
         // std::cout << "[GEM     ] total clusters.size=" << mClusterFinderGEM.getClusters().size() << std::endl;
-        if (isOriginalDumped()) {
+        if (isActive(DumpOriginal)) {
           mClusterFinderGEM.dumpClusterResults(mOriginalDump, mClusterFinderOriginal.getClusters(), startOriginalIdx, bCrossing, orbit, iPreCluster);
         }
-        if (isGEMDumped()) {
+        if (isActive(DumpGEM)) {
           mClusterFinderGEM.dumpClusterResults(mGEMDump, mClusterFinderGEM.getClusters(), startGEMIdx, bCrossing, orbit, iPreCluster);
         }
-        // if ( isGEMDumped())
+        // Timing Statistics
+        if (isActive(TimingStats)) {
+          auto tPreClusterEnd = std::chrono::high_resolution_clock::now();
+          preClusterDuration = tPreClusterEnd - tPreClusterStart;
+          int16_t nPads = preCluster.nDigits;
+          int16_t DEId = digits[preCluster.firstDigit].getDetID();
+          // double dt = duration_cast<duration<double>>(tPreClusterEnd - tPreClusterStart).count;
+          // std::chrono::duration<double> time_span = std::chrono::duration_cast<duration<double>>(tPreClusterEnd - tPreClusterStart);
+          preClusterDuration = tPreClusterEnd - tPreClusterStart;
+          double dt = preClusterDuration.count();
+          // In second
+          dt = (dt < 1.0e-06) ? 0.0 : dt * 1000;
+          saveStatistics(orbit, bCrossing, iPreCluster, nPads, nbrClusters, DEId, dt);
+        }
         iPreCluster++;
       }
       // } // Inv ??? if ( orbit==22 ) {
@@ -234,7 +275,7 @@ class ClusterFinderGEMTask
       mTimeClusterFinder += tEnd - tStart;
 
       // fill the ouput messages
-      if (isGEMOutputStream()) {
+      if (isActive(GEMOutputStream)) {
         clusterROFs.emplace_back(preClusterROF.getBCData(), clusters.size(), mClusterFinderGEM.getClusters().size());
       } else {
         clusterROFs.emplace_back(preClusterROF.getBCData(), clusters.size(), mClusterFinderOriginal.getClusters().size());
@@ -255,13 +296,13 @@ class ClusterFinderGEMTask
     /// fill the output messages with clusters and attached digits of the current event
     /// modify the references to the attached digits according to their position in the global vector
     auto clusterOffset = clusters.size();
-    if (isGEMOutputStream()) {
+    if (isActive(GEMOutputStream)) {
       clusters.insert(clusters.end(), mClusterFinderGEM.getClusters().begin(), mClusterFinderGEM.getClusters().end());
     } else {
       clusters.insert(clusters.end(), mClusterFinderOriginal.getClusters().begin(), mClusterFinderOriginal.getClusters().end());
     }
     auto digitOffset = usedDigits.size();
-    if (isGEMOutputStream()) {
+    if (isActive(GEMOutputStream)) {
       usedDigits.insert(usedDigits.end(), mClusterFinderGEM.getUsedDigits().begin(), mClusterFinderGEM.getUsedDigits().end());
     } else {
       usedDigits.insert(usedDigits.end(), mClusterFinderOriginal.getUsedDigits().begin(), mClusterFinderOriginal.getUsedDigits().end());
@@ -296,11 +337,13 @@ o2::framework::DataProcessorSpec getClusterFinderGEMSpec(const char* specName)
       {"mch-config", VariantType::String, "", {"JSON or INI file with clustering parameters"}},
       {"run2-config", VariantType::Bool, false, {"Setup for run2 data"}},
       {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
-      //{"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM | ClusterFinderGEMTask::DumpGEM | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
+      // {"mode", VariantType::Int, ClusterFinderGEMTask::DoOriginal, {"Running mode"}},
+      // {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
+      // {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM | ClusterFinderGEMTask::DumpGEM | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
+      // {"mode", VariantType::Int, ClusterFinderGEMTask::DoOriginal | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
       //{"mode", VariantType::Int, ClusterFinderGEMTask::DoOriginal | ClusterFinderGEMTask::DumpOriginal | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
       // {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM, {"Running mode"}},
 
-      // {"mode", VariantType::Int, ClusterFinderGEMTask::DoOriginal, {"Running mode"}},
       // {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM | ClusterFinderGEMTask::DumpGEM | ClusterFinderGEMTask::GEMOutputStream, {"Running mode"}},
       // {"mode", VariantType::Int, ClusterFinderGEMTask::DoGEM, {"Running mode"}},
     }};