/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright (C) 2009--2026 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// stdlib includes


/////////////////////// Qt includes
#include <QList>
#include <QDebug>


/////////////////////// pappsomspp includes


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/LowMassDeconvolver.hpp"
#include "MsXpS/libXpertMassCore/IsotopicClusterGenerator.hpp"

namespace MsXpS
{
namespace libXpertMassCore
{


double muropeptide_averagine_avg_mass  = 102.42615;
double muropeptide_averagine_mono_mass = 102.48550;

LowMassDeconvolver::LowMassDeconvolver(Parameters &parameters)
  : m_params(parameters)
{
  qDebug() << "Parameters:" << m_params.toString();

  Q_ASSERT(QFileInfo::exists(m_params.isotopicDataFilePath));

  IsotopicDataLibraryHandler isotopic_data_handler;
  qsizetype loaded_isotope_count =
    isotopic_data_handler.loadData(m_params.isotopicDataFilePath);
  Q_ASSERT(loaded_isotope_count);

  m_params.isotopicDataSp = isotopic_data_handler.getIsotopicData();

  Q_ASSERT_X(m_params.isotopicDataSp != nullptr &&
               m_params.isotopicDataSp.get() != nullptr,
             __FILE__,
             "The isotopic data are missing.");

  ErrorList error_list;
  bool ok =
    m_params.averagineFormula.validate(m_params.isotopicDataSp, &error_list);
  if(!ok)
    {
      QString msg =
        QString("The averagine formula cannot be invalid. Errors:\n%1")
          .arg(Utils::joinErrorList(error_list));
      Q_ASSERT_X(!ok, __FILE__, msg.toLatin1().data());
    }

  double avg;
  m_params.averagineFormula.accountMasses(
    ok, m_params.isotopicDataSp, m_params.averagineMonoMass, avg, 1 /*times*/);

  qDebug() << "The low mass deconvolver was initialized with"
           << m_params.isotopicDataSp->size() << "isotopes.";
}

LowMassDeconvolver::~LowMassDeconvolver()
{
}

QString
LowMassDeconvolver::compositionEstimationFromMass(double neutral_mass) const
{
  double averagine_equivalents = neutral_mass / m_params.averagineMonoMass;

  // qDebug() << qSetRealNumberPrecision(5) << "Neutral mass" << neutral_mass
  //          << "corresponds to" << averagine_equivalents
  //          << "averagine equivalents";

  if(averagine_equivalents <= 0)
    return QString();

  const double avg_c_per_unit = m_params.averagineFormula.symbolCount("C");
  const double avg_h_per_unit = m_params.averagineFormula.symbolCount("H");
  const double avg_n_per_unit = m_params.averagineFormula.symbolCount("N");
  const double avg_o_per_unit = m_params.averagineFormula.symbolCount("O");
  const double avg_s_per_unit = m_params.averagineFormula.symbolCount("S");

  // qDebug() << "Averagine composition (CHNOS)" << avg_c_per_unit
  //          << avg_h_per_unit << avg_n_per_unit << avg_o_per_unit
  //          << avg_s_per_unit;

  // Note how the formula indices are double values
  QString averagine_based_formula(
    QString("C%1H%2N%3O%4S%5")
      .arg(averagine_equivalents * avg_c_per_unit)
      .arg(averagine_equivalents * avg_h_per_unit)
      .arg(averagine_equivalents * avg_n_per_unit)
      .arg(averagine_equivalents * avg_o_per_unit)
      .arg(averagine_equivalents * avg_s_per_unit));

  // qDebug() << "The averagine-based formula for the neutral mass:"
  //          << averagine_based_formula;

  Formula int_based_indices_formula(averagine_based_formula);
  ErrorList error_list;
  int_based_indices_formula.validate(
    m_params.isotopicDataSp, true /*store*/, true /*reset*/, &error_list);
  int_based_indices_formula.roundIndices();
  int_based_indices_formula.validate(m_params.isotopicDataSp, &error_list);
  Q_ASSERT(int_based_indices_formula.isValid());

  QString rounded_formula = int_based_indices_formula.elementalComposition();

  // qDebug() << "After rounding indices to nearest integer:" <<
  // rounded_formula;

  return rounded_formula;
}

pappso::Trace
LowMassDeconvolver::isotopicClusterForFormula(const QString &formula) const
{
  IsotopicClusterGenerator isotopic_cluster_generator(m_params.isotopicDataSp);

  isotopic_cluster_generator.setIsotopicDataType(
    IsotopicDataType::LIBRARY_CONFIG);
  isotopic_cluster_generator.setMaxSummedProbability(0.95);
  isotopic_cluster_generator.setNormalizationIntensity(1);
  isotopic_cluster_generator.setSortType(pappso::Enums::SortType::x);
  isotopic_cluster_generator.setSortOrder(pappso::Enums::SortOrder::ascending);

  FormulaChargePair formula_charge_pair(formula, 0);

  isotopic_cluster_generator.setFormulaChargePair(formula_charge_pair);

  std::size_t count = isotopic_cluster_generator.run();

  std::vector<IsotopicClusterChargePair> isotopic_cluster_charge_pairs =
    isotopic_cluster_generator.getIsotopicClusterChargePairs();

  Q_ASSERT(count == isotopic_cluster_charge_pairs.size());

  return *isotopic_cluster_charge_pairs.at(0).first;
}

pappso::Trace
LowMassDeconvolver::isotopicClusterForNeutralMass(double neutral_mass) const
{
  QString averagine_based_estimated_formula =
    compositionEstimationFromMass(neutral_mass);
  Q_ASSERT_X(!averagine_based_estimated_formula.isEmpty(),
             __FILE__,
             "The returned formula is empty.");

  // qDebug() << "The averagine-based formula for neutral mass" << neutral_mass
  //          << "is:" << averagine_based_estimated_formula;

  pappso::Trace centroids =
    isotopicClusterForFormula(averagine_based_estimated_formula);
  Q_ASSERT_X(centroids.size(), __FILE__, "The returned centroids are empty.");

  return centroids;
}

std::vector<BinnedIsotopicCentroid>
LowMassDeconvolver::collapseTheoreticalIsotopicCluster(
  const pappso::Trace &theoretical_cluster) const
{
  // This only works on neutral masses!

  Q_ASSERT(!theoretical_cluster.empty());

  const double mono_mass = theoretical_cluster.front().x;

  // isotopeIndex → (sum_mass, sum_intensity)
  std::map<int, std::pair<double, double>> bins;

  for(const pappso::DataPoint &dp : theoretical_cluster)
    {
      const double mass      = dp.x;
      const double intensity = dp.y;

      if(intensity <= 0.0)
        continue;

      const int isotope_index = static_cast<int>(
        std::llround((mass - mono_mass) / ISOTOPIC_NEUTRON_MASS));

      auto &bin   = bins[isotope_index];
      bin.first  += mass * intensity; // weighted mass sum
      bin.second += intensity;        // intensity sum
    }

  std::vector<BinnedIsotopicCentroid> result;
  result.reserve(bins.size());

  for(const auto &[iso_idx, sums] : bins)
    {
      const double weighted_mass = sums.first;
      const double total_int     = sums.second;

      if(total_int <= 0.0)
        continue;

      result.push_back(
        BinnedIsotopicCentroid{iso_idx, weighted_mass / total_int, total_int});
    }

  // Ensure strict ordering by isotope index
  std::sort(result.begin(), result.end(), [](const auto &a, const auto &b) {
    return a.isotopeIndex < b.isotopeIndex;
  });

  return result;
}

std::vector<pappso::DataPoint>
LowMassDeconvolver::selectCandidatePeaks(
  const pappso::Trace &input_centroids) const
{
  std::vector<pappso::DataPoint> candidates;
  candidates.reserve(input_centroids.size()); // reserve max possible size

  for(const pappso::DataPoint &dp : input_centroids)
    {
      // Skip peaks outside mass bounds
      if(dp.x < m_params.minMass || dp.x > m_params.maxMass)
        continue;

      // Skip peaks below intensity threshold
      if(dp.y < m_params.minIntensity)
        continue;

      candidates.push_back(dp);
    }

  return candidates;
}

// This is how we call this function:
// bool accepted = fitIsotopicEnvelope(input_centroids,
//                                     iter_input_candidate_peak,
//                                     z,
//                                     charge_specific_feature);

// The idea of this algo is that we get an index to an input centroided mass
// spectrum centroid. We thus get a centroid m/z. We also get the neutral
// mass of that centroid m/z, as stored in the charge_specific_feature
// parameter. This neutral mass is used to compute a theoretical isotopic
// cluster. Then, each theoretical cluster's centroid is transformed into a m/z
// value with the charge, that is then checked against the m/z values of the
// centroided input mass spectrum. Whenever a centroid match occurs, it is
// recorded so as to avoid having to iterate in it again.
bool
LowMassDeconvolver::fitIsotopicEnvelope(
  const pappso::Trace &input_centroids,
  std::size_t input_centroid_index,
  ChargeSpecificFeature &charge_specific_feature,
  std::vector<bool> &globally_used) const
{
  const int MIN_MATCH_COUNT = 3;

  const pappso::DataPoint input_candidate_centroid =
    input_centroids[input_centroid_index];

  QString debug_msg = QString(
                        "Received input centroid: %1 as charged feature:\n"
                        "\t%2\n"
                        "\tfor checking isotopic cluster fit.\n\n")
                        .arg(input_candidate_centroid.toString())
                        .arg(charge_specific_feature.toString());

  // 1. Get neutral mass from feature
  // ================================

  const double neutral_mass = charge_specific_feature.neutralMass;

  // 2. Generate theoretical isotopic envelope using IsoSpec++
  // =========================================================

  pappso::Trace theoretical_isotopic_cluster_for_neutral_mass =
    isotopicClusterForNeutralMass(neutral_mass);
  Q_ASSERT(theoretical_isotopic_cluster_for_neutral_mass.size());

  debug_msg += QString("Obtained neutral theor. isot. cluster:\n%1")
                 .arg(theoretical_isotopic_cluster_for_neutral_mass.toString());

  qDebug().noquote() << debug_msg;
  debug_msg.clear();

  // Because of the rounding of the formula in the function that
  // computes the formula corresponding to the neutral mass, using
  // the averagine equivalents concept, the mono mass of the returned
  // theoretical isotopic cluster by IsoSpec++ cannot be exactly the
  // neutral mass that we used in the first place. There is a neutral
  // mass gap.

  double neutral_mass_gap =
    neutral_mass - theoretical_isotopic_cluster_for_neutral_mass.at(0).x;

  debug_msg +=
    QString("\tNeutral mass gap between input mass and cluster model: %1\n\n")
      .arg(neutral_mass_gap, 0, 'f', 4);

  for(pappso::DataPoint &dp : theoretical_isotopic_cluster_for_neutral_mass)
    {
      // debug_msg += QString("m/z before correction: %1\n").arg(dp.x, 0, 'f',
      // 5);
      dp.x += neutral_mass_gap;
      // debug_msg += QString("m/z after correction: %1\n").arg(dp.x, 0, 'f',
      // 5);
    }

  // qDebug() << "Finished iterating... Size:"
  //          << theoretical_isotopic_cluster_for_neutral_mass.size();

  debug_msg += QString("Corrected neutral theor. isot. cluster:\n\n%1\n\n")
                 .arg(theoretical_isotopic_cluster_for_neutral_mass.toString());
  // qDebug().noquote() << debug_msg;

  std::vector<BinnedIsotopicCentroid> theoretical_cluster_binned_centroids =
    collapseTheoreticalIsotopicCluster(
      theoretical_isotopic_cluster_for_neutral_mass);

  QString text;

  for(const BinnedIsotopicCentroid &binned_centroid :
      theoretical_cluster_binned_centroids)
    text += binned_centroid.toString();

  debug_msg +=
    QString(
      "After isobaric centroids merge, collapsed neutral theor. isot. "
      "cluster:\n\n%1\n\n")
      .arg(text);

  // 3. Match observed peaks to theoretical m/z
  // ==========================================

  // Iterate in the theoretical cluster centroid peaks,
  // convert each one into a m/z for the charge and check if
  // there is a data point in the input centroided spectrum that has
  // a m/z matching it within tolerance.

  // Now iterating in the theoretical isotopic cluster centroids
  // that are converted into m/z with charge and then searched for
  // matches in the input spectrum centroids.

  // We do not want that two theoretical peaks match the same centroid
  // from the input spectrum.
  std::vector<bool> used(input_centroids.size(), false);

  // To store data about the match that we will need for verification later.
  std::vector<IsoMatch> matches;

  // How many isotopic centroids, starting from the first one, do we
  // want to try to match?
  const int max_isotopic_centroids_to_scan = 5;
  const double expected_spacing =
    ISOTOPIC_NEUTRON_MASS / charge_specific_feature.charge;

  // At which index to start scanning the input centroids.
  size_t scan_start_idx = 0;

  for(size_t theor_cluster_binned_centroids_iter = 0;
      theor_cluster_binned_centroids_iter <
      theoretical_cluster_binned_centroids.size();
      ++theor_cluster_binned_centroids_iter)
    {
      double theoretical_centroid_neutral_mass =
        theoretical_cluster_binned_centroids
          .at(theor_cluster_binned_centroids_iter)
          .neutralMass;

      debug_msg += QString("Theor. cluster centroid at index: %1\n")
                     .arg(theoretical_cluster_binned_centroids
                            .at(theor_cluster_binned_centroids_iter)
                            .isotopeIndex);

      debug_msg += QString("\tNeutral mass: %1")
                     .arg(theoretical_centroid_neutral_mass, 0, 'f', 5);

      const double theoretical_centroid_mz = neutralToMz(
        theoretical_centroid_neutral_mass, charge_specific_feature.charge);

      debug_msg +=
        QString("\tBecomes m/z %1 for currently modelled charge %2\n")
          .arg(theoretical_centroid_mz, 0, 'f', 5)
          .arg(charge_specific_feature.charge);

      // In the code below, the tolerance is applied to variables in the
      // m/z space, not the dalton space.
      double tolerance_in_thompson =
        ppmToDelta(theoretical_centroid_mz, m_params.ppmMassTolerance);

      debug_msg += QString(
                     "\tThus searching that %1 m/z in input mass spectrum "
                     "centroids (tolerance: %2):\n")
                     .arg(theoretical_centroid_mz, 0, 'f', 5)
                     .arg(tolerance_in_thompson, 0, 'f', 5);

      // qDebug() << qSetRealNumberPrecision(5)
      //          << "The tolerance in Thompson is :" << tolerance_in_thompson;

      // FIXME: here, we should try to get rapidly to the input centroid
      // that roughly matches the searched m/z. Indeed, there might be a big
      // gap between the last scan idx and the actual m/z we are looking for.

      auto start_it =
        std::lower_bound(input_centroids.begin(),
                         input_centroids.end(),
                         // Searches for a DataPoint that has x greater than
                         // or equal to the value below.
                         theoretical_centroid_mz - tolerance_in_thompson,
                         [](const pappso::DataPoint &dp, double mz) {
                           return dp.x < mz;
                         });

      if(start_it != input_centroids.end())
        {
          size_t scan_start_idx_by_lower_bound =
            std::distance(input_centroids.begin(), start_it);

          debug_msg +=
            QString(
              "\t\tThe start_it points to datapoint with m/z: %1 - the "
              "scan_start_idx has x value: %2\n")
              .arg(input_centroids[scan_start_idx_by_lower_bound].x, 0, 'f', 5)
              .arg(input_centroids[scan_start_idx].x, 0, 'f', 5);

          scan_start_idx = scan_start_idx_by_lower_bound > scan_start_idx
                             ? scan_start_idx_by_lower_bound
                             : scan_start_idx;

          debug_msg +=
            QString("\t\t\tFinally, the retained start for the scan is: %1\n\n")
              .arg(input_centroids[scan_start_idx].x, 0, 'f', 5);

          double best_delta = std::numeric_limits<double>::max();
          int best_idx      = -1;
          double delta      = 0;

          for(size_t i = scan_start_idx; i < input_centroids.size(); ++i)
            {
              if(used[i] || globally_used[i])
                continue;

              delta = input_centroids[i].x - theoretical_centroid_mz;

              // It makes no sense to continue iterating in the input
              // mass spectrum centroids if we are farther than 4~Th from
              // the target theoretical centroid !

              const double max_scan_window =
                expected_spacing * max_isotopic_centroids_to_scan;

              if(delta > max_scan_window)
                break;

              delta = std::abs(delta);

              if(delta <= tolerance_in_thompson && delta < best_delta)
                {
                  debug_msg +=
                    QString(
                      "\tFor input centroid m/z %1, MATCH with "
                      "theoretical m/z %2, delta: %3 within tolerance: %4\n")
                      .arg(input_centroids[i].x)
                      .arg(theoretical_centroid_mz, 0, 'f', 5)
                      .arg(delta, 0, 'f', 5)
                      .arg(tolerance_in_thompson, 0, 'f', 4);

                  best_delta = delta;
                  best_idx   = i;
                }
              else
                {
                  debug_msg += QString(
                                 "\tFor input centroid m/z %1, NO match with "
                                 "theoretical m/z %2, delta: %3 NOT within "
                                 "tolerance: %4\n")
                                 .arg(input_centroids[i].x)
                                 .arg(theoretical_centroid_mz, 0, 'f', 5)
                                 .arg(delta, 0, 'f', 5)
                                 .arg(tolerance_in_thompson, 0, 'f', 5);
                }
            }

          if(best_idx != -1)
            {
              used[best_idx] = true; // <-- CRITICAL
              scan_start_idx = best_idx + 1;
              matches.push_back({theor_cluster_binned_centroids_iter,
                                 static_cast<std::size_t>(best_idx),
                                 input_centroids[best_idx].x,
                                 input_centroids[best_idx].y});
              debug_msg += QString("\n\tSuccess (best delta: %1).\n\n")
                             .arg(best_delta, 0, 'f', 5);
            }
          else
            {
              debug_msg += QString("\tFailure.\n\n");
            }
        }
      // End of
      // if(start_it != input_centroids.end())
    }
  // End of iteration in the theoretical cluster centroids.

  // Store once for all the count of matches.
  const size_t match_count = matches.size();
  // qDebug() << "There were" << match_count << "matches.";

  if(match_count < MIN_MATCH_COUNT)
    {
      debug_msg += QString("There were less than %1 matches --> Rejecting.\n")
                     .arg(MIN_MATCH_COUNT);
      qDebug().noquote() << debug_msg;

      return false;
    }
  debug_msg += QString("Found %1 matches.\n").arg(match_count);

  // Ensure matches are ordered by theoretical isotope index
  // (required for spacing error and correlation computations)
  std::sort(
    matches.begin(), matches.end(), [](const IsoMatch &a, const IsoMatch &b) {
      return a.theoreticalClusterIndex < b.theoreticalClusterIndex;
    });


  // Perform some checks about the matches
  //=======================================

  // 1. The cluster that is recreated by the matches should be faithful, that
  // is, with the right expected isotopic spacing in m/z for a given z value.

  std::vector<double> observed_spacings;
  observed_spacings.reserve(match_count - 1);

  for(size_t i = 1; i < match_count; ++i)
    {
      observed_spacings.push_back(matches[i].observedMz -
                                  matches[i - 1].observedMz);

      // Take advantage of this loop to also check that matches are
      // ordered in increasing input centroided mass spectrum centroid indices:
      if(matches[i].observedPeakIndex <= matches[i - 1].observedPeakIndex)
        {
          debug_msg += QString(
            "Two consecutive matches cannot have two input centroided mass "
            "spectrum indices in decreasing order.");

          qDebug().noquote() << debug_msg;

          return false;
        }
    }

  if(observed_spacings.size() >= 2)
    {
      // Put the median value item in the middle of the vector (no sorting of
      // the vector).
      std::nth_element(observed_spacings.begin(),
                       observed_spacings.begin() + observed_spacings.size() / 2,
                       observed_spacings.end());

      // This is the median of the inter-isotopologue spacings
      double median_spacing = observed_spacings[observed_spacings.size() / 2];

      // The median of the spacings should roughly (20% margin) match the
      // expected theoretical spacing for a given charge (spacing is 1/z), with
      // z = charge_specific_feature.charge.
      if(std::abs(median_spacing - expected_spacing) > 0.2 * expected_spacing)
        {
          debug_msg +=
            "When looking into the matches, the charge seems inconsistent, "
            "with non-constant spacing between the matched cluster centroids "
            "-> "
            "rejecting.\n";

          qDebug().noquote() << debug_msg;

          return false;
        }
    }
  else
    {
      // FIXMEs
      debug_msg +=
        ("The count of contiguous spacings in the matches is less than two -> "
         "Rejecting.");
      // return false;
    }

  // 3. Compute FitQuality
  // =====================

  FitQuality fit_quality;

  // 3a. Pearson correlation
  // -----------------------
  // The Pearson correlation checks that the observed and theoretical isotopic
  // cluster centroids are correlated by their intensities.

  Q_ASSERT(match_count >= MIN_MATCH_COUNT); // 3

  double sum_obs   = 0.0;
  double sum_theo  = 0.0;
  double sum_obs2  = 0.0;
  double sum_theo2 = 0.0;
  double sum_cross = 0.0;

  for(const auto &match : matches)
    {
      double obs_matched_intensity = match.observedIntensity;
      double theo_matched_intensity =
        theoretical_cluster_binned_centroids[match.theoreticalClusterIndex]
          .intensity;

      sum_obs   += obs_matched_intensity;
      sum_theo  += theo_matched_intensity;
      sum_obs2  += obs_matched_intensity * obs_matched_intensity;
      sum_theo2 += theo_matched_intensity * theo_matched_intensity;
      sum_cross += obs_matched_intensity * theo_matched_intensity;

      debug_msg += QString(
                     "For one match, observed centroid intensity: %1 "
                     "theoretical intensity: %2. \n")
                     .arg(obs_matched_intensity)
                     .arg(theo_matched_intensity);
    }

  debug_msg += QString(
                 "After iteration through all the matches:\n"
                 "sum of observed intensities: %1 "
                 "sum of theoretical intensities: %2\n")
                 .arg(sum_obs)
                 .arg(sum_theo);

  const double numerator = match_count * sum_cross - sum_obs * sum_theo;

  const double denominator =
    std::sqrt((match_count * sum_obs2 - sum_obs * sum_obs) *
              (match_count * sum_theo2 - sum_theo * sum_theo));

  fit_quality.correlation = (denominator > 0.0) ? numerator / denominator : 0.0;

  debug_msg +=
    QString("Pearson correlation value: %1\n").arg(fit_quality.correlation);

  // 3b. Spacing error in ppm
  // ------------------------

  double spacing_error_ppm = 0.0;
  size_t count             = 0;

  for(size_t i = 1; i < match_count; ++i)
    {
      size_t d_theor = matches[i].theoreticalClusterIndex -
                       matches[i - 1].theoreticalClusterIndex;

      if(d_theor == 1)
        {
          double obs_mz_delta =
            matches[i].observedMz - matches[i - 1].observedMz;

          double theo_mz_delta =
            ISOTOPIC_NEUTRON_MASS / charge_specific_feature.charge;

          spacing_error_ppm +=
            // '/' and '*' same precedence, evalutation by left-to-right
            // grouping std::abs(obs_delta - theo_delta) / theo_delta * 1e6;
            deltaToPpm(std::abs(obs_mz_delta - theo_mz_delta), theo_mz_delta);

          ++count;
        }
    }

  fit_quality.spacingErrorPpm = (count > 0) ? spacing_error_ppm / count : 0.0;

  debug_msg += QString(
                 "Mean inter-isotopologue spacing error (ppm) relative to "
                 "theoretical spacing : %1\n")
                 .arg(fit_quality.spacingErrorPpm);

  // 3c. Explained intensity
  // -----------------------

  // This function compares the sum of the intensities in the theoretical
  // cluster with the sum of the intesnities of the matched centroids.

  // Get an iterator to the match that involves the monoisotopic
  // centroid of the theoretical cluster.
  auto iterator_to_theor_cluster_mono_centroid =
    std::find_if(matches.begin(), matches.end(), [](const IsoMatch &iso_match) {
      return iso_match.theoreticalClusterIndex == 0;
    });

  if(iterator_to_theor_cluster_mono_centroid == matches.end())
    {
      debug_msg +=
        "Failed to get the monoisotopic centroid of theoretical cluster -> "
        "Rejecting.\n";

      qDebug().noquote() << debug_msg;

      return false;
    }

  const double obs_mono_int =
    iterator_to_theor_cluster_mono_centroid->observedIntensity;
  const double theo_mono_int =
    theoretical_cluster_binned_centroids[0].intensity;

  if(theo_mono_int <= 0.0)
    {
      debug_msg += QString(
        "Theoretical cluster mono intensity <= 0: tried to explaing intensity, "
        "but failed -> Rejecting.\n");

      qDebug().noquote() << debug_msg;

      return false;
    }

  const double obs_to_theo_mono_intensity_ratio = obs_mono_int / theo_mono_int;

  double sum_matched     = 0.0;
  double sum_theo_intensities = 0.0;

  // Compute total *scaled* theoretical envelope intensity
  sum_theo_intensities = std::accumulate(
    theoretical_cluster_binned_centroids.begin(),
    theoretical_cluster_binned_centroids.end(),
    0.0,
    [obs_to_theo_mono_intensity_ratio](double sum, const auto &centroid) {
      return sum + centroid.intensity * obs_to_theo_mono_intensity_ratio;
    });

  // Compute sum of matched intensities (clipped to theoretical)
  for(const auto &match : matches)
    {
      const double obs_int = match.observedIntensity;
      const double theo_int =
        theoretical_cluster_binned_centroids[match.theoreticalClusterIndex]
          .intensity *
        obs_to_theo_mono_intensity_ratio;

      sum_matched += std::min(obs_int, theo_int);
    }

  fit_quality.explainedIntensity =
    (sum_theo_intensities > 0.0) ? sum_matched / sum_theo_intensities : 0.0;

  // Threshold for strict check: e.g., at least 65% of the theoretical envelope
  if(fit_quality.explainedIntensity < 0.65)
    {
      debug_msg +=
        QString(
          "Ratio between observed/theoretical cluster centroids summed "
          "intensities (explained intensity ratio): %1, that is too low -> "
          "Rejecting.\n")
          .arg(fit_quality.explainedIntensity);

      qDebug().noquote() << debug_msg;

      return false;
    }

  debug_msg +=
    QString(
      "Ratio between observed/theoretical cluster centroids summed intensities "
      "(explained intensity ratio): %1\n")
      .arg(fit_quality.explainedIntensity);


  // 3d. Local intensity ratio sanity check
  // --------------------------------------

  // We only check consecutive theoretical cluster centroids
  // and verify that the matching observed centroids share the same
  // relative intensity (Mr 2000 -> same intensity for first two peaks
  // of the cluster, for example).
  Q_ASSERT(match_count >= MIN_MATCH_COUNT); // 3

  // Do the computation only for the two main centroids, otherwise there is a
  // risk that a new species would start at centroid number 3.
  for(size_t match_iter = 1; match_iter < MIN_MATCH_COUNT; ++match_iter)
    {
      const size_t ti_prev = matches[match_iter - 1].theoreticalClusterIndex;
      const size_t ti_curr = matches[match_iter].theoreticalClusterIndex;

      // Only enforce for consecutive isotopes
      if(ti_curr != ti_prev + 1)
        continue;

      const double obs_prev = matches[match_iter - 1].observedIntensity;
      const double obs_curr = matches[match_iter].observedIntensity;

      const double theo_prev =
        theoretical_cluster_binned_centroids[ti_prev].intensity;
      const double theo_curr =
        theoretical_cluster_binned_centroids[ti_curr].intensity;

      if(theo_prev <= 0.0 || theo_curr <= 0.0)
        continue;

      const double obs_ratio  = obs_curr / obs_prev;
      const double theo_ratio = theo_curr / theo_prev;

      debug_msg += QString(
                     "Obs isotopologue %1 intensity: %2 - Theor isotopologue "
                     "%3 intensity: %4\n"
                     "Obs isotopologue %5 intensity: %6 - Theor isotopologue "
                     "%7 intensity: %8\n"
                     "Obs isotopologue intensity ratio: %9 - Theor "
                     "isotopologue intensity ratio: %10\n")
                     .arg(ti_prev)
                     .arg(obs_prev)
                     .arg(ti_prev)
                     .arg(theo_prev)
                     .arg(ti_curr)
                     .arg(obs_curr)
                     .arg(ti_curr)
                     .arg(theo_curr)
                     .arg(obs_ratio)
                     .arg(theo_ratio);

      // Allow generous deviation (noise, centroiding artifacts) (typically -
      // 60%)

      debug_msg += QString("m_params.clusterShapeTolerance: %1\n")
                     .arg(m_params.clusterShapeTolerance);

      if(std::abs(obs_ratio - theo_ratio) >
         m_params.clusterShapeTolerance * theo_ratio)
        {
          debug_msg +=
            QString(
              "Local intensity ratio mismatch between isotopes %1 and %2: "
              "observed int ratio=%3 theoretical int ratio=%4 -> rejecting.\n")
              .arg(ti_prev)
              .arg(ti_curr)
              .arg(obs_ratio, 0, 'f', 3)
              .arg(theo_ratio, 0, 'f', 3);

          qDebug().noquote() << debug_msg;
          return false;
        }
    }

  // 4. Update feature
  // =================
  charge_specific_feature.intensity  = obs_mono_int;
  charge_specific_feature.fitQuality = fit_quality;

  debug_msg +=
    QString("Explained intensity: %1\n").arg(fit_quality.explainedIntensity);

  // 5. Acceptance criteria
  // ======================

  debug_msg += QString("m_params.minScore: %1 - ").arg(m_params.minScore);

  if(fit_quality.correlation < m_params.minScore)
    {
      debug_msg +=
        QString(
          "fit_quality.correlation balow min score: %1 -> returning false.\n")
          .arg(fit_quality.correlation);

      qDebug().noquote() << debug_msg;

      return false;
    }

  // Consume the input centroid.
  globally_used[input_centroid_index] = true;

  // But consume also all the input centroids that were used
  // successfully to match any of the theoretical isotopic cluster
  // centroids:
  for(const IsoMatch &iso_match : matches)
    globally_used[iso_match.observedPeakIndex] = true;

  debug_msg += QString(
                 "fit_quality.correlation: %1 -> set input centroid index %2 "
                 "as  consumed and returning true.\n")
                 .arg(fit_quality.correlation)
                 .arg(input_centroid_index);

  qDebug().noquote() << debug_msg;

  return true;
}

// The idea of this algo is that we get as input data a centroided mass
// spectrum.
// Because the charge of the ion below any of the centroids in the spectrum is
// not known, each centroid is converted to a neutral species. The charge
// used to compute the neutral mass is in the range defined by the user.
// Once the neutral mass has been computed it is tested with the
// fitIsotopicEnvelope().
std::vector<ChargeSpecificFeature>
LowMassDeconvolver::generateChargeSpecificFeatures(
  const pappso::Trace &input_centroids) const
{
  QString debug_msg =
    QString(
      "Generating model charged features for %1 selected candidate "
      "peaks from input centroided mass spectrum:\n%2\n")
      .arg(input_centroids.size())
      .arg(input_centroids.toString());

  qDebug().noquote() << qSetRealNumberPrecision(4) << debug_msg;
  debug_msg.clear();

  std::vector<ChargeSpecificFeature> charge_specific_features;

  // Reserve a conservative upper bound
  charge_specific_features.reserve(input_centroids.size() * m_params.maxCharge);

  // We'll need this to document the input centoids that generated features that
  // could be matched.
  std::vector<bool> globally_used(input_centroids.size(), false);

  // For each centroid peak in the input data, compute a neutral mass
  // by assuming that its charge is either 1, 2 or 3 ... m_params.maxCharge.
  // Of course we do not know the charge of the ion below that centroid.

  for(size_t iter = 0; iter < input_centroids.size(); ++iter)
    {
      pappso::DataPoint input_centroid = input_centroids[iter];

      debug_msg += QString("Input mass centroid, index %1, m/z: %2\n")
                     .arg(iter)
                     .arg(input_centroid.toString());

      // After neutralizing the data point, make charged variants of it.

      // Iterate in the possible charge values in reversed order so that
      // we consume the multi-charged ions before getting to the mono-charged.
      // This way, we avoid having a real twice-charged ion being matched to
      // every other peak of a theoretical isotopic cluster generated for
      // charge 1.

      for(int z = m_params.maxCharge; z >= m_params.minCharge; --z)
        // for(int z = 1; z <= m_params.maxCharge; ++z)
        {
          // Compute neutral mass assuming that spectrum centroid was for charge
          // z.

          double neutral_mass = mzToNeutral(input_centroid.x, z);

          debug_msg += QString(
                         "\tInput centroid m/z %1 - assuming spectrum centroid "
                         "was of charge %2 -> "
                         "neutral mass:%3\n")
                         .arg(input_centroid.x, 0, 'f', 5)
                         .arg(z)
                         .arg(neutral_mass, 0, 'f', 5);

          // Skip masses outside allowed range
          if(neutral_mass < m_params.minMass || neutral_mass > m_params.maxMass)
            {
              debug_msg += QString(
                             "\t\tFound neutral mass that is outside accepted "
                             "range: %1, skipping it.\n")
                             .arg(neutral_mass, 0, 'f', 5);
              continue;
            }

          // Prepare a ChargeSpecificFeature to fill
          ChargeSpecificFeature charge_specific_feature;
          charge_specific_feature.charge         = z;
          charge_specific_feature.neutralMass    = neutral_mass;
          charge_specific_feature.monoisotopicMz = input_centroid.x;
          charge_specific_feature.intensity      = input_centroid.y;

          debug_msg += QString(
                         "\t\tGoing to check envelope fit for charged "
                         "feature:\n\t\t%1\n")
                         .arg(charge_specific_feature.toString(
                           false /*with_fit_quality*/));

          qDebug().noquote() << qSetRealNumberPrecision(4) << debug_msg;
          debug_msg.clear();

          // Fit the theoretical isotopic envelope
          bool accepted = fitIsotopicEnvelope(
            input_centroids, iter, charge_specific_feature, globally_used);

          if(accepted)
            {
              debug_msg += QString("\tAccepted\n");
              qDebug().noquote() << qSetRealNumberPrecision(4) << debug_msg;
              debug_msg.clear();

              charge_specific_features.push_back(
                std::move(charge_specific_feature));
            }
          else
            {
              debug_msg += QString("\tRejected\n");
              qDebug().noquote() << debug_msg;
              debug_msg.clear();
            }
        }
      // End of
      // for(int z = 1; z <= m_params.maxCharge; ++z)
      // That is, finished iterating in the accepted charge range.
    }
  // End of
  // for(const pappso::DataPoint &iter_input_candidate_peak : input_centroids)
  // That is, finished iterating in the input centroided mass spectrum.

  return charge_specific_features;
}

std::vector<DeconvolutedFeature>
LowMassDeconvolver::groupByNeutralMass(
  const std::vector<ChargeSpecificFeature> &charge_specific_features) const
{
  std::vector<DeconvolutedFeature> deconvoluted_features;

  qDebug() << "Found" << charge_specific_features.size()
           << "charge-specific features.";

  for(const ChargeSpecificFeature &charge_specific_feature :
      charge_specific_features)
    {
      bool merged = false;

      qDebug().noquote() << qSetRealNumberPrecision(5)
                         << "Now iterating into charge specific feature:"
                         << charge_specific_feature.toString();

      for(DeconvolutedFeature &iter_feature : deconvoluted_features)
        {
          qDebug().noquote()
            << "At least one deconvoluted feature existed already, "
               "iterating into one:"
            << iter_feature.toString(true /*with_supporting_ions*/);

          double reference_mass = iter_feature.neutralMass;

          // delta_mass = | observed − reference |
          double delta_mass =
            std::abs(charge_specific_feature.neutralMass - reference_mass);

          double delta_ppm = deltaToPpm(delta_mass, reference_mass);

          qDebug().noquote()
            << qSetRealNumberPrecision(5) << "Delta mass:" << delta_mass
            << "reference mass:" << reference_mass
            << " - delta_ppm:" << delta_ppm
            << " - params tolerance (ppm):" << m_params.ppmMassTolerance
            << " - Merging only if delta_ppm is less than "
               "params tolerance (ppm)";

          if(delta_ppm <= m_params.ppmMassTolerance)
            {
              // --- merge into existing feature ---
              qDebug() << "Merge new charge-specific feature into an existing "
                          "deconvoluted feature.";

              SupportingIon ion;
              ion.charge         = charge_specific_feature.charge;
              ion.monoisotopicMz = charge_specific_feature.monoisotopicMz;
              ion.intensity      = charge_specific_feature.intensity;

              iter_feature.supportingIons.push_back(ion);

              // intensity sum
              iter_feature.intensity += charge_specific_feature.intensity;

              // FitQuality aggregation
              iter_feature.fitQuality.correlation =
                std::max(iter_feature.fitQuality.correlation,
                         charge_specific_feature.fitQuality.correlation);

              iter_feature.fitQuality.spacingErrorPpm =
                (iter_feature.fitQuality.spacingErrorPpm *
                   (iter_feature.intensity -
                    charge_specific_feature.intensity) +
                 charge_specific_feature.fitQuality.spacingErrorPpm *
                   charge_specific_feature.intensity) /
                iter_feature.intensity;

              iter_feature.fitQuality.explainedIntensity =
                (iter_feature.fitQuality.explainedIntensity *
                   (iter_feature.intensity -
                    charge_specific_feature.intensity) +
                 charge_specific_feature.fitQuality.explainedIntensity *
                   charge_specific_feature.intensity) /
                iter_feature.intensity;

              merged = true;
              break;
            }
        }

      if(!merged)
        {
          // --- create new feature ---
          qDebug()
            << qSetRealNumberPrecision(5)
            << "Now creating final new deconvoluted feature (neutral mass):"
            << charge_specific_feature.neutralMass;

          DeconvolutedFeature feature;
          feature.neutralMass = charge_specific_feature.neutralMass;
          feature.intensity   = charge_specific_feature.intensity;
          feature.fitQuality  = charge_specific_feature.fitQuality;

          SupportingIon ion;
          ion.charge         = charge_specific_feature.charge;
          ion.monoisotopicMz = charge_specific_feature.monoisotopicMz;
          ion.intensity      = charge_specific_feature.intensity;

          feature.supportingIons.push_back(ion);

          deconvoluted_features.push_back(std::move(feature));
        }
    }

  return deconvoluted_features;
}

std::vector<DeconvolutedFeature>
LowMassDeconvolver::deconvolute(const pappso::Trace &input_centroids) const
{
  // 1. Filter peaks (intensity, mass)
  // 2. Detect charge-specific isotopic candidates
  // 3. Fit theoretical envelopes (IsoSpec++)
  // 4. Keep ChargeSpecificFeature candidates
  // 5. Group by neutral mass
  // 6. Build DeconvolutedFeature objects

  auto input_candidate_centroids = selectCandidatePeaks(input_centroids);
  auto charge_specific_features =
    generateChargeSpecificFeatures(input_candidate_centroids);
  auto deconfoluted_features = groupByNeutralMass(charge_specific_features);
  return deconfoluted_features;
}


} // namespace libXpertMassCore
} // namespace MsXpS
