Skip to content

Commit

Permalink
Add pi results to Fst Processor
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed May 29, 2024
1 parent 7240f36 commit c7b0745
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
3 changes: 1 addition & 2 deletions lib/genesis/population/function/diversity_pool_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,12 @@ enum class TajimaDenominatorPolicy
* @brief Replicate the original behaviour of PoPoolation <= 1.2.2.
*
* The idea is the same as in kProvidedMinReadDepth, but re-introduces the bugs of PoPoolation.
* There are three major bugs (as far as we are aware) in the PoPoolation implementation
* There are major bugs (as far as we are aware) in the PoPoolation implementation
* up until (and including) version 1.2.2:
*
* 1. They compute the empirical pool size (expeted number of individuals sequenced) as
* n_base(), based on pool size alone, and do not take the read depth into account at all.
* 2. They do not use alpha star, but set it to be equal to beta star instead.
* 3. To compute window averages, they use the number of SNPs in the window, instead of the
* number of all valid positions in the window.
*
* Using this option, one can voluntarily activate these bugs here as well, in order to get
Expand Down
55 changes: 55 additions & 0 deletions lib/genesis/population/function/fst_pool_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
*/

#include "genesis/population/function/fst_pool_calculator.hpp"
#include "genesis/population/function/fst_pool_unbiased.hpp"
#include "genesis/population/function/window_average.hpp"
#include "genesis/population/filter/variant_filter.hpp"
#include "genesis/population/variant.hpp"
Expand All @@ -46,6 +47,7 @@
#include <stdexcept>
#include <string>
#include <utility>
#include <tuple>
#include <vector>

namespace genesis {
Expand All @@ -63,6 +65,12 @@ class FstPoolProcessor
{
public:

// -------------------------------------------------------------------------
// Typedefs and Enums
// -------------------------------------------------------------------------

using PiVectorTuple = std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>;

// -------------------------------------------------------------------------
// Constructors and Rule of Five
// -------------------------------------------------------------------------
Expand Down Expand Up @@ -254,6 +262,51 @@ class FstPoolProcessor
return results_;
}

/**
* @brief Get lists of all the three intermediate pi values (within, between, total) that
* are part of our unbiased estimators.
*
* This computes the window-averaged values for pi within, pi between, and pi total (in that
* order in the tuple), for each sample pair (order in the three vectors). This uses the same
* window averaging policy as the get_result() function.
*
* This only works when all calculators are of type FstPoolCalculatorUnbiased, and throws an
* exception otherwise. It is merely meant as a convenience function for that particular case.
*/
PiVectorTuple const& get_pi_vectors( size_t window_length ) const
{
// Only allocate when someone first calls this.
// Does not do anything afterwards.
auto const result_size = calculators_.size();
std::get<0>( results_pi_ ).resize( result_size );
std::get<1>( results_pi_ ).resize( result_size );
std::get<2>( results_pi_ ).resize( result_size );

// Get the pi values from all calculators, assuming that they are of the correct type.
for( size_t i = 0; i < result_size; ++i ) {
auto& raw_calc = calculators_[i];
auto cast_calc = dynamic_cast<FstPoolCalculatorUnbiased const*>( raw_calc.get() );
if( ! cast_calc ) {
throw std::domain_error(
"Can only call FstPoolProcessor::get_pi_vectors() "
"for calculators of type FstPoolCalculatorUnbiased."
);
}

// Get the denominator to use for all averaging.
auto const window_avg_denom = window_average_denominator(
avg_policy_, window_length, filter_stats_, calculators_[i]->get_filter_stats()
);

// We compute the window-averaged values as above.
std::get<0>(results_pi_)[i] = cast_calc->get_pi_within() / window_avg_denom;
std::get<1>(results_pi_)[i] = cast_calc->get_pi_between() / window_avg_denom;
std::get<2>(results_pi_)[i] = cast_calc->get_pi_total() / window_avg_denom;
}

return results_pi_;
}

/**
* @brief Get the sum of filter statistics of all Variant%s processed here.
*
Expand Down Expand Up @@ -296,7 +349,9 @@ class FstPoolProcessor
VariantFilterStats filter_stats_;

// We keep a mutable cache for the results, to avoid reallocating memory each time.
// We do the same of the pi values, but this is only allocated when first called.
mutable std::vector<double> results_;
mutable PiVectorTuple results_pi_;

// Thread pool to run the buffering in the background, and the size
// (number of sample pairs) at which we start using the thread pool.
Expand Down

0 comments on commit c7b0745

Please sign in to comment.