Skip to content

Commit

Permalink
Fix Diversity Pool Processor and add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed May 28, 2024
1 parent 67f4a66 commit 9a3a923
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 69 deletions.
13 changes: 7 additions & 6 deletions lib/genesis/population/function/diversity_pool_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class DiversityPoolProcessor
* can be constructed, but they have to be replaced by non-default-constructed instances
* befor usage.
*/
FstPoolProcessor() = default;
DiversityPoolProcessor() = default;

/**
* @brief Construct a processor.
Expand Down Expand Up @@ -171,7 +171,7 @@ class DiversityPoolProcessor
}
for( auto pool_size : pool_sizes ) {
calculators_.push_back( DiversityPoolCalculator( settings, pool_size ));
results_.emplate_back();
results_.emplace_back();
}
}

Expand Down Expand Up @@ -249,9 +249,9 @@ class DiversityPoolProcessor
assert( results_.size() == calculators_.size() );
for( size_t i = 0; i < results_.size(); ++i ) {
auto const window_avg_denom = window_average_denominator(
avg_policy_, window_length, filter_stats_, calculators_[i]->get_filter_stats()
avg_policy_, window_length, filter_stats_, calculators_[i].get_filter_stats()
);
results_[i] = calculators_[i]->get_result( window_avg_denom );
results_[i] = calculators_[i].get_result( window_avg_denom );
}
return results_;
}
Expand Down Expand Up @@ -304,7 +304,7 @@ class DiversityPoolProcessor
};

// =================================================================================================
// Helper Functions with Pool Sizes
// Make Diversity Processor Helper Functions
// =================================================================================================

/**
Expand All @@ -318,10 +318,11 @@ class DiversityPoolProcessor
* really do much, and is just provided for symmetry reasons with the fst functions...
*/
inline DiversityPoolProcessor make_diversity_pool_processor(
WindowAveragePolicy window_average_policy,
DiversityPoolSettings const& settings,
std::vector<size_t> const& pool_sizes
) {
DiversityPoolProcessor processor;
DiversityPoolProcessor processor{ window_average_policy };
processor.add_calculators( settings, pool_sizes );
return processor;
}
Expand Down
6 changes: 5 additions & 1 deletion lib/genesis/population/function/fst_pool_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ class FstPoolProcessor
};

// =================================================================================================
// Helper Functions with Pool Sizes
// Make Fst Processor Helper Functions
// =================================================================================================

/**
Expand Down Expand Up @@ -438,6 +438,10 @@ inline FstPoolProcessor make_fst_pool_processor(
return result;
}

// =================================================================================================
// Sample Names Helper Function
// =================================================================================================

/**
* @brief Return a list of sample name pairs for each calculator in an FstPoolProcessor.
*
Expand Down
65 changes: 65 additions & 0 deletions test/src/population/diversity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "genesis/population/format/simple_pileup_reader.hpp"
#include "genesis/population/function/diversity_pool_calculator.hpp"
#include "genesis/population/function/diversity_pool_functions.hpp"
#include "genesis/population/function/diversity_pool_processor.hpp"
#include "genesis/population/filter/sample_counts_filter_numerical.hpp"
#include "genesis/population/filter/sample_counts_filter.hpp"
#include "genesis/population/filter/variant_filter_numerical.hpp"
Expand All @@ -47,6 +48,7 @@
#include "genesis/population/window/window.hpp"
#include "genesis/utils/containers/filter_iterator.hpp"
#include "genesis/utils/containers/transform_iterator.hpp"
#include "genesis/utils/math/random.hpp"

using namespace genesis::population;
using namespace genesis::utils;
Expand Down Expand Up @@ -507,3 +509,66 @@ TEST( Population, DiversityMeasuresIterator )
EXPECT_EQ( window_cnt, exp_tw.size() );
EXPECT_EQ( window_cnt, exp_td.size() );
}

// =================================================================================================
// Random Fuzzy
// =================================================================================================

// Declartion here. Defined in random_variants.cpp
std::vector<Variant> test_create_random_variants_( size_t min_count );

void test_diversity_fuzzy_run_( std::vector<Variant> const& data )
{
// Set up some random settings
DiversityPoolSettings settings;
settings.min_count = 2;
settings.min_read_depth = 2;
settings.max_read_depth = 100;
settings.tajima_denominator_policy = static_cast<TajimaDenominatorPolicy>(
permuted_congruential_generator( 4 )
);

// Make a diversity processor
ASSERT_GT( data.size(), 0 );
auto const n_samples = data[0].samples.size();
auto const pool_sizes = std::vector<size_t>( n_samples, 100 );
auto const window_average_policy = static_cast<WindowAveragePolicy>(
permuted_congruential_generator( 0, 4 )
);
auto processor = make_diversity_pool_processor(
window_average_policy, settings, pool_sizes
);

// Run the data
for( auto const& variant : data ) {
processor.process( variant );
}

// Test the result
size_t const window_length = data.size();
auto const result = processor.get_result( window_length );
EXPECT_EQ( n_samples, result.size() );
for( auto const& r : result ) {
LOG_DBG << r.theta_pi << " " << r.theta_watterson << " " << r.tajima_d;
}
}

TEST( Diversity, RandomFuzzy )
{
// Random seed. Report it, so that in an error case, we can reproduce.
auto const seed = ::time(nullptr);
permuted_congruential_generator_init( seed );
LOG_INFO << "Seed: " << seed;

// For the duration of the test, we deactivate debug logging.
// But if needed, comment this line out, and each test will report its input.
LOG_SCOPE_LEVEL( genesis::utils::Logging::kInfo );

size_t num_tests = 5000;
for( size_t i = 0; i < num_tests; ++i ) {
LOG_DBG << "=================================";
LOG_DBG << "Test " << i;
auto const data = test_create_random_variants_( 4 );
test_diversity_fuzzy_run_( data );
}
}
66 changes: 4 additions & 62 deletions test/src/population/fst_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,71 +424,13 @@ TEST( FST, FstPoolProcessor )
// Random Fuzzy
// =================================================================================================

std::vector<Variant> test_fst_fuzzy_make_data_()
{
// Number of samples per variant
auto const n_positions = 100;
auto const n_samples = permuted_congruential_generator( 2, 10 );

// Create a list of Variants and samples, with random content
std::vector<Variant> data;
data.resize( n_positions );
for( size_t n_var = 0; n_var < n_positions; ++n_var ) {
auto& variant = data[n_var];
variant.chromosome = "1";
variant.position = n_var + 1;
variant.status.set(
permuted_congruential_generator( static_cast<int>( VariantFilterTag::kEnd ) - 1 )
);

// Fill the variant with samples
variant.samples.resize( n_samples );
for( size_t n_smp = 0; n_smp < n_samples; ++n_smp ) {
auto& sample = variant.samples[n_smp];

// Make a selection of how many of the counts we want to fill.
// This makes sure that we are not underrepresenting low counts.
auto const num_non_empty = permuted_congruential_generator( 4 );
switch( num_non_empty ) {
case 0: {
break;
}
case 1: {
sample.a_count = permuted_congruential_generator( 10 );
break;
}
case 2: {
sample.a_count = permuted_congruential_generator( 10 );
sample.c_count = permuted_congruential_generator( 10 );
break;
}
case 3: {
sample.a_count = permuted_congruential_generator( 10 );
sample.c_count = permuted_congruential_generator( 10 );
sample.g_count = permuted_congruential_generator( 10 );
break;
}
case 4: {
sample.a_count = permuted_congruential_generator( 10 );
sample.c_count = permuted_congruential_generator( 10 );
sample.g_count = permuted_congruential_generator( 10 );
sample.t_count = permuted_congruential_generator( 10 );
break;
}
}

// Also set a random status
sample.status.set(
permuted_congruential_generator( static_cast<int>( SampleCountsFilterTag::kEnd ) - 1 )
);
}
}
return data;
}
// Declartion here. Defined in random_variants.cpp
std::vector<Variant> test_create_random_variants_();

void test_fst_fuzzy_run_( std::vector<Variant> const& data )
{
// Make an FST processor
ASSERT_GT( data.size(), 0 );
auto const n_samples = data[0].samples.size();
auto const pool_sizes = std::vector<size_t>( n_samples, 100 );
auto const window_average_policy = static_cast<WindowAveragePolicy>(
Expand Down Expand Up @@ -529,7 +471,7 @@ TEST( FST, RandomFuzzy )
for( size_t i = 0; i < num_tests; ++i ) {
LOG_DBG << "=================================";
LOG_DBG << "Test " << i;
auto const data = test_fst_fuzzy_make_data_();
auto const data = test_create_random_variants_();
test_fst_fuzzy_run_( data );
}
}
Expand Down
134 changes: 134 additions & 0 deletions test/src/population/random_variants.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2024 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact:
Lucas Czech <[email protected]>
University of Copenhagen, Globe Institute, Section for GeoGenetics
Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
*/

/**
* @brief
*
* @file
* @ingroup test
*/

#include "src/common.hpp"

#include "genesis/population/sample_counts.hpp"
#include "genesis/population/variant.hpp"
#include "genesis/population/stream/variant_input_stream.hpp"
#include "genesis/population/stream/variant_input_stream_sources.hpp"
#include "genesis/population/stream/variant_input_stream_adapters.hpp"
#include "genesis/population/filter/sample_counts_filter.hpp"
#include "genesis/population/filter/variant_filter.hpp"
#include "genesis/population/function/functions.hpp"
#include "genesis/utils/math/random.hpp"

#include <cmath>
#include <limits>

using namespace genesis::population;
using namespace genesis::utils;

// =================================================================================================
// Random Fuzzy
// =================================================================================================

/**
* @brief This is a helper function to create random Variants, to be used for testing.
*
* Just to avoid code duplication in the test cases.
*/
std::vector<Variant> test_create_random_variants_( size_t min_count )
{
// Number of samples per variant
auto const n_positions = 100;
auto const n_samples = permuted_congruential_generator( 2, 10 );

// Create a list of Variants and samples, with random content
std::vector<Variant> data;
data.resize( n_positions );
for( size_t n_var = 0; n_var < n_positions; ++n_var ) {
auto& variant = data[n_var];
variant.chromosome = "1";
variant.position = n_var + 1;
variant.status.set(
permuted_congruential_generator(
static_cast<int>( VariantFilterTag::kEnd ) - 1
)
);

// Fill the variant with samples
variant.samples.resize( n_samples );
for( size_t n_smp = 0; n_smp < n_samples; ++n_smp ) {
auto& sample = variant.samples[n_smp];

// Make a selection of how many of the counts we want to fill.
// This makes sure that we are not underrepresenting low counts.
auto const num_non_empty = permuted_congruential_generator( 4 );
switch( num_non_empty ) {
case 0: {
break;
}
case 1: {
sample.a_count = permuted_congruential_generator( 1, 10 );
break;
}
case 2: {
sample.a_count = permuted_congruential_generator( 1, 10 );
sample.c_count = permuted_congruential_generator( 1, 10 );
break;
}
case 3: {
sample.a_count = permuted_congruential_generator( 1, 10 );
sample.c_count = permuted_congruential_generator( 1, 10 );
sample.g_count = permuted_congruential_generator( 1, 10 );
break;
}
case 4: {
sample.a_count = permuted_congruential_generator( 1, 10 );
sample.c_count = permuted_congruential_generator( 1, 10 );
sample.g_count = permuted_congruential_generator( 1, 10 );
sample.t_count = permuted_congruential_generator( 1, 10 );
break;
}
}

// Also set a random status
sample.status.set(
permuted_congruential_generator(
static_cast<int>( SampleCountsFilterTag::kEnd ) - 1
)
);

// Lastly, to avoid missing data issues in the computation,
// we always set the status to missing if there are no counts.
// Our filters would usually catch that, so it's fair to do this here as well.
if( nucleotide_sum( sample ) < min_count ) {
sample.status.reset( SampleCountsFilterTag::kMissing );
}
}
}
return data;
}

std::vector<Variant> test_create_random_variants_()
{
return test_create_random_variants_( 0 );
}

0 comments on commit 9a3a923

Please sign in to comment.