Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Jun 6, 2024
2 parents 1088cd1 + 44c6d2d commit ed91b00
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 19 deletions.
2 changes: 1 addition & 1 deletion lib/genesis/population/filter/variant_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ std::ostream& print_variant_filter_category_stats(
os << "Empty: " << stats[VariantFilterTagCategory::kSamplesFailed] << "\n";
}
if( stats[VariantFilterTagCategory::kNumeric] > 0 || verbose ) {
os << "Numric: " << stats[VariantFilterTagCategory::kNumeric] << "\n";
os << "Numeric: " << stats[VariantFilterTagCategory::kNumeric] << "\n";
}
if( stats[VariantFilterTagCategory::kInvariant] > 0 || verbose ) {
os << "Invariant: " << stats[VariantFilterTagCategory::kInvariant] << "\n";
Expand Down
4 changes: 2 additions & 2 deletions lib/genesis/population/function/diversity_pool_calculator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,14 @@ class DiversityPoolCalculator
if( std::isfinite( tp )) {
theta_pi_sum_ += tp;
}
assert( std::isfinite( tp ) );
// assert( std::isfinite( tp ) );
}
if( enable_theta_watterson_ || enable_tajima_d_ ) {
tw = theta_watterson_pool( settings_, pool_size_, sample );
if( std::isfinite( tw )) {
theta_watterson_sum_ += tw;
}
assert( std::isfinite( tw ) );
// assert( std::isfinite( tw ) );
}

// We want to keep track of the minimum read depth of the data that we are processing.
Expand Down
32 changes: 18 additions & 14 deletions lib/genesis/population/function/diversity_pool_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,15 @@ double theta_pi_pool_denominator(
size_t min_count, size_t poolsize, size_t nucleotide_count
){
// Boundary: if not held, we'd return zero, and that would not be a useful denominator.
if( 2 * min_count > nucleotide_count ) {
throw std::invalid_argument(
"Cannot compute theta_pi_pool_denominator with min_count = " +
std::to_string( min_count ) + " and nucleotide_count = " +
std::to_string( nucleotide_count )
);
}
// Update: It's okay, we can return 0 here. The position will just not contribute
// to the overall diversity sum then, but still considered for the sum of valid positions.
// if( 2 * min_count > nucleotide_count ) {
// throw std::invalid_argument(
// "Cannot compute theta_pi_pool_denominator with min_count = " +
// std::to_string( min_count ) + " and nucleotide_count = " +
// std::to_string( nucleotide_count )
// );
// }

// Iterate all allele frequencies in between the min and max-min boundaries.
double denom = 0.0;
Expand Down Expand Up @@ -240,13 +242,15 @@ double theta_watterson_pool_denominator(
size_t min_count, size_t poolsize, size_t nucleotide_count
){
// Boundary: if not held, we'd return zero, and that would not be a useful denominator.
if( 2 * min_count > nucleotide_count ) {
throw std::invalid_argument(
"Cannot compute theta_watterson_pool_denominator with min_count = " +
std::to_string( min_count ) + " and nucleotide_count = " +
std::to_string( nucleotide_count )
);
}
// Update: It's okay, we can return 0 here. The position will just not contribute
// to the overall diversity sum then, but still considered for the sum of valid positions.
// if( 2 * min_count > nucleotide_count ) {
// throw std::invalid_argument(
// "Cannot compute theta_watterson_pool_denominator with min_count = " +
// std::to_string( min_count ) + " and nucleotide_count = " +
// std::to_string( nucleotide_count )
// );
// }

// Iterate all allele frequencies in between the min and max-min boundaries.
double denom = 0.0;
Expand Down
1 change: 1 addition & 0 deletions lib/genesis/population/function/fst_pool_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ class FstPoolProcessor
// Also reset the pi vectors to nan.
// If they are not allocated, nothing happens.
auto const res_sz = results_.size();
(void) res_sz;
assert( std::get<0>( results_pi_ ).size() == 0 || std::get<0>( results_pi_ ).size() == res_sz );
assert( std::get<1>( results_pi_ ).size() == 0 || std::get<1>( results_pi_ ).size() == res_sz );
assert( std::get<2>( results_pi_ ).size() == 0 || std::get<2>( results_pi_ ).size() == res_sz );
Expand Down
12 changes: 10 additions & 2 deletions lib/genesis/population/stream/variant_input_stream_sources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,16 @@ VariantInputStream make_variant_input_stream_from_sam_file(
// Take this into account, and create as many empty (unnamed) samples as needed.
// This cannot be more than one though, as it can be the unaccounted or none,
// or, if we do not split by RG at all, just the one sample were every read ends up in.
data.sample_names = make_sample_name_list_( data.source_name, cur.sample_size() );
assert( data.sample_names.size() <= 1 );
// data.sample_names = make_sample_name_list_( data.source_name, cur.sample_size() );
// assert( data.sample_names.size() <= 1 );

// Scratch that. If we treat the file as a single sample anyway, we just use the file name
// as the sample name. Way more intuitive. Unfortunately, there is then the inconsistency
// in naming, but it's more in line with what e.g. the sync does if a header is provided.
assert( cur.sample_size() <= 1 );
if( cur.sample_size() == 1 ) {
data.sample_names = std::vector<std::string>{ data.source_name };
}
} else {
assert( reader.split_by_rg() == true );
}
Expand Down

0 comments on commit ed91b00

Please sign in to comment.