Skip to content

Commit

Permalink
Add Genome Locus Set invert function
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Jul 31, 2024
1 parent 8c419c4 commit 5197621
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
44 changes: 44 additions & 0 deletions lib/genesis/population/genome_locus_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

#include "genesis/utils/math/bitvector/operators.hpp"

#include <cassert>
#include <stdexcept>
#include <unordered_set>

namespace genesis {
Expand Down Expand Up @@ -257,5 +259,47 @@ void GenomeLocusSet::set_union( GenomeLocusSet const& rhs )
}
}

void GenomeLocusSet::invert( sequence::SequenceDict const& sequence_dict )
{
using namespace genesis::utils;

for( auto& chr_bv : locus_map_ ) {
// Basic check for the chromosome
if( ! sequence_dict.contains( chr_bv.first )) {
throw std::runtime_error(
"Cannot invert Genome Locus Set for chromosome \"" + chr_bv.first +
"\", as the given Sequence Dict does not contain an entry for that chromosome"
);
}

// Get the sequence dict entry and check the lengths
auto const& seq_entry = sequence_dict.get( chr_bv.first );
assert( chr_bv.second.size() > 0 );
if( chr_bv.second.size() - 1 > seq_entry.size() ) {
throw std::runtime_error(
"Cannot invert Genome Locus Set for chromosome \"" + chr_bv.first +
"\", as the given Sequence Dict indicates its length to be " +
std::to_string( seq_entry.size() ) + ", instead of " +
std::to_string( chr_bv.second.size() - 1 )
);
}

// Create an empty bitvector of the desired size, and fill it with the bits set.
// We make sure that it gets the larger size, which is the seq entry size (or equal).
// Again we need to reserve an additional bit for special position 0.
// If the "all" marker is set, the inversion is "nothing", so then we just use a single bit.
auto new_bv = Bitvector( 1 );
if( ! chr_bv.second.get(0) ) {
new_bv = Bitvector( seq_entry.size() + 1, chr_bv.second );
new_bv.negate();
new_bv.set( 0, false );
assert( new_bv.size() >= chr_bv.second.size() );
}

// Finally update our data
locus_map_[ chr_bv.first ] = new_bv;
}
}

} // namespace population
} // namespace genesis
11 changes: 11 additions & 0 deletions lib/genesis/population/genome_locus_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "genesis/population/genome_locus.hpp"
#include "genesis/population/genome_region.hpp"
#include "genesis/population/genome_region_list.hpp"
#include "genesis/sequence/sequence_dict.hpp"
#include "genesis/utils/math/bitvector.hpp"

namespace genesis {
Expand Down Expand Up @@ -267,6 +268,16 @@ class GenomeLocusSet
*/
void set_union( GenomeLocusSet const& rhs );

/**
* @brief Invert all chromosome regions.
*
* This needs a means of getting the length of each chromosome, in order to know how many
* positions towards the end of each chromosome need to be inverted. If the given
* @p sequence_dict does not contain a chromosome that is present in this set here,
* or the set contains set positions beyond the dict, an exception is thrown.
*/
void invert( sequence::SequenceDict const& sequence_dict );

// -------------------------------------------------------------------------
// Locus Covered
// -------------------------------------------------------------------------
Expand Down

0 comments on commit 5197621

Please sign in to comment.