Skip to content

Commit

Permalink
Failure handling and more tests for WFA over GBWT
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Jun 16, 2022
1 parent 69ffe4d commit f4c4e72
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 53 deletions.
22 changes: 13 additions & 9 deletions src/gbwt_extender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ struct state_hash {

bool WFAAlignment::unlocalized_insertion() const {
return (
this->ok &&
this->path.empty() &&
this->edits.size() == 1 &&
this->edits.front().first == insertion
Expand All @@ -755,11 +756,11 @@ uint32_t WFAAlignment::final_offset(const gbwtgraph::GBWTGraph& graph) const {
}

void WFAAlignment::flip(const gbwtgraph::GBWTGraph& graph, const std::string& sequence) {
this->seq_offset = sequence.length() - this->seq_offset - this->length;

if (this->path.empty()) {
return;
}

this->seq_offset = sequence.length() - this->seq_offset - this->length;
this->node_offset = graph.get_length(this->path.back()) - this->final_offset(graph);

// Reverse the path and the edits.
Expand Down Expand Up @@ -996,6 +997,9 @@ class WFATree {
// The overall closed range of diagonals reached.
std::pair<int32_t, int32_t> max_diagonals;

// TODO: Remove when unnecessary.
bool debug;

WFATree(const gbwtgraph::GBWTGraph& graph, const std::string& sequence, const gbwt::SearchState& root, uint32_t node_offset, const Aligner& aligner) :
graph(graph), sequence(sequence),
nodes(),
Expand All @@ -1004,7 +1008,8 @@ class WFATree {
gap_open(2 * (aligner.gap_open - aligner.gap_extension)),
gap_extend(2 * aligner.gap_extension + aligner.match),
score_bound(0),
possible_scores(), max_diagonals(0, 0)
possible_scores(), max_diagonals(0, 0),
debug(false)
{
this->nodes.emplace_back(root, 0);
this->nodes.front().update(WFANode::MATCHES, 0, 0, 0, node_offset);
Expand Down Expand Up @@ -1103,6 +1108,7 @@ class WFATree {
if (!subst.empty()) {
subst.seq_offset++;
this->successor_offset(subst);
this->expand_if_necessary(subst);
}

// Determine the edit that reaches furthest on the diagonal.
Expand Down Expand Up @@ -1134,7 +1140,6 @@ class WFATree {
}
}
this->nodes[subst.node()].update(WFANode::MATCHES, score, diagonal, subst);
this->expand_if_necessary(subst);
}
}
}
Expand Down Expand Up @@ -1363,7 +1368,7 @@ class WFATree {
//------------------------------------------------------------------------------

WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) const {
if (this->graph == nullptr || this->aligner == nullptr || sequence.empty()) {
if (this->graph == nullptr || this->aligner == nullptr) {
return WFAAlignment();
}
gbwt::SearchState root_state = this->graph->get_state(this->graph->get_handle(id(from), is_rev(from)));
Expand All @@ -1373,6 +1378,7 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co
this->mask(sequence);

WFATree tree(*(this->graph), sequence, root_state, offset(from) + 1, *(this->aligner));
tree.debug = this->debug;

int32_t score = 0;
while (true) {
Expand Down Expand Up @@ -1401,15 +1407,13 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co
return WFAAlignment();
}
}
if (tree.candidate_point.seq_offset == 0) {
return WFAAlignment();
}

// Start building an alignment. Store the path first.
WFAAlignment result {
{}, {}, static_cast<uint32_t>(offset(from) + 1), 0,
tree.candidate_point.seq_offset + unaligned_tail,
tree.candidate_point.alignment_score(*(this->aligner), unaligned_tail)
tree.candidate_point.alignment_score(*(this->aligner), unaligned_tail),
true
};
uint32_t node = tree.candidate_node;
while (true) {
Expand Down
32 changes: 21 additions & 11 deletions src/gbwt_extender.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,15 @@ struct WFAAlignment {
/// Alignment score.
int32_t score = 0;

/// Returns true if the alignment is empty.
bool empty() const { return (this->length == 0); }
/// Is this an actual alignment or a failure?
bool ok = false;

/// Is this an actual alignment or a failure?
operator bool() const { return this->ok; }

/// Returns true if the alignment does not cover anything in the sequence
/// and the graph.
bool empty() const { return (this->path.empty() && this->edits.empty()); }

/// Returns true if the alignment is an insertion without a location.
bool unlocalized_insertion() const;
Expand Down Expand Up @@ -280,7 +287,7 @@ struct WFAAlignment {
*
* VG scores a gap of length `n` as `gap_open + (n - 1) * gap_extend`, while WFA
* papers use `gap_open + n * gap_extend`. Hence we use `gap_open - gap_extend` as
* the effective four-parameter gap open score inside this aligner.
* the effective four-parameter gap open score inside the aligner.
*
* NOTE: Most internal arithmetic operations use 32-bit integers.
*/
Expand All @@ -300,25 +307,28 @@ class WFAExtender {
* The endpoints are assumed to be valid graph positions. In order for
* there to be an alignment, there must be a haplotype that includes the
* endpoints and connects them. However, the endpoints are not covered
* by the returned alignment.
* by the returned alignment.
*
* The sequence that will be aligned is passed by value. All non-ACGT
* characters are masked with character X, which should not match any
* character in the graph.
*
* Returns an empty alignment if there is no alignment with an acceptable
* Returns a failed alignment if there is no alignment with an acceptable
* score.
*
* NOTE: The alignment is to a path after `from` and before `to`. If the
* points are identical, such a path can only exist if there is a cycle.
*/
WFAAlignment connect(std::string sequence, pos_t from, pos_t to) const;

/**
* A special case of connect() for aligning the sequence to a haplotype
* starting at the given position. If there is no alignment for the
* entire sequence with an acceptable score, returns the highest-scoring
* partial alignment.
* partial alignment, which may be empty.
*
* NOTE: This creates a suffix of the alignment by aligning a prefix of
* the sequence.
* NOTE: This creates a suffix of the full alignment by aligning a
* prefix of the sequence.
* TODO: Should we use full-length bonuses?
*/
WFAAlignment suffix(const std::string& sequence, pos_t from) const;
Expand All @@ -327,10 +337,10 @@ class WFAExtender {
* A special case of connect() for aligning the sequence to a haplotype
* ending at the given position. If there is no alignment for the entire
* sequence with an acceptable score, returns the highest-scoring partial
* alignment.
* alignment, which may be empty.
*
* NOTE: This creates a prefix of the alignment by aligning a suffix of
* the sequence.
* NOTE: This creates a prefix of the full alignment by aligning a suffix
* of the sequence.
* TODO: Should we use full-length bonuses?
*/
WFAAlignment prefix(const std::string& sequence, pos_t to) const;
Expand Down
Loading

1 comment on commit f4c4e72

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch update-wfa-over-gbwt. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 9712 seconds

Please sign in to comment.