diff --git a/src/gbwt_extender.cpp b/src/gbwt_extender.cpp index 5c3cf87cf53..c939823357c 100644 --- a/src/gbwt_extender.cpp +++ b/src/gbwt_extender.cpp @@ -844,13 +844,6 @@ struct MatchPos { } return (this->seq_offset < another.seq_offset); } - - // Returns the non-empty position further on the diagonal (on the sequence). - static const MatchPos& max(const MatchPos& a, const MatchPos& b) { - if (a.empty()) { return b; } - if (b.empty()) { return a; } - return (a.seq_offset > b.seq_offset ? a : b); - } }; // A point in an WFA score matrix (for a specific node). @@ -980,7 +973,7 @@ class WFATree { std::vector nodes; // Best alignment found so far. If we reached the destination in the graph, - // the score includes the possible insertion at the end but the point itself + // the score includes the implicit insertion at the end but the point itself // does not. WFAPoint candidate_point; uint32_t candidate_node; @@ -1030,9 +1023,14 @@ class WFATree { static bool is_root(uint32_t node) { return (node == 0); } uint32_t parent(uint32_t node) const { return this->nodes[node].parent; } + // Assumes length > 0. + int32_t gap_extend_penalty(uint32_t length) const { + return static_cast(length) * this->gap_extend; + } + // Assumes length > 0. int32_t gap_penalty(uint32_t length) const { - return this->gap_open + static_cast(length) * this->gap_extend; + return this->gap_open + this->gap_extend_penalty(length); } // wf_extend() in the paper. @@ -1106,21 +1104,32 @@ class WFATree { subst.seq_offset++; this->successor_offset(subst); } - subst = MatchPos::max(subst, ins); - subst = MatchPos::max(subst, del); + + // Determine the edit that reaches furthest on the diagonal. + bool is_insertion = false; + if (subst < ins) { + subst = std::move(ins); + is_insertion = true; + } + if (subst < del) { + subst = std::move(del); + is_insertion = false; + } + if (!subst.empty()) { - // If we are just past the end position, we get a candidate alignment by - // assuming that the rest of the sequence is an insertion. - // If `subst` is an insertion, we already got a better alignment from the - // match preceding it. - if (this->nodes[subst.node()].same_node(to) && subst.node_offset == offset(to) + 1) { + // If we reached the end position with the edit, we get a candidate + // alignment by assuming that the rest of the sequence is an insertion. + // If the edit is an insertion, we charge the gap open cost again, but + // we already got the same insertion without the extra cost from the + // match preceding the insertion. + if (this->nodes[subst.node()].same_node(to) && subst.node_offset == offset(to)) { uint32_t gap_length = this->sequence.length() - subst.seq_offset; - int32_t new_score = score; + int32_t gap_score = 0; if (gap_length > 0) { - new_score += this->gap_penalty(gap_length); + gap_score = this->gap_penalty(gap_length); } - if (new_score < this->candidate_point.score) { - this->candidate_point = { new_score, diagonal, subst.seq_offset, subst.node_offset }; + if (score + gap_score < this->candidate_point.score) { + this->candidate_point = { score + gap_score, diagonal, subst.seq_offset, subst.node_offset }; this->candidate_node = subst.node(); } } @@ -1221,18 +1230,18 @@ class WFATree { while (true) { bool may_reach_to = this->nodes[pos.node()].same_node(to) & (pos.node_offset <= offset(to)); this->nodes[pos.node()].match_forward(this->sequence, this->graph, pos); - // We got a match that covers the end position but may extend past it. + // We got a match that reached the end or went past it. // Alternatively there is no end position and we have aligned the entire sequence. // This gives us a candidate where the rest of the sequence is an insertion. - if ((may_reach_to && pos.node_offset > offset(to)) || (no_pos(to) && pos.seq_offset >= this->sequence.length())) { - uint32_t overshoot = (no_pos(to) ? 0 : pos.node_offset - offset(to) - 1); + if ((may_reach_to && pos.node_offset >= offset(to)) || (no_pos(to) && pos.seq_offset >= this->sequence.length())) { + uint32_t overshoot = (no_pos(to) ? 0 : pos.node_offset - offset(to)); uint32_t gap_length = (this->sequence.length() - pos.seq_offset) + overshoot; - int32_t new_score = score; + int32_t gap_score = 0; if (gap_length > 0) { - new_score += this->gap_penalty(gap_length); + gap_score = this->gap_penalty(gap_length); } - if (new_score < this->candidate_point.score) { - this->candidate_point = { new_score, diagonal, pos.seq_offset - overshoot, static_cast(offset(to) + 1) }; + if (score + gap_score < this->candidate_point.score) { + this->candidate_point = { score + gap_score, diagonal, pos.seq_offset - overshoot, static_cast(offset(to)) }; this->candidate_node = pos.node(); } } @@ -1363,7 +1372,7 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co } this->mask(sequence); - WFATree tree(*(this->graph), sequence, root_state, offset(from), *(this->aligner)); + WFATree tree(*(this->graph), sequence, root_state, offset(from) + 1, *(this->aligner)); int32_t score = 0; while (true) { @@ -1398,7 +1407,7 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co // Start building an alignment. Store the path first. WFAAlignment result { - {}, {}, static_cast(offset(from)), 0, + {}, {}, static_cast(offset(from) + 1), 0, tree.candidate_point.seq_offset + unaligned_tail, tree.candidate_point.alignment_score(*(this->aligner), unaligned_tail) }; @@ -1418,7 +1427,7 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co if (unaligned_tail > 0) { uint32_t final_insertion = sequence.length() - tree.candidate_point.seq_offset; result.append(WFAAlignment::insertion, final_insertion); - point.score -= tree.gap_penalty(final_insertion); + point.score -= tree.gap_penalty(unaligned_tail); } // Backtrace the edits. @@ -1472,6 +1481,14 @@ WFAAlignment WFAExtender::connect(std::string sequence, pos_t from, pos_t to) co } std::reverse(result.edits.begin(), result.edits.end()); + // We used "from + 1" as the starting position for the alignment. That could have + // been a past-the-end position in the initial node. Once we have an actual path + // instead of a tree of potential paths, we can remove the unused node. + if (!result.path.empty() && result.node_offset >= this->graph->get_length(result.path.front())) { + result.path.erase(result.path.begin()); + result.node_offset = 0; + } + // Due to the way we expand the tree of GBWT search states and store wavefront // information in the leaves, we sometimes do not use any bases in the final node. // We deal with this now to avoid facing the issue later. diff --git a/src/gbwt_extender.hpp b/src/gbwt_extender.hpp index 3d067cf3de4..327751e815a 100644 --- a/src/gbwt_extender.hpp +++ b/src/gbwt_extender.hpp @@ -297,6 +297,11 @@ class WFAExtender { /** * Align the sequence to a haplotype between the two graph positions. * + * The endpoints are assumed to be valid graph positions. In order for + * there to be an alignment, there must be a haplotype that includes the + * endpoints and connects them. However, the endpoints are not covered + * by the returned alignment. + * * The sequence that will be aligned is passed by value. All non-ACGT * characters are masked with character X, which should not match any * character in the graph. diff --git a/src/unittest/gbwt_extender.cpp b/src/unittest/gbwt_extender.cpp index 438818832a5..b1f6b69e7e9 100644 --- a/src/unittest/gbwt_extender.cpp +++ b/src/unittest/gbwt_extender.cpp @@ -970,16 +970,29 @@ void check_alignment(const WFAAlignment& alignment, const std::string& sequence, } // Check that the alignment is between the right positions, if provided, and the start/end nodes are used in the alignment. - pos_t start_pos(graph.get_id(alignment.path.front()), graph.get_is_reverse(alignment.path.front()), alignment.node_offset); - REQUIRE(offset(start_pos) < graph.get_length(alignment.path.front())); + REQUIRE(alignment.node_offset < graph.get_length(alignment.path.front())); if (from != nullptr) { - REQUIRE(start_pos == *from); + handle_t from_handle = graph.get_handle(id(*from), is_rev(*from)); + if (alignment.path.front() == from_handle && alignment.node_offset > 0) { + REQUIRE(alignment.node_offset == offset(*from) + 1); + } else { + REQUIRE(offset(*from) + 1 == graph.get_length(from_handle)); + REQUIRE(graph.has_edge(from_handle, alignment.path.front())); + REQUIRE(alignment.node_offset == 0); + } } uint32_t final_offset = alignment.final_offset(graph); REQUIRE(final_offset > 0); - pos_t end_pos(graph.get_id(alignment.path.back()), graph.get_is_reverse(alignment.path.back()), final_offset - 1); if (to != nullptr) { - REQUIRE(end_pos == *to); + handle_t to_handle = graph.get_handle(id(*to), is_rev(*to)); + uint32_t final_node_length = graph.get_length(alignment.path.back()); + if (alignment.path.back() == to_handle && final_offset < final_node_length) { + REQUIRE(final_offset == offset(*to)); + } else { + REQUIRE(offset(*to) == 0); + REQUIRE(graph.has_edge(alignment.path.back(), to_handle)); + REQUIRE(final_offset == final_node_length); + } } // Check that edits of the same type are merged. @@ -1054,7 +1067,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Single node, start to end") { std::string sequence("GATTACA"); - pos_t from(2, false, 0); pos_t to(2, false, 6); + pos_t from(1, false, 2); pos_t to(3, false, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1062,7 +1075,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Single node, middle") { std::string sequence("ATTAC"); - pos_t from(2, false, 1); pos_t to(2, false, 5); + pos_t from(2, false, 0); pos_t to(2, false, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1070,7 +1083,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, start to end") { std::string sequence("GATTACAGATTA"); - pos_t from(2, false, 0); pos_t to(3, false, 4); + pos_t from(1, false, 2); pos_t to(4, false, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1078,7 +1091,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, middle") { std::string sequence("ATTACAGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1086,7 +1099,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, end to start") { std::string sequence("AG"); - pos_t from(2, false, 6); pos_t to(3, false, 0); + pos_t from(2, false, 5); pos_t to(3, false, 1); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1094,7 +1107,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, reverse, start to end") { std::string sequence("TAATCTGTAATC"); - pos_t from(3, true, 0); pos_t to(2, true, 6); + pos_t from(4, true, 2); pos_t to(1, true, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1102,7 +1115,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, reverse, middle") { std::string sequence("AATCTGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1110,7 +1123,7 @@ TEST_CASE("Exact matches in a linear graph", "[wfa_extender]") { SECTION("Multiple nodes, reverse, end to start") { std::string sequence("CT"); - pos_t from(3, true, 4); pos_t to(2, true, 0); + pos_t from(3, true, 3); pos_t to(2, true, 1); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1129,7 +1142,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("In the middle") { // MMMXMM|MMMM std::string sequence("ATTCCAGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1138,7 +1151,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("In the middle, reverse") { // MMMM|MMMXMM std::string sequence("AATCTGTTAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1147,7 +1160,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("At both ends") { // XMMMMM|MMMX std::string sequence("TTTACAGATA"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 2, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1156,7 +1169,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("At both ends, reverse") { // XMMM|MMMMMX std::string sequence("TATCTGTAAA"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 2, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1165,7 +1178,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("Over node boundary") { // MMMMMX|XMMM std::string sequence("ATTACTTATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 2, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1174,7 +1187,7 @@ TEST_CASE("Mismatches in a linear graph", "[wfa_extender]") { SECTION("Over node boundary, reverse") { // MMMX|XMMMMM std::string sequence("AATAAGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 2, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1193,7 +1206,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion in the middle") { // MDDMMM|MMMM std::string sequence("AACAGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1202,7 +1215,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion in the middle, reverse") { // MMMM|MMMDDMM std::string sequence("AATCTGTT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1211,7 +1224,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion in the middle") { // MMMMMIIM|MMMM std::string sequence("ATTATACAGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1220,7 +1233,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion in the middle, reverse") { // MMMM|MIIMMMMM std::string sequence("AATCTCCGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1229,7 +1242,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion over node boundary") { // MMMMMD|DDMM std::string sequence("ATTACTT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 3); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1238,7 +1251,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion over node boundary, reverse") { // MMMD|DMMMMM std::string sequence("AATGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1247,7 +1260,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion at node boundary") { // MMMMMMII|MMMM std::string sequence("ATTACATTGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1256,7 +1269,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion at node boundary, reverse") { // MMMMII|MMMMMM std::string sequence("AATCAATGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 5); + pos_t from(3, true, 0); pos_t to(2, true, 6); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1265,7 +1278,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion at the end") { // MMMMMM|MMMD std::string sequence("ATTACAGAT"); - pos_t from(2, false, 1); pos_t to(3, false, 3); + pos_t from(2, false, 0); pos_t to(3, false, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 1); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1274,7 +1287,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion at the end, reverse") { // MMMM|MMMMD std::string sequence("AATCTGTA"); - pos_t from(3, true, 1); pos_t to(2, true, 4); + pos_t from(3, true, 0); pos_t to(2, true, 5); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 1, 1); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1283,7 +1296,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion at the end") { // MMMMMM|MMII std::string sequence("ATTACAGATT"); - pos_t from(2, false, 1); pos_t to(3, false, 1); + pos_t from(2, false, 0); pos_t to(3, false, 2); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1292,7 +1305,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion at the end, reverse") { // MMMM|MMMMII std::string sequence("AATCTGTAAT"); - pos_t from(3, true, 1); pos_t to(2, true, 3); + pos_t from(3, true, 0); pos_t to(2, true, 4); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 0, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1301,7 +1314,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Deletion at both ends") { // DMMMMMM|MMMMD std::string sequence("ATTACAGATT"); - pos_t from(2, false, 0); pos_t to(3, false, 4); + pos_t from(1, false, 2); pos_t to(4, false, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 2, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1310,7 +1323,7 @@ TEST_CASE("Gaps in a linear graph", "[wfa_extender]") { SECTION("Insertion at both ends") { // IIMM|MMMMMM|MMMMM|MMII std::string sequence("AAGCGATTACAGATTATACC"); - pos_t from(1, false, 1); pos_t to(4, false, 1); + pos_t from(1, false, 0); pos_t to(4, false, 2); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 4, 0, 2, 4); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1328,7 +1341,7 @@ TEST_CASE("Special cases in a linear graph", "[wfa_extender]") { SECTION("Empty sequence") { std::string sequence; - pos_t from(2, false, 1); pos_t to(2, false, 2); + pos_t from(2, false, 0); pos_t to(2, false, 3); WFAAlignment result = extender.connect(sequence, from, to); REQUIRE(result.empty()); } @@ -1336,7 +1349,7 @@ TEST_CASE("Special cases in a linear graph", "[wfa_extender]") { SECTION("Cannot align within the score bound") { // MMXXMXM std::string sequence("GAGGAGA"); - pos_t from(2, false, 0); pos_t to(2, false, 6); + pos_t from(1, false, 2); pos_t to(3, false, 0); WFAAlignment result = extender.connect(sequence, from, to); REQUIRE(result.empty()); } @@ -1351,7 +1364,7 @@ TEST_CASE("Special cases in a linear graph", "[wfa_extender]") { SECTION("Run out of graph") { // MM|---- std::string sequence("ATCCCC"); - pos_t from(4, false, 1); pos_t to(5, false, 3); + pos_t from(4, false, 0); pos_t to(5, false, 3); WFAAlignment result = extender.connect(sequence, from, to); REQUIRE(result.empty()); } @@ -1359,7 +1372,7 @@ TEST_CASE("Special cases in a linear graph", "[wfa_extender]") { SECTION("Mixed edits") { // MMMMXM|MMDDM|MM std::string sequence("ATTAGAGAATA"); - pos_t from(2, false, 1); pos_t to(4, false, 1); + pos_t from(2, false, 0); pos_t to(4, false, 2); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 1, 2); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1368,7 +1381,7 @@ TEST_CASE("Special cases in a linear graph", "[wfa_extender]") { SECTION("Mismatches beat ins + del") { // MMXXMMM std::string sequence("GACCACA"); - pos_t from(2, false, 0); pos_t to(2, false, 6); + pos_t from(1, false, 2); pos_t to(3, false, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 2, 2, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1386,7 +1399,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Empty sequence") { std::string sequence; - pos_t to(2, false, 2); + pos_t to(2, false, 1); WFAAlignment result = extender.prefix(sequence, to); REQUIRE(result.empty()); } @@ -1394,7 +1407,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Got an unlocalized insertion") { // IIII std::string sequence("GGGG"); - pos_t to(2, false, 6); + pos_t to(3, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_unlocalized_insertion(result, sequence, aligner); } @@ -1402,14 +1415,14 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Cannot align within the score bound") { // IIIII std::string sequence("GGGGGG"); - pos_t to(2, false, 6); + pos_t to(3, false, 0); WFAAlignment result = extender.prefix(sequence, to); REQUIRE(result.empty()); } SECTION("Exact match, middle of a node") { std::string sequence("ATTAC"); - pos_t to(2, false, 5); + pos_t to(2, false, 6); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1417,7 +1430,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, start of a node") { std::string sequence("GATTA"); - pos_t to(2, false, 4); + pos_t to(2, false, 5); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1425,7 +1438,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, end of a node") { std::string sequence("CGATTA"); - pos_t to(2, false, 4); + pos_t to(2, false, 5); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1433,7 +1446,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, reverse") { std::string sequence("TAAT"); - pos_t to(2, true, 5); + pos_t to(2, true, 6); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1442,7 +1455,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("With edits") { // MMM|MMMMDDM|MMMXM std::string sequence("CGCGATTAGATAA"); - pos_t to(3, false, 4); + pos_t to(4, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 1, 1, 1, 2); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1451,7 +1464,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Trim the prefix") { // ------|MMMMM std::string sequence("TTTTTTGATTA"); - pos_t to(3, false, 4); + pos_t to(4, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 6, 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1460,7 +1473,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Run out of graph, trim") { // ----|MMM|MMMXM std::string sequence("ATATCGCGATAA"); - pos_t to(2, false, 4); + pos_t to(2, false, 5); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 5, 1, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1469,7 +1482,7 @@ TEST_CASE("Prefixes in a linear graph", "[wfa_extender]") { SECTION("Run out of graph, no trim") { // I|MMM|MMMXMMM std::string sequence("TCGCGATAACA"); - pos_t to(2, false, 6); + pos_t to(3, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 2, 1, 1, 1); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1503,14 +1516,14 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Cannot align within the score bound") { // IIIII std::string sequence("GGGGGG"); - pos_t from(2, false, 1); + pos_t from(2, false, 0); WFAAlignment result = extender.suffix(sequence, from); REQUIRE(result.empty()); } SECTION("Exact match, middle of a node") { std::string sequence("ATTAC"); - pos_t from(2, false, 1); + pos_t from(2, false, 0); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1518,7 +1531,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, start of a node") { std::string sequence("TTACAG"); - pos_t from(2, false, 2); + pos_t from(2, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1526,7 +1539,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, end of a node") { std::string sequence("TTACA"); - pos_t from(2, false, 2); + pos_t from(2, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1534,7 +1547,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Exact match, reverse") { std::string sequence("TAAT"); - pos_t from(2, true, 2); + pos_t from(2, true, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1543,7 +1556,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("With edits") { // MMXMMM|MDDMM|MM std::string sequence("ATGACAGTATA"); - pos_t from(2, false, 1); + pos_t from(2, false, 0); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 1, 1, 1, 2); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1552,7 +1565,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Trim the suffix") { // MMMMM|------ std::string sequence("GATTAAAAAAA"); - pos_t from(3, false, 0); + pos_t from(2, false, 6); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 6, 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1561,7 +1574,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Run out of graph, trim") { // MXMM|MMM|--- std::string sequence("AATATATCAC"); - pos_t from(3, false, 1); + pos_t from(3, false, 0); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 4, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1570,7 +1583,7 @@ TEST_CASE("Suffixes in a linear graph", "[wfa_extender]") { SECTION("Run out of graph, no trim") { // MMMMMM|MMMXM|MMM|II std::string sequence("ATTACAGATCATATCC"); - pos_t from(2, false, 1); + pos_t from(2, false, 0); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 3, 1, 1, 2); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1590,7 +1603,7 @@ TEST_CASE("Connect in a general graph", "[wfa_extender]") { SECTION("Exact match") { std::string sequence("ACAGATTATGG"); - pos_t from(2, false, 4); pos_t to(8, false, 0); + pos_t from(2, false, 3); pos_t to(8, false, 1); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1599,16 +1612,16 @@ TEST_CASE("Connect in a general graph", "[wfa_extender]") { SECTION("Mismatch") { // One of these: X X std::string sequence("ACACATTATGG"); - pos_t from(2, false, 4); pos_t to(8, false, 0); + pos_t from(2, false, 3); pos_t to(8, false, 1); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); } SECTION("Mix of edits") { - // MMMM|D|MM|XM|MMM + // MMMM|D|MMMM|XM|MMM std::string sequence("TACAATTACCGAA"); - pos_t from(2, false, 3); pos_t to(8, false, 2); + pos_t from(2, false, 2); pos_t to(10, false, 0); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 1, 1); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1617,7 +1630,7 @@ TEST_CASE("Connect in a general graph", "[wfa_extender]") { SECTION("Nondeterministic choice") { // MMM|MM|MXM|MMM|MM, bottom choice std::string sequence("TTATCGTAGTATA"); - pos_t from(5, false, 1); pos_t to(11, false, 1); + pos_t from(5, false, 0); pos_t to(11, false, 2); WFAAlignment result = extender.connect(sequence, from, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, &to); @@ -1625,7 +1638,7 @@ TEST_CASE("Connect in a general graph", "[wfa_extender]") { SECTION("No haplotype connection") { // Matches 6 -> 8 -> 10 - std::string sequence("TGGAAGTA"); + std::string sequence("GGAAGT"); pos_t from(6, false, 0); pos_t to(10, false, 2); WFAAlignment result = extender.connect(sequence, from, to); REQUIRE(result.empty()); @@ -1634,7 +1647,7 @@ TEST_CASE("Connect in a general graph", "[wfa_extender]") { SECTION("Not within score bound") { // M|XMMMMMM|X|MXXM std::string sequence("CCATTACATAGGA"); - pos_t from(1, false, 2); pos_t to(5, false, 3); + pos_t from(1, false, 1); pos_t to(6, false, 0); WFAAlignment result = extender.connect(sequence, from, to); REQUIRE(result.empty()); } @@ -1653,7 +1666,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Exact match") { std::string sequence("TACACAT"); - pos_t to(5, false, 1); + pos_t to(5, false, 2); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1662,7 +1675,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Mismatch") { // MMXM|M|MMM std::string sequence("TAGAGATT"); - pos_t to(5, false, 2); + pos_t to(5, false, 3); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1671,7 +1684,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Mix of edits") { // M|MMMMMXM|M|MDDM|MM|MMM std::string sequence("CGATTAGACAATCGAA"); - pos_t to(8, false, 2); + pos_t to(10, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 1, 1, 1, 2); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1680,7 +1693,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Nondeterministic choice") { // MMM|MXM|MM|MMM, bottom options on the reverse strand std::string sequence("TACTACGATAA"); - pos_t to(5, true, 2); + pos_t to(5, true, 3); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1689,7 +1702,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Trim the prefix") { // ------|M|MMMM std::string sequence("GGGGGGGATTA"); - pos_t to(5, false, 3); + pos_t to(6, false, 0); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 6, 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1698,7 +1711,7 @@ TEST_CASE("Prefix in a general graph", "[wfa_extender]") { SECTION("Run out of graph, trim") { // ------|MMM|MMMM std::string sequence("AAAAAACGCGATT"); - pos_t to(2, false, 3); + pos_t to(2, false, 4); WFAAlignment result = extender.prefix(sequence, to); check_score(result, aligner, sequence.length() - 6, 0, 0, 0); check_alignment(result, sequence, graph, aligner, nullptr, &to); @@ -1718,7 +1731,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Exact match") { std::string sequence("ATTATGGAA"); - pos_t from(5, false, 0); + pos_t from(3, false, 0); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length(), 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1727,7 +1740,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Mismatch") { // MMM|M|MMMM|MX|M std::string sequence("ACACATTATGG"); - pos_t from(2, false, 4); + pos_t from(2, false, 3); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1736,7 +1749,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Mix of edits") { // MMMXM|M|MMMD|DM|MM std::string sequence("TTAGACATTCGA"); - pos_t from(2, false, 2); + pos_t from(2, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 1, 1, 1, 2); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1745,7 +1758,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Nondeterministic choice") { // MM|MM|MMX|MMM|M, top options std::string sequence("TATGGATCATT"); - pos_t from(5, false, 2); + pos_t from(5, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 1, 1, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1754,7 +1767,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Trim the suffix") { // MMM|MM|------ std::string sequence("GAAGTCCCCCC"); - pos_t from(8, false, 0); + pos_t from(7, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 6, 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr); @@ -1763,7 +1776,7 @@ TEST_CASE("Suffix in a general graph", "[wfa_extender]") { SECTION("Run out of graph, trim") { // M|MMM|MMM|------- std::string sequence("AGTATATAAAAAAA"); - pos_t from(8, false, 2); + pos_t from(8, false, 1); WFAAlignment result = extender.suffix(sequence, from); check_score(result, aligner, sequence.length() - 7, 0, 0, 0); check_alignment(result, sequence, graph, aligner, &from, nullptr);