diff --git a/src/subcommand/mpmap_main.cpp b/src/subcommand/mpmap_main.cpp index 21a70863807..7e9cce87a48 100644 --- a/src/subcommand/mpmap_main.cpp +++ b/src/subcommand/mpmap_main.cpp @@ -13,7 +13,7 @@ #include "../path.hpp" #include "../watchdog.hpp" -#define record_read_run_times +//#define record_read_run_times #ifdef record_read_run_times #define READ_TIME_FILE "_read_times.tsv" diff --git a/src/unittest/handle.cpp b/src/unittest/handle.cpp index d5c91cdd881..c49620eedee 100644 --- a/src/unittest/handle.cpp +++ b/src/unittest/handle.cpp @@ -10,6 +10,9 @@ #include "../xg.hpp" #include "../json2pb.h" + +#include "algorithms/are_equivalent.hpp" + #include "sglib/packed_graph.hpp" #include "sglib/hash_graph.hpp" @@ -1822,6 +1825,16 @@ TEST_CASE("VG and XG path handle implementations are correct", "[handle][vg][xg] check_path_traversal(vg, path2); check_path_traversal(vg, path3); + + // maintains validity after serialization (had a bug with this after rewrites at + // one point) + stringstream strm; + vg.serialize_to_ostream(strm); + strm.seekg(0); + VG copy(strm); + + REQUIRE(algorithms::are_equivalent_with_paths(&vg, ©)); + // TODO: check the replaced segments' handles } } diff --git a/src/vg.cpp b/src/vg.cpp index 27e5a260586..42bd38797cd 100644 --- a/src/vg.cpp +++ b/src/vg.cpp @@ -774,20 +774,25 @@ pair VG::rewrite_segment(const step_handle_t& segm cerr << "error:[VG] attempted to rewrite segment delimited by steps on two separate paths" << endl; exit(1); } + int64_t path_id = as_integer(get_path_handle_of_step(segment_begin)); // erase the old segment, using the get_next_step logic to wrap around circular paths // collect the mapping_t*'s that we'll need to erase from the mapping_itr once we don't need them for get_next_step vector to_erase; - auto& path_list = paths._paths.at(paths.get_path_name(as_integer(get_path_handle_of_step(segment_begin)))); - for (step_handle_t step = segment_begin; step != segment_end; ) { - step_handle_t next = get_next_step(step); - path_list.erase(paths.mapping_itr.at(reinterpret_cast(as_integers(step)[1])).first); - step = next; + for (step_handle_t step = segment_begin; step != segment_end; step = get_next_step(step)) { + to_erase.push_back(reinterpret_cast(as_integers(step)[1])); } + vector ranks; + ranks.reserve(to_erase.size()); + auto& path_list = paths._paths.at(paths.get_path_name(as_integer(get_path_handle_of_step(segment_begin)))); for (mapping_t* mapping : to_erase) { + + ranks.push_back(mapping->rank); + paths.node_mapping[mapping->node_id()][path_id].erase(mapping); + path_list.erase(paths.mapping_itr.at(mapping).first); paths.mapping_itr.erase(mapping); } @@ -804,17 +809,27 @@ pair VG::rewrite_segment(const step_handle_t& segm pair return_val(segment_end, segment_end); bool first_iter = true; - for (const handle_t& handle : new_segment) { + for (size_t i = 0; i < new_segment.size(); ++i) { + const handle_t& handle = new_segment[i]; // translate to a mapping mapping_t mapping; mapping.set_node_id(get_id(handle)); mapping.set_is_reverse(get_is_reverse(handle)); mapping.length = get_length(handle); + // TODO: there's no efficient way to maintain the ranks dynamically if we're going to be expanding them + if (new_segment.size() <= ranks.size()) { + mapping.rank = ranks[i]; + } auto iterator = path_list.insert(last_pos, mapping); + if (new_segment.size() <= ranks.size()) { + paths.mappings_by_rank[get_path_name(as_path_handle(path_id))][ranks[i]] = &(*iterator); + } + paths.mapping_itr[&(*iterator)] = pair::iterator, int64_t>(iterator, as_integers(segment_end)[0]); + paths.node_mapping[get_id(handle)][path_id].insert(&(*iterator)); // on the first iteration, construct the first step handle for the return value if (first_iter) { @@ -823,6 +838,11 @@ pair VG::rewrite_segment(const step_handle_t& segm } } + // if we're shrinking the rank space, clear out ranks that are now unused + for (size_t i = new_segment.size(); i < ranks.size(); ++i) { + paths.mappings_by_rank[get_path_name(as_path_handle(path_id))].erase(ranks[i]); + } + return return_val; } @@ -874,12 +894,10 @@ void VG::serialize_to_function(const function& emit, id_t chunk_si // This prevents duplication of edges in the serialized output. nonoverlapping_node_context_without_paths(node, g); auto& mappings = paths.get_node_mapping(node); - //cerr << "getting node mappings for " << node->id() << endl; for (auto m : mappings) { auto& name = paths.get_path_name(m.first); auto& mappings = m.second; for (auto& mapping : mappings) { - //cerr << "mapping " << name << pb2json(*mapping) << endl; sorted_paths[name][mapping->rank] = mapping; } }