Skip to content

Commit

Permalink
Change merge logic
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Jan 8, 2025
1 parent 8f58861 commit 8b2edee
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/clean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ fn finished_message(out_gfa: &Path) {
fn clean_graph(graph: &mut UnitigGraph, remove_tigs: &[u32], in_gfa: &Path) {
section_header("Cleaning graph");
explanation("The user-specified tigs are now removed from the graph.");
check_tig_nums_are_valid(in_gfa, &graph, remove_tigs);
check_tig_nums_are_valid(in_gfa, graph, remove_tigs);
let remove_set: HashSet<u32> = HashSet::from_iter(remove_tigs.iter().cloned());
graph.remove_unitigs_by_number(remove_set);
graph.print_basic_graph_info();
Expand All @@ -77,7 +77,7 @@ fn clean_graph(graph: &mut UnitigGraph, remove_tigs: &[u32], in_gfa: &Path) {
fn merge_graph(graph: &mut UnitigGraph) {
section_header("Merging linear paths");
explanation("Linear paths in the graph are now merged.");
merge_linear_paths(graph, &vec![], false);
merge_linear_paths(graph, &vec![]);
graph.print_basic_graph_info();
graph.renumber_unitigs();
}
Expand Down
2 changes: 1 addition & 1 deletion src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ fn save_cluster_gfa(sequences: &[Sequence], cluster_num: u16, gfa_lines: &Vec<St
cluster_graph.remove_sequence_from_graph(id);
}
cluster_graph.remove_zero_depth_unitigs();
merge_linear_paths(&mut cluster_graph, &cluster_seqs, false);
merge_linear_paths(&mut cluster_graph, &cluster_seqs);
cluster_graph.save_gfa(&out_gfa, &cluster_seqs).unwrap();
}

Expand Down
34 changes: 25 additions & 9 deletions src/graph_simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::collections::HashSet;
use std::rc::Rc;

use crate::misc::{reverse_complement, strand};
use crate::position::Position;
use crate::sequence::Sequence;
use crate::unitig::{Unitig, UnitigStrand};
use crate::unitig_graph::UnitigGraph;
Expand Down Expand Up @@ -313,7 +314,7 @@ fn get_common_end_seq(unitigs: &[UnitigStrand]) -> Vec<u8> {
}


pub fn merge_linear_paths(graph: &mut UnitigGraph, seqs: &Vec<Sequence>, require_same_depth: bool) {
pub fn merge_linear_paths(graph: &mut UnitigGraph, seqs: &Vec<Sequence>) {
// This function looks for linear paths in the graph (where one Unitig leads only to another
// and vice versa) and merges them together when possible.
//
Expand Down Expand Up @@ -343,12 +344,10 @@ pub fn merge_linear_paths(graph: &mut UnitigGraph, seqs: &Vec<Sequence>, require
// Extend the path as far as possible.
loop {
let unitig = current_path.last().unwrap();
let current_depth = unitig.depth();
if cannot_merge_end(unitig.number(), unitig.strand, &fixed_starts, &fixed_ends) { break; }
let mut outputs = if unitig.strand { get_exclusive_outputs(&unitig.unitig) } else { get_exclusive_inputs(&unitig.unitig) };
if outputs.len() != 1 { break; }
let output = &mut outputs[0];
if require_same_depth && output.depth() != current_depth { break; }
if !unitig.strand { output.strand = !output.strand; }
let output_number = output.number();
if already_used.contains(&output_number) { break; }
Expand Down Expand Up @@ -433,8 +432,7 @@ fn merge_path(graph: &mut UnitigGraph, path: &Vec<UnitigStrand>, new_unitig_numb
number: new_unitig_number,
reverse_seq: reverse_complement(&merged_seq),
forward_seq: merged_seq,
depth: if !forward_positions.is_empty() { forward_positions.len() as f64 }
else { weighted_mean_depth(path) },
depth: get_merge_path_depth(path, &forward_positions),
forward_positions, reverse_positions,
forward_next, forward_prev, reverse_next, reverse_prev,
..Default::default()
Expand Down Expand Up @@ -502,6 +500,24 @@ fn merge_unitig_seqs(path: &Vec<UnitigStrand>) -> Vec<u8> {
}


fn get_merge_path_depth(path: &Vec<UnitigStrand>, forward_positions: &[Position]) -> f64 {
// If the unitigs have position information, use that to determine depth.
if !forward_positions.is_empty() {
return forward_positions.len() as f64;
}

// If the path contains an anchor unitig, set the merged depth to the anchor's depth.
for u in path {
if u.anchor() {
return u.depth();
}
}

// Otherwise, give a weighted mean depth of the unitigs in the path.
weighted_mean_depth(path)
}


fn weighted_mean_depth(path: &Vec<UnitigStrand>) -> f64 {
let total_length = path.iter().map(|u| u.length()).sum::<u32>() as f64;
let mut depth_sum = 0.0;
Expand Down Expand Up @@ -729,7 +745,7 @@ mod tests {
fn test_merge_linear_paths_1() {
let (mut graph, seqs) = UnitigGraph::from_gfa_lines(&get_test_gfa_3());
assert_eq!(graph.unitigs.len(), 7);
merge_linear_paths(&mut graph, &seqs, false);
merge_linear_paths(&mut graph, &seqs);
assert_eq!(graph.unitigs.len(), 3);
assert_eq!(std::str::from_utf8(&graph.unitig_index.get(&8).unwrap().borrow().forward_seq).unwrap(),
"TTCGCTGCGCTCGCTTCGCTTTTGCACAGCGACGACGGCATGCCTGAATCGCCTA");
Expand All @@ -753,7 +769,7 @@ mod tests {
fn test_merge_linear_paths_2() {
let (mut graph, seqs) = UnitigGraph::from_gfa_lines(&get_test_gfa_4());
assert_eq!(graph.unitigs.len(), 5);
merge_linear_paths(&mut graph, &seqs, false);
merge_linear_paths(&mut graph, &seqs);
assert_eq!(graph.unitigs.len(), 2);
assert_eq!(std::str::from_utf8(&graph.unitig_index.get(&6).unwrap().borrow().forward_seq).unwrap(),
"ACGACTACGAGCACGAGTCGTCGTCGTAACTGACT");
Expand All @@ -772,7 +788,7 @@ mod tests {
fn test_merge_linear_paths_3() {
let (mut graph, seqs) = UnitigGraph::from_gfa_lines(&get_test_gfa_5());
assert_eq!(graph.unitigs.len(), 6);
merge_linear_paths(&mut graph, &seqs, false);
merge_linear_paths(&mut graph, &seqs);
assert_eq!(graph.unitigs.len(), 5);
assert_eq!(std::str::from_utf8(&graph.unitig_index.get(&7).unwrap().borrow().forward_seq).unwrap(),
"AAATGCGACTGTG");
Expand All @@ -782,7 +798,7 @@ mod tests {
fn test_merge_linear_paths_4() {
let (mut graph, seqs) = UnitigGraph::from_gfa_lines(&get_test_gfa_14());
assert_eq!(graph.unitigs.len(), 13);
merge_linear_paths(&mut graph, &seqs, false);
merge_linear_paths(&mut graph, &seqs);
assert_eq!(graph.unitigs.len(), 11);
}
}
3 changes: 2 additions & 1 deletion src/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,14 @@ fn apply_bridges(graph: &mut UnitigGraph, bridges: &Vec<Bridge>, bridge_depth: f
}
}
delete_unitigs_not_connected_to_anchor(graph);
graph.remove_zero_depth_unitigs();

// TODO: add logic for removing non-anchor tips to handle blunt ends?
}


fn merge_after_bridging(graph: &mut UnitigGraph) {
merge_linear_paths(graph, &vec![], true);
merge_linear_paths(graph, &vec![]);
graph.print_basic_graph_info();
graph.renumber_unitigs();
}
Expand Down
2 changes: 1 addition & 1 deletion src/trim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ fn clean_up_graph(graph: &mut UnitigGraph, sequences: &Vec<Sequence>) {
has occurred above.");
graph.recalculate_depths();
graph.remove_zero_depth_unitigs();
merge_linear_paths(graph, sequences, false);
merge_linear_paths(graph, sequences);
graph.print_basic_graph_info();
graph.renumber_unitigs();
}
Expand Down

0 comments on commit 8b2edee

Please sign in to comment.