Skip to content

Commit

Permalink
use u128s for length to prevent under/overflow (?)
Browse files Browse the repository at this point in the history
  • Loading branch information
wdecoster committed Oct 25, 2023
1 parent 61f3ced commit ae65f64
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 17 deletions.
4 changes: 2 additions & 2 deletions src/calculations.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub fn get_n(lengths: &Vec<u64>, nb_bases_total: u64, percentile: f64) -> u64 {
pub fn get_n(lengths: &Vec<u128>, nb_bases_total: u128, percentile: f64) -> u128 {
let mut acc = 0;
for val in lengths.iter() {
acc += *val;
Expand All @@ -20,7 +20,7 @@ pub fn median<T: Into<f64> + Copy>(array: &[T]) -> f64 {
}
}

pub fn median_length(array: &[u64]) -> f64 {
pub fn median_length(array: &[u128]) -> f64 {
if (array.len() % 2) == 0 {
let ind_left = array.len() / 2 - 1;
let ind_right = array.len() / 2;
Expand Down
21 changes: 13 additions & 8 deletions src/extract_from_bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use rust_htslib::bam::record::{Aux, Cigar};
use rust_htslib::{bam, bam::Read, htslib};

pub struct Data {
pub lengths: Option<Vec<u64>>,
pub lengths: Option<Vec<u128>>,
pub all_counts: usize,
pub identities: Option<Vec<f64>>,
pub tids: Option<Vec<i32>>,
Expand Down Expand Up @@ -59,9 +59,9 @@ pub fn extract(args: &crate::Cli) -> Data {
.filter(|read| filter_closure(read))
{
lengths.push(
read.seq_len() as u64
- read.cigar().leading_softclips() as u64
- read.cigar().trailing_softclips() as u64,
read.seq_len() as u128
- read.cigar().leading_softclips() as u128
- read.cigar().trailing_softclips() as u128,
);
if args.karyotype || args.phased {
tids.push(read.tid());
Expand All @@ -80,10 +80,15 @@ pub fn extract(args: &crate::Cli) -> Data {
}
if let Some(s) = &args.arrow {
match args.ubam {
true => crate::feather::save_as_arrow_ubam(s.to_string(), lengths.clone()),
false => {
crate::feather::save_as_arrow(s.to_string(), lengths.clone(), identities.clone())
}
true => crate::feather::save_as_arrow_ubam(
s.to_string(),
lengths.clone().iter().map(|x| *x as u64).collect(),
),
false => crate::feather::save_as_arrow(
s.to_string(),
lengths.clone().iter().map(|x| *x as u64).collect(),
identities.clone(),
),
}
}
// sort vectors in descending order (required for N50/N75)
Expand Down
6 changes: 3 additions & 3 deletions src/histograms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use std::cmp::max;
// as well as for future customizations
// in principle it would be possible to enable the user to change the step size or max value, but I don't want to add too many options to the CLI

pub fn make_histogram_lengths(array: &[u64]) {
let stepsize: u64 = 2000;
pub fn make_histogram_lengths(array: &[u128]) {
let stepsize: u128 = 2000;
let max_value = 60_000;
let step_count = max_value / stepsize as usize;
let mut counts = vec![0; step_count + 1];
Expand All @@ -34,7 +34,7 @@ pub fn make_histogram_lengths(array: &[u64]) {
"{: >11} {}",
format!(
"{}-{}",
index as u64 * stepsize,
index as u128 * stepsize,
(index + 1) * stepsize as usize
),
"∎".repeat(entry / dotsize)
Expand Down
8 changes: 4 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ fn metrics_from_bam(metrics: Data, args: Cli) {
}

fn generate_main_output(
lengths: &Vec<u64>,
lengths: &Vec<u128>,
identities: Option<&Vec<f64>>,
genome_size: u64,
all_reads: usize,
Expand All @@ -152,7 +152,7 @@ fn generate_main_output(
error!("Not enough reads to calculate metrics!");
panic!();
}
let data_yield: u64 = lengths.iter().sum::<u64>();
let data_yield: u128 = lengths.iter().sum::<u128>();
println!("Number of alignments\t{num_reads}");
println!(
"% from total reads\t{:.2}",
Expand All @@ -163,12 +163,12 @@ fn generate_main_output(
"Mean coverage\t{:.2}",
data_yield as f64 / genome_size as f64
);
let data_yield_long = lengths.iter().filter(|l| l > &&25000).sum::<u64>();
let data_yield_long = lengths.iter().filter(|l| l > &&25000).sum::<u128>();
println!("Yield [Gb] (>25kb)\t{:.2}", data_yield_long as f64 / 1e9);
println!("N50\t{}", calculations::get_n(lengths, data_yield, 0.50));
println!("N75\t{}", calculations::get_n(lengths, data_yield, 0.75));
println!("Median length\t{:.2}", calculations::median_length(lengths));
println!("Mean length\t{:.2}", data_yield / num_reads as u64);
println!("Mean length\t{:.2}", data_yield / num_reads as u128);
if let Some(identities) = identities {
println!("Median identity\t{:.2}", calculations::median(identities));
println!(
Expand Down

0 comments on commit ae65f64

Please sign in to comment.