diff --git a/src/calculations.rs b/src/calculations.rs index ba49169..072bdbc 100644 --- a/src/calculations.rs +++ b/src/calculations.rs @@ -1,4 +1,4 @@ -pub fn get_n(lengths: &Vec, nb_bases_total: u64, percentile: f64) -> u64 { +pub fn get_n(lengths: &Vec, nb_bases_total: u128, percentile: f64) -> u128 { let mut acc = 0; for val in lengths.iter() { acc += *val; @@ -20,7 +20,7 @@ pub fn median + Copy>(array: &[T]) -> f64 { } } -pub fn median_length(array: &[u64]) -> f64 { +pub fn median_length(array: &[u128]) -> f64 { if (array.len() % 2) == 0 { let ind_left = array.len() / 2 - 1; let ind_right = array.len() / 2; diff --git a/src/extract_from_bam.rs b/src/extract_from_bam.rs index 853792b..c1f7857 100644 --- a/src/extract_from_bam.rs +++ b/src/extract_from_bam.rs @@ -3,7 +3,7 @@ use rust_htslib::bam::record::{Aux, Cigar}; use rust_htslib::{bam, bam::Read, htslib}; pub struct Data { - pub lengths: Option>, + pub lengths: Option>, pub all_counts: usize, pub identities: Option>, pub tids: Option>, @@ -59,9 +59,9 @@ pub fn extract(args: &crate::Cli) -> Data { .filter(|read| filter_closure(read)) { lengths.push( - read.seq_len() as u64 - - read.cigar().leading_softclips() as u64 - - read.cigar().trailing_softclips() as u64, + read.seq_len() as u128 + - read.cigar().leading_softclips() as u128 + - read.cigar().trailing_softclips() as u128, ); if args.karyotype || args.phased { tids.push(read.tid()); @@ -80,10 +80,15 @@ pub fn extract(args: &crate::Cli) -> Data { } if let Some(s) = &args.arrow { match args.ubam { - true => crate::feather::save_as_arrow_ubam(s.to_string(), lengths.clone()), - false => { - crate::feather::save_as_arrow(s.to_string(), lengths.clone(), identities.clone()) - } + true => crate::feather::save_as_arrow_ubam( + s.to_string(), + lengths.clone().iter().map(|x| *x as u64).collect(), + ), + false => crate::feather::save_as_arrow( + s.to_string(), + lengths.clone().iter().map(|x| *x as u64).collect(), + identities.clone(), + ), } } // sort vectors in descending order (required for N50/N75) diff --git a/src/histograms.rs b/src/histograms.rs index ff6abab..2cda870 100644 --- a/src/histograms.rs +++ b/src/histograms.rs @@ -11,8 +11,8 @@ use std::cmp::max; // as well as for future customizations // in principle it would be possible to enable the user to change the step size or max value, but I don't want to add too many options to the CLI -pub fn make_histogram_lengths(array: &[u64]) { - let stepsize: u64 = 2000; +pub fn make_histogram_lengths(array: &[u128]) { + let stepsize: u128 = 2000; let max_value = 60_000; let step_count = max_value / stepsize as usize; let mut counts = vec![0; step_count + 1]; @@ -34,7 +34,7 @@ pub fn make_histogram_lengths(array: &[u64]) { "{: >11} {}", format!( "{}-{}", - index as u64 * stepsize, + index as u128 * stepsize, (index + 1) * stepsize as usize ), "∎".repeat(entry / dotsize) diff --git a/src/main.rs b/src/main.rs index 702653e..ed31591 100644 --- a/src/main.rs +++ b/src/main.rs @@ -142,7 +142,7 @@ fn metrics_from_bam(metrics: Data, args: Cli) { } fn generate_main_output( - lengths: &Vec, + lengths: &Vec, identities: Option<&Vec>, genome_size: u64, all_reads: usize, @@ -152,7 +152,7 @@ fn generate_main_output( error!("Not enough reads to calculate metrics!"); panic!(); } - let data_yield: u64 = lengths.iter().sum::(); + let data_yield: u128 = lengths.iter().sum::(); println!("Number of alignments\t{num_reads}"); println!( "% from total reads\t{:.2}", @@ -163,12 +163,12 @@ fn generate_main_output( "Mean coverage\t{:.2}", data_yield as f64 / genome_size as f64 ); - let data_yield_long = lengths.iter().filter(|l| l > &&25000).sum::(); + let data_yield_long = lengths.iter().filter(|l| l > &&25000).sum::(); println!("Yield [Gb] (>25kb)\t{:.2}", data_yield_long as f64 / 1e9); println!("N50\t{}", calculations::get_n(lengths, data_yield, 0.50)); println!("N75\t{}", calculations::get_n(lengths, data_yield, 0.75)); println!("Median length\t{:.2}", calculations::median_length(lengths)); - println!("Mean length\t{:.2}", data_yield / num_reads as u64); + println!("Mean length\t{:.2}", data_yield / num_reads as u128); if let Some(identities) = identities { println!("Median identity\t{:.2}", calculations::median(identities)); println!(