From 61e0e5bfcaebb9f3725741d7cceb09b5033695e7 Mon Sep 17 00:00:00 2001 From: jeffersonfparil Date: Sat, 11 May 2024 16:59:06 +1000 Subject: [PATCH] improving error handling --- src/aldknni.rs | 71 ++++++++++----- src/filter_missing.rs | 95 ++++++++++++------- src/geno.rs | 207 +++++++++++++++++++++++++++++++----------- src/helpers.rs | 125 ++++++++++++++++--------- 4 files changed, 346 insertions(+), 152 deletions(-) diff --git a/src/aldknni.rs b/src/aldknni.rs index 8889168..5ab1f20 100644 --- a/src/aldknni.rs +++ b/src/aldknni.rs @@ -554,7 +554,13 @@ impl GenotypesAndPhenotypes { // - Input vcf file will have all alleles per locus extracted. // - Similarly, input sync file will have all alleles per locus extracted. // - Finally, input allele frequency table file which can be represented by all alleles or one less allele per locus will be appended with the alternative allele if the sum of allele frequencies per locus do not add up to one. - self.check(); + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 108, + message: "Error checking GenotypesAndPhenotypes type in method adaptive_ld_knn_imputation() | ".to_owned() + &e.message + }) + }; // Parse arguments let min_loci_corr: &f64 = optimisation_arguments.0; let max_pool_dist: &f64 = optimisation_arguments.1; @@ -591,7 +597,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 108, + code: 109, message: "Error defining chunks of the file for parallel imputation." .to_owned(), }) @@ -619,7 +625,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 109, + code: 110, message: "Error executing per_chunk_aldknni() method on self_clone." .to_owned(), }) @@ -632,7 +638,7 @@ impl GenotypesAndPhenotypes { .duration_since(UNIX_EPOCH) { Ok(x) => x.as_secs_f64(), Err(_) => return Err(ImputefError{ - code: 110, + code: 111, message: "Error extracting time in UNIX_EPOCH within write_tsv() method for GenotypesAndPhenotypes struct.".to_owned() }) }; @@ -642,7 +648,7 @@ impl GenotypesAndPhenotypes { Some(x) => x.to_owned().to_string().len(), None => { return Err(ImputefError { - code: 111, + code: 112, message: "Error extracting the last element of loci_idx. Probably empty." .to_owned(), }) @@ -672,15 +678,23 @@ impl GenotypesAndPhenotypes { match chromosome.len() == p { true => (), false => return Err(ImputefError{ - code: 112, + code: 113, message: "Error, the number of chromosome names and the total number of loci are not equal.".to_owned() }) }; // Instantiate output file + let mae_intermediate = match sensible_round(sum_mae / n_missing, 4) { + Ok(x) => x, + Err(e) => return Err(ImputefError { + code: 114, + message: "Error printing the MAE for the intermediate file: ".to_owned() + + &fname_intermediate_file + }) + }; println!( "--> {}: Writing out intermediate file with expected MAE of {}: {}", i, - sensible_round(sum_mae / n_missing, 4), + mae_intermediate, &fname_intermediate_file ); let mut file_out = match OpenOptions::new() @@ -692,7 +706,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 113, + code: 115, message: "Unable to create file: ".to_owned() + &fname_intermediate_file, }) } @@ -705,7 +719,7 @@ impl GenotypesAndPhenotypes { ) { Ok(x) => x, Err(_) => return Err(ImputefError{ - code: 114, + code: 116, message: "Error calling write_all() within the write_tsv() method for GenotypesAndPhenotypes struct. Unable to create file: ".to_owned() + &fname_intermediate_file }) }; @@ -722,7 +736,7 @@ impl GenotypesAndPhenotypes { allele_frequencies .column(j) .iter() - .map(|&x| parse_f64_roundup_and_own(x, 6)) + .map(|&x| parse_f64_roundup_and_own(x, 6).expect("Error in rounding and converting the allele frequencies into String.")) .collect::>() .join("\t"), ] @@ -731,7 +745,7 @@ impl GenotypesAndPhenotypes { match file_out.write_all(line.as_bytes()) { Ok(x) => x, Err(_) => return Err(ImputefError{ - code: 115, + code: 117, message: "Error calling write_all() per line of the output file within the write_tsv() method for GenotypesAndPhenotypes struct. Unable to create file: ".to_owned() + &fname_intermediate_file }) }; @@ -760,7 +774,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 116, + code: 118, message: "Error opening the intermediate file of the first chunk: ".to_owned() + &vec_fname_intermediate_files_and_mae[0].0, }) @@ -773,7 +787,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 117, + code: 119, message: "Error opening the intermediate file of a chunk: ".to_owned() + &name_and_mae.0, }) @@ -783,7 +797,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 118, + code: 120, message: "Error concatenating intermediate output files: ".to_owned() + &vec_fname_intermediate_files_and_mae[0].0 + " and " @@ -795,7 +809,7 @@ impl GenotypesAndPhenotypes { Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 119, + code: 121, message: "Error removing the intermediate file of a chunk: ".to_owned() + &name_and_mae.0, }) @@ -804,7 +818,14 @@ impl GenotypesAndPhenotypes { sum_mae += name_and_mae.1; n_missing += name_and_mae.2; } - let mae = sensible_round(sum_mae / n_missing, 4); + let mae = match sensible_round(sum_mae / n_missing, 4) { + Ok(x) => x, + Err(e) => return Err(ImputefError { + code: 122, + message: "Error in adaptive_ld_knn_imputation() method | ".to_owned() + + &e.message + }) + }; Ok((vec_fname_intermediate_files_and_mae[0].0.to_owned(), mae)) } } @@ -865,7 +886,7 @@ pub fn impute_aldknni( Ok(x) => x, Err(e) => { return Err(ImputefError { - code: 120, + code: 121, message: e.message, }) } @@ -900,7 +921,7 @@ pub fn impute_aldknni( Ok(x) => x, Err(e) => { return Err(ImputefError { - code: 121, + code: 122, message: e.message, }) } @@ -909,7 +930,7 @@ pub fn impute_aldknni( let duration = match end.duration_since(start) { Ok(x) => x, Err(e) => return Err(ImputefError{ - code: 122, + code: 123, message: "Error measuring the duration of running adaptive_ld_knn_imputation() within impute_aldknni().".to_owned() }) }; @@ -928,7 +949,7 @@ pub fn impute_aldknni( Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 123, + code: 124, message: "Error loading the imputed genotype file: ".to_owned() + &fname_imputed, }) } @@ -937,7 +958,7 @@ pub fn impute_aldknni( Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 124, + code: 125, message: "Error removing concatenated intermediate file: ".to_owned() + &fname_imputed, }) @@ -950,7 +971,7 @@ pub fn impute_aldknni( match genotypes_and_phenotypes.missing_rate() { Ok(x) => x, Err(_) => return Err(ImputefError{ - code: 125, + code: 126, message: "Error measuring sparsity of the data using missing_rate() method within impute_aldknni().".to_owned() }) }, @@ -962,7 +983,7 @@ pub fn impute_aldknni( Ok(x) => x, Err(_) => { return Err(ImputefError { - code: 126, + code: 127, message: "Error calling filter_out_top_missing_loci() method within impute_aldknni()." .to_owned(), @@ -978,7 +999,7 @@ pub fn impute_aldknni( match genotypes_and_phenotypes.missing_rate() { Ok(x) => x, Err(_) => return Err(ImputefError{ - code: 127, + code: 128, message: "Error measuring sparsity of the data using missing_rate() method after filtering for missing top loci within impute_aldknni().".to_owned() }) }, @@ -989,7 +1010,7 @@ pub fn impute_aldknni( .write_tsv(filter_stats, false, out, n_threads) { Ok(x) => x, Err(_) => return Err(ImputefError{ - code: 127, + code: 129, message: "Error writing the output file using the write_tsv() method within impute_aldknni(): ".to_owned() + &out }) }; diff --git a/src/filter_missing.rs b/src/filter_missing.rs index df20d73..91b0041 100644 --- a/src/filter_missing.rs +++ b/src/filter_missing.rs @@ -1,5 +1,4 @@ use ndarray::prelude::*; -use std::io::{self, Error, ErrorKind}; use crate::helpers::*; use crate::structs_and_traits::*; @@ -9,18 +8,17 @@ impl GenotypesAndPhenotypes { &mut self, min_depth_below_which_are_missing: &f64, max_depth_above_which_are_missing: &f64, - ) -> io::Result<&mut Self> { - self.check().expect("Error calling check() method within set_missing_by_depth() method for GenotypesAndPhenotypes struct."); + ) -> Result<&mut Self, ImputefError> { + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 201, + message: "Error checking GenotypesAndPhenotypes in the method set_missing_by_depth() | ".to_owned() + + &e.message + }) + }; let (n, _p) = self.intercept_and_allele_frequencies.dim(); let (_n, l) = self.coverages.dim(); - // println!( - // "self.intercept_and_allele_frequencies.dim()={:?}", - // self.intercept_and_allele_frequencies.dim() - // ); - // println!("self.chromosome.len()={:?}", self.chromosome.len()); - // println!("self.position.len()={:?}", self.position.len()); - // println!("self.allele.len()={:?}", self.allele.len()); - // println!("self.coverages.dim()={:?}", self.coverages.dim()); let (loci_idx, _loci_chr, _loci_pos) = self.count_loci().expect("Error defining loci indexes and identities via count_loci() method within set_missing_by_depth() method for GenotypesAndPhenotypes struct."); for i in 0..n { for j in 0..l { @@ -37,11 +35,18 @@ impl GenotypesAndPhenotypes { } } } - self.check().expect("Error calling check() method within set_missing_by_depth() method for GenotypesAndPhenotypes struct."); + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 202, + message: "Error checking GenotypesAndPhenotypes in the method set_missing_by_depth() | ".to_owned() + + &e.message + }) + }; Ok(self) } - pub fn missing_rate(&mut self) -> io::Result { + pub fn missing_rate(&mut self) -> Result { let (n, l) = self.coverages.dim(); let sum = self.coverages.fold(0, |sum, &x| { if (x.is_nan()) || (x == 0.0) { @@ -50,14 +55,21 @@ impl GenotypesAndPhenotypes { sum } }); - Ok(sensible_round(sum as f64 * 100.0 / ((n * l) as f64), 5)) + sensible_round(sum as f64 * 100.0 / ((n * l) as f64), 5) } pub fn filter_out_top_missing_pools( &mut self, frac_top_missing_pools: &f64, - ) -> io::Result<&mut Self> { - self.check().expect("Error calling check() method within filter_out_top_missing_pools() method for GenotypesAndPhenotypes struct."); + ) -> Result<&mut Self, ImputefError> { + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 203, + message: "Error checking GenotypesAndPhenotypes in the method filter_out_top_missing_pools() | ".to_owned() + + &e.message + }) + }; let n = self.intercept_and_allele_frequencies.nrows(); let p = self.intercept_and_allele_frequencies.ncols() - 1; let missingness_per_pool: Array1 = self @@ -75,10 +87,10 @@ impl GenotypesAndPhenotypes { missingness_per_pool.fold(0.0, |sum, &x| if x > 0.0 { sum + 1.0 } else { sum }); let n_after_filtering = n - (n_missing * frac_top_missing_pools).ceil() as usize; if n_after_filtering == 0 { - return Err(Error::new( - ErrorKind::Other, - "No pools left after filtering, please reduce 'frac_top_missing_pools'".to_owned(), - )); + return Err(ImputefError{ + code: 204, + message: "No pools left after filtering, please reduce 'frac_top_missing_pools'".to_owned(), + }); } // Sort by increasing missingness let mut idx = (0..n).collect::>(); @@ -127,7 +139,14 @@ impl GenotypesAndPhenotypes { // println!("self.allele.len()={:?}", self.allele.len()); // println!("self.intercept_and_allele_frequencies.dim()={:?}", self.intercept_and_allele_frequencies.dim()); // println!("self.coverages.len()={:?}", self.coverages.dim()); - self.check().expect("Error calling check() method within filter_out_top_missing_pools() method for GenotypesAndPhenotypes struct."); + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 205, + message: "Error checking GenotypesAndPhenotypes in the method filter_out_top_missing_pools() | ".to_owned() + + &e.message + }) + }; Ok(self) } @@ -135,8 +154,15 @@ impl GenotypesAndPhenotypes { pub fn filter_out_top_missing_loci( &mut self, frac_top_missing_loci: &f64, - ) -> io::Result<&mut Self> { - self.check().expect("Error calling check() method within filter_out_top_missing_loci() method for GenotypesAndPhenotypes struct."); + ) -> Result<&mut Self, ImputefError> { + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 206, + message: "Error checking GenotypesAndPhenotypes in the method filter_out_top_missing_loci() | ".to_owned() + + &e.message + }) + }; let n = self.intercept_and_allele_frequencies.nrows(); let (loci_idx, _loci_chr, _loci_pos) = self.count_loci().expect("Error calling count_loci() method within filter_out_top_missing_loci() method for GenotypesAndPhenotypes struct."); let l = loci_idx.len() - 1; // Less one for the trailing locus @@ -159,10 +185,10 @@ impl GenotypesAndPhenotypes { // Define the number of loci kept after filtering let l_after_filtering = l - (l_missing * frac_top_missing_loci).ceil() as usize; if l_after_filtering == 0 { - return Err(Error::new( - ErrorKind::Other, - "No loci left after filtering, please reduce 'frac_top_missing_loci'".to_owned(), - )); + return Err(ImputefError { + code: 207, + message: "No loci left after filtering, please reduce 'frac_top_missing_loci'".to_owned(), + }); } // Sort by increasing missingness let mut idx = (0..l).collect::>(); @@ -224,13 +250,14 @@ impl GenotypesAndPhenotypes { self.allele = new_allele; self.intercept_and_allele_frequencies = new_intercept_and_allele_frequencies; self.coverages = new_coverages; - // println!("self={:?}", self); - // println!("self.chromosome.len()={:?}", self.chromosome.len()); - // println!("self.position.len()={:?}", self.position.len()); - // println!("self.allele.len()={:?}", self.allele.len()); - // println!("self.intercept_and_allele_frequencies.dim()={:?}", self.intercept_and_allele_frequencies.dim()); - // println!("self.coverages.len()={:?}", self.coverages.dim()); - self.check().expect("Error calling check() method within filter_out_top_missing_loci() method for GenotypesAndPhenotypes struct."); + match self.check() { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 208, + message: "Error checking GenotypesAndPhenotypes in the method filter_out_top_missing_loci() | ".to_owned() + + &e.message + }) + }; Ok(self) } } diff --git a/src/geno.rs b/src/geno.rs index a1c94f7..c182c3e 100644 --- a/src/geno.rs +++ b/src/geno.rs @@ -1,6 +1,6 @@ use ndarray::prelude::*; use std::fs::File; -use std::io::{self, prelude::*, BufReader, Error, ErrorKind, SeekFrom}; +use std::io::{self, prelude::*, BufReader, SeekFrom}; use std::str; use std::sync::{Arc, Mutex}; @@ -8,10 +8,13 @@ use crate::helpers::*; use crate::structs_and_traits::*; impl Parse for String { - fn lparse(&self) -> io::Result> { + fn lparse(&self) -> Result, ImputefError> { // Ignore commented-out lines (i.e. '#' => 35) if self.as_bytes()[0] == 35_u8 { - return Err(Error::new(ErrorKind::Other, "Commented out line")); + return Err(ImputefError { + code: 301, + message: "Commented out line: ".to_owned() + &self + }) } let vec_line: Vec<&str> = self.split('\t').collect(); let vec_line: Vec<&str> = if vec_line.len() == 1 { @@ -31,10 +34,10 @@ impl Parse for String { let position = match vec_line[1].parse::() { Ok(x) => x, Err(_) => { - return Err(Error::new( - ErrorKind::Other, - "Please check format of the file: position is not and integer.", - )) + return Err(ImputefError { + code: 302, + message: "Please check format of the file: position is not and integer: ".to_owned() + &self + }) } }; let alleles_vector: Vec = if vec_line[2].is_empty() { @@ -42,7 +45,7 @@ impl Parse for String { } else { vec![vec_line[2].to_owned()] }; - let matrix: Array2 = Array2::from_shape_vec( + let matrix: Array2 = match Array2::from_shape_vec( (n, 1), vec_line[3..l] .iter() @@ -53,8 +56,14 @@ impl Parse for String { } }) .collect::>(), - ) - .expect("Error parsing the allele frequency table text file within the lparse() method for parsing String into LocusFrequencies struct."); + ) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 303, + message: "Error parsing the allele frequency table text file within the lparse() method for parsing String into LocusFrequencies struct: ".to_owned() + + &self + }) + }; let freq_line = LocusFrequencies { chromosome, position, @@ -73,26 +82,54 @@ impl LoadAll for FileGeno { end: &u64, _filter_stats: &FilterStats, _keep_p_minus_1: bool, - ) -> io::Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec), ImputefError> { // Input syn file let fname = self.filename.clone(); // Prepare output vectors let mut freq: Vec = Vec::new(); let cnts: Vec = Vec::new(); // Empty and will remain empty as each line corresponds to just an allele of a locus // Input file chunk - let file = File::open(fname.clone()).expect("Error opening the allele frequency table text file within the per_chunk_load() method for FileGeno struct."); + let file = match File::open(fname.clone()) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 304, + message: "Error opening the allele frequency table text file within the per_chunk_load() method for FileGeno struct: ".to_owned() + + &fname + }) + }; let mut reader = BufReader::new(file); // Navigate to the start of the chunk let mut i: u64 = *start; - reader.seek(SeekFrom::Start(*start)).expect("Error navigating across the allele frequency table text file within the per_chunk_load() method for FileGeno struct."); + match reader.seek(SeekFrom::Start(*start)) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 305, + message: "Error navigating across the allele frequency table text file within the per_chunk_load() method for FileGeno struct: ".to_owned() + + &fname + }) + }; // Read and parse until the end of the chunk while i < *end { // Instantiate the line let mut line = String::new(); // Read the line which automatically moves the cursor position to the next line - let _ = reader.read_line(&mut line).expect("Error reading the allele frequency table text file within the per_chunk_load() method for FileGeno struct."); + let _ = match reader.read_line(&mut line) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 306, + message: "Error reading the allele frequency table text file within the per_chunk_load() method for FileGeno struct: ".to_owned() + + &fname + }) + }; // Find the new cursor position - i = reader.stream_position().expect("Error navigating across the allele frequency table text file within the per_chunk_load() method for FileGeno struct."); + i = match reader.stream_position() { + Ok(x) => x, + Err(_) => return Err(ImputefError { + code: 307, + message: "Error navigating across the allele frequency table text file within the per_chunk_load() method for FileGeno struct: ".to_owned() + + &fname + }) + }; // Remove trailing newline character in Unix-like (\n) and Windows (\r) if line.ends_with('\n') { line.pop(); @@ -103,17 +140,16 @@ impl LoadAll for FileGeno { // Parse the geno line let allele_freqs: LocusFrequencies = match line.lparse() { Ok(x) => *x, - Err(x) => match x.kind() { - ErrorKind::Other => continue, + Err(x) => match x { + ImputefError => continue, _ => { - return Err(Error::new( - ErrorKind::Other, - "T_T Input sync file error, i.e. '".to_owned() - + &fname - + "' at line with the first 20 characters as: " - + &line[0..20] - + ".", - )) + return Err(ImputefError{ + code: 308, + message: "T_T Input sync file error, i.e. '".to_owned() + + &fname + + "' at line with the first 20 characters as: " + + &line + }) } }, }; @@ -127,10 +163,17 @@ impl LoadAll for FileGeno { filter_stats: &FilterStats, keep_p_minus_1: bool, n_threads: &usize, - ) -> io::Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec), ImputefError> { let fname = self.filename.clone(); // Find the positions whereto split the file into n_threads pieces - let chunks = find_file_splits(&fname, n_threads).expect("Error splitting the allele frequency table file format given the number of threads suppplied within load() method for FileGeno struct."); + let chunks = match find_file_splits(&fname, n_threads) { + Ok(x) => x, + Err(e) => return Err(ImputefError { + code: 309, + message: "Error splitting the allele frequency table file format given the number of threads suppplied within load() method for FileGeno struct | ".to_owned() + + &e.message + }) + }; let n_threads = chunks.len() - 1; println!("Chunks: {:?}", chunks); // Tuple arguments of pileup2sync_chunks @@ -168,12 +211,26 @@ impl LoadAll for FileGeno { } // Waiting for all threads to finish for thread in thread_objects { - thread.join().expect("Unknown thread error occured."); + match thread.join() { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 310, + message: "Unknown thread error occured in load() method for FileGeno struct: ".to_owned() + + &fname + }) + }; } // Extract output filenames from each thread into a vector and sort them let mut freq: Vec = Vec::new(); let cnts: Vec = Vec::new(); // Empty and will remain empty as each line corresponds to just an allele of a locus - for x in thread_ouputs_freq.lock().expect("Error unlocking the threads after multi-threaded execution of per_chunk_load() within load() method for FileGeno struct.").iter() { + for x in match thread_ouputs_freq.lock(){ + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 311, + message: "Error unlocking the threads after multi-threaded execution of per_chunk_load() within load() method for FileGeno struct: ".to_owned() + + &fname + }) + }.iter() { freq.push(x.clone()); } freq.sort_by(|a, b| { @@ -189,14 +246,32 @@ impl LoadAll for FileGeno { filter_stats: &FilterStats, keep_p_minus_1: bool, n_threads: &usize, - ) -> io::Result { + ) -> Result { // No filtering! Just loading the allele frequency data // Extract pool names - let file: File = File::open(self.filename.clone()).expect("Error opening the allele frequency table text file within the convert_into_genotypes_and_phenotypes() method for FileGeno struct."); + let file: File = match File::open(self.filename.clone()) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 312, + message: "Error opening the allele frequency table text file within the convert_into_genotypes_and_phenotypes() method for FileGeno struct: ".to_owned() + + &self.filename + }) + }; let reader = io::BufReader::new(file); let mut header: String = match reader.lines().next() { - Some(x) => x.expect("Error reading the allele frequency table text file within the convert_into_genotypes_and_phenotypes() method for FileGeno struct."), - None => return Err(Error::new(ErrorKind::Other, "No header line found.")), + Some(x) => match x { + Ok(y) => y, + Err(_) => return Err(ImputefError{ + code: 313, + message: "Error reading the header of the allele frequency table text file within the convert_into_genotypes_and_phenotypes() method for FileGeno struct: ".to_owned() + + &self.filename + }) + }, + None => return Err(ImputefError { + code: 314, + message: "No header line found in file: .".to_owned() + + &self.filename + }), }; if header.ends_with('\n') { header.pop(); @@ -223,11 +298,14 @@ impl LoadAll for FileGeno { // Load allele frequencies let (freqs, _cnts) = self.load(filter_stats, keep_p_minus_1, n_threads).expect("Error calling load() within the convert_into_genotypes_and_phenotypes() method for FileGeno struct."); let n = freqs[0].matrix.nrows(); - assert_eq!( - n, - pool_names.len(), - "Header names and allele frequency data does not have the same number of samples." - ); + match n == pool_names.len() { + true => (), + false => return Err(ImputefError { + code: 315, + message: "Header names and allele frequency data does not have the same number of samples in the file: .".to_owned() + + &self.filename + }) + }; let mut p = freqs.len(); // total number of alleles across all loci p += 1; // include the intercept let mut chromosome: Vec = Vec::with_capacity(p); @@ -341,15 +419,25 @@ pub fn load_geno<'a, 'b>( _fname_out_prefix: &'a str, _rand_id: &'a str, n_threads: &'a usize, -) -> io::Result<(GenotypesAndPhenotypes, &'b FilterStats)> { +) -> Result<(GenotypesAndPhenotypes, &'b FilterStats), ImputefError> { // Extract pool names from the txt file let file: File = File::open(fname).expect("Error reading the allele frequency table file."); let reader = io::BufReader::new(file); - let mut header: String = reader + let mut header: String = match reader .lines() - .next() - .expect("Error reading the allele frequency table file.") - .expect("Please check the format of the allele frequency table text file."); + .next() { + Some(x) => match x { + Ok(y) => y, + Err(_) => return Err(ImputefError { + code: 316, + message: "Error reading the allele frequency table file: ".to_owned() + &fname + }) + }, + None => return Err(ImputefError{ + code: 317, + message: "Please check the format of the allele frequency table text file: ".to_owned() + &fname + }) + }; if header.ends_with('\n') { header.pop(); if header.ends_with('\r') { @@ -367,7 +455,13 @@ pub fn load_geno<'a, 'b>( } else { vec_header }; - assert!(vec_header.len() > 3, "Error unable to properly parse the header line. Please make sure the allele frequency table file is separated by tabs, commas, or semi-colons."); + match vec_header.len() > 3 { + true => (), + false => return Err(ImputefError{ + code: 318, + message: "Error unable to properly parse the header line. Please make sure the allele frequency table file: ".to_owned() + &fname +" is separated by tabs, commas, or semi-colons." + }) + }; let pool_names: Vec = vec_header[3..vec_header.len()] .iter() .map(|&x| x.to_owned()) @@ -377,17 +471,26 @@ pub fn load_geno<'a, 'b>( if filter_stats.pool_sizes.len() == 1 { filter_stats.pool_sizes = vec![filter_stats.pool_sizes[0]; n]; } - assert_eq!( - filter_stats.pool_sizes.len(), - n, - "Error: the number of pools and the pool sizes do not match." - ); + match filter_stats.pool_sizes.len() == n { + true => (), + false => return Err(ImputefError { + code: 319, + message: "Error in the number of pools and the pool sizes do not match in the input file: ".to_owned() + + &fname + }), + }; let file_geno = FileGeno { filename: fname.to_owned(), }; - Ok((file_geno - .convert_into_genotypes_and_phenotypes(filter_stats, false, n_threads) - .expect("Error parsing the genotype data (extracted from allele frequency table text file) via convert_into_genotypes_and_phenotypes() method within impute()."), + let genotypes_and_phenotypes = match file_geno + .convert_into_genotypes_and_phenotypes(filter_stats, false, n_threads) { + Ok(x) => x, + Err(e) => return Err(ImputefError { + code: 320, + message: "Error parsing the genotype data (extracted from allele frequency table text file: ".to_owned() + &fname + ") via convert_into_genotypes_and_phenotypes() method within impute()." + }) + }; + Ok((genotypes_and_phenotypes, filter_stats)) } diff --git a/src/helpers.rs b/src/helpers.rs index 70f4448..2a14847 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -5,8 +5,9 @@ use ndarray::prelude::*; use statrs::distribution::{ContinuousCDF, StudentsT}; use std::fs::File; -use std::io::{self, prelude::*, BufReader, SeekFrom}; -use std::io::{Error, ErrorKind}; +use std::io::{prelude::*, BufReader, SeekFrom}; + +use crate::ImputefError; // use crate::structs_and_traits::*; @@ -30,7 +31,7 @@ fn find_start_of_next_line(fname: &str, pos: u64) -> u64 { pub fn define_chunks( loci_idx: &[usize], n_threads: &usize, -) -> io::Result<(Vec, Vec)> { +) -> Result<(Vec, Vec), ImputefError> { let mut n_chunks = *n_threads; // can be more than the number of threads due to unequal division of the loci let l = loci_idx.len(); let chunk_size = (l as f64 / n_chunks as f64).floor() as usize; @@ -40,36 +41,43 @@ pub fn define_chunks( } else { vec![0, l] }; - // println!("l={:?}", l); - // println!("n_chunks={:?}", n_chunks); - // println!("chunk_size={:?}", chunk_size); - // println!("vec_idx_all[vec_idx_all.len()-1]={:?}", vec_idx_all[vec_idx_all.len()-1]); if vec_idx_all.len() < (n_chunks + 1) { vec_idx_all.push(l - 1); } else { vec_idx_all.pop(); vec_idx_all.push(l - 1); } - // println!("vec_idx_all[vec_idx_all.len()-1]={:?}", vec_idx_all[vec_idx_all.len()-1]); - // println!("vec_idx_all={:?}", vec_idx_all); n_chunks = vec_idx_all.len(); let vec_idx_loci_idx_ini: Vec = vec_idx_all[0..(n_chunks - 1)].to_owned(); let vec_idx_loci_idx_fin: Vec = vec_idx_all[1..n_chunks].to_owned(); - // println!("vec_idx_loci_idx_ini={:?}", vec_idx_loci_idx_ini); - // println!("vec_idx_loci_idx_fin={:?}", vec_idx_loci_idx_fin); - assert_eq!(vec_idx_loci_idx_ini.len(), vec_idx_loci_idx_fin.len()); + match vec_idx_loci_idx_ini.len() == vec_idx_loci_idx_fin.len() { + true => (), + false => return Err(ImputefError { + code: 401, + message: "Error defining chunks: the number of initial and final indices are different.".to_owned() + }) + }; Ok((vec_idx_loci_idx_ini, vec_idx_loci_idx_fin)) } /// Detect the cursor positions across the input file corresponding to the splits for parallel computation -pub fn find_file_splits(fname: &str, n_threads: &usize) -> io::Result> { +pub fn find_file_splits(fname: &str, n_threads: &usize) -> Result, ImputefError> { let mut file = match File::open(fname) { Ok(x) => x, - Err(_) => return Err(Error::new(ErrorKind::Other, "The input file: ".to_owned() + fname + " does not exist. Please make sure you are entering the correct filename and/or the correct path.")), + Err(_) => return Err(ImputefError { + code:402, + message: "The input file: ".to_owned() + fname + " does not exist. Please make sure you are entering the correct filename and/or the correct path." + }), }; let _ = file.seek(SeekFrom::End(0)); let mut reader = BufReader::new(file); - let end = reader.stream_position().expect("Error navigating file."); + let end = match reader.stream_position() { + Ok(x) => x, + Err(_) => return Err(ImputefError { + code: 403, + message: "Error navigating file: ".to_owned() + fname + }) + }; let mut out = (0..end) .step_by((end as usize) / n_threads) .collect::>(); @@ -82,26 +90,37 @@ pub fn find_file_splits(fname: &str, n_threads: &usize) -> io::Result> } /// Round-up an `f64` to `n_digits` decimal points -pub fn sensible_round(x: f64, n_digits: usize) -> f64 { - let factor = ("1e".to_owned() + &n_digits.to_string()) - .parse::() - .expect("Error parsing String into f64."); - (x * factor).round() / factor +pub fn sensible_round(x: f64, n_digits: usize) -> Result { + let factor = match ("1e".to_owned() + &n_digits.to_string()) + .parse::() { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 404, + message: "Error parsing String into f64: ".to_owned() + &x.to_string() + }) + }; + Ok((x * factor).round() / factor) } /// Round-up an `f64` to `n_digits` decimal points and cast into a `String` -pub fn parse_f64_roundup_and_own(x: f64, n_digits: usize) -> String { +pub fn parse_f64_roundup_and_own(x: f64, n_digits: usize) -> Result { let s = x.to_string(); if s.len() < n_digits { - return s; + return Ok(s); + } + match sensible_round(x, n_digits) { + Ok(x) => Ok(x.to_string()), + Err(e) => Err(ImputefError{ + code: 405, + message: "Error in parse_f64_roundup_and_own() | ".to_owned() + &e.message + }) } - sensible_round(x, n_digits).to_string() } /// Calculate the mean of a 1D array ignoring NaN pub fn mean_array1_ignore_nan( x: &ArrayBase, Dim<[usize; 1]>>, -) -> io::Result { +) -> Result { // let sum = x.fold(0.0, |sum, &a| if a.is_nan() { sum } else { sum + a }); // let counts = x.iter().filter(|&a| !a.is_nan()).count() as f64; // Ok(sum / counts) @@ -120,7 +139,7 @@ pub fn mean_array1_ignore_nan( pub fn mean_axis_ignore_nan( a: &Array, axis: usize, -) -> io::Result::Smaller>> +) -> Result::Smaller>, ImputefError> where D: ndarray::Dimension + ndarray::RemoveAxis, { @@ -147,14 +166,15 @@ where pub fn pearsons_correlation_pairwise_complete( x: &ArrayBase, Dim<[usize; 1]>>, y: &ArrayBase, Dim<[usize; 1]>>, -) -> io::Result<(f64, f64)> { +) -> Result<(f64, f64), ImputefError> { let n = x.len(); - if n != y.len() { - return Err(Error::new( - ErrorKind::Other, - "Input vectors are not the same size.", - )); - } + match n == y.len() { + true => (), + false => return Err(ImputefError { + code: 406, + message: "Error in pearsons_correlation_pairwise_complete(): input vectors are not the same size.".to_owned() + }) + }; // Using pairs of values with non-missing data across the pair of vectors // Note that this may result in unreasonable correlations is used for a matrix, i.e. column vectors may be comparing different sets of rows let filtered_vectors: (Vec, Vec) = x @@ -166,10 +186,20 @@ pub fn pearsons_correlation_pairwise_complete( let x = Array1::from_vec(filtered_vectors.0); let y = Array1::from_vec(filtered_vectors.1); // Make sure we are handling NAN properly - let mu_x = mean_array1_ignore_nan(&x.view()) - .expect("Error calculating the mean of x while ignoring NANs."); - let mu_y = mean_array1_ignore_nan(&y.view()) - .expect("Error calculating the mean of y while ignoring NANs."); + let mu_x = match mean_array1_ignore_nan(&x.view()) { + Ok(x) => x, + Err(_) => return Err(ImputefError { + code: 407, + message: "Error in pearsons_correlation_pairwise_complete() in calculating the mean of x while ignoring NANs.".to_owned() + }) + }; + let mu_y = match mean_array1_ignore_nan(&y.view()) { + Ok(x) => x, + Err(_) => return Err(ImputefError { + code: 408, + message: "Error in pearsons_correlation_pairwise_complete() in calculating the mean of y while ignoring NANs.".to_owned() + }) + }; let x_less_mu_x = x .iter() .filter(|&x| !x.is_nan()) @@ -203,13 +233,26 @@ pub fn pearsons_correlation_pairwise_complete( let sigma_r = sigma_r_denominator.sqrt(); let t = r / sigma_r; let pval = if n > 2 { - let d = StudentsT::new(0.0, 1.0, n as f64 - 2.0) - .expect("Error defining Student's t-distribution."); + let d = match StudentsT::new(0.0, 1.0, n as f64 - 2.0) { + Ok(x) => x, + Err(_) => return Err(ImputefError{ + code: 409, + message: "Error in pearsons_correlation_pairwise_complete() in defining Student's t-distribution.".to_owned() + }) + }; 2.00 * (1.00 - d.cdf(t.abs())) } else { f64::NAN }; - Ok((sensible_round(r, 7), pval)) + let r = match sensible_round(r, 7) { + Ok(x) => x, + Err(e) => return Err(ImputefError{ + code: 410, + message: "Error in pearsons_correlation_pairwise_complete() | ".to_owned() + + &e.message + }) + }; + Ok((r, pval)) } //////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -248,9 +291,9 @@ mod tests { define_chunks(&vec![0, 1, 2, 3, 4, 5, 6], &5).unwrap(), (vec![0, 1, 2, 3, 4, 5], vec![1, 2, 3, 4, 5, 6]) ); - assert_eq!(sensible_round(0.420000012435, 4), 0.42); + assert_eq!(sensible_round(0.420000012435, 4).unwrap(), 0.42); assert_eq!( - parse_f64_roundup_and_own(0.690000012435, 4), + parse_f64_roundup_and_own(0.690000012435, 4).unwrap(), "0.69".to_owned() ); let _a: Array2 =