diff --git a/src/main.rs b/src/main.rs index 1ed5c50..5f4f29a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,13 +19,13 @@ pub mod search; #[derive(Debug, Parser)] #[command(author, version, about)] struct Args { - /// DNA file - #[arg(short, long, value_name = "DNA")] - dna: String, + /// Junctions file + #[arg(short, long, value_name = "JUNCTIONS")] + junctions: String, - /// RNA file - #[arg(short, long, value_name = "RNA", num_args(1..))] - rna: Vec, + /// Reads file(s) + #[arg(short, long, value_name = "READS", num_args(1..), required(true))] + reads: Vec, /// Output directory #[arg(short, long, value_name = "OUTDIR", default_value = "out")] @@ -62,16 +62,16 @@ fn run(args: Args) -> Result<()> { // make the multimap for post-processing let timer = Instant::now(); let mut map = HashMap::new(); - let mut needles = vec![]; - let mut dna = get_reader(&args.dna)?; + let mut junctions = vec![]; + let mut junctions_file = get_reader(&args.junctions)?; - while let Some(rec) = dna.iter_record()? { + while let Some(rec) = junctions_file.iter_record()? { match compress_seq(rec.seq()) { Some(comp) => { - needles.push(comp); + junctions.push(comp); if map.get(&comp).is_some() { eprintln!( - r#"WARNING: "{}" ({}) duplicated"#, + r#"WARNING: Junction sequence "{}" ({}) duplicated"#, rec.seq(), rec.head() ); @@ -80,7 +80,7 @@ fn run(args: Args) -> Result<()> { } } _ => eprintln!( - r#"DNA sequence "{}" ({}) rejected"#, + r#"Junction sequence "{}" ({}) rejected"#, rec.seq(), rec.head() ), @@ -94,8 +94,8 @@ fn run(args: Args) -> Result<()> { ); } - if needles.is_empty() { - bail!("No needles"); + if junctions.is_empty() { + bail!("No junctions"); } let outdir = Path::new(&args.outdir); @@ -104,10 +104,10 @@ fn run(args: Args) -> Result<()> { fs::create_dir_all(&outdir)?; } - args.rna + args.reads .into_par_iter() - .try_for_each(|filename| -> Result<()> { - let basename = Path::new(&filename) + .try_for_each(|reads_file| -> Result<()> { + let basename = Path::new(&reads_file) .file_name() .ok_or(anyhow!("basename"))?; @@ -119,24 +119,24 @@ fn run(args: Args) -> Result<()> { // Search through each of the RNA sequences, reusing // the sequence and search results instances. let timer = Instant::now(); - let mut rna: kseq::Paths = get_reader(&filename)?; - writeln!(out_file, "File: {}", &filename)?; + let mut reads: kseq::Paths = get_reader(&reads_file)?; + writeln!(out_file, "File: {}", &reads_file)?; - let mut search: Search = Search::new(&needles)?; - while let Some(rec) = rna.iter_record()? { + let mut search: Search = Search::new(&junctions)?; + while let Some(rec) = reads.iter_record()? { search.search(rec.seq()); } if args.verbose { eprintln!( - r#"Time to search "{filename}": {:?}"#, + r#"Time to search "{reads_file}": {:?}"#, timer.elapsed() ); } - for (i, count) in search.needles.hits.into_iter().enumerate() { + for (i, count) in search.junctions.hits.into_iter().enumerate() { if count > 0 { - if let Some(name) = map.get(&search.needles.key[i]) { + if let Some(name) = map.get(&search.junctions.key[i]) { writeln!(out_file, "{name}\t{count}")?; } } diff --git a/src/search.rs b/src/search.rs index 99bb034..ace7bf4 100644 --- a/src/search.rs +++ b/src/search.rs @@ -7,24 +7,24 @@ pub struct Search { haystack_index: usize, haystack_size: usize, haystack_window: u64, - pub needles: Hash, + pub junctions: Hash, start_index: usize, } impl Search { - pub fn new(needles: &Vec) -> Result { - let mut needles_hash = - Hash::new(needles.len() * HASH_CAPACITY_MULTIPLE); + pub fn new(junctions: &Vec) -> Result { + let mut junction_hash = + Hash::new(junctions.len() * HASH_CAPACITY_MULTIPLE); - for needle in needles { - needles_hash.add(*needle)?; + for seq in junctions { + junction_hash.add(*seq)?; } Ok(Search { haystack_index: 0, haystack_size: 0, haystack_window: 0, - needles: needles_hash, + junctions: junction_hash, start_index: 0, }) } @@ -65,7 +65,7 @@ impl Search { // Bump the start index in order to slide the window one // nucleotide to the right. self.start_index += 1; - let _ = self.needles.inc_hits(self.haystack_window); + let _ = self.junctions.inc_hits(self.haystack_window); } } } @@ -76,38 +76,38 @@ mod test { #[test] fn test_search() { - let needles = vec![ + let junctions = vec![ compress_seq(&"T".repeat(32)).unwrap(), compress_seq(&"G".repeat(32)).unwrap(), ]; - let mut search = Search::new(&needles).unwrap(); + let mut search = Search::new(&junctions).unwrap(); search.search("AAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAA"); - let res = search.needles.get_hits(needles[0]); + let res = search.junctions.get_hits(junctions[0]); assert_eq!(res, Some(0)); - let res = search.needles.get_hits(needles[1]); + let res = search.junctions.get_hits(junctions[1]); assert_eq!(res, Some(1)); let missing = compress_seq(&"C".repeat(32)).unwrap(); - let res = search.needles.get_hits(missing); + let res = search.junctions.get_hits(missing); assert!(res.is_none()); } #[test] fn test_search_with_n() { - let needles = vec![ + let junctions = vec![ compress_seq(&"ACGT".repeat(8)).unwrap(), compress_seq(&"G".repeat(32)).unwrap(), ]; - let mut search = Search::new(&needles).unwrap(); + let mut search = Search::new(&junctions).unwrap(); search.search("AANACGTACGTACGTACGTACGTACGTACGTACGTNNAA"); - let res = search.needles.get_hits(needles[0]); + let res = search.junctions.get_hits(junctions[0]); assert_eq!(res, Some(1)); - let res = search.needles.get_hits(needles[1]); + let res = search.junctions.get_hits(junctions[1]); assert_eq!(res, Some(0)); } } diff --git a/tests/cli.rs b/tests/cli.rs index 97986f8..642b685 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -35,11 +35,11 @@ fn gen_bad_file() -> String { // -------------------------------------------------- #[test] -fn dies_bad_rna_file() -> Result<()> { +fn dies_bad_reads_file() -> Result<()> { let bad = gen_bad_file(); let expected = format!("{bad}: .* [(]os error 2[)]"); Command::cargo_bin(PRG)? - .args(["-r", &bad, "-d", DNA_FA]) + .args(["-r", &bad, "-j", DNA_FA]) .assert() .failure() .stderr(predicate::str::is_match(expected)?); @@ -48,11 +48,11 @@ fn dies_bad_rna_file() -> Result<()> { // -------------------------------------------------- #[test] -fn dies_bad_dna_file() -> Result<()> { +fn dies_bad_junction_file() -> Result<()> { let bad = gen_bad_file(); let expected = format!("{bad}: .* [(]os error 2[)]"); Command::cargo_bin(PRG)? - .args(["-d", &bad, "-r", RNA_FA_50K]) + .args(["-j", &bad, "-r", RNA_FA_50K]) .assert() .failure() .stderr(predicate::str::is_match(expected)?); @@ -60,31 +60,35 @@ fn dies_bad_dna_file() -> Result<()> { } // -------------------------------------------------- -fn run(rna_files: &[&str], dna: &str, expected_files: &[&str]) -> Result<()> { +fn run( + read_files: &[&str], + junction_file: &str, + expected_files: &[&str], +) -> Result<()> { // outdir will be removed when var leaves scope let outdir = TempDir::new()?; let mut args: Vec = vec![ - "-d".to_string(), - dna.to_string(), + "-j".to_string(), + junction_file.to_string(), "-o".to_string(), outdir.path().to_string_lossy().to_string(), "-r".to_string(), ]; - for file in rna_files { - args.push(file.to_string()); + for read_file in read_files { + args.push(read_file.to_string()); } Command::cargo_bin(PRG)?.args(&args).assert().success(); - for (rna_file, expected_file) in zip(rna_files, expected_files) { - // Output file is RNA basename + ".txt" - let mut rna_base = Path::new(&rna_file) + for (read_file, expected_file) in zip(read_files, expected_files) { + // Output file is read basename + ".txt" + let mut read_base = Path::new(&read_file) .file_name() .ok_or(anyhow!("No basename"))? .to_os_string(); - rna_base.push(".txt"); - let outpath = &outdir.path().join(&rna_base); + read_base.push(".txt"); + let outpath = &outdir.path().join(&read_base); assert!(outpath.exists()); let expected = fs::read_to_string(expected_file)?;