Skip to content

Commit

Permalink
added feature per clément to output count file for each read file
Browse files Browse the repository at this point in the history
  • Loading branch information
kyclark committed Jun 28, 2024
1 parent d46430a commit f60eb67
Show file tree
Hide file tree
Showing 14 changed files with 73 additions and 525 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ anyhow = "1.0.86"
kseq = "0.5"
clap = { version = "4.5.7", features = ["derive"] }
rayon = "1.10.0"
itertools = "0.13.0"

[dev-dependencies]
assert_cmd = "2"
Expand Down
10 changes: 6 additions & 4 deletions mk-outs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ OUT_FA_100K="tests/outputs/out-100k-fasta.txt"
OUT_FQ_50K="tests/outputs/out-50k-fastq.txt"
OUT_FQ_100K="tests/outputs/out-100k-fastq.txt"

$PRG -d $DNA_FA -r $RNA_FA_50K -o $OUT_FA_50K
$PRG -d $DNA_FA -r $RNA_FA_100K -o $OUT_FA_100K
OUT_DIR="tests/outputs"

$PRG -d $DNA_FA -r $RNA_FQ_50K -o $OUT_FQ_50K
$PRG -d $DNA_FA -r $RNA_FQ_100K -o $OUT_FQ_100K
$PRG -j $DNA_FA -r $RNA_FA_50K -o $OUT_DIR
$PRG -j $DNA_FA -r $RNA_FA_100K -o $OUT_DIR

$PRG -j $DNA_FA -r $RNA_FQ_50K -o $OUT_DIR
$PRG -j $DNA_FA -r $RNA_FQ_100K -o $OUT_DIR
24 changes: 17 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,30 @@ fn run(args: Args) -> Result<()> {
.try_for_each(|reads_file| -> Result<()> {
let basename = Path::new(&reads_file)
.file_name()
.ok_or(anyhow!("basename"))?;
.ok_or(anyhow!("basename"))?
.to_os_string();

let mut basename = basename.to_os_string();
basename.push(".txt");
let out_path = &outdir.join(basename);
let mut out_file = File::create(out_path)?;
let mut out_data_file = basename.clone();
out_data_file.push(".txt");
let out_data_path = &outdir.join(out_data_file);
let mut out_data = File::create(out_data_path)?;

let mut out_count_file = basename.clone();
out_count_file.push(".count");
let out_count_path = &outdir.join(out_count_file);
let mut out_count = File::create(out_count_path)?;

// Search through each of the RNA sequences, reusing
// the sequence and search results instances.
let timer = Instant::now();
let mut reads: kseq::Paths = get_reader(&reads_file)?;
writeln!(out_file, "File: {}", &reads_file)?;
writeln!(out_data, "File: {}", &reads_file)?;

let mut search: Search = Search::new(&junctions)?;
let mut read_count = 0;
while let Some(rec) = reads.iter_record()? {
search.search(rec.seq());
read_count += 1;
}

if args.verbose {
Expand All @@ -137,10 +145,12 @@ fn run(args: Args) -> Result<()> {
for (i, count) in search.junctions.hits.into_iter().enumerate() {
if count > 0 {
if let Some(name) = map.get(&search.junctions.key[i]) {
writeln!(out_file, "{name}\t{count}")?;
writeln!(out_data, "{name}\t{count}")?;
}
}
}

writeln!(out_count, "{read_count}")?;
Ok(())
})?;

Expand Down
48 changes: 35 additions & 13 deletions tests/cli.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
use anyhow::{anyhow, Result};
use assert_cmd::Command;
use itertools::izip;
use predicates::prelude::*;
use pretty_assertions::assert_eq;
use rand::{distributions::Alphanumeric, Rng};
use std::{fs, iter::zip, path::Path};
use std::{fs, path::Path};
use tempfile::TempDir;

const PRG: &str = "tallyman";
const DNA_FA: &str = "tests/inputs/dna.fasta";
const DNA_FQ: &str = "tests/inputs/dna.fastq";

const RNA_FA_50K: &str = "tests/inputs/rna-50k.fasta";
const RNA_FQ_50K: &str = "tests/inputs/rna-50k.fastq";
const RNA_FA_100K: &str = "tests/inputs/rna-100k.fasta";
const RNA_FQ_100K: &str = "tests/inputs/rna-100k.fastq";
const OUT_FA_50K: &str = "tests/outputs/out-50k-fasta.txt";
const OUT_FA_100K: &str = "tests/outputs/out-100k-fasta.txt";
const OUT_FQ_50K: &str = "tests/outputs/out-50k-fastq.txt";
const OUT_FQ_100K: &str = "tests/outputs/out-100k-fastq.txt";

const OUT_FA_50K: &str = "tests/outputs/rna-50k.fasta.txt";
const OUT_FA_100K: &str = "tests/outputs/rna-100k.fasta.txt";
const OUT_FA_50K_COUNT: &str = "tests/outputs/rna-50k.fasta.count";
const OUT_FA_100K_COUNT: &str = "tests/outputs/rna-100k.fasta.count";

const OUT_FQ_50K: &str = "tests/outputs/rna-50k.fastq.txt";
const OUT_FQ_100K: &str = "tests/outputs/rna-100k.fastq.txt";
const OUT_FQ_50K_COUNT: &str = "tests/outputs/rna-50k.fastq.count";
const OUT_FQ_100K_COUNT: &str = "tests/outputs/rna-100k.fastq.count";

// --------------------------------------------------
fn gen_bad_file() -> String {
Expand Down Expand Up @@ -64,9 +72,11 @@ fn run(
read_files: &[&str],
junction_file: &str,
expected_files: &[&str],
expected_counts: &[&str],
) -> Result<()> {
// outdir will be removed when var leaves scope
let outdir = TempDir::new()?;

let mut args: Vec<String> = vec![
"-j".to_string(),
junction_file.to_string(),
Expand All @@ -81,19 +91,30 @@ fn run(

Command::cargo_bin(PRG)?.args(&args).assert().success();

for (read_file, expected_file) in zip(read_files, expected_files) {
for (read_file, expected_file, expected_count) in
izip!(read_files, expected_files, expected_counts)
{
// Output file is read basename + ".txt"
let mut read_base = Path::new(&read_file)
let read_base = Path::new(&read_file)
.file_name()
.ok_or(anyhow!("No basename"))?
.to_os_string();
read_base.push(".txt");
let outpath = &outdir.path().join(&read_base);
let mut data_basename = read_base.clone();
data_basename.push(".txt");
let outpath = &outdir.path().join(&data_basename);
assert!(outpath.exists());

let expected = fs::read_to_string(expected_file)?;
let actual = fs::read_to_string(outpath)?;
assert_eq!(&actual, &expected);

let mut count_basename = read_base.clone();
count_basename.push(".count");
let outpath = &outdir.path().join(&count_basename);
assert!(outpath.exists());
let expected = fs::read_to_string(expected_count)?;
let actual = fs::read_to_string(outpath)?;
assert_eq!(&actual, &expected);
}

Ok(())
Expand All @@ -102,25 +123,25 @@ fn run(
// --------------------------------------------------
#[test]
fn run_50k_fasta() -> Result<()> {
run(&[RNA_FA_50K], DNA_FA, &[OUT_FA_50K])
run(&[RNA_FA_50K], DNA_FA, &[OUT_FA_50K], &[OUT_FA_50K_COUNT])
}

// --------------------------------------------------
#[test]
fn run_50k_fastq() -> Result<()> {
run(&[RNA_FQ_50K], DNA_FQ, &[OUT_FQ_50K])
run(&[RNA_FQ_50K], DNA_FQ, &[OUT_FQ_50K], &[OUT_FQ_50K_COUNT])
}

// --------------------------------------------------
#[test]
fn run_100k_fasta() -> Result<()> {
run(&[RNA_FA_100K], DNA_FA, &[OUT_FA_100K])
run(&[RNA_FA_100K], DNA_FA, &[OUT_FA_100K], &[OUT_FA_100K_COUNT])
}

// --------------------------------------------------
#[test]
fn run_100k_fastq() -> Result<()> {
run(&[RNA_FQ_100K], DNA_FQ, &[OUT_FQ_100K])
run(&[RNA_FQ_100K], DNA_FQ, &[OUT_FQ_100K], &[OUT_FQ_100K_COUNT])
}

// --------------------------------------------------
Expand All @@ -130,5 +151,6 @@ fn run_50k_100k_fastq() -> Result<()> {
&[RNA_FA_50K, RNA_FQ_100K],
DNA_FQ,
&[OUT_FA_50K, OUT_FQ_100K],
&[OUT_FA_50K_COUNT, OUT_FQ_100K_COUNT],
)
}
Loading

0 comments on commit f60eb67

Please sign in to comment.