Skip to content

Commit

Permalink
nomenclature per bossman
Browse files Browse the repository at this point in the history
  • Loading branch information
kyclark committed Jun 26, 2024
1 parent aabc4c3 commit d46430a
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 55 deletions.
48 changes: 24 additions & 24 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ pub mod search;
#[derive(Debug, Parser)]
#[command(author, version, about)]
struct Args {
/// DNA file
#[arg(short, long, value_name = "DNA")]
dna: String,
/// Junctions file
#[arg(short, long, value_name = "JUNCTIONS")]
junctions: String,

/// RNA file
#[arg(short, long, value_name = "RNA", num_args(1..))]
rna: Vec<String>,
/// Reads file(s)
#[arg(short, long, value_name = "READS", num_args(1..), required(true))]
reads: Vec<String>,

/// Output directory
#[arg(short, long, value_name = "OUTDIR", default_value = "out")]
Expand Down Expand Up @@ -62,16 +62,16 @@ fn run(args: Args) -> Result<()> {
// make the multimap for post-processing
let timer = Instant::now();
let mut map = HashMap::new();
let mut needles = vec![];
let mut dna = get_reader(&args.dna)?;
let mut junctions = vec![];
let mut junctions_file = get_reader(&args.junctions)?;

while let Some(rec) = dna.iter_record()? {
while let Some(rec) = junctions_file.iter_record()? {
match compress_seq(rec.seq()) {
Some(comp) => {
needles.push(comp);
junctions.push(comp);
if map.get(&comp).is_some() {
eprintln!(
r#"WARNING: "{}" ({}) duplicated"#,
r#"WARNING: Junction sequence "{}" ({}) duplicated"#,
rec.seq(),
rec.head()
);
Expand All @@ -80,7 +80,7 @@ fn run(args: Args) -> Result<()> {
}
}
_ => eprintln!(
r#"DNA sequence "{}" ({}) rejected"#,
r#"Junction sequence "{}" ({}) rejected"#,
rec.seq(),
rec.head()
),
Expand All @@ -94,8 +94,8 @@ fn run(args: Args) -> Result<()> {
);
}

if needles.is_empty() {
bail!("No needles");
if junctions.is_empty() {
bail!("No junctions");
}

let outdir = Path::new(&args.outdir);
Expand All @@ -104,10 +104,10 @@ fn run(args: Args) -> Result<()> {
fs::create_dir_all(&outdir)?;
}

args.rna
args.reads
.into_par_iter()
.try_for_each(|filename| -> Result<()> {
let basename = Path::new(&filename)
.try_for_each(|reads_file| -> Result<()> {
let basename = Path::new(&reads_file)
.file_name()
.ok_or(anyhow!("basename"))?;

Expand All @@ -119,24 +119,24 @@ fn run(args: Args) -> Result<()> {
// Search through each of the RNA sequences, reusing
// the sequence and search results instances.
let timer = Instant::now();
let mut rna: kseq::Paths = get_reader(&filename)?;
writeln!(out_file, "File: {}", &filename)?;
let mut reads: kseq::Paths = get_reader(&reads_file)?;
writeln!(out_file, "File: {}", &reads_file)?;

let mut search: Search = Search::new(&needles)?;
while let Some(rec) = rna.iter_record()? {
let mut search: Search = Search::new(&junctions)?;
while let Some(rec) = reads.iter_record()? {
search.search(rec.seq());
}

if args.verbose {
eprintln!(
r#"Time to search "{filename}": {:?}"#,
r#"Time to search "{reads_file}": {:?}"#,
timer.elapsed()
);
}

for (i, count) in search.needles.hits.into_iter().enumerate() {
for (i, count) in search.junctions.hits.into_iter().enumerate() {
if count > 0 {
if let Some(name) = map.get(&search.needles.key[i]) {
if let Some(name) = map.get(&search.junctions.key[i]) {
writeln!(out_file, "{name}\t{count}")?;
}
}
Expand Down
34 changes: 17 additions & 17 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@ pub struct Search {
haystack_index: usize,
haystack_size: usize,
haystack_window: u64,
pub needles: Hash,
pub junctions: Hash,
start_index: usize,
}

impl Search {
pub fn new(needles: &Vec<u64>) -> Result<Search> {
let mut needles_hash =
Hash::new(needles.len() * HASH_CAPACITY_MULTIPLE);
pub fn new(junctions: &Vec<u64>) -> Result<Search> {
let mut junction_hash =
Hash::new(junctions.len() * HASH_CAPACITY_MULTIPLE);

for needle in needles {
needles_hash.add(*needle)?;
for seq in junctions {
junction_hash.add(*seq)?;
}

Ok(Search {
haystack_index: 0,
haystack_size: 0,
haystack_window: 0,
needles: needles_hash,
junctions: junction_hash,
start_index: 0,
})
}
Expand Down Expand Up @@ -65,7 +65,7 @@ impl Search {
// Bump the start index in order to slide the window one
// nucleotide to the right.
self.start_index += 1;
let _ = self.needles.inc_hits(self.haystack_window);
let _ = self.junctions.inc_hits(self.haystack_window);
}
}
}
Expand All @@ -76,38 +76,38 @@ mod test {

#[test]
fn test_search() {
let needles = vec![
let junctions = vec![
compress_seq(&"T".repeat(32)).unwrap(),
compress_seq(&"G".repeat(32)).unwrap(),
];
let mut search = Search::new(&needles).unwrap();
let mut search = Search::new(&junctions).unwrap();
search.search("AAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAA");

let res = search.needles.get_hits(needles[0]);
let res = search.junctions.get_hits(junctions[0]);
assert_eq!(res, Some(0));

let res = search.needles.get_hits(needles[1]);
let res = search.junctions.get_hits(junctions[1]);
assert_eq!(res, Some(1));

let missing = compress_seq(&"C".repeat(32)).unwrap();
let res = search.needles.get_hits(missing);
let res = search.junctions.get_hits(missing);
assert!(res.is_none());
}

#[test]
fn test_search_with_n() {
let needles = vec![
let junctions = vec![
compress_seq(&"ACGT".repeat(8)).unwrap(),
compress_seq(&"G".repeat(32)).unwrap(),
];
let mut search = Search::new(&needles).unwrap();
let mut search = Search::new(&junctions).unwrap();

search.search("AANACGTACGTACGTACGTACGTACGTACGTACGTNNAA");

let res = search.needles.get_hits(needles[0]);
let res = search.junctions.get_hits(junctions[0]);
assert_eq!(res, Some(1));

let res = search.needles.get_hits(needles[1]);
let res = search.junctions.get_hits(junctions[1]);
assert_eq!(res, Some(0));
}
}
32 changes: 18 additions & 14 deletions tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ fn gen_bad_file() -> String {

// --------------------------------------------------
#[test]
fn dies_bad_rna_file() -> Result<()> {
fn dies_bad_reads_file() -> Result<()> {
let bad = gen_bad_file();
let expected = format!("{bad}: .* [(]os error 2[)]");
Command::cargo_bin(PRG)?
.args(["-r", &bad, "-d", DNA_FA])
.args(["-r", &bad, "-j", DNA_FA])
.assert()
.failure()
.stderr(predicate::str::is_match(expected)?);
Expand All @@ -48,43 +48,47 @@ fn dies_bad_rna_file() -> Result<()> {

// --------------------------------------------------
#[test]
fn dies_bad_dna_file() -> Result<()> {
fn dies_bad_junction_file() -> Result<()> {
let bad = gen_bad_file();
let expected = format!("{bad}: .* [(]os error 2[)]");
Command::cargo_bin(PRG)?
.args(["-d", &bad, "-r", RNA_FA_50K])
.args(["-j", &bad, "-r", RNA_FA_50K])
.assert()
.failure()
.stderr(predicate::str::is_match(expected)?);
Ok(())
}

// --------------------------------------------------
fn run(rna_files: &[&str], dna: &str, expected_files: &[&str]) -> Result<()> {
fn run(
read_files: &[&str],
junction_file: &str,
expected_files: &[&str],
) -> Result<()> {
// outdir will be removed when var leaves scope
let outdir = TempDir::new()?;
let mut args: Vec<String> = vec![
"-d".to_string(),
dna.to_string(),
"-j".to_string(),
junction_file.to_string(),
"-o".to_string(),
outdir.path().to_string_lossy().to_string(),
"-r".to_string(),
];

for file in rna_files {
args.push(file.to_string());
for read_file in read_files {
args.push(read_file.to_string());
}

Command::cargo_bin(PRG)?.args(&args).assert().success();

for (rna_file, expected_file) in zip(rna_files, expected_files) {
// Output file is RNA basename + ".txt"
let mut rna_base = Path::new(&rna_file)
for (read_file, expected_file) in zip(read_files, expected_files) {
// Output file is read basename + ".txt"
let mut read_base = Path::new(&read_file)
.file_name()
.ok_or(anyhow!("No basename"))?
.to_os_string();
rna_base.push(".txt");
let outpath = &outdir.path().join(&rna_base);
read_base.push(".txt");
let outpath = &outdir.path().join(&read_base);
assert!(outpath.exists());

let expected = fs::read_to_string(expected_file)?;
Expand Down

0 comments on commit d46430a

Please sign in to comment.