Skip to content

Commit

Permalink
Add mash_stream implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Roderick Bovee committed Sep 28, 2017
1 parent 704ddcf commit e7d5c7c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ readme = "./README.md"
[dependencies]
clap = "~2.26.0"
murmurhash3 = "~0.0.5"
needletail = "~0.1.2"
needletail = "~0.1.4"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0.2"
45 changes: 44 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ extern crate serde;
#[macro_use] extern crate serde_derive;
extern crate serde_json;

use std::io::{Read, Seek};
use std::path::Path;
use needletail::fastx::fastx_cli;
use needletail::fastx::{fastx_cli, fastx_stream};

use filtering::{FilterParams, filter_sketch};
use minhashes::MinHashKmers;
Expand Down Expand Up @@ -73,3 +74,45 @@ pub fn mash_files(filenames: Vec<&str>, n_hashes: usize, final_size: usize, kmer
sketches: sketches,
})
}


pub fn mash_stream<R>(reader: R, n_hashes: usize, final_size: usize, kmer_length: u8,
filters: &mut FilterParams, no_strict: bool, seed: u64) -> Result<JSONSketch, String> where
R: Read + Seek,
{
let mut seq_len = 0u64;
let mut n_kmers = 0u64;
let mut minhash = match filters.filter_on {
Some(true) | None => MinHashKmers::new(n_hashes, seed),
Some(false) => MinHashKmers::new(final_size, seed),
};
fastx_stream(reader, |seq_type| {
// disable filtering for FASTA files unless it was explicitly specified
if let None = filters.filter_on {
filters.filter_on = match seq_type {
"FASTA" => Some(false),
"FASTQ" => Some(true),
_ => panic!("Unknown sequence type"),
};
}
}, |seq| {
seq_len += seq.seq.len() as u64;
for (_, kmer, is_rev_complement) in seq.normalize(false).kmers(kmer_length, true) {
let rc_count = match is_rev_complement {
true => 1u8,
false => 0u8,
};
n_kmers += 1;
minhash.push(kmer, rc_count);
}
}).map_err(|e| e.to_string())?;

let hashes = minhash.into_vec();
let (mut filtered_hashes, filter_stats) = filter_sketch(&hashes, &filters);
filtered_hashes.truncate(final_size);
if !no_strict && filtered_hashes.len() < final_size {
return Err(format!("Stream had too few kmers ({}) to sketch", filtered_hashes.len()));
}

Ok(JSONSketch::new("", seq_len, n_kmers, filtered_hashes, &filter_stats))
}

0 comments on commit e7d5c7c

Please sign in to comment.