diff --git a/Cargo.lock b/Cargo.lock index 6d7ab45..483560e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,9 +51,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -111,11 +111,10 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb738d83750ec705808f6d44046d165e6bb8623f64e29a4d53fcb136ab22dfb" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash", "arrow-arith", "arrow-array", "arrow-buffer", @@ -133,9 +132,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5c3d17fc5b006e7beeaebfb1d2edfc92398b981f82d9744130437909b72a468" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -148,9 +147,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55705ada5cdde4cb0f202ffa6aa756637e33fea30e13d8d0d0fd6a24ffcee1e3" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash", "arrow-buffer", @@ -164,9 +163,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a722f90a09b94f295ab7102542e97199d3500128843446ef63e410ad546c5333" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half", @@ -175,15 +174,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af01fc1a06f6f2baf31a04776156d47f9f31ca5939fe6d00cd7a059f95a46ff1" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "base64", "chrono", "half", "lexical-core", @@ -192,9 +192,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83cbbfde86f9ecd3f875c42a73d8aeab3d95149cd80129b18d09e039ecf5391b" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,9 +211,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a547195e607e625e7fafa1a7269b8df1a4a612c919efd9b26bd86e74538f3a" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -223,9 +223,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36bf091502ab7e37775ff448413ef1ffff28ff93789acb669fffdd51b394d51" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ "arrow-array", "arrow-buffer", @@ -237,9 +237,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac346bc84846ab425ab3c8c7b6721db90643bc218939677ed7e071ccbfb919d" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,9 +257,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4502123d2397319f3a13688432bc678c61cb1582f2daa01253186da650bf5841" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -272,9 +272,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "249fc5a07906ab3f3536a6e9f118ec2883fbcde398a97a5ba70053f0276abda4" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash", "arrow-array", @@ -287,15 +287,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7a8c3f97f5ef6abd862155a6f39aaba36b029322462d72bbcfa69782a50614" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" [[package]] name = "arrow-select" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f868f4a5001429e20f7c1994b5cd1aa68b82e3db8cf96c559cdb56dc8be21410" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash", "arrow-array", @@ -307,9 +307,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "48.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27fdf8fc70040a2dee78af2e217479cb5b263bd7ab8711c7999e74056eb688a" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -338,6 +338,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "bio-types" version = "1.0.1" @@ -357,12 +363,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitflags" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" - [[package]] name = "blake" version = "2.0.2" @@ -515,7 +515,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags 1.3.2", + "bitflags", "strsim 0.8.0", "textwrap", "unicode-width", @@ -611,7 +611,7 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cramino" -version = "0.13.1" +version = "0.14.0" dependencies = [ "arrow", "checksums", @@ -619,12 +619,14 @@ dependencies = [ "clap 4.4.7", "ctor", "env_logger", + "hts-sys", "itertools", "libz-sys", "log", "rayon", "rust-htslib", "unzip-n", + "url", ] [[package]] @@ -794,17 +796,27 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" +checksum = "05e7cf40684ae96ade6232ed84582f40ce0a66efcd43a5117aef610534f8e0b8" dependencies = [ + "anstream", + "anstyle", + "env_filter", "humantime", - "is-terminal", "log", - "regex", - "termcolor", ] [[package]] @@ -813,31 +825,21 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" -[[package]] -name = "errno" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" -dependencies = [ - "libc", - "windows-sys", -] - [[package]] name = "flatbuffers" version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ - "bitflags 1.3.2", + "bitflags", "rustc_version 0.4.0", ] [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -979,9 +981,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -1003,22 +1005,11 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "is-terminal" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" -dependencies = [ - "hermit-abi 0.3.3", - "rustix", - "windows-sys", -] - [[package]] name = "itertools" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -1148,12 +1139,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" -[[package]] -name = "linux-raw-sys" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" - [[package]] name = "log" version = "0.4.20" @@ -1350,9 +1335,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pkg-config" @@ -1471,9 +1456,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rust-htslib" -version = "0.44.1" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7eb0f29fce64a4e22578905efef3d72389058016023279a58b282eb5c0c467" +checksum = "bea99084ac4f6f7caff758be2f0dd4718d3e582a735f75bd5957f9c49f5802f8" dependencies = [ "bio-types", "byteorder", @@ -1514,19 +1499,6 @@ dependencies = [ "semver 1.0.20", ] -[[package]] -name = "rustix" -version = "0.38.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0" -dependencies = [ - "bitflags 2.4.1", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "rustversion" version = "1.0.14" @@ -1675,15 +1647,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "termcolor" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" -dependencies = [ - "winapi-util", -] - [[package]] name = "textwrap" version = "0.11.0" @@ -1783,9 +1746,9 @@ dependencies = [ [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", diff --git a/Cargo.toml b/Cargo.toml index 422ace7..1c22111 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramino" -version = "0.13.1" +version = "0.14.0" edition = "2021" authors = ["Wouter De Coster decosterwouter@gmail.com"] license = "MIT" @@ -15,17 +15,19 @@ categories = ["command-line-utilities", "science"] [dependencies] clap = { version = "4.4.7", features = ["derive"] } -rust-htslib = "0.44.1" +rust-htslib = "0.45.0" log = "0.4.0" -env_logger = "0.10.0" +env_logger = "0.11.1" checksums = "0.9.1" chrono = "0.4.22" rayon = "1.5.3" -arrow = "48.0.0" +arrow = "50.0.0" unzip-n = "0.1.2" -itertools = "0.11.0" +itertools = "0.12.1" libz-sys = "1.1.12" +url = "2.5.0" +hts-sys = "2.1.1" [dev-dependencies] ctor = "0.2.4" diff --git a/src/calculations.rs b/src/calculations.rs index 072bdbc..ee8e11d 100644 --- a/src/calculations.rs +++ b/src/calculations.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + pub fn get_n(lengths: &Vec, nb_bases_total: u128, percentile: f64) -> u128 { let mut acc = 0; for val in lengths.iter() { @@ -31,27 +33,6 @@ pub fn median_length(array: &[u128]) -> f64 { } } -/// Returns the median of an array of normalized read counts. -/// -/// The array is assumed to be a slice of normalized read counts for each -/// chromosome after having been aligned using minimap2. -/// -/// # Examples -/// -/// ```rust, ignore -/// # use crate::calculations::median_phaseblocks; -/// // Array with odd number of elements -/// let v1 = vec![3.2, 1.5, 4.7]; -/// assert_eq!(median_phaseblocks(v1), 3.2); -/// -/// // Array with even number of elements -/// let v2 = vec![1.2, 3.4, 5.6, 7.8]; -/// assert_eq!(median_phaseblocks(v2), 4.5); -/// -/// // Array with a single element -/// let v3 = vec![1.0]; -/// assert_eq!(median_phaseblocks(v3), 1.0); -/// ``` pub fn median_phaseblocks(mut array: Vec) -> f32 { array.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); if (array.len() % 2) == 0 { @@ -76,6 +57,25 @@ pub fn median_splice(array: &Vec) -> usize { } } +pub fn modal_accuracy(array: &[f64]) -> f64 { + // this doesn't work for f64s, so first I multiply by 10 and then divide by 10 at the end to get the original value again + // it gets converted to an int, so some resolution is lost, but the floating point differences don't really matter anyway + let inflate = 10.0; + let frequencies = + array + .iter() + .map(|x| (x * inflate) as i32) + .fold(HashMap::new(), |mut freqs, value| { + *freqs.entry(value).or_insert(0) += 1; + freqs + }); + let mode = frequencies + .into_iter() + .max_by_key(|&(_, count)| count) + .map(|(value, _)| value); + mode.expect("Failed getting the modal accuracy!") as f64 / inflate +} + #[cfg(test)] mod tests { use super::*; @@ -103,4 +103,12 @@ mod tests { let v3 = vec![]; assert_eq!(median_phaseblocks(v3), 0.0); } + + #[test] + fn test_modal_accuracy() { + let array = [1.1, 2.2, 2.2, 3.3, 4.4]; + let expected = 2.2; + let result = modal_accuracy(&array); + assert_eq!(result, expected, "The modal accuracy calculation failed!"); + } } diff --git a/src/extract_from_bam.rs b/src/extract_from_bam.rs index 231721a..09aa0c2 100644 --- a/src/extract_from_bam.rs +++ b/src/extract_from_bam.rs @@ -1,6 +1,8 @@ use bam::ext::BamRecordExtensions; use rust_htslib::bam::record::{Aux, Cigar}; use rust_htslib::{bam, bam::Read, htslib}; +use std::env; +use url::Url; pub struct Data { pub lengths: Option>, @@ -23,19 +25,41 @@ pub fn extract(args: &crate::Cli) -> (Data, rust_htslib::bam::Header) { let mut exons = vec![]; let mut bam = if args.input == "-" { bam::Reader::from_stdin().expect("\n\nError reading alignments from stdin.\nDid you include the file header with -h?\n\n\n\n") + } else if args.input.starts_with("s3") || args.input.starts_with("https://") { + if env::var("CURL_CA_BUNDLE").is_err() { + env::set_var("CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt"); + } + bam::Reader::from_url(&Url::parse(&args.input).expect("Failed to parse URL")) + .unwrap_or_else(|err| panic!("Error opening remote BAM: {err}")) } else { bam::Reader::from_path(&args.input) .expect("Error opening BAM/CRAM file.\nIs the input file correct?\n\n\n\n") }; + if args.input.ends_with(".cram") & args.reference.is_some() { + // bam.set_cram_option(htslib::CFR_REQUIRED_FIELDS, htslib::sam_fields_SAM_AUX as i32) + // .expect("Failed setting cram options"); + bam.set_reference( + args.reference + .as_ref() + .expect("Failed setting reference for CRAM file"), + ) + .expect("Failed setting reference for CRAM file"); + } + if args.input.ends_with(".cram") { + bam.set_cram_options( + hts_sys::hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS, + hts_sys::sam_fields_SAM_AUX + | hts_sys::sam_fields_SAM_MAPQ + | hts_sys::sam_fields_SAM_CIGAR + | hts_sys::sam_fields_SAM_SEQ, + ) + .expect("Failed setting cram options"); + } let header = bam.header().clone(); let header = rust_htslib::bam::Header::from_template(&header); bam.set_threads(args.threads) .expect("Failure setting decompression threads"); - if let Some(s) = &args.reference { - bam.set_reference(s) - .expect("Failure setting bam/cram reference"); - } let min_read_len = args.min_read_len; // the match statement below is a bit ugly, but it is the only way to get a closure // that closure is used for filtering the reads diff --git a/src/file_info.rs b/src/file_info.rs index 1bc2caf..560823b 100644 --- a/src/file_info.rs +++ b/src/file_info.rs @@ -13,9 +13,9 @@ impl BamFile { pub fn file_name(&self) -> String { Path::new(&self.path) .file_name() - .unwrap() + .expect("Could not get file name") .to_str() - .unwrap() + .expect("Could not convert file name to string") .to_string() } pub fn checksum(&self) -> String { @@ -23,11 +23,18 @@ impl BamFile { } pub fn file_time(&self) -> String { - if self.path == "-" { + if self.path == "-" + || self.path.starts_with("http") + || self.path.starts_with("ftp") + || self.path.starts_with("s3") + { return "NA".to_string(); } let metadata = fs::metadata(&self.path); - if let Ok(time) = metadata.unwrap().created() { + if let Ok(time) = metadata + .expect("Failed to extract metadata from file") + .created() + { let datetime: DateTime = time.into(); format!("{}", datetime.format("%d/%m/%Y %T")) } else { diff --git a/src/main.rs b/src/main.rs index a1f670d..65162fe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -63,7 +63,11 @@ pub struct Cli { } pub fn is_file(pathname: &str) -> Result<(), String> { - if pathname == "-" { + if pathname == "-" + || pathname.starts_with("http") + || pathname.starts_with("ftp") + || pathname.starts_with("s3") + { return Ok(()); } let path = PathBuf::from(pathname); @@ -182,6 +186,11 @@ fn generate_main_output( "Mean identity\t{:.2}", identities.iter().sum::() / (num_reads as f64) ); + // modal accuracy has lower precision because it gets inflated and divided by 10, losing everything after the first decimal + println!( + "Modal identity\t{:.1}", + calculations::modal_accuracy(identities) + ); } } @@ -216,6 +225,27 @@ fn extract() { assert!(metrics_from_bam(metrics, args, header).is_ok()) } +// this test is ignored because it uses a local reference file +#[ignore] +#[test] +fn extract_cram() { + let args = Cli { + input: "test-data/small-test-phased.cram".to_string(), + threads: 8, + reference: Some("/home/wdecoster/reference/GRCh38.fa".to_string()), + min_read_len: 0, + hist: false, + checksum: false, + arrow: None, + karyotype: false, + phased: false, + spliced: false, + ubam: false, + }; + let (metrics, header) = extract_from_bam::extract(&args); + assert!(metrics_from_bam(metrics, args, header).is_ok()) +} + #[test] fn extract_ubam() { let args = Cli { @@ -234,3 +264,24 @@ fn extract_ubam() { let (metrics, header) = extract_from_bam::extract(&args); assert!(metrics_from_bam(metrics, args, header).is_ok()) } + +// this test is ignored because it uses a local reference file and takes a very long time +#[ignore] +#[test] +fn extract_url() { + let args = Cli { + input: "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1KG_ONT_VIENNA/hg38/HG00096.hg38.cram".to_string(), + threads: 8, + reference: Some("/home/wdecoster/local/1KG_ONT_VIENNA_hg38.fa.gz".to_string()), + min_read_len: 0, + hist: true, + checksum: false, + arrow: None, + karyotype: false, + phased: false, + spliced: false, + ubam: false, + }; + let (metrics, header) = extract_from_bam::extract(&args); + assert!(metrics_from_bam(metrics, args, header).is_ok()) +} diff --git a/test-data/small-test-phased.cram b/test-data/small-test-phased.cram new file mode 100644 index 0000000..9bd687f Binary files /dev/null and b/test-data/small-test-phased.cram differ