Skip to content

Commit

Permalink
[util] remove to_hashmap fixes #29.
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtRand committed Jun 16, 2023
1 parent a30dc4c commit 7560f42
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 4 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [pileup, extract, sample-probs, summary] Allow narrowing of analysis to specific sites with `--include-bed`.
- [summary, sample-probs] Add `--only-mapped` flag that will only report on base modification probabilities if they are mapped to the reference.
- [pileup] Allow partitioning counts to separate bedMethyl files based on SAM tags with `--partition-tag` option.
### Fixes
- [adjust-mods, update-tags, call-mods] Panic when failure to parse SAM header, fixes #29.


## [v0.1.8]
Expand All @@ -21,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixes
- [call-mods] `--no-filtering` flag properly handled.
### Deprecated
- [adjust-mods] In order to allow `--edge-filter` without also performing an ignore or convert, in the next release `--ignore` will require a explicit argument (`h` is no longer the default). In this release, when no edge filter, conversion, or ignore is provided, `h` will be used for ignore, but this behavior will be removed in the next release.
- [adjust-mods] In order to allow `--edge-filter` without also performing an ignore or convert, in the next release `--ignore` will require an explicit argument (`h` is no longer the default). In this release, when no edge filter, conversion, or ignore is provided, `h` will be used for ignore, but this behavior will be removed in the next release.

## [v0.1.7]
### Changes
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ derive-new = "0.5.9"
histo_fp = "0.2.1"
prettytable-rs = "0.10.0"
rust-lapper = "1.1.0"
linear-map = "1.2.0"

[dev-dependencies]
similar-asserts = "1.4.2"
62 changes: 59 additions & 3 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use anyhow::{anyhow, bail};
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};

use std::string::FromUtf8Error;

use anyhow::Result as AnyhowResult;
use derive_new::new;
use indicatif::{ProgressBar, ProgressStyle};
use log::debug;
use linear_map::LinearMap;
use log::{debug, error};
use regex::Regex;
use rust_htslib::bam::{
self, ext::BamRecordExtensions, header::HeaderRecord, record::Aux,
HeaderView,
Expand Down Expand Up @@ -328,8 +330,62 @@ impl Region {
}
}

// shouldn't need this once it's fixed in rust-htslib or the repo moves to noodles..
fn header_to_hashmap(
header: &bam::Header,
) -> anyhow::Result<HashMap<String, Vec<LinearMap<String, String>>>> {
let mut header_map = HashMap::default();
let record_type_regex = Regex::new(r"@([A-Z][A-Z])").unwrap();
let tag_regex = Regex::new(r"([A-Za-z][A-Za-z0-9]):([ -~]*)").unwrap();

if let Ok(header_string) = String::from_utf8(header.to_bytes()) {
for line in header_string.split('\n').filter(|x| !x.is_empty()) {
let parts: Vec<_> =
line.split('\t').filter(|x| !x.is_empty()).collect();
if parts.is_empty() {
continue;
}
let record_type = record_type_regex
.captures(parts[0])
.and_then(|captures| captures.get(1))
.map(|m| m.as_str().to_owned());

if let Some(record_type) = record_type {
if record_type == "CO" {
continue;
}
let mut field = LinearMap::default();
for part in parts.iter().skip(1) {
if let Some(cap) = tag_regex.captures(part) {
let tag = cap.get(1).unwrap().as_str().to_owned();
let value = cap.get(2).unwrap().as_str().to_owned();
field.insert(tag, value);
} else {
debug!("encounted illegal record line {line}");
}
}
header_map
.entry(record_type)
.or_insert_with(Vec::new)
.push(field);
} else {
debug!("encountered illegal record type in line {line}");
}
}
Ok(header_map)
} else {
bail!("failed to parse header string")
}
}

pub fn add_modkit_pg_records(header: &mut bam::Header) {
let header_map = header.to_hashmap();
let header_map = match header_to_hashmap(&header) {
Ok(hm) => hm,
Err(_) => {
error!("failed to parse input BAM header, not adding PG header record for modkit");
return;
}
};
let (id, pp) = if let Some(pg_tags) = header_map.get("PG") {
let modkit_invocations = pg_tags.iter().filter_map(|tags| {
tags.get("ID").and_then(|v| {
Expand Down

0 comments on commit 7560f42

Please sign in to comment.