-
Notifications
You must be signed in to change notification settings - Fork 53
/
bam_fastq.rs
126 lines (100 loc) · 2.9 KB
/
bam_fastq.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//! Converts a BAM file to the FASTQ format.
//!
//! This example only supports single segment reads.
//!
//! The result matches the output of `samtools fastq <src>`.
use std::{
env,
io::{self, BufWriter, Write},
};
use noodles_bam as bam;
use noodles_sam::alignment::record::Flags;
const FILTERS: Flags = Flags::SECONDARY.union(Flags::SUPPLEMENTARY);
fn main() -> io::Result<()> {
let src = env::args().nth(1).expect("missing src");
let mut reader = bam::io::reader::Builder.build_from_path(src)?;
reader.read_header()?;
let stdout = io::stdout().lock();
let mut writer = BufWriter::new(stdout);
for result in reader.records() {
let record = result?;
let flags = record.flags();
if flags.is_segmented() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"this example only supports single segment reads",
));
} else if flags.intersects(FILTERS) {
continue;
}
write_record(&mut writer, &record)?;
}
Ok(())
}
fn write_record<W>(writer: &mut W, record: &bam::Record) -> io::Result<()>
where
W: Write,
{
const DEFINITION_PREFIX: u8 = b'@';
const MISSING_NAME: &[u8] = b"*";
const SEPARATOR: u8 = b'+';
const LINE_FEED: u8 = b'\n';
writer.write_all(&[DEFINITION_PREFIX])?;
let name = record
.name()
.map(|name| name.as_ref())
.unwrap_or(MISSING_NAME);
writer.write_all(name)?;
writer.write_all(&[LINE_FEED])?;
let is_reversed_complemented = record.flags().is_reverse_complemented();
let bases: Vec<_> = record.sequence().iter().collect();
if is_reversed_complemented {
for base in bases.into_iter().rev().map(complement_base) {
writer.write_all(&[base])?;
}
} else {
for base in bases {
writer.write_all(&[base])?;
}
}
writer.write_all(&[LINE_FEED])?;
writer.write_all(&[SEPARATOR, LINE_FEED])?;
let quality_scores = record.quality_scores();
let scores = quality_scores.as_ref().iter().copied().map(encode_score);
if is_reversed_complemented {
for n in scores.rev() {
writer.write_all(&[n])?;
}
} else {
for n in scores {
writer.write_all(&[n])?;
}
}
writer.write_all(&[LINE_FEED])?;
Ok(())
}
fn complement_base(b: u8) -> u8 {
match b {
b'A' => b'T',
b'C' => b'G',
b'G' => b'C',
b'T' => b'A',
b'U' => b'A',
b'W' => b'W',
b'S' => b'S',
b'M' => b'K',
b'K' => b'M',
b'R' => b'Y',
b'Y' => b'R',
b'B' => b'V',
b'D' => b'H',
b'H' => b'D',
b'V' => b'B',
b'N' => b'N',
_ => unreachable!(),
}
}
fn encode_score(n: u8) -> u8 {
const OFFSET: u8 = b'!';
n.checked_add(OFFSET).expect("attempt to add with overflow")
}