Skip to content

Commit

Permalink
bam/io/reader/header: Add reader
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Dec 10, 2024
1 parent e429eb7 commit 376d609
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 41 deletions.
98 changes: 66 additions & 32 deletions noodles-bam/src/io/reader/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,49 @@ use noodles_sam::{self as sam, header::ReferenceSequences};
use self::reference_sequences::read_reference_sequences;
use crate::MAGIC_NUMBER;

pub(super) fn read_header<R>(reader: &mut R) -> io::Result<sam::Header>
struct Reader<'r, R> {
inner: &'r mut R,
}

impl<'r, R> Reader<'r, R>
where
R: Read,
{
read_magic(reader)?;
fn new(inner: &'r mut R) -> Self {
Self { inner }
}

let mut header = read_header_inner(reader)?;
let reference_sequences = read_reference_sequences(reader)?;
fn read_magic_number(&mut self) -> io::Result<[u8; MAGIC_NUMBER.len()]> {
let mut buf = [0; MAGIC_NUMBER.len()];
self.inner.read_exact(&mut buf)?;
Ok(buf)
}

if header.reference_sequences().is_empty() {
*header.reference_sequences_mut() = reference_sequences;
} else if !reference_sequences_eq(header.reference_sequences(), &reference_sequences) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"SAM header and binary reference sequence dictionaries mismatch",
));
fn raw_sam_header_reader(&mut self) -> io::Result<sam_header::Reader<R>> {
let len = self.inner.read_u32::<LittleEndian>().map(u64::from)?;
Ok(sam_header::Reader::new(self.inner, len))
}

Ok(header)
fn read_reference_sequences(&mut self) -> io::Result<ReferenceSequences> {
read_reference_sequences(self.inner)
}
}

pub(super) fn read_header<R>(reader: &mut R) -> io::Result<sam::Header>
where
R: Read,
{
let mut header_reader = Reader::new(reader);
read_header_inner(&mut header_reader)
}

fn read_magic<R>(reader: &mut R) -> io::Result<()>
fn read_magic_number<R>(reader: &mut Reader<R>) -> io::Result<()>
where
R: Read,
{
let mut magic = [0; 4];
reader.read_exact(&mut magic)?;
let magic_number = reader.read_magic_number()?;

if magic == MAGIC_NUMBER {
if magic_number == MAGIC_NUMBER {
Ok(())
} else {
Err(io::Error::new(
Expand All @@ -47,24 +61,44 @@ where
}
}

fn read_header_inner<R>(reader: &mut R) -> io::Result<sam::Header>
fn read_header_inner<R>(reader: &mut Reader<R>) -> io::Result<sam::Header>
where
R: Read,
{
let mut parser = sam::header::Parser::default();
read_magic_number(reader)?;

let l_text = reader.read_u32::<LittleEndian>().map(u64::from)?;
let mut sam_header_reader = sam_header::Reader::new(reader, l_text);
let mut raw_sam_header_reader = reader.raw_sam_header_reader()?;
let mut header = read_sam_header(&mut raw_sam_header_reader)?;

let reference_sequences = reader.read_reference_sequences()?;

if header.reference_sequences().is_empty() {
*header.reference_sequences_mut() = reference_sequences;
} else if !reference_sequences_eq(header.reference_sequences(), &reference_sequences) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"SAM header and binary reference sequence dictionaries mismatch",
));
}

Ok(header)
}

fn read_sam_header<R>(reader: &mut sam_header::Reader<R>) -> io::Result<sam::Header>
where
R: Read,
{
let mut parser = sam::header::Parser::default();

let mut buf = Vec::new();

while read_line(&mut sam_header_reader, &mut buf)? != 0 {
while read_line(reader, &mut buf)? != 0 {
parser
.parse_partial(&buf)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
}

sam_header_reader.discard_to_end()?;
reader.discard_to_end()?;

Ok(parser.finish())
}
Expand Down Expand Up @@ -128,22 +162,22 @@ mod tests {
};

#[test]
fn test_read_magic() -> io::Result<()> {
let data = b"BAM\x01";
let mut reader = &data[..];
assert!(read_magic(&mut reader).is_ok());
fn test_read_magic_number() -> io::Result<()> {
let mut src = &b"BAM\x01"[..];
let mut reader = Reader::new(&mut src);
assert!(read_magic_number(&mut reader).is_ok());

let data = [];
let mut reader = &data[..];
let mut src = &[][..];
let mut reader = Reader::new(&mut src);
assert!(matches!(
read_magic(&mut reader),
read_magic_number(&mut reader),
Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof
));

let data = b"MThd";
let mut reader = &data[..];
let mut src = &b"MThd"[..];
let mut reader = Reader::new(&mut src);
assert!(matches!(
read_magic(&mut reader),
read_magic_number(&mut reader),
Err(ref e) if e.kind() == io::ErrorKind::InvalidData
));

Expand Down
19 changes: 10 additions & 9 deletions noodles-bam/src/io/reader/header/sam_header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ use std::io::{self, BufRead, BufReader, Read, Take};

use bstr::ByteSlice;

pub(super) struct Reader<R> {
inner: BufReader<Take<R>>,
pub(super) struct Reader<'r, R> {
inner: BufReader<Take<&'r mut R>>,
is_eol: bool,
}

impl<R> Reader<R>
impl<'r, R> Reader<'r, R>
where
R: Read,
{
pub(super) fn new(inner: R, len: u64) -> Self {
pub(super) fn new(inner: &'r mut R, len: u64) -> Self {
Self {
inner: BufReader::new(inner.take(len)),
is_eol: true,
Expand All @@ -37,7 +37,7 @@ where
}
}

impl<R> Read for Reader<R>
impl<R> Read for Reader<'_, R>
where
R: Read,
{
Expand All @@ -49,7 +49,7 @@ where
}
}

impl<R> BufRead for Reader<R>
impl<R> BufRead for Reader<'_, R>
where
R: Read,
{
Expand Down Expand Up @@ -83,10 +83,11 @@ mod tests {
fn test_read_with_trailing_nul_padding() -> io::Result<()> {
const DATA: &[u8] = b"@HD\tVN:1.6\n";

let mut src = DATA.to_vec();
src.resize(1 << 10, 0);
let mut buf = DATA.to_vec();
buf.resize(1 << 10, 0);

let mut reader = Reader::new(&src[..], 1 << 10);
let mut src = &buf[..];
let mut reader = Reader::new(&mut src, 1 << 10);

let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
Expand Down

0 comments on commit 376d609

Please sign in to comment.