diff --git a/noodles-bam/src/io/reader/header.rs b/noodles-bam/src/io/reader/header.rs index 0331d14fb..c9fd80508 100644 --- a/noodles-bam/src/io/reader/header.rs +++ b/noodles-bam/src/io/reader/header.rs @@ -9,35 +9,49 @@ use noodles_sam::{self as sam, header::ReferenceSequences}; use self::reference_sequences::read_reference_sequences; use crate::MAGIC_NUMBER; -pub(super) fn read_header(reader: &mut R) -> io::Result +struct Reader<'r, R> { + inner: &'r mut R, +} + +impl<'r, R> Reader<'r, R> where R: Read, { - read_magic(reader)?; + fn new(inner: &'r mut R) -> Self { + Self { inner } + } - let mut header = read_header_inner(reader)?; - let reference_sequences = read_reference_sequences(reader)?; + fn read_magic_number(&mut self) -> io::Result<[u8; MAGIC_NUMBER.len()]> { + let mut buf = [0; MAGIC_NUMBER.len()]; + self.inner.read_exact(&mut buf)?; + Ok(buf) + } - if header.reference_sequences().is_empty() { - *header.reference_sequences_mut() = reference_sequences; - } else if !reference_sequences_eq(header.reference_sequences(), &reference_sequences) { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "SAM header and binary reference sequence dictionaries mismatch", - )); + fn raw_sam_header_reader(&mut self) -> io::Result> { + let len = self.inner.read_u32::().map(u64::from)?; + Ok(sam_header::Reader::new(self.inner, len)) } - Ok(header) + fn read_reference_sequences(&mut self) -> io::Result { + read_reference_sequences(self.inner) + } +} + +pub(super) fn read_header(reader: &mut R) -> io::Result +where + R: Read, +{ + let mut header_reader = Reader::new(reader); + read_header_inner(&mut header_reader) } -fn read_magic(reader: &mut R) -> io::Result<()> +fn read_magic_number(reader: &mut Reader) -> io::Result<()> where R: Read, { - let mut magic = [0; 4]; - reader.read_exact(&mut magic)?; + let magic_number = reader.read_magic_number()?; - if magic == MAGIC_NUMBER { + if magic_number == MAGIC_NUMBER { Ok(()) } else { Err(io::Error::new( @@ -47,24 +61,44 @@ where } } -fn read_header_inner(reader: &mut R) -> io::Result +fn read_header_inner(reader: &mut Reader) -> io::Result where R: Read, { - let mut parser = sam::header::Parser::default(); + read_magic_number(reader)?; - let l_text = reader.read_u32::().map(u64::from)?; - let mut sam_header_reader = sam_header::Reader::new(reader, l_text); + let mut raw_sam_header_reader = reader.raw_sam_header_reader()?; + let mut header = read_sam_header(&mut raw_sam_header_reader)?; + + let reference_sequences = reader.read_reference_sequences()?; + + if header.reference_sequences().is_empty() { + *header.reference_sequences_mut() = reference_sequences; + } else if !reference_sequences_eq(header.reference_sequences(), &reference_sequences) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "SAM header and binary reference sequence dictionaries mismatch", + )); + } + + Ok(header) +} + +fn read_sam_header(reader: &mut sam_header::Reader) -> io::Result +where + R: Read, +{ + let mut parser = sam::header::Parser::default(); let mut buf = Vec::new(); - while read_line(&mut sam_header_reader, &mut buf)? != 0 { + while read_line(reader, &mut buf)? != 0 { parser .parse_partial(&buf) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; } - sam_header_reader.discard_to_end()?; + reader.discard_to_end()?; Ok(parser.finish()) } @@ -128,22 +162,22 @@ mod tests { }; #[test] - fn test_read_magic() -> io::Result<()> { - let data = b"BAM\x01"; - let mut reader = &data[..]; - assert!(read_magic(&mut reader).is_ok()); + fn test_read_magic_number() -> io::Result<()> { + let mut src = &b"BAM\x01"[..]; + let mut reader = Reader::new(&mut src); + assert!(read_magic_number(&mut reader).is_ok()); - let data = []; - let mut reader = &data[..]; + let mut src = &[][..]; + let mut reader = Reader::new(&mut src); assert!(matches!( - read_magic(&mut reader), + read_magic_number(&mut reader), Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof )); - let data = b"MThd"; - let mut reader = &data[..]; + let mut src = &b"MThd"[..]; + let mut reader = Reader::new(&mut src); assert!(matches!( - read_magic(&mut reader), + read_magic_number(&mut reader), Err(ref e) if e.kind() == io::ErrorKind::InvalidData )); diff --git a/noodles-bam/src/io/reader/header/sam_header.rs b/noodles-bam/src/io/reader/header/sam_header.rs index db551232a..ea341969b 100644 --- a/noodles-bam/src/io/reader/header/sam_header.rs +++ b/noodles-bam/src/io/reader/header/sam_header.rs @@ -2,16 +2,16 @@ use std::io::{self, BufRead, BufReader, Read, Take}; use bstr::ByteSlice; -pub(super) struct Reader { - inner: BufReader>, +pub(super) struct Reader<'r, R> { + inner: BufReader>, is_eol: bool, } -impl Reader +impl<'r, R> Reader<'r, R> where R: Read, { - pub(super) fn new(inner: R, len: u64) -> Self { + pub(super) fn new(inner: &'r mut R, len: u64) -> Self { Self { inner: BufReader::new(inner.take(len)), is_eol: true, @@ -37,7 +37,7 @@ where } } -impl Read for Reader +impl Read for Reader<'_, R> where R: Read, { @@ -49,7 +49,7 @@ where } } -impl BufRead for Reader +impl BufRead for Reader<'_, R> where R: Read, { @@ -83,10 +83,11 @@ mod tests { fn test_read_with_trailing_nul_padding() -> io::Result<()> { const DATA: &[u8] = b"@HD\tVN:1.6\n"; - let mut src = DATA.to_vec(); - src.resize(1 << 10, 0); + let mut buf = DATA.to_vec(); + buf.resize(1 << 10, 0); - let mut reader = Reader::new(&src[..], 1 << 10); + let mut src = &buf[..]; + let mut reader = Reader::new(&mut src, 1 << 10); let mut buf = Vec::new(); reader.read_to_end(&mut buf)?;