Skip to content

Commit

Permalink
bgzf/gzi/index: Remove first entry
Browse files Browse the repository at this point in the history
The first entry is now implicity `(0, 0)`. As a result, the number of
entries corresponds to n - 1 blocks. This now follows the same layout as
the physical index.
  • Loading branch information
zaeleus committed Jan 16, 2025
1 parent 69b1994 commit 3d0fa23
Show file tree
Hide file tree
Showing 13 changed files with 50 additions and 57 deletions.
6 changes: 6 additions & 0 deletions noodles-bgzf/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@

* bgzf/gzi: Move convenience `read` function to `fs` module.

* bgzf/gzi/index: Remove first entry.

The first entry is now implicity `(0, 0)`. As a result, the number of
entries corresponds to n - 1 blocks. This now follows the same layout as
the physical index.

### Deprecated

* bgzf: Deprecate async re-exports (`AsyncReader` and `AsyncWriter`).
Expand Down
4 changes: 2 additions & 2 deletions noodles-bgzf/src/async/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ where
///
/// let mut reader = bgzf::r#async::Reader::new(io::empty());
///
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// reader.seek_by_uncompressed_position(&index, 0).await?;
/// # Ok(())
/// # }
Expand All @@ -207,7 +207,7 @@ where
index: &gzi::Index,
pos: u64,
) -> io::Result<u64> {
let record = index.query(pos).expect("invalid index");
let record = index.query(pos);

let cpos = record.0;
let upos = u16::try_from(pos - record.1)
Expand Down
2 changes: 1 addition & 1 deletion noodles-bgzf/src/gzi/async/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ where
/// # #[tokio::main]
/// # async fn main() -> tokio::io::Result<()> {
/// use noodles_bgzf::gzi;
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// gzi::r#async::fs::write("in.gz.gzi", &index).await?;
/// # Ok(())
/// # }
Expand Down
7 changes: 3 additions & 4 deletions noodles-bgzf/src/gzi/async/io/reader/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ where
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

let mut offsets = vec![(0, 0)];
offsets.reserve(len);
let mut offsets = Vec::with_capacity(len);

for _ in 0..len {
let compressed = reader.read_u64_le().await?;
Expand Down Expand Up @@ -47,7 +46,7 @@ mod tests {

assert_eq!(
read_index(&mut reader).await?,
Index::from(vec![(0, 0), (4668, 21294), (23810, 86529)])
Index::from(vec![(4668, 21294), (23810, 86529)])
);

Ok(())
Expand All @@ -57,7 +56,7 @@ mod tests {
async fn test_read_index_with_no_entries() -> io::Result<()> {
let src = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; // len = 0
let mut reader = &src[..];
assert_eq!(read_index(&mut reader).await?, Index::from(vec![(0, 0)]));
assert_eq!(read_index(&mut reader).await?, Index::default());
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion noodles-bgzf/src/gzi/async/io/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ where
/// # async fn main() -> tokio::io::Result<()> {
/// use noodles_bgzf::gzi;
/// use tokio::io;
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// let mut writer = gzi::r#async::io::Writer::new(io::sink());
/// writer.write_index(&index).await?;
/// # Ok(())
Expand Down
18 changes: 5 additions & 13 deletions noodles-bgzf/src/gzi/async/io/writer/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,12 @@ where
{
let index = index.as_ref();

if index.is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"index is empty",
));
}

// SAFETY: `index` is nonempty.
let len = u64::try_from(index.len() - 1)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let len =
u64::try_from(index.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;

writer.write_u64_le(len).await?;

for (compressed_pos, uncompressed_pos) in index.iter().skip(1).copied() {
for &(compressed_pos, uncompressed_pos) in index {
writer.write_u64_le(compressed_pos).await?;
writer.write_u64_le(uncompressed_pos).await?;
}
Expand All @@ -37,7 +29,7 @@ mod tests {
async fn test_write_index() -> io::Result<()> {
let mut buf = Vec::new();

let index = Index::from(vec![(0, 0), (4668, 21294), (23810, 86529)]);
let index = Index::from(vec![(4668, 21294), (23810, 86529)]);
write_index(&mut buf, &index).await?;

let expected = [
Expand All @@ -56,7 +48,7 @@ mod tests {
#[tokio::test]
async fn test_write_index_with_no_entries() -> io::Result<()> {
let mut buf = Vec::new();
let index = Index::from(vec![(0, 0)]);
let index = Index::default();
write_index(&mut buf, &index).await?;
assert_eq!(buf, [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion noodles-bgzf/src/gzi/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ where
///
/// ```no_run
/// use noodles_bgzf::gzi;
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// gzi::fs::write("in.gz.gzi", &index)?;
/// # Ok::<(), std::io::Error>(())
/// ```
Expand Down
31 changes: 18 additions & 13 deletions noodles-bgzf/src/gzi/index.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
/// A gzip index.
/// A gzip index (GZI).
///
/// A gzip index holds compressed-uncompressed position pairs.
///
/// Like this physical index, this does _not_ include the position of the first block, which is
/// implicity at 0.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Index(Vec<(u64, u64)>);

Expand All @@ -11,21 +16,21 @@ impl Index {
/// use noodles_bgzf::gzi;
///
/// let index = gzi::Index::default();
/// assert!(index.query(0).is_none());
/// assert_eq!(index.query(0), (0, 0));
///
/// let index = gzi::Index::from(vec![(0, 0), (8, 21), (13, 55)]);
/// assert_eq!(index.query(0), Some((0, 0)));
/// assert_eq!(index.query(13), Some((0, 0)));
/// assert_eq!(index.query(34), Some((8, 21)));
/// assert_eq!(index.query(89), Some((13, 55)));
/// let index = gzi::Index::from(vec![(8, 21), (13, 55)]);
/// assert_eq!(index.query(0), (0, 0));
/// assert_eq!(index.query(13), (0, 0));
/// assert_eq!(index.query(34), (8, 21));
/// assert_eq!(index.query(89), (13, 55));
/// ```
pub fn query(&self, pos: u64) -> Option<(u64, u64)> {
if self.0.is_empty() {
None
pub fn query(&self, pos: u64) -> (u64, u64) {
let i = self.0.partition_point(|r| r.1 <= pos);

if i == 0 {
(0, 0)
} else {
let i = self.0.partition_point(|r| r.1 <= pos);
// SAFETY: `i` is > 0.
Some(self.0[i - 1])
self.0[i - 1]
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions noodles-bgzf/src/gzi/io/reader/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ where
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

let mut offsets = vec![(0, 0)];
offsets.reserve(len);
let mut offsets = Vec::with_capacity(len);

for _ in 0..len {
let compressed = reader.read_u64::<LittleEndian>()?;
Expand Down Expand Up @@ -48,7 +47,7 @@ mod tests {
let mut reader = &src[..];
assert_eq!(
read_index(&mut reader)?,
Index::from(vec![(0, 0), (4668, 21294), (23810, 86529)])
Index::from(vec![(4668, 21294), (23810, 86529)])
);

Ok(())
Expand All @@ -58,7 +57,7 @@ mod tests {
fn test_read_index_with_no_entries() -> io::Result<()> {
let src = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; // len = 0
let mut reader = &src[..];
assert_eq!(read_index(&mut reader)?, Index::from(vec![(0, 0)]));
assert_eq!(read_index(&mut reader)?, Index::default());
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion noodles-bgzf/src/gzi/io/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ where
/// ```
/// # use std::io;
/// use noodles_bgzf::gzi;
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// let mut writer = gzi::io::Writer::new(io::sink());
/// writer.write_index(&index)?;
/// # Ok::<_, std::io::Error>(())
Expand Down
18 changes: 5 additions & 13 deletions noodles-bgzf/src/gzi/io/writer/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@ where
{
let index = index.as_ref();

if index.is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"index is empty",
));
}

// SAFETY: `index` is nonempty.
let len = u64::try_from(index.len() - 1)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let len =
u64::try_from(index.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;

writer.write_u64::<LittleEndian>(len)?;

for (compressed_pos, uncompressed_pos) in index.iter().skip(1).copied() {
for &(compressed_pos, uncompressed_pos) in index {
writer.write_u64::<LittleEndian>(compressed_pos)?;
writer.write_u64::<LittleEndian>(uncompressed_pos)?;
}
Expand All @@ -39,7 +31,7 @@ mod tests {
fn test_write_index() -> io::Result<()> {
let mut buf = Vec::new();

let index = Index::from(vec![(0, 0), (4668, 21294), (23810, 86529)]);
let index = Index::from(vec![(4668, 21294), (23810, 86529)]);
write_index(&mut buf, &index)?;

let expected = [
Expand All @@ -58,7 +50,7 @@ mod tests {
#[test]
fn test_write_index_with_no_entries() -> io::Result<()> {
let mut buf = Vec::new();
let index = Index::from(vec![(0, 0)]);
let index = Index::default();
write_index(&mut buf, &index)?;
assert_eq!(buf, [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion noodles-bgzf/src/multithreaded_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ where
unimplemented!();
};

let record = index.query(pos).expect("invalid index");
let record = index.query(pos);

let cpos = record.0;
self.get_mut().seek(SeekFrom::Start(cpos))?;
Expand Down
6 changes: 3 additions & 3 deletions noodles-bgzf/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ where
///
/// let mut reader = bgzf::Reader::new(io::empty());
///
/// let index = gzi::Index::from(vec![(0, 0)]);
/// let index = gzi::Index::default();
/// reader.seek_by_uncompressed_position(&index, 0)?;
/// # Ok::<_, io::Error>(())
/// ```
Expand All @@ -203,7 +203,7 @@ where
index: &gzi::Index,
pos: u64,
) -> io::Result<u64> {
let record = index.query(pos).expect("invalid index");
let record = index.query(pos);

let cpos = record.0;
self.inner.seek(SeekFrom::Start(cpos))?;
Expand Down Expand Up @@ -400,7 +400,7 @@ mod tests {
0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];

let index = gzi::Index::from(vec![(0, 0), (35, 7)]);
let index = gzi::Index::from(vec![(35, 7)]);

let mut reader = Reader::new(Cursor::new(&data));

Expand Down

0 comments on commit 3d0fa23

Please sign in to comment.