Skip to content

Commit

Permalink
Improve ChecksummedBytes::extend and clarify data integrity guarantee (
Browse files Browse the repository at this point in the history
…#575)

* Move ChecksummedBytes into a new module

Signed-off-by: Alessandro Passaro <[email protected]>

* Improve ChecksummedBytes::extend and clarify data integrity guarantee

ChecksummedBytes maintains a data buffer and a checksum and guarantees that only validated data can be accessed. Transformations such as `split_off`, `extend`, or `slice` (introduced in this change), may trigger a validation (and return an IntegrityError on failure), or propagate existing checksum(s) if possible, allowing for later validation.

This change clarifies the data integrity guarantee in the docs and optimizes the extend method to avoid re-validation when the checksums for both slices can be combined. It also avoid a redundant buffer allocation.

Signed-off-by: Alessandro Passaro <[email protected]>

* Add split_off tests

Signed-off-by: Alessandro Passaro <[email protected]>

* Make clearer that shrink_to_fit does not copy data

Signed-off-by: Alessandro Passaro <[email protected]>

---------

Signed-off-by: Alessandro Passaro <[email protected]>
  • Loading branch information
passaro authored Oct 26, 2023
1 parent 71277e7 commit 524eac6
Show file tree
Hide file tree
Showing 10 changed files with 469 additions and 299 deletions.
453 changes: 453 additions & 0 deletions mountpoint-s3/src/checksums.rs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion mountpoint-s3/src/data_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::ops::RangeBounds;

use thiserror::Error;

pub use crate::prefetch::checksummed_bytes::ChecksummedBytes;
pub use crate::checksums::ChecksummedBytes;

/// Indexes blocks within a given object.
pub type BlockIndex = u64;
Expand Down
16 changes: 7 additions & 9 deletions mountpoint-s3/src/data_cache/in_memory_data_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,18 @@ mod tests {

use bytes::Bytes;

use mountpoint_s3_crt::checksums::crc32c;

type TestCacheKey = String;

#[test]
fn test_put_get() {
let data_1 = Bytes::from_static(b"Hello world");
let data_1 = ChecksummedBytes::new(data_1.clone(), crc32c::checksum(&data_1));
let data_1 = ChecksummedBytes::from_bytes(data_1.clone());
let data_2 = Bytes::from_static(b"Foo bar");
let data_2 = ChecksummedBytes::new(data_2.clone(), crc32c::checksum(&data_2));
let data_2 = ChecksummedBytes::from_bytes(data_2.clone());
let data_3 = Bytes::from_static(b"Baz");
let data_3 = ChecksummedBytes::new(data_3.clone(), crc32c::checksum(&data_3));
let data_3 = ChecksummedBytes::from_bytes(data_3.clone());

let mut cache = InMemoryDataCache::new(8 * 1024 * 1024);
let cache = InMemoryDataCache::new(8 * 1024 * 1024);
let cache_key_1: TestCacheKey = String::from("a");
let cache_key_2: TestCacheKey = String::from("b");

Expand Down Expand Up @@ -141,11 +139,11 @@ mod tests {
#[test]
fn test_cached_indices() {
let data_1 = Bytes::from_static(b"Hello world");
let data_1 = ChecksummedBytes::new(data_1.clone(), crc32c::checksum(&data_1));
let data_1 = ChecksummedBytes::from_bytes(data_1.clone());
let data_2 = Bytes::from_static(b"Foo bar");
let data_2 = ChecksummedBytes::new(data_2.clone(), crc32c::checksum(&data_2));
let data_2 = ChecksummedBytes::from_bytes(data_2.clone());

let mut cache = InMemoryDataCache::new(8 * 1024 * 1024);
let cache = InMemoryDataCache::new(8 * 1024 * 1024);
let cache_key_1: TestCacheKey = String::from("a");
let cache_key_2: TestCacheKey = String::from("b");

Expand Down
1 change: 1 addition & 0 deletions mountpoint-s3/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod checksums;
mod data_cache;
pub mod fs;
pub mod fuse;
Expand Down
3 changes: 1 addition & 2 deletions mountpoint-s3/src/prefetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
//! we increase the size of the GetObject requests up to some maximum. If the reader ever makes a
//! non-sequential read, we abandon the prefetching and start again with the minimum request size.
pub mod checksummed_bytes;
mod feed;
mod part;
mod part_queue;
Expand All @@ -26,7 +25,7 @@ use mountpoint_s3_client::ObjectClient;
use thiserror::Error;
use tracing::{debug_span, error, trace, Instrument};

use crate::prefetch::checksummed_bytes::{ChecksummedBytes, IntegrityError};
use crate::checksums::{ChecksummedBytes, IntegrityError};
use crate::prefetch::feed::{ClientPartFeed, ObjectPartFeed};
use crate::prefetch::part::Part;
use crate::prefetch::part_queue::{unbounded_part_queue, PartQueue};
Expand Down
265 changes: 0 additions & 265 deletions mountpoint-s3/src/prefetch/checksummed_bytes.rs

This file was deleted.

3 changes: 2 additions & 1 deletion mountpoint-s3/src/prefetch/feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use mountpoint_s3_client::{
use mountpoint_s3_crt::checksums::crc32c;
use tracing::{error, trace};

use crate::prefetch::{checksummed_bytes::ChecksummedBytes, part::Part, part_queue::PartQueueProducer};
use crate::checksums::ChecksummedBytes;
use crate::prefetch::{part::Part, part_queue::PartQueueProducer};

/// A generic interface to retrieve data from objects in a S3-like store.
#[async_trait]
Expand Down
2 changes: 1 addition & 1 deletion mountpoint-s3/src/prefetch/part.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use thiserror::Error;

use super::checksummed_bytes::ChecksummedBytes;
use crate::checksums::ChecksummedBytes;

/// A self-identifying part of an S3 object. Users can only retrieve the bytes from this part if
/// they can prove they have the correct offset and key.
Expand Down
2 changes: 1 addition & 1 deletion mountpoint-s3/src/prefetch/part_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl<E: std::error::Error + Send + Sync> PartQueueProducer<E> {

#[cfg(test)]
mod tests {
use crate::prefetch::checksummed_bytes::ChecksummedBytes;
use crate::checksums::ChecksummedBytes;

use super::*;

Expand Down
Loading

0 comments on commit 524eac6

Please sign in to comment.