From cd81122998d7f8b7dad6adb66a62842f57a6892d Mon Sep 17 00:00:00 2001 From: Daniel Carl Jones Date: Fri, 3 Nov 2023 13:29:03 +0000 Subject: [PATCH 1/3] Move ETag into DiskDataCache hashed block path Signed-off-by: Daniel Carl Jones --- .../src/data_cache/disk_data_cache.rs | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/mountpoint-s3/src/data_cache/disk_data_cache.rs b/mountpoint-s3/src/data_cache/disk_data_cache.rs index 864db62e1..07c96d0c8 100644 --- a/mountpoint-s3/src/data_cache/disk_data_cache.rs +++ b/mountpoint-s3/src/data_cache/disk_data_cache.rs @@ -158,13 +158,13 @@ impl DiskDataCache { let mut path = self.cache_directory.join(CACHE_VERSION); // An S3 key may be up to 1024 UTF-8 bytes long, which exceeds the maximum UNIX file name length. - // Instead, we hash the key. - // The risk of collisions is mitigated as we will ignore blocks read that contain the wrong S3 key, etc.. - let encoded_s3_key = hex::encode(Sha256::digest(cache_key.s3_key.as_bytes())); + // Instead, the path contains a hash of the S3 key and ETag. + // The risk of collisions is mitigated as we ignore blocks read that contain the wrong S3 key, etc.. + let hashed_cache_key = hash_cache_key(cache_key); + // TODO: Split directory into subdirectories. // Take the first few chars of hash to avoid hitting any FS-specific maximum number of directory entries. - path.push(encoded_s3_key); - path.push(cache_key.etag.as_str()); + path.push(hashed_cache_key); path } @@ -176,6 +176,16 @@ impl DiskDataCache { } } +/// Hash the cache key using its fields, returning the hash encoded as hexadecimal in a new [String]. +fn hash_cache_key(cache_key: &CacheKey) -> String { + let CacheKey { s3_key, etag } = cache_key; + + let mut hasher = Sha256::new(); + hasher.update(s3_key.as_bytes()); + hasher.update(etag.as_str().as_bytes()); + hex::encode(hasher.finalize()) +} + impl DataCache for DiskDataCache { fn get_block(&self, cache_key: &CacheKey, block_idx: BlockIndex) -> DataCacheResult> { let path = self.get_path_for_block(cache_key, block_idx); @@ -254,7 +264,6 @@ mod tests { use super::*; use mountpoint_s3_client::types::ETag; - use test_case::test_case; #[test] fn test_block_format_version_requires_update() { @@ -276,30 +285,30 @@ mod tests { ); } - #[test_case("hello"; "simple string")] - #[test_case("foo/bar/baz"; "with forward slashes")] - #[test_case("hello+world"; "with plus char")] - #[test_case("hello\\ world"; "backslash")] - #[test_case("hello=world"; "equals")] - #[test_case("lookšŸŽ”emoji"; "emoji")] - fn test_get_path_for_key(s3_key: &str) { + #[test] + fn test_get_path_for_key() { let cache_dir = PathBuf::from("mountpoint-cache/"); let data_cache = DiskDataCache::new(cache_dir, 1024); - let encoded_s3_key = hex::encode(Sha256::digest(s3_key.as_bytes())); + let s3_key = "a".repeat(266); + let etag = ETag::for_tests(); let key = CacheKey { etag, s3_key: s3_key.to_owned(), }; - let expected = vec!["mountpoint-cache", CACHE_VERSION, &encoded_s3_key, key.etag.as_str()]; + let expected = vec![ + "mountpoint-cache", + CACHE_VERSION, + "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae", + ]; let path = data_cache.get_path_for_key(&key); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); } #[test] - fn test_get_path_for_key_very_long_key() { + fn test_get_path_for_block() { let cache_dir = PathBuf::from("mountpoint-cache/"); let data_cache = DiskDataCache::new(cache_dir, 1024); @@ -313,10 +322,10 @@ mod tests { let expected = vec![ "mountpoint-cache", CACHE_VERSION, - "a7bf334bec6f17021671033b243b8689757212496cd525ba9873addde87b0c56", - key.etag.as_str(), + "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae", + "5.block", ]; - let path = data_cache.get_path_for_key(&key); + let path = data_cache.get_path_for_block(&key, 5); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); } From 13da61b890e45b3a68ec004500ec0148c5a1c81a Mon Sep 17 00:00:00 2001 From: Daniel Carl Jones Date: Fri, 10 Nov 2023 14:40:23 +0000 Subject: [PATCH 2/3] Test hash_cache_key separately from get_path_for_key/get_path_for_block Signed-off-by: Daniel Carl Jones --- .../src/data_cache/disk_data_cache.rs | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/mountpoint-s3/src/data_cache/disk_data_cache.rs b/mountpoint-s3/src/data_cache/disk_data_cache.rs index 07c96d0c8..044047dfe 100644 --- a/mountpoint-s3/src/data_cache/disk_data_cache.rs +++ b/mountpoint-s3/src/data_cache/disk_data_cache.rs @@ -285,6 +285,18 @@ mod tests { ); } + #[test] + fn test_hash_cache_key() { + let s3_key = "a".repeat(266); + let etag = ETag::for_tests(); + let key = CacheKey { + etag, + s3_key: s3_key.to_owned(), + }; + let expected_hash = "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae"; + assert_eq!(expected_hash, hash_cache_key(&key)); + } + #[test] fn test_get_path_for_key() { let cache_dir = PathBuf::from("mountpoint-cache/"); @@ -297,11 +309,8 @@ mod tests { etag, s3_key: s3_key.to_owned(), }; - let expected = vec![ - "mountpoint-cache", - CACHE_VERSION, - "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae", - ]; + let hashed_cache_key = hash_cache_key(&key); + let expected = vec!["mountpoint-cache", CACHE_VERSION, hashed_cache_key.as_str()]; let path = data_cache.get_path_for_key(&key); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); @@ -319,12 +328,8 @@ mod tests { etag, s3_key: s3_key.to_owned(), }; - let expected = vec![ - "mountpoint-cache", - CACHE_VERSION, - "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae", - "5.block", - ]; + let hashed_cache_key = hash_cache_key(&key); + let expected = vec!["mountpoint-cache", CACHE_VERSION, hashed_cache_key.as_str(), "5.block"]; let path = data_cache.get_path_for_block(&key, 5); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); From 677ef0870956d6a755a0f0ea026572e0292f289e Mon Sep 17 00:00:00 2001 From: Daniel Carl Jones Date: Fri, 10 Nov 2023 14:52:22 +0000 Subject: [PATCH 3/3] Add CACHE_VERSION to hash computed on cache_key Signed-off-by: Daniel Carl Jones --- mountpoint-s3/src/data_cache/disk_data_cache.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mountpoint-s3/src/data_cache/disk_data_cache.rs b/mountpoint-s3/src/data_cache/disk_data_cache.rs index 044047dfe..04b76ca33 100644 --- a/mountpoint-s3/src/data_cache/disk_data_cache.rs +++ b/mountpoint-s3/src/data_cache/disk_data_cache.rs @@ -176,11 +176,13 @@ impl DiskDataCache { } } -/// Hash the cache key using its fields, returning the hash encoded as hexadecimal in a new [String]. +/// Hash the cache key using its fields as well as the [CACHE_VERSION], +/// returning the hash encoded as hexadecimal in a new [String]. fn hash_cache_key(cache_key: &CacheKey) -> String { let CacheKey { s3_key, etag } = cache_key; let mut hasher = Sha256::new(); + hasher.update(CACHE_VERSION.as_bytes()); hasher.update(s3_key.as_bytes()); hasher.update(etag.as_str().as_bytes()); hex::encode(hasher.finalize()) @@ -293,7 +295,7 @@ mod tests { etag, s3_key: s3_key.to_owned(), }; - let expected_hash = "5931fd6bf1fe4eb26db321dda8c5a8917750d8e3a8a984fdbf028b3df59e89ae"; + let expected_hash = "b717d5a78ed63238b0778e7295d83e963758aa54db6e969a822f2b13ce9a3067"; assert_eq!(expected_hash, hash_cache_key(&key)); }