diff --git a/mountpoint-s3/src/data_cache/disk_data_cache.rs b/mountpoint-s3/src/data_cache/disk_data_cache.rs index 864db62e1..04b76ca33 100644 --- a/mountpoint-s3/src/data_cache/disk_data_cache.rs +++ b/mountpoint-s3/src/data_cache/disk_data_cache.rs @@ -158,13 +158,13 @@ impl DiskDataCache { let mut path = self.cache_directory.join(CACHE_VERSION); // An S3 key may be up to 1024 UTF-8 bytes long, which exceeds the maximum UNIX file name length. - // Instead, we hash the key. - // The risk of collisions is mitigated as we will ignore blocks read that contain the wrong S3 key, etc.. - let encoded_s3_key = hex::encode(Sha256::digest(cache_key.s3_key.as_bytes())); + // Instead, the path contains a hash of the S3 key and ETag. + // The risk of collisions is mitigated as we ignore blocks read that contain the wrong S3 key, etc.. + let hashed_cache_key = hash_cache_key(cache_key); + // TODO: Split directory into subdirectories. // Take the first few chars of hash to avoid hitting any FS-specific maximum number of directory entries. - path.push(encoded_s3_key); - path.push(cache_key.etag.as_str()); + path.push(hashed_cache_key); path } @@ -176,6 +176,18 @@ impl DiskDataCache { } } +/// Hash the cache key using its fields as well as the [CACHE_VERSION], +/// returning the hash encoded as hexadecimal in a new [String]. +fn hash_cache_key(cache_key: &CacheKey) -> String { + let CacheKey { s3_key, etag } = cache_key; + + let mut hasher = Sha256::new(); + hasher.update(CACHE_VERSION.as_bytes()); + hasher.update(s3_key.as_bytes()); + hasher.update(etag.as_str().as_bytes()); + hex::encode(hasher.finalize()) +} + impl DataCache for DiskDataCache { fn get_block(&self, cache_key: &CacheKey, block_idx: BlockIndex) -> DataCacheResult> { let path = self.get_path_for_block(cache_key, block_idx); @@ -254,7 +266,6 @@ mod tests { use super::*; use mountpoint_s3_client::types::ETag; - use test_case::test_case; #[test] fn test_block_format_version_requires_update() { @@ -276,30 +287,39 @@ mod tests { ); } - #[test_case("hello"; "simple string")] - #[test_case("foo/bar/baz"; "with forward slashes")] - #[test_case("hello+world"; "with plus char")] - #[test_case("hello\\ world"; "backslash")] - #[test_case("hello=world"; "equals")] - #[test_case("lookšŸŽ”emoji"; "emoji")] - fn test_get_path_for_key(s3_key: &str) { + #[test] + fn test_hash_cache_key() { + let s3_key = "a".repeat(266); + let etag = ETag::for_tests(); + let key = CacheKey { + etag, + s3_key: s3_key.to_owned(), + }; + let expected_hash = "b717d5a78ed63238b0778e7295d83e963758aa54db6e969a822f2b13ce9a3067"; + assert_eq!(expected_hash, hash_cache_key(&key)); + } + + #[test] + fn test_get_path_for_key() { let cache_dir = PathBuf::from("mountpoint-cache/"); let data_cache = DiskDataCache::new(cache_dir, 1024); - let encoded_s3_key = hex::encode(Sha256::digest(s3_key.as_bytes())); + let s3_key = "a".repeat(266); + let etag = ETag::for_tests(); let key = CacheKey { etag, s3_key: s3_key.to_owned(), }; - let expected = vec!["mountpoint-cache", CACHE_VERSION, &encoded_s3_key, key.etag.as_str()]; + let hashed_cache_key = hash_cache_key(&key); + let expected = vec!["mountpoint-cache", CACHE_VERSION, hashed_cache_key.as_str()]; let path = data_cache.get_path_for_key(&key); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); } #[test] - fn test_get_path_for_key_very_long_key() { + fn test_get_path_for_block() { let cache_dir = PathBuf::from("mountpoint-cache/"); let data_cache = DiskDataCache::new(cache_dir, 1024); @@ -310,13 +330,9 @@ mod tests { etag, s3_key: s3_key.to_owned(), }; - let expected = vec![ - "mountpoint-cache", - CACHE_VERSION, - "a7bf334bec6f17021671033b243b8689757212496cd525ba9873addde87b0c56", - key.etag.as_str(), - ]; - let path = data_cache.get_path_for_key(&key); + let hashed_cache_key = hash_cache_key(&key); + let expected = vec!["mountpoint-cache", CACHE_VERSION, hashed_cache_key.as_str(), "5.block"]; + let path = data_cache.get_path_for_block(&key, 5); let results: Vec = path.iter().map(ToOwned::to_owned).collect(); assert_eq!(expected, results); }