From 2d3418dbebefb8b34ddf42cf7dba8b6ec7013c57 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 15:37:13 +0300 Subject: [PATCH 01/46] feat(fs-index): add benchmarks for index update_all() Signed-off-by: Tarek --- README.md | 6 +- fs-index/Cargo.toml | 5 +- fs-index/benches/index_build_benchmark.rs | 43 ------ fs-index/benches/resource_index_benchmark.rs | 146 +++++++++++++++++++ 4 files changed, 152 insertions(+), 48 deletions(-) delete mode 100644 fs-index/benches/index_build_benchmark.rs create mode 100644 fs-index/benches/resource_index_benchmark.rs diff --git a/README.md b/README.md index 6cf0436d..8a472b50 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ cargo bench This command runs all benchmarks and generates a report in HTML format located at `target/criterion/report`. If you wish to run a specific benchmark, you can specify its name as an argument as in: ```bash -cargo bench index_build +cargo bench resource_index ``` ### Benchmarking Local Files @@ -97,10 +97,10 @@ To install `flamegraph`, run: cargo install flamegraph ``` -To generate a flame graph for `index_build_benchmark`, use the following command: +To generate a flame graph for `resource_index_benchmark`, use the following command: ```bash -cargo flamegraph --bench index_build_benchmark -o index_build_benchmark.svg -- --bench +cargo flamegraph --bench resource_index_benchmark -o resource_index_benchmark.svg -- --bench ``` > [!NOTE] diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index 49d0e763..0ae00b3f 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -26,11 +26,12 @@ data-resource = { path = "../data-resource" } uuid = { version = "1.6.1", features = ["v4"] } # benchmarking criterion = { version = "0.5", features = ["html_reports"] } +tempfile = "3.10" # Depending on `dev-hash` for testing dev-hash = { path = "../dev-hash" } fs-atomic-versions = { path = "../fs-atomic-versions" } [[bench]] -name = "index_build_benchmark" +name = "resource_index_benchmark" harness = false -path = "benches/index_build_benchmark.rs" +path = "benches/resource_index_benchmark.rs" diff --git a/fs-index/benches/index_build_benchmark.rs b/fs-index/benches/index_build_benchmark.rs deleted file mode 100644 index 1de0ef3f..00000000 --- a/fs-index/benches/index_build_benchmark.rs +++ /dev/null @@ -1,43 +0,0 @@ -use criterion::{ - black_box, criterion_group, criterion_main, BenchmarkId, Criterion, -}; -use dev_hash::Crc32; -use fs_index::index::ResourceIndex; - -const DIR_PATH: &str = "../test-assets/"; // Set the path to the directory containing the resources here - -fn index_build_benchmark(c: &mut Criterion) { - // assert the path exists and is a directory - assert!( - std::path::Path::new(DIR_PATH).is_dir(), - "The path: {} does not exist or is not a directory", - DIR_PATH - ); - - let mut group = c.benchmark_group("index_build"); - group.measurement_time(std::time::Duration::from_secs(20)); // Set the measurement time here - - let mut collisions_size = 0; - - group.bench_with_input( - BenchmarkId::new("index_build", DIR_PATH), - &DIR_PATH, - |b, path| { - b.iter(|| { - let index: ResourceIndex = - ResourceIndex::build(black_box(path.to_string())); - collisions_size = index.collisions.len(); - }); - }, - ); - group.finish(); - - println!("Collisions: {}", collisions_size); -} - -criterion_group! { - name = benches; - config = Criterion::default(); - targets = index_build_benchmark -} -criterion_main!(benches); diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs new file mode 100644 index 00000000..c762eebd --- /dev/null +++ b/fs-index/benches/resource_index_benchmark.rs @@ -0,0 +1,146 @@ +use std::path::PathBuf; + +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkId, Criterion, +}; +use tempfile::TempDir; + +use dev_hash::Crc32; +use fs_index::index::ResourceIndex; + +// The path to the test assets directory +const DIR_PATH: &str = "../test-assets/"; + +fn resource_index_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("resource_index"); + group.measurement_time(std::time::Duration::from_secs(20)); // Set the measurement time here + + let benchmarks_dir = setup_temp_dir(); + let benchmarks_dir = benchmarks_dir.path(); + let benchmarks_dir_str = benchmarks_dir.to_str().unwrap(); + + // Benchmark `ResourceIndex::build()` + + let mut collisions_size = 0; + group.bench_with_input( + BenchmarkId::new("index_build", benchmarks_dir_str), + &benchmarks_dir, + |b, path| { + b.iter(|| { + let index: ResourceIndex = + ResourceIndex::build(black_box(path)); + collisions_size = index.collisions.len(); + }); + }, + ); + println!("Collisions: {}", collisions_size); + + // TODO: Benchmark `ResourceIndex::get_resource_by_id()` + + // TODO: Benchmark `ResourceIndex::get_resource_by_path()` + + // TODO: Benchmark `ResourceIndex::track_addition()` + + // TODO: Benchmark `ResourceIndex::track_deletion()` + + // TODO: Benchmark `ResourceIndex::track_update()` + + // Benchmark `ResourceIndex::update_all()` + + // First, create a new temp directory specifically for the update_all benchmark + // since we will be creating new files, removing files, and modifying files + let update_all_benchmarks_dir = + TempDir::with_prefix("ark-fs-index-benchmarks-update-all").unwrap(); + let update_all_benchmarks_dir = update_all_benchmarks_dir.path(); + + group.bench_function("index_update_all", |b| { + b.iter(|| { + // Clear the directory + std::fs::remove_dir_all(&update_all_benchmarks_dir).unwrap(); + std::fs::create_dir(&update_all_benchmarks_dir).unwrap(); + + // Create 50 new files + for i in 0..50 { + let new_file = + update_all_benchmarks_dir.join(format!("file_{}.txt", i)); + std::fs::File::create(&new_file).unwrap(); + std::fs::write(&new_file, format!("Hello, World! {}", i)) + .unwrap(); + } + let mut index: ResourceIndex = + ResourceIndex::build(black_box(&update_all_benchmarks_dir)); + + update_all_files(&update_all_benchmarks_dir.to_path_buf()); + let _update_result = index.update_all().unwrap(); + }); + }); + + group.finish(); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = resource_index_benchmark +} +criterion_main!(benches); + +/// A helper function to setup a temp directory for the benchmarks using the test assets directory +fn setup_temp_dir() -> TempDir { + // assert the path exists and is a directory + assert!( + std::path::Path::new(DIR_PATH).is_dir(), + "The path: {} does not exist or is not a directory", + DIR_PATH + ); + + // Create a temp directory + let temp_dir = TempDir::with_prefix("ark-fs-index-benchmarks").unwrap(); + let benchmarks_dir = temp_dir.path(); + let benchmarks_dir_str = benchmarks_dir.to_str().unwrap(); + log::info!("Temp directory for benchmarks: {}", benchmarks_dir_str); + + // Copy the test assets to the temp directory + let source = std::path::Path::new(DIR_PATH); + // Can't use fs::copy because the source is a directory + let output = std::process::Command::new("cp") + .arg("-r") + .arg(source) + .arg(benchmarks_dir_str) + .output() + .expect("Failed to copy test assets to temp directory"); + if !output.status.success() { + panic!( + "Failed to copy test assets to temp directory: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + temp_dir +} + +/// A helper function that takes a directory and creates 50 new files, removes 30 files, and modifies 10 files +/// +/// Note: The function assumes that the directory already contains 50 files named `file_0.txt` to `file_49.txt` +fn update_all_files(dir: &PathBuf) { + // Create 50 new files + for i in 51..101 { + let new_file = dir.join(format!("file_{}.txt", i)); + std::fs::File::create(&new_file).unwrap(); + // We add the index `i` to the file content to make sure the content is unique + // This is to avoid collisions in the index + std::fs::write(&new_file, format!("Hello, World! {}", i)).unwrap(); + } + + // Remove 30 files + for i in 0..30 { + let removed_file = dir.join(format!("file_{}.txt", i)); + std::fs::remove_file(&removed_file).unwrap(); + } + + // Modify 10 files + for i in 40..50 { + let modified_file = dir.join(format!("file_{}.txt", i)); + std::fs::write(&modified_file, "Hello, World!").unwrap(); + } +} From ea84d283d597ba88bb853b319972d43bb1426cb5 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 15:41:46 +0300 Subject: [PATCH 02/46] feat(fs-index): move tests to a separate module Signed-off-by: Tarek --- fs-index/src/index.rs | 446 +----------------------------------------- fs-index/src/lib.rs | 3 + fs-index/src/tests.rs | 426 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 431 insertions(+), 444 deletions(-) create mode 100644 fs-index/src/tests.rs diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index a60287ea..9bbb863d 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -27,7 +27,7 @@ pub struct ResourceIndex { pub path2id: HashMap>, pub collisions: HashMap, - root: PathBuf, + pub root: PathBuf, } #[derive(PartialEq, Debug)] @@ -578,7 +578,7 @@ impl ResourceIndex { } } -fn discover_paths>( +pub(crate) fn discover_paths>( root_path: P, ) -> HashMap { log::debug!( @@ -676,445 +676,3 @@ fn is_hidden(entry: &DirEntry) -> bool { .map(|s| s.starts_with('.')) .unwrap_or(false) } - -#[cfg(test)] -mod tests { - use crate::index::{discover_paths, IndexEntry}; - use crate::ResourceIndex; - use canonical_path::CanonicalPathBuf; - use dev_hash::Crc32; - use fs_atomic_versions::initialize; - use std::fs::File; - #[cfg(target_family = "unix")] - use std::fs::Permissions; - #[cfg(target_family = "unix")] - use std::os::unix::fs::PermissionsExt; - - use std::path::PathBuf; - use std::time::SystemTime; - use uuid::Uuid; - - const FILE_SIZE_1: u64 = 10; - const FILE_SIZE_2: u64 = 11; - - const FILE_NAME_1: &str = "test1.txt"; - const FILE_NAME_2: &str = "test2.txt"; - const FILE_NAME_3: &str = "test3.txt"; - - const CRC32_1: Crc32 = Crc32(3817498742); - const CRC32_2: Crc32 = Crc32(1804055020); - - fn get_temp_dir() -> PathBuf { - create_dir_at(std::env::temp_dir()) - } - - fn create_dir_at(path: PathBuf) -> PathBuf { - let mut dir_path = path.clone(); - dir_path.push(Uuid::new_v4().to_string()); - std::fs::create_dir(&dir_path).expect("Could not create temp dir"); - dir_path - } - - fn create_file_at( - path: PathBuf, - size: Option, - name: Option<&str>, - ) -> (File, PathBuf) { - let mut file_path = path.clone(); - if let Some(file_name) = name { - file_path.push(file_name); - } else { - file_path.push(Uuid::new_v4().to_string()); - } - let file = File::create(file_path.clone()) - .expect("Could not create temp file"); - file.set_len(size.unwrap_or(0)) - .expect("Could not set file size"); - (file, file_path) - } - - fn run_test_and_clean_up( - test: impl FnOnce(PathBuf) + std::panic::UnwindSafe, - ) { - initialize(); - - let path = get_temp_dir(); - let result = std::panic::catch_unwind(|| test(path.clone())); - std::fs::remove_dir_all(path.clone()) - .expect("Could not clean up after test"); - if result.is_err() { - panic!("{}", result.err().map(|_| "Test panicked").unwrap()) - } - assert!(result.is_ok()); - } - - // resource index build - - #[test] - fn index_build_should_process_1_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 1); - assert_eq!(actual.id2path.len(), 1); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 1); - }) - } - - #[test] - fn index_build_should_process_colliding_files_correctly() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 2); - assert_eq!(actual.id2path.len(), 1); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert_eq!(actual.collisions.len(), 1); - assert_eq!(actual.size(), 2); - }) - } - - // resource index update - - #[test] - fn update_all_should_handle_renamed_file_correctly() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - create_file_at(path.clone(), Some(FILE_SIZE_2), Some(FILE_NAME_2)); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - - // rename test2.txt to test3.txt - let mut name_from = path.clone(); - name_from.push(FILE_NAME_2); - let mut name_to = path.clone(); - name_to.push(FILE_NAME_3); - std::fs::rename(name_from, name_to) - .expect("Should rename file successfully"); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 1); - assert_eq!(update.added.len(), 1); - }) - } - - #[test] - fn update_all_should_index_new_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - let (_, expected_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 2); - assert_eq!(actual.id2path.len(), 2); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert!(actual.id2path.contains_key(&CRC32_2)); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 1); - - let added_key = - CanonicalPathBuf::canonicalize(expected_path.clone()) - .expect("CanonicalPathBuf should be fine"); - assert_eq!( - update - .added - .get(&added_key) - .expect("Key exists") - .clone(), - CRC32_2 - ) - }) - } - - #[test] - fn index_new_should_index_new_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - let mut index: ResourceIndex = - ResourceIndex::build(path.clone()); - - let (_, new_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = index - .index_new(&new_path) - .expect("Should update index correctly"); - - assert_eq!(index.root, path.clone()); - assert_eq!(index.path2id.len(), 2); - assert_eq!(index.id2path.len(), 2); - assert!(index.id2path.contains_key(&CRC32_1)); - assert!(index.id2path.contains_key(&CRC32_2)); - assert_eq!(index.collisions.len(), 0); - assert_eq!(index.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 1); - - let added_key = CanonicalPathBuf::canonicalize(new_path.clone()) - .expect("CanonicalPathBuf should be fine"); - assert_eq!( - update - .added - .get(&added_key) - .expect("Key exists") - .clone(), - CRC32_2 - ) - }) - } - - #[test] - fn update_one_should_error_on_new_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - let mut index = ResourceIndex::build(path.clone()); - - let (_, new_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = index.update_one(&new_path, CRC32_2); - - assert!(update.is_err()) - }) - } - - #[test] - fn update_one_should_index_delete_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - - let mut actual = ResourceIndex::build(path.clone()); - - let mut file_path = path.clone(); - file_path.push(FILE_NAME_1); - std::fs::remove_file(file_path.clone()) - .expect("Should remove file successfully"); - - let update = actual - .update_one(&file_path.clone(), CRC32_1) - .expect("Should update index successfully"); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 0); - assert_eq!(update.deleted.len(), 1); - assert_eq!(update.added.len(), 0); - - assert!(update.deleted.contains(&CRC32_1)) - }) - } - - #[test] - fn update_all_should_error_on_files_without_permissions() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - let (file, _) = create_file_at( - path.clone(), - Some(FILE_SIZE_2), - Some(FILE_NAME_2), - ); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - #[cfg(target_family = "unix")] - file.set_permissions(Permissions::from_mode(0o222)) - .expect("Should be fine"); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 0); - }) - } - - // error cases - - #[test] - fn update_one_should_not_update_absent_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let mut actual = ResourceIndex::build(path.clone()); - let old_id = Crc32(2); - let result = actual - .update_one(&missing_path, old_id.clone()) - .map(|i| i.deleted.clone().take(&old_id)) - .ok() - .flatten(); - - assert_eq!(result, Some(Crc32(2))); - }) - } - - #[test] - fn update_one_should_index_new_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let mut actual = ResourceIndex::build(path.clone()); - let old_id = Crc32(2); - let result = actual - .update_one(&missing_path, old_id.clone()) - .map(|i| i.deleted.clone().take(&old_id)) - .ok() - .flatten(); - - assert_eq!(result, Some(Crc32(2))); - }) - } - - #[test] - fn should_not_index_empty_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(0), None); - let actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) - } - - #[test] - fn should_not_index_hidden_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(".hidden")); - let actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) - } - - #[test] - fn should_not_index_1_empty_directory() { - run_test_and_clean_up(|path| { - create_dir_at(path.clone()); - - let actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) - } - - #[test] - fn discover_paths_should_not_walk_on_invalid_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let actual = discover_paths(missing_path); - assert_eq!(actual.len(), 0); - }) - } - - #[test] - fn index_entry_order() { - let old1 = IndexEntry { - id: Crc32(2), - modified: SystemTime::UNIX_EPOCH, - }; - let old2 = IndexEntry { - id: Crc32(1), - modified: SystemTime::UNIX_EPOCH, - }; - - let new1 = IndexEntry { - id: Crc32(1), - modified: SystemTime::now(), - }; - let new2 = IndexEntry { - id: Crc32(2), - modified: SystemTime::now(), - }; - - assert_eq!(new1, new1); - assert_eq!(new2, new2); - assert_eq!(old1, old1); - assert_eq!(old2, old2); - - assert_ne!(new1, new2); - assert_ne!(new1, old1); - - assert!(new1 > old1); - assert!(new1 > old2); - assert!(new2 > old1); - assert!(new2 > old2); - assert!(new2 > new1); - } - - /// Test the performance of `ResourceIndex::build` on a specific directory. - /// - /// This test evaluates the performance of building a resource - /// index using the `ResourceIndex::build` method on a given directory. - /// It measures the time taken to build the resource index and prints the - /// number of collisions detected. - #[test] - fn test_build_resource_index() { - use std::time::Instant; - - let path = "../test-assets/"; // The path to the directory to index - assert!( - std::path::Path::new(path).is_dir(), - "The provided path is not a directory or does not exist" - ); - - let start_time = Instant::now(); - let index: ResourceIndex = - ResourceIndex::build(path.to_string()); - let elapsed_time = start_time.elapsed(); - - println!("Number of paths: {}", index.id2path.len()); - println!("Number of resources: {}", index.id2path.len()); - println!("Number of collisions: {}", index.collisions.len()); - println!("Time taken: {:?}", elapsed_time); - } -} diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index f475f478..bf280e99 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -1,3 +1,6 @@ pub mod index; +#[cfg(test)] +mod tests; + pub use index::ResourceIndex; diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs new file mode 100644 index 00000000..76d13f4d --- /dev/null +++ b/fs-index/src/tests.rs @@ -0,0 +1,426 @@ +use crate::index::{discover_paths, IndexEntry}; +use crate::ResourceIndex; +use canonical_path::CanonicalPathBuf; +use dev_hash::Crc32; +use fs_atomic_versions::initialize; +use std::fs::File; +#[cfg(target_family = "unix")] +use std::fs::Permissions; +#[cfg(target_family = "unix")] +use std::os::unix::fs::PermissionsExt; + +use std::path::PathBuf; +use std::time::SystemTime; +use uuid::Uuid; + +const FILE_SIZE_1: u64 = 10; +const FILE_SIZE_2: u64 = 11; + +const FILE_NAME_1: &str = "test1.txt"; +const FILE_NAME_2: &str = "test2.txt"; +const FILE_NAME_3: &str = "test3.txt"; + +const CRC32_1: Crc32 = Crc32(3817498742); +const CRC32_2: Crc32 = Crc32(1804055020); + +fn get_temp_dir() -> PathBuf { + create_dir_at(std::env::temp_dir()) +} + +fn create_dir_at(path: PathBuf) -> PathBuf { + let mut dir_path = path.clone(); + dir_path.push(Uuid::new_v4().to_string()); + std::fs::create_dir(&dir_path).expect("Could not create temp dir"); + dir_path +} + +fn create_file_at( + path: PathBuf, + size: Option, + name: Option<&str>, +) -> (File, PathBuf) { + let mut file_path = path.clone(); + if let Some(file_name) = name { + file_path.push(file_name); + } else { + file_path.push(Uuid::new_v4().to_string()); + } + let file = + File::create(file_path.clone()).expect("Could not create temp file"); + file.set_len(size.unwrap_or(0)) + .expect("Could not set file size"); + (file, file_path) +} + +fn run_test_and_clean_up(test: impl FnOnce(PathBuf) + std::panic::UnwindSafe) { + initialize(); + + let path = get_temp_dir(); + let result = std::panic::catch_unwind(|| test(path.clone())); + std::fs::remove_dir_all(path.clone()) + .expect("Could not clean up after test"); + if result.is_err() { + panic!("{}", result.err().map(|_| "Test panicked").unwrap()) + } + assert!(result.is_ok()); +} + +// resource index build + +#[test] +fn index_build_should_process_1_file_successfully() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + + let actual: ResourceIndex = ResourceIndex::build(path.clone()); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 1); + assert_eq!(actual.id2path.len(), 1); + assert!(actual.id2path.contains_key(&CRC32_1)); + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 1); + }) +} + +#[test] +fn index_build_should_process_colliding_files_correctly() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + + let actual: ResourceIndex = ResourceIndex::build(path.clone()); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 2); + assert_eq!(actual.id2path.len(), 1); + assert!(actual.id2path.contains_key(&CRC32_1)); + assert_eq!(actual.collisions.len(), 1); + assert_eq!(actual.size(), 2); + }) +} + +// resource index update + +#[test] +fn update_all_should_handle_renamed_file_correctly() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); + create_file_at(path.clone(), Some(FILE_SIZE_2), Some(FILE_NAME_2)); + + let mut actual: ResourceIndex = + ResourceIndex::build(path.clone()); + + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 2); + + // rename test2.txt to test3.txt + let mut name_from = path.clone(); + name_from.push(FILE_NAME_2); + let mut name_to = path.clone(); + name_to.push(FILE_NAME_3); + std::fs::rename(name_from, name_to) + .expect("Should rename file successfully"); + + let update = actual + .update_all() + .expect("Should update index correctly"); + + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 2); + assert_eq!(update.deleted.len(), 1); + assert_eq!(update.added.len(), 1); + }) +} + +#[test] +fn update_all_should_index_new_file_successfully() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + + let mut actual: ResourceIndex = + ResourceIndex::build(path.clone()); + + let (_, expected_path) = + create_file_at(path.clone(), Some(FILE_SIZE_2), None); + + let update = actual + .update_all() + .expect("Should update index correctly"); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 2); + assert_eq!(actual.id2path.len(), 2); + assert!(actual.id2path.contains_key(&CRC32_1)); + assert!(actual.id2path.contains_key(&CRC32_2)); + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 2); + assert_eq!(update.deleted.len(), 0); + assert_eq!(update.added.len(), 1); + + let added_key = CanonicalPathBuf::canonicalize(expected_path.clone()) + .expect("CanonicalPathBuf should be fine"); + assert_eq!( + update + .added + .get(&added_key) + .expect("Key exists") + .clone(), + CRC32_2 + ) + }) +} + +#[test] +fn index_new_should_index_new_file_successfully() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + let mut index: ResourceIndex = + ResourceIndex::build(path.clone()); + + let (_, new_path) = + create_file_at(path.clone(), Some(FILE_SIZE_2), None); + + let update = index + .index_new(&new_path) + .expect("Should update index correctly"); + + assert_eq!(index.root, path.clone()); + assert_eq!(index.path2id.len(), 2); + assert_eq!(index.id2path.len(), 2); + assert!(index.id2path.contains_key(&CRC32_1)); + assert!(index.id2path.contains_key(&CRC32_2)); + assert_eq!(index.collisions.len(), 0); + assert_eq!(index.size(), 2); + assert_eq!(update.deleted.len(), 0); + assert_eq!(update.added.len(), 1); + + let added_key = CanonicalPathBuf::canonicalize(new_path.clone()) + .expect("CanonicalPathBuf should be fine"); + assert_eq!( + update + .added + .get(&added_key) + .expect("Key exists") + .clone(), + CRC32_2 + ) + }) +} + +#[test] +fn update_one_should_error_on_new_file() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), None); + let mut index = ResourceIndex::build(path.clone()); + + let (_, new_path) = + create_file_at(path.clone(), Some(FILE_SIZE_2), None); + + let update = index.update_one(&new_path, CRC32_2); + + assert!(update.is_err()) + }) +} + +#[test] +fn update_one_should_index_delete_file_successfully() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); + + let mut actual = ResourceIndex::build(path.clone()); + + let mut file_path = path.clone(); + file_path.push(FILE_NAME_1); + std::fs::remove_file(file_path.clone()) + .expect("Should remove file successfully"); + + let update = actual + .update_one(&file_path.clone(), CRC32_1) + .expect("Should update index successfully"); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 0); + assert_eq!(actual.id2path.len(), 0); + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 0); + assert_eq!(update.deleted.len(), 1); + assert_eq!(update.added.len(), 0); + + assert!(update.deleted.contains(&CRC32_1)) + }) +} + +#[test] +fn update_all_should_error_on_files_without_permissions() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); + let (file, _) = + create_file_at(path.clone(), Some(FILE_SIZE_2), Some(FILE_NAME_2)); + + let mut actual: ResourceIndex = + ResourceIndex::build(path.clone()); + + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 2); + #[cfg(target_family = "unix")] + file.set_permissions(Permissions::from_mode(0o222)) + .expect("Should be fine"); + + let update = actual + .update_all() + .expect("Should update index correctly"); + + assert_eq!(actual.collisions.len(), 0); + assert_eq!(actual.size(), 2); + assert_eq!(update.deleted.len(), 0); + assert_eq!(update.added.len(), 0); + }) +} + +// error cases + +#[test] +fn update_one_should_not_update_absent_path() { + run_test_and_clean_up(|path| { + let mut missing_path = path.clone(); + missing_path.push("missing/directory"); + let mut actual = ResourceIndex::build(path.clone()); + let old_id = Crc32(2); + let result = actual + .update_one(&missing_path, old_id.clone()) + .map(|i| i.deleted.clone().take(&old_id)) + .ok() + .flatten(); + + assert_eq!(result, Some(Crc32(2))); + }) +} + +#[test] +fn update_one_should_index_new_path() { + run_test_and_clean_up(|path| { + let mut missing_path = path.clone(); + missing_path.push("missing/directory"); + let mut actual = ResourceIndex::build(path.clone()); + let old_id = Crc32(2); + let result = actual + .update_one(&missing_path, old_id.clone()) + .map(|i| i.deleted.clone().take(&old_id)) + .ok() + .flatten(); + + assert_eq!(result, Some(Crc32(2))); + }) +} + +#[test] +fn should_not_index_empty_file() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(0), None); + let actual: ResourceIndex = ResourceIndex::build(path.clone()); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 0); + assert_eq!(actual.id2path.len(), 0); + assert_eq!(actual.collisions.len(), 0); + }) +} + +#[test] +fn should_not_index_hidden_file() { + run_test_and_clean_up(|path| { + create_file_at(path.clone(), Some(FILE_SIZE_1), Some(".hidden")); + let actual: ResourceIndex = ResourceIndex::build(path.clone()); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 0); + assert_eq!(actual.id2path.len(), 0); + assert_eq!(actual.collisions.len(), 0); + }) +} + +#[test] +fn should_not_index_1_empty_directory() { + run_test_and_clean_up(|path| { + create_dir_at(path.clone()); + + let actual: ResourceIndex = ResourceIndex::build(path.clone()); + + assert_eq!(actual.root, path.clone()); + assert_eq!(actual.path2id.len(), 0); + assert_eq!(actual.id2path.len(), 0); + assert_eq!(actual.collisions.len(), 0); + }) +} + +#[test] +fn discover_paths_should_not_walk_on_invalid_path() { + run_test_and_clean_up(|path| { + let mut missing_path = path.clone(); + missing_path.push("missing/directory"); + let actual = discover_paths(missing_path); + assert_eq!(actual.len(), 0); + }) +} + +#[test] +fn index_entry_order() { + let old1 = IndexEntry { + id: Crc32(2), + modified: SystemTime::UNIX_EPOCH, + }; + let old2 = IndexEntry { + id: Crc32(1), + modified: SystemTime::UNIX_EPOCH, + }; + + let new1 = IndexEntry { + id: Crc32(1), + modified: SystemTime::now(), + }; + let new2 = IndexEntry { + id: Crc32(2), + modified: SystemTime::now(), + }; + + assert_eq!(new1, new1); + assert_eq!(new2, new2); + assert_eq!(old1, old1); + assert_eq!(old2, old2); + + assert_ne!(new1, new2); + assert_ne!(new1, old1); + + assert!(new1 > old1); + assert!(new1 > old2); + assert!(new2 > old1); + assert!(new2 > old2); + assert!(new2 > new1); +} + +/// Test the performance of `ResourceIndex::build` on a specific directory. +/// +/// This test evaluates the performance of building a resource +/// index using the `ResourceIndex::build` method on a given directory. +/// It measures the time taken to build the resource index and prints the +/// number of collisions detected. +#[test] +fn test_build_resource_index() { + use std::time::Instant; + + let path = "../test-assets/"; // The path to the directory to index + assert!( + std::path::Path::new(path).is_dir(), + "The provided path is not a directory or does not exist" + ); + + let start_time = Instant::now(); + let index: ResourceIndex = ResourceIndex::build(path.to_string()); + let elapsed_time = start_time.elapsed(); + + println!("Number of paths: {}", index.id2path.len()); + println!("Number of resources: {}", index.id2path.len()); + println!("Number of collisions: {}", index.collisions.len()); + println!("Time taken: {:?}", elapsed_time); +} From a59438822422dec47a5180d2930572b57ea91784 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 15:56:59 +0300 Subject: [PATCH 03/46] organize rules for rustfmt Signed-off-by: Tarek --- .github/workflows/build.yml | 1 + ark-cli/src/commands/backup.rs | 3 +- ark-cli/src/commands/file/append.rs | 7 ++-- ark-cli/src/commands/file/insert.rs | 7 ++-- ark-cli/src/commands/file/read.rs | 7 ++-- ark-cli/src/commands/file/utils.rs | 7 ++-- ark-cli/src/commands/link/utils.rs | 3 +- ark-cli/src/commands/list.rs | 6 ++-- ark-cli/src/commands/render.rs | 3 +- ark-cli/src/commands/storage/list.rs | 4 +-- ark-cli/src/index_registrar.rs | 8 +++-- ark-cli/src/main.rs | 30 ++++++++-------- ark-cli/src/models/storage.rs | 3 +- ark-cli/src/util.rs | 26 +++++++------- data-json/src/lib.rs | 5 +-- data-link/src/lib.rs | 18 +++++----- data-resource/src/lib.rs | 10 +++--- dev-hash/benches/blake3.rs | 3 +- dev-hash/benches/crc32.rs | 3 +- fs-atomic-light/src/lib.rs | 5 +-- fs-atomic-versions/src/atomic/file.rs | 8 +++-- fs-atomic-versions/src/lib.rs | 7 ++-- fs-index/benches/resource_index_benchmark.rs | 18 ++++++---- fs-index/src/index.rs | 14 ++++---- fs-index/src/tests.rs | 9 ++--- fs-metadata/src/lib.rs | 4 +-- fs-properties/src/lib.rs | 4 +-- fs-storage/examples/cli.rs | 7 ++-- fs-storage/src/base_storage.rs | 20 +++++++---- fs-storage/src/file_storage.rs | 37 +++++++++++--------- fs-storage/src/jni/file_storage.rs | 7 ++-- fs-storage/src/monoid.rs | 10 +++--- fs-storage/src/utils.rs | 3 +- rustfmt.toml | 13 ++++--- 34 files changed, 167 insertions(+), 153 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0edf7006..a6a7d947 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,6 +22,7 @@ jobs: - name: Install Rust uses: dtolnay/rust-toolchain@stable with: + toolchain: nightly # nightly is required for fmt components: rustfmt, clippy - name: Check diff --git a/ark-cli/src/commands/backup.rs b/ark-cli/src/commands/backup.rs index 398220bd..40d5d46d 100644 --- a/ark-cli/src/commands/backup.rs +++ b/ark-cli/src/commands/backup.rs @@ -1,5 +1,4 @@ -use std::io::Write; -use std::path::PathBuf; +use std::{io::Write, path::PathBuf}; use crate::{ create_dir_all, dir, discover_roots, home_dir, storages_exists, timestamp, diff --git a/ark-cli/src/commands/file/append.rs b/ark-cli/src/commands/file/append.rs index 46d0b132..91969928 100644 --- a/ark-cli/src/commands/file/append.rs +++ b/ark-cli/src/commands/file/append.rs @@ -1,9 +1,8 @@ -use std::path::PathBuf; -use std::str::FromStr; +use std::{path::PathBuf, str::FromStr}; use crate::{ - models::storage::Storage, models::storage::StorageType, translate_storage, - AppError, Format, ResourceId, + models::storage::{Storage, StorageType}, + translate_storage, AppError, Format, ResourceId, }; use data_error::ArklibError; diff --git a/ark-cli/src/commands/file/insert.rs b/ark-cli/src/commands/file/insert.rs index ff9b1ac9..b60199e5 100644 --- a/ark-cli/src/commands/file/insert.rs +++ b/ark-cli/src/commands/file/insert.rs @@ -1,9 +1,8 @@ -use std::path::PathBuf; -use std::str::FromStr; +use std::{path::PathBuf, str::FromStr}; use crate::{ - models::storage::Storage, models::storage::StorageType, translate_storage, - AppError, Format, ResourceId, + models::storage::{Storage, StorageType}, + translate_storage, AppError, Format, ResourceId, }; use data_error::ArklibError; diff --git a/ark-cli/src/commands/file/read.rs b/ark-cli/src/commands/file/read.rs index 8387d011..7b47d719 100644 --- a/ark-cli/src/commands/file/read.rs +++ b/ark-cli/src/commands/file/read.rs @@ -1,9 +1,8 @@ -use std::path::PathBuf; -use std::str::FromStr; +use std::{path::PathBuf, str::FromStr}; use crate::{ - models::storage::Storage, models::storage::StorageType, translate_storage, - AppError, ResourceId, + models::storage::{Storage, StorageType}, + translate_storage, AppError, ResourceId, }; use data_error::ArklibError; diff --git a/ark-cli/src/commands/file/utils.rs b/ark-cli/src/commands/file/utils.rs index 8a3c4048..6cc993dd 100644 --- a/ark-cli/src/commands/file/utils.rs +++ b/ark-cli/src/commands/file/utils.rs @@ -1,6 +1,7 @@ -use crate::error::AppError; -use crate::models::key_value_to_str; -use crate::models::Format; +use crate::{ + error::AppError, + models::{key_value_to_str, Format}, +}; use data_error::Result as ArklibResult; use fs_atomic_versions::atomic::{modify, modify_json, AtomicFile}; diff --git a/ark-cli/src/commands/link/utils.rs b/ark-cli/src/commands/link/utils.rs index 122f7f1c..1b851966 100644 --- a/ark-cli/src/commands/link/utils.rs +++ b/ark-cli/src/commands/link/utils.rs @@ -3,8 +3,7 @@ use data_link::Link; use std::path::PathBuf; use url::Url; -use crate::error::AppError; -use crate::util::provide_index; // Import your custom AppError type +use crate::{error::AppError, util::provide_index}; // Import your custom AppError type pub async fn create_link( root: &PathBuf, diff --git a/ark-cli/src/commands/list.rs b/ark-cli/src/commands/list.rs index bc557b42..99725c65 100644 --- a/ark-cli/src/commands/list.rs +++ b/ark-cli/src/commands/list.rs @@ -1,5 +1,4 @@ -use std::io::Read; -use std::path::PathBuf; +use std::{io::Read, path::PathBuf}; use crate::{ provide_index, provide_root, read_storage_value, AppError, DateTime, @@ -137,7 +136,8 @@ impl List { let mut contents = String::new(); match file.read_to_string(&mut contents) { Ok(_) => { - // Check if the content of the file is a valid url + // Check if the content of the file is a + // valid url let url = contents.trim(); let url = url::Url::parse(url); match url { diff --git a/ark-cli/src/commands/render.rs b/ark-cli/src/commands/render.rs index 7f3fa9e6..82fde115 100644 --- a/ark-cli/src/commands/render.rs +++ b/ark-cli/src/commands/render.rs @@ -26,7 +26,8 @@ impl Render { let dest_path = filepath.with_file_name( filepath .file_stem() - // SAFETY: we know that the file stem is valid UTF-8 because it is a file name + // SAFETY: we know that the file stem is valid UTF-8 because it + // is a file name .unwrap() .to_str() .unwrap() diff --git a/ark-cli/src/commands/storage/list.rs b/ark-cli/src/commands/storage/list.rs index 6b0c6c4e..0fdcef7c 100644 --- a/ark-cli/src/commands/storage/list.rs +++ b/ark-cli/src/commands/storage/list.rs @@ -1,8 +1,8 @@ use std::path::PathBuf; use crate::{ - models::storage::Storage, models::storage::StorageType, translate_storage, - AppError, + models::storage::{Storage, StorageType}, + translate_storage, AppError, }; #[derive(Clone, Debug, clap::Args)] diff --git a/ark-cli/src/index_registrar.rs b/ark-cli/src/index_registrar.rs index fc6a2e5b..4d0ea6fd 100644 --- a/ark-cli/src/index_registrar.rs +++ b/ark-cli/src/index_registrar.rs @@ -4,9 +4,11 @@ extern crate canonical_path; use data_error::{ArklibError, Result}; use fs_index::ResourceIndex; -use std::collections::HashMap; -use std::path::Path; -use std::sync::{Arc, RwLock}; +use std::{ + collections::HashMap, + path::Path, + sync::{Arc, RwLock}, +}; use crate::ResourceId; diff --git a/ark-cli/src/main.rs b/ark-cli/src/main.rs index c8c718ca..e8029c18 100644 --- a/ark-cli/src/main.rs +++ b/ark-cli/src/main.rs @@ -1,5 +1,7 @@ -use std::fs::{create_dir_all, File}; -use std::path::PathBuf; +use std::{ + fs::{create_dir_all, File}, + path::PathBuf, +}; use crate::index_registrar::provide_index; use data_pdf::{render_preview_page, PDFQuality}; @@ -15,25 +17,23 @@ use fs_storage::ARK_FOLDER; use anyhow::Result; -use chrono::prelude::DateTime; -use chrono::Utc; +use chrono::{prelude::DateTime, Utc}; -use clap::CommandFactory; -use clap::FromArgMatches; +use clap::{CommandFactory, FromArgMatches}; use fs_extra::dir::{self, CopyOptions}; use home::home_dir; -use crate::cli::Cli; -use crate::commands::file::File::{Append, Insert, Read}; -use crate::commands::link::Link::{Create, Load}; -use crate::commands::Commands::Link; -use crate::commands::Commands::Storage; -use crate::commands::Commands::*; -use crate::models::EntryOutput; -use crate::models::Format; -use crate::models::Sort; +use crate::{ + cli::Cli, + commands::{ + file::File::{Append, Insert, Read}, + link::Link::{Create, Load}, + Commands::{Link, Storage, *}, + }, + models::{EntryOutput, Format, Sort}, +}; use crate::error::AppError; diff --git a/ark-cli/src/models/storage.rs b/ark-cli/src/models/storage.rs index 48fc0464..ef6f7f37 100644 --- a/ark-cli/src/models/storage.rs +++ b/ark-cli/src/models/storage.rs @@ -1,7 +1,6 @@ use crate::ResourceId; use fs_atomic_versions::atomic::AtomicFile; -use std::fmt::Write; -use std::path::PathBuf; +use std::{fmt::Write, path::PathBuf}; use crate::{ commands::{file_append, file_insert, format_file, format_line}, diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index 9a370167..d2b216ac 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -6,19 +6,21 @@ use fs_storage::{ ARK_FOLDER, PREVIEWS_STORAGE_FOLDER, SCORE_STORAGE_FILE, STATS_FOLDER, TAG_STORAGE_FILE, THUMBNAILS_STORAGE_FOLDER, }; -use std::env::current_dir; -use std::fs::{canonicalize, metadata}; -use std::io::BufRead; -use std::io::BufReader; -use std::path::Path; -use std::str::FromStr; -use std::thread; -use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use std::{fs::File, path::PathBuf}; +use std::{ + env::current_dir, + fs::{canonicalize, metadata, File}, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, + str::FromStr, + thread, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, +}; -use crate::error::AppError; -use crate::models::storage::{Storage, StorageType}; -use crate::ARK_CONFIG; +use crate::{ + error::AppError, + models::storage::{Storage, StorageType}, + ARK_CONFIG, +}; pub fn discover_roots( roots_cfg: &Option, diff --git a/data-json/src/lib.rs b/data-json/src/lib.rs index cc663775..9d047f17 100644 --- a/data-json/src/lib.rs +++ b/data-json/src/lib.rs @@ -1,7 +1,4 @@ -use serde_json::json; -use serde_json::map::Entry; -use serde_json::Map; -use serde_json::Value; +use serde_json::{json, map::Entry, Map, Value}; pub fn merge(origin: Value, new_data: Value) -> Value { match (origin, new_data) { diff --git a/data-link/src/lib.rs b/data-link/src/lib.rs index 7e614a1d..ba925722 100644 --- a/data-link/src/lib.rs +++ b/data-link/src/lib.rs @@ -2,18 +2,20 @@ use data_error::Result; use data_resource::ResourceId; use fs_atomic_versions::atomic::AtomicFile; use fs_metadata::store_metadata; -use fs_properties::load_raw_properties; -use fs_properties::store_properties; -use fs_properties::PROPERTIES_STORAGE_FOLDER; +use fs_properties::{ + load_raw_properties, store_properties, PROPERTIES_STORAGE_FOLDER, +}; use fs_storage::{ARK_FOLDER, PREVIEWS_STORAGE_FOLDER}; use reqwest::header::HeaderValue; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; -use std::fmt; -use std::marker::PhantomData; -use std::path::Path; -use std::str::{self, FromStr}; -use std::{io::Write, path::PathBuf}; +use std::{ + fmt, + io::Write, + marker::PhantomData, + path::{Path, PathBuf}, + str::{self, FromStr}, +}; use url::Url; #[derive(Debug, Deserialize, Serialize)] diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs index 049b4dbb..a8ac7774 100644 --- a/data-resource/src/lib.rs +++ b/data-resource/src/lib.rs @@ -3,16 +3,16 @@ //! `data-resource` is a crate for managing resource identifiers. use core::{fmt::Display, str::FromStr}; use data_error::Result; -use serde::de::DeserializeOwned; -use serde::Serialize; +use serde::{de::DeserializeOwned, Serialize}; use std::{fmt::Debug, hash::Hash, path::Path}; /// This trait defines a generic type representing a resource identifier. /// -/// Resources are identified by a hash value, which is computed from the resource's data. -/// The hash value is used to uniquely identify the resource. +/// Resources are identified by a hash value, which is computed from the +/// resource's data. The hash value is used to uniquely identify the resource. /// -/// Implementors of this trait must provide a way to compute the hash value from the resource's data. +/// Implementors of this trait must provide a way to compute the hash value from +/// the resource's data. pub trait ResourceId: Debug + Display diff --git a/dev-hash/benches/blake3.rs b/dev-hash/benches/blake3.rs index cf95233b..f434cb36 100644 --- a/dev-hash/benches/blake3.rs +++ b/dev-hash/benches/blake3.rs @@ -17,7 +17,8 @@ fn generate_random_data(size: usize) -> Vec { (0..size).map(|_| rng.gen()).collect() } -/// Benchmarks the performance of resource ID creation from file paths and random data. +/// Benchmarks the performance of resource ID creation from file paths and +/// random data. /// /// - Measures the time taken to create a resource ID from file paths. /// - Measures the time taken to create a resource ID from random data. diff --git a/dev-hash/benches/crc32.rs b/dev-hash/benches/crc32.rs index b462035b..c85c4dc7 100644 --- a/dev-hash/benches/crc32.rs +++ b/dev-hash/benches/crc32.rs @@ -17,7 +17,8 @@ fn generate_random_data(size: usize) -> Vec { (0..size).map(|_| rng.gen()).collect() } -/// Benchmarks the performance of resource ID creation from file paths and random data. +/// Benchmarks the performance of resource ID creation from file paths and +/// random data. /// /// - Measures the time taken to create a resource ID from file paths. /// - Measures the time taken to create a resource ID from random data. diff --git a/fs-atomic-light/src/lib.rs b/fs-atomic-light/src/lib.rs index 25288f6d..fb4bf028 100644 --- a/fs-atomic-light/src/lib.rs +++ b/fs-atomic-light/src/lib.rs @@ -1,9 +1,6 @@ use data_error::Result; -use std::env; -use std::fs; -use std::path::Path; -use std::str; +use std::{env, fs, path::Path, str}; /// Write data to a tempory file and move that written file to destination /// diff --git a/fs-atomic-versions/src/atomic/file.rs b/fs-atomic-versions/src/atomic/file.rs index bd3f2571..1fe9549d 100644 --- a/fs-atomic-versions/src/atomic/file.rs +++ b/fs-atomic-versions/src/atomic/file.rs @@ -1,8 +1,10 @@ -use std::fs::{self, File}; -use std::io::{Error, ErrorKind, Read, Result}; #[cfg(target_os = "unix")] use std::os::unix::fs::MetadataExt; -use std::path::{Path, PathBuf}; +use std::{ + fs::{self, File}, + io::{Error, ErrorKind, Read, Result}, + path::{Path, PathBuf}, +}; use crate::app_id; diff --git a/fs-atomic-versions/src/lib.rs b/fs-atomic-versions/src/lib.rs index 1ce1f0b2..62935039 100644 --- a/fs-atomic-versions/src/lib.rs +++ b/fs-atomic-versions/src/lib.rs @@ -1,7 +1,8 @@ use lazy_static::lazy_static; -use std::path::PathBuf; -use std::sync::Once; -use std::sync::RwLock; +use std::{ + path::PathBuf, + sync::{Once, RwLock}, +}; pub mod app_id; pub mod atomic; diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index c762eebd..be113dca 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -47,8 +47,9 @@ fn resource_index_benchmark(c: &mut Criterion) { // Benchmark `ResourceIndex::update_all()` - // First, create a new temp directory specifically for the update_all benchmark - // since we will be creating new files, removing files, and modifying files + // First, create a new temp directory specifically for the update_all + // benchmark since we will be creating new files, removing files, and + // modifying files let update_all_benchmarks_dir = TempDir::with_prefix("ark-fs-index-benchmarks-update-all").unwrap(); let update_all_benchmarks_dir = update_all_benchmarks_dir.path(); @@ -85,7 +86,8 @@ criterion_group! { } criterion_main!(benches); -/// A helper function to setup a temp directory for the benchmarks using the test assets directory +/// A helper function to setup a temp directory for the benchmarks using the +/// test assets directory fn setup_temp_dir() -> TempDir { // assert the path exists and is a directory assert!( @@ -119,16 +121,18 @@ fn setup_temp_dir() -> TempDir { temp_dir } -/// A helper function that takes a directory and creates 50 new files, removes 30 files, and modifies 10 files +/// A helper function that takes a directory and creates 50 new files, removes +/// 30 files, and modifies 10 files /// -/// Note: The function assumes that the directory already contains 50 files named `file_0.txt` to `file_49.txt` +/// Note: The function assumes that the directory already contains 50 files +/// named `file_0.txt` to `file_49.txt` fn update_all_files(dir: &PathBuf) { // Create 50 new files for i in 51..101 { let new_file = dir.join(format!("file_{}.txt", i)); std::fs::File::create(&new_file).unwrap(); - // We add the index `i` to the file content to make sure the content is unique - // This is to avoid collisions in the index + // We add the index `i` to the file content to make sure the content is + // unique This is to avoid collisions in the index std::fs::write(&new_file, format!("Hello, World! {}", i)).unwrap(); } diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 9bbb863d..22aa7c43 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -1,12 +1,14 @@ use anyhow::anyhow; use canonical_path::{CanonicalPath, CanonicalPathBuf}; use itertools::Itertools; -use std::collections::{HashMap, HashSet}; -use std::fs::{self, File, Metadata}; -use std::io::{BufRead, BufReader, Write}; -use std::ops::Add; -use std::path::{Path, PathBuf}; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::{ + collections::{HashMap, HashSet}, + fs::{self, File, Metadata}, + io::{BufRead, BufReader, Write}, + ops::Add, + path::{Path, PathBuf}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; use walkdir::{DirEntry, WalkDir}; use log; diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 76d13f4d..9eed0158 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -1,5 +1,7 @@ -use crate::index::{discover_paths, IndexEntry}; -use crate::ResourceIndex; +use crate::{ + index::{discover_paths, IndexEntry}, + ResourceIndex, +}; use canonical_path::CanonicalPathBuf; use dev_hash::Crc32; use fs_atomic_versions::initialize; @@ -9,8 +11,7 @@ use std::fs::Permissions; #[cfg(target_family = "unix")] use std::os::unix::fs::PermissionsExt; -use std::path::PathBuf; -use std::time::SystemTime; +use std::{path::PathBuf, time::SystemTime}; use uuid::Uuid; const FILE_SIZE_1: u64 = 10; diff --git a/fs-metadata/src/lib.rs b/fs-metadata/src/lib.rs index df258ae5..881ddeab 100644 --- a/fs-metadata/src/lib.rs +++ b/fs-metadata/src/lib.rs @@ -1,9 +1,7 @@ use data_error::Result; use fs_atomic_versions::atomic::{modify_json, AtomicFile}; use serde::{de::DeserializeOwned, Serialize}; -use std::fmt::Debug; -use std::io::Read; -use std::path::Path; +use std::{fmt::Debug, io::Read, path::Path}; use data_resource::ResourceId; use fs_storage::ARK_FOLDER; diff --git a/fs-properties/src/lib.rs b/fs-properties/src/lib.rs index 81b5b04f..0c28aede 100644 --- a/fs-properties/src/lib.rs +++ b/fs-properties/src/lib.rs @@ -1,8 +1,6 @@ use serde::{de::DeserializeOwned, Serialize}; use serde_json::Value; -use std::fmt::Debug; -use std::io::Read; -use std::path::Path; +use std::{fmt::Debug, io::Read, path::Path}; use data_error::Result; use data_json::merge; diff --git a/fs-storage/examples/cli.rs b/fs-storage/examples/cli.rs index 7b2ab23d..8214f258 100644 --- a/fs-storage/examples/cli.rs +++ b/fs-storage/examples/cli.rs @@ -1,10 +1,7 @@ use anyhow::{Context, Result}; -use fs_storage::base_storage::BaseStorage; -use fs_storage::file_storage::FileStorage; +use fs_storage::{base_storage::BaseStorage, file_storage::FileStorage}; use serde_json::Value; -use std::env; -use std::fs; -use std::path::Path; +use std::{env, fs, path::Path}; fn main() { if let Err(e) = run() { diff --git a/fs-storage/src/base_storage.rs b/fs-storage/src/base_storage.rs index 576b5a8f..4eba03b0 100644 --- a/fs-storage/src/base_storage.rs +++ b/fs-storage/src/base_storage.rs @@ -30,16 +30,22 @@ impl std::fmt::Display for SyncStatus { } } -/// The `BaseStorage` trait represents a key-value mapping that is written to the file system. +/// The `BaseStorage` trait represents a key-value mapping that is written to +/// the file system. /// -/// This trait provides methods to create or update entries in the internal mapping, remove entries from the internal mapping, -/// determine if the in-memory model or the underlying storage requires syncing, scan and load the mapping from the filesystem, -/// write the mapping to the filesystem, and remove all stored data. +/// This trait provides methods to create or update entries in the internal +/// mapping, remove entries from the internal mapping, determine if the +/// in-memory model or the underlying storage requires syncing, scan and load +/// the mapping from the filesystem, write the mapping to the filesystem, and +/// remove all stored data. /// -/// The trait also includes a method to merge values from another key-value mapping. +/// The trait also includes a method to merge values from another key-value +/// mapping. /// -/// Note: The trait does not write to storage by default. It is up to the implementor to decide when to read or write to storage -/// based on `SyncStatus`. This is to allow for trading off between performance and consistency. +/// Note: The trait does not write to storage by default. It is up to the +/// implementor to decide when to read or write to storage +/// based on `SyncStatus`. This is to allow for trading off between performance +/// and consistency. pub trait BaseStorage: AsRef> { /// Create or update an entry in the internal mapping. fn set(&mut self, id: K, value: V); diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs index 0e27766f..9b762bbd 100644 --- a/fs-storage/src/file_storage.rs +++ b/fs-storage/src/file_storage.rs @@ -1,15 +1,17 @@ use serde::{Deserialize, Serialize}; -use std::fs::{self, File}; -use std::io::Write; -use std::time::SystemTime; use std::{ collections::BTreeMap, + fs::{self, File}, + io::Write, path::{Path, PathBuf}, + time::SystemTime, }; -use crate::base_storage::{BaseStorage, SyncStatus}; -use crate::monoid::Monoid; -use crate::utils::read_version_2_fs; +use crate::{ + base_storage::{BaseStorage, SyncStatus}, + monoid::Monoid, + utils::read_version_2_fs, +}; use data_error::{ArklibError, Result}; /* @@ -80,8 +82,8 @@ where /// Create a new file storage with a diagnostic label and file path /// The storage will be initialized using the disk data, if the path exists /// - /// Note: if the file storage already exists, the data will be read from the file - /// without overwriting it. + /// Note: if the file storage already exists, the data will be read from the + /// file without overwriting it. pub fn new(label: String, path: &Path) -> Result { let time = SystemTime::now(); let mut storage = Self { @@ -114,7 +116,8 @@ where // First check if the file starts with "version: 2" let file_content = std::fs::read_to_string(&self.path)?; if file_content.starts_with("version: 2") { - // Attempt to parse the file using the legacy version 2 storage format of FileStorage. + // Attempt to parse the file using the legacy version 2 storage + // format of FileStorage. match read_version_2_fs(&self.path) { Ok(data) => { log::info!( @@ -193,14 +196,14 @@ where // Determine the synchronization status based on the modification times // Conditions: - // 1. If both the in-memory storage and the storage on disk have been modified - // since the last write, then the storage is diverged. - // 2. If only the in-memory storage has been modified since the last write, - // then the storage on disk is stale. - // 3. If only the storage on disk has been modified since the last write, - // then the in-memory storage is stale. - // 4. If neither the in-memory storage nor the storage on disk has been modified - // since the last write, then the storage is in sync. + // 1. If both the in-memory storage and the storage on disk have been + // modified since the last write, then the storage is diverged. + // 2. If only the in-memory storage has been modified since the last + // write, then the storage on disk is stale. + // 3. If only the storage on disk has been modified since the last + // write, then the in-memory storage is stale. + // 4. If neither the in-memory storage nor the storage on disk has been + // modified since the last write, then the storage is in sync. let status = match ( self.modified > self.written_to_disk, file_updated > self.written_to_disk, diff --git a/fs-storage/src/jni/file_storage.rs b/fs-storage/src/jni/file_storage.rs index 20820cd0..d693a04b 100644 --- a/fs-storage/src/jni/file_storage.rs +++ b/fs-storage/src/jni/file_storage.rs @@ -1,7 +1,6 @@ use crate::base_storage::SyncStatus; use jni::signature::ReturnType; -use std::collections::BTreeMap; -use std::path::Path; +use std::{collections::BTreeMap, path::Path}; // This is the interface to the JVM that we'll call the majority of our // methods on. use jni::JNIEnv; @@ -82,8 +81,8 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_remove<'local>( }); } -// A JNI function called from Java that creates a `MyData` Rust type, converts it to a Java -// type and returns it. +// A JNI function called from Java that creates a `MyData` Rust type, converts +// it to a Java type and returns it. #[no_mangle] #[allow(non_snake_case)] pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_syncStatus< diff --git a/fs-storage/src/monoid.rs b/fs-storage/src/monoid.rs index b2acf546..1c483201 100644 --- a/fs-storage/src/monoid.rs +++ b/fs-storage/src/monoid.rs @@ -1,10 +1,11 @@ -// Currently, we have three structures: Tags (HashSet), Properties (HashSet), Score (int). -// In fact, HashSet already implements a union function, +// Currently, we have three structures: Tags (HashSet), Properties (HashSet), +// Score (int). In fact, HashSet already implements a union function, // so only a special function for integers is needed. // CRDTs can be considered later when we need to add structures that require // more powerful combine semantics. -// Trait defining a Monoid, which represents a mathematical structure with an identity element and an associative binary operation. +// Trait defining a Monoid, which represents a mathematical structure with an +// identity element and an associative binary operation. pub trait Monoid { // Returns the neutral element of the monoid. fn neutral() -> V; @@ -13,7 +14,8 @@ pub trait Monoid { fn combine(a: &V, b: &V) -> V; // Combines multiple elements of the monoid into a single element. - // Default implementation uses `neutral()` as the initial accumulator and `combine()` for folding. + // Default implementation uses `neutral()` as the initial accumulator and + // `combine()` for folding. fn combine_all>(values: I) -> V { values .into_iter() diff --git a/fs-storage/src/utils.rs b/fs-storage/src/utils.rs index b5b6830a..d1e818bd 100644 --- a/fs-storage/src/utils.rs +++ b/fs-storage/src/utils.rs @@ -1,6 +1,5 @@ use data_error::Result; -use std::collections::BTreeMap; -use std::path::Path; +use std::{collections::BTreeMap, path::Path}; /// Parses version 2 `FileStorage` format and returns the data as a BTreeMap /// diff --git a/rustfmt.toml b/rustfmt.toml index 0a869692..5b415646 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,15 +1,18 @@ +# General settings +imports_granularity = 'Crate' verbose = "Verbose" tab_spaces = 4 - max_width = 80 +newline_style = "Unix" + +# Code style settings chain_width = 50 single_line_if_else_max_width = 30 - force_explicit_abi = true - +# Import settings reorder_imports = true +# Comment settings wrap_comments = true - -newline_style = "Unix" +comment_width = 80 From fdb2f362d6b5e8389837ea498c4b888c37e87b15 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 18:12:42 +0300 Subject: [PATCH 04/46] fix: change target_os to target_family Signed-off-by: Tarek --- fs-atomic-versions/src/atomic/file.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs-atomic-versions/src/atomic/file.rs b/fs-atomic-versions/src/atomic/file.rs index 1fe9549d..a2b708ab 100644 --- a/fs-atomic-versions/src/atomic/file.rs +++ b/fs-atomic-versions/src/atomic/file.rs @@ -1,4 +1,4 @@ -#[cfg(target_os = "unix")] +#[cfg(target_family = "unix")] use std::os::unix::fs::MetadataExt; use std::{ fs::{self, File}, @@ -231,7 +231,7 @@ impl AtomicFile { // May return `EEXIST`. let res = std::fs::hard_link(&new.path, new_path); if let Err(err) = res { - #[cfg(target_os = "unix")] + #[cfg(target_family = "unix")] // From open(2) manual page: // // "[...] create a unique file on the same filesystem (e.g., @@ -243,7 +243,7 @@ impl AtomicFile { if new.path.metadata()?.nlink() != 2 { Err(err)?; } - #[cfg(not(target_os = "unix"))] + #[cfg(not(target_family = "unix"))] Err(err)?; } From 07b84b2ec1b74be128a654dbbfd6585a6ab9b6eb Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 19:51:00 +0300 Subject: [PATCH 05/46] refactor(fs-index): refactor ResourceIndex and store relative paths Signed-off-by: Tarek --- fs-index/Cargo.toml | 6 +- fs-index/src/index.rs | 970 ++++++++++++++++-------------------------- fs-index/src/lib.rs | 4 + fs-index/src/serde.rs | 123 ++++++ fs-index/src/tests.rs | 918 +++++++++++++++++++++++---------------- fs-index/src/utils.rs | 69 +++ 6 files changed, 1105 insertions(+), 985 deletions(-) create mode 100644 fs-index/src/serde.rs create mode 100644 fs-index/src/utils.rs diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index 0ae00b3f..0ded09dd 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -12,9 +12,8 @@ bench = false log = { version = "0.4.17", features = ["release_max_level_off"] } walkdir = "2.3.2" anyhow = "1.0.58" -canonical-path = "2.0.2" -pathdiff = "0.2.1" -itertools = "0.10.5" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } fs-storage = { path = "../fs-storage" } @@ -29,7 +28,6 @@ criterion = { version = "0.5", features = ["html_reports"] } tempfile = "3.10" # Depending on `dev-hash` for testing dev-hash = { path = "../dev-hash" } -fs-atomic-versions = { path = "../fs-atomic-versions" } [[bench]] name = "resource_index_benchmark" diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 22aa7c43..1026e557 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -1,680 +1,430 @@ -use anyhow::anyhow; -use canonical_path::{CanonicalPath, CanonicalPathBuf}; -use itertools::Itertools; use std::{ - collections::{HashMap, HashSet}, - fs::{self, File, Metadata}, - io::{BufRead, BufReader, Write}, - ops::Add, + collections::HashMap, + fs, + hash::Hash, path::{Path, PathBuf}, - time::{Duration, SystemTime, UNIX_EPOCH}, + time::SystemTime, }; -use walkdir::{DirEntry, WalkDir}; +use anyhow::anyhow; use log; +use serde::{Deserialize, Serialize}; +use walkdir::WalkDir; use data_error::{ArklibError, Result}; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] -pub struct IndexEntry { - pub modified: SystemTime, +use crate::utils::should_index; + +/// Represents a resource in the index +#[derive( + PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug, Serialize, Deserialize, +)] +pub struct IndexedResource { + /// The unique identifier of the resource pub id: Id, + /// The path of the resource, relative to the root path + pub path: PathBuf, + /// The last modified time of the resource (from the file system metadata) + pub last_modified: SystemTime, } -#[derive(PartialEq, Clone, Debug)] -pub struct ResourceIndex { - pub id2path: HashMap, - pub path2id: HashMap>, - - pub collisions: HashMap, - pub root: PathBuf, +/// Represents the index of resources in a directory. +/// +/// [`ResourceIndex`] provides functionality for managing a directory index, +/// including tracking changes, and querying resources. +/// +/// #### Reactive API +/// - [`ResourceIndex::update_all`]: Method to update the index by rescanning +/// files and returning changes (additions/deletions/updates). +/// +/// #### Snapshot API +/// - [`ResourceIndex::get_resources_by_id`]: Query resources from the index by +/// ID. +/// - [`ResourceIndex::get_resource_by_path`]: Query a resource from the index +/// by its path. +/// +/// #### Track API +/// Allows for fine-grained control over tracking changes in the index +/// - [`ResourceIndex::track_addition`]: Track a newly added file (checks if the +/// file exists in the file system). +/// - [`ResourceIndex::track_removal`]: Track the deletion of a file (checks if +/// the file was actually deleted). +/// - [`ResourceIndex::track_modification`]: Track an update on a single file. +/// +/// ## Examples +/// ```no_run +/// use std::path::Path; +/// use fs_index::{ResourceIndex, load_or_build_index}; +/// use dev_hash::Crc32; +/// +/// // Define the root path +/// let root_path = Path::new("animals"); +/// +/// // Build the index +/// let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); +/// // Store the index +/// index.store().expect("Failed to store index"); +/// +/// // Load the stored index +/// let mut loaded_index: ResourceIndex = load_or_build_index(root_path, false).expect("Failed to load index"); +/// +/// // Update the index +/// loaded_index.update_all().expect("Failed to update index"); +/// +/// // Get a resource by path +/// let _resource = loaded_index +/// .get_resource_by_path("cat.txt") +/// .expect("Resource not found"); +/// +/// // Track the removal of a file +/// loaded_index +/// .track_removal(Path::new("cat.txt")) +/// .expect("Failed to track removal"); +/// +/// // Track the addition of a new file +/// loaded_index +/// .track_addition(Path::new("dog.txt")) +/// .expect("Failed to track addition"); +/// +/// // Track the modification of a file +/// loaded_index +/// .track_modification(Path::new("dog.txt")) +/// .expect("Failed to track modification"); +/// ``` +#[derive(Clone, Debug)] +pub struct ResourceIndex +where + Id: Eq + Hash, +{ + /// The root path of the index (canonicalized) + pub(crate) root: PathBuf, + /// A map from resource IDs to resources + /// + /// Multiple resources can have the same ID (e.g., due to hash collisions + /// or files with the same content) + pub(crate) id_to_resources: HashMap>>, + /// A map from resource paths to resources + pub(crate) path_to_resource: HashMap>, } +/// Represents the result of an update operation on the ResourceIndex #[derive(PartialEq, Debug)] pub struct IndexUpdate { - pub deleted: HashSet, - pub added: HashMap, + /// Resources that were added during the update + pub added: Vec>, + /// Resources that were modified during the update + pub modified: Vec>, + /// Resources that were removed during the update + pub removed: Vec>, } -pub const RESOURCE_UPDATED_THRESHOLD: Duration = Duration::from_millis(1); - -pub type Paths = HashSet; - impl ResourceIndex { - pub fn size(&self) -> usize { - //the actual size is lower in presence of collisions - self.path2id.len() + /// Return the number of resources in the index + pub fn len(&self) -> usize { + self.path_to_resource.len() } - pub fn build>(root_path: P) -> Self { - log::info!("Building the index from scratch"); - let root_path: PathBuf = root_path.as_ref().to_owned(); - - let entries = discover_paths(&root_path); - let entries = scan_entries(entries); - - let mut index = ResourceIndex { - id2path: HashMap::new(), - path2id: HashMap::new(), - collisions: HashMap::new(), - root: root_path, - }; - - for (path, entry) in entries { - index.insert_entry(path, entry); - } - - log::info!("Index built"); - index + /// Return true if the index is empty + pub fn is_empty(&self) -> bool { + self.path_to_resource.is_empty() } - pub fn load>(root_path: P) -> Result { - let root_path: PathBuf = root_path.as_ref().to_owned(); - - let index_path: PathBuf = root_path.join(ARK_FOLDER).join(INDEX_PATH); - log::info!("Loading the index from file {}", index_path.display()); - let file = File::open(&index_path)?; - let mut index = ResourceIndex { - id2path: HashMap::new(), - path2id: HashMap::new(), - collisions: HashMap::new(), - root: root_path.clone(), - }; - - // We should not return early in case of missing files - let lines = BufReader::new(file).lines(); - for line in lines { - let line = line?; - - let mut parts = line.split(' '); - - let modified = { - let str = parts.next().ok_or(ArklibError::Parse)?; - UNIX_EPOCH.add(Duration::from_millis( - str.parse().map_err(|_| ArklibError::Parse)?, - )) - }; - - let id = { - let str = parts.next().ok_or(ArklibError::Parse)?; - Id::from_str(str).map_err(|_| ArklibError::Parse)? - }; + /// Return the root path of the index + pub fn root(&self) -> &Path { + &self.root + } - let path: String = - itertools::Itertools::intersperse(parts, " ").collect(); - let path: PathBuf = root_path.join(Path::new(&path)); - match CanonicalPathBuf::canonicalize(&path) { - Ok(path) => { - log::trace!("[load] {} -> {}", id, path.display()); - index.insert_entry(path, IndexEntry { modified, id }); - } - Err(_) => { - log::warn!("File {} not found", path.display()); - continue; - } - } - } + /// Return the resources in the index + pub fn resources(&self) -> Vec> { + // Using path_to_resource so to avoid not collecting duplicates + self.path_to_resource.values().cloned().collect() + } - Ok(index) + /// Return the ID collisions + pub fn collisions(&self) -> HashMap>> { + // Filter out IDs with only one resource + self.id_to_resources + .iter() + .filter(|(_, resources)| resources.len() > 1) + .map(|(id, resources)| (id.clone(), resources.clone())) + .collect() } + /// Save the index to the file system (as a JSON file in + /// /ARK_FOLDER/INDEX_PATH) pub fn store(&self) -> Result<()> { - log::info!("Storing the index to file"); - - let start = SystemTime::now(); + let ark_folder = self.root.join(ARK_FOLDER); + let index_path = ark_folder.join(INDEX_PATH); + log::debug!("Storing index at: {:?}", index_path); - let index_path = self - .root - .to_owned() - .join(ARK_FOLDER) - .join(INDEX_PATH); + fs::create_dir_all(&ark_folder)?; + let index_file = fs::File::create(index_path)?; + serde_json::to_writer_pretty(index_file, self)?; - let ark_dir = index_path.parent().unwrap(); - fs::create_dir_all(ark_dir)?; - - let mut file = File::create(index_path)?; + Ok(()) + } - let mut path2id: Vec<(&CanonicalPathBuf, &IndexEntry)> = - self.path2id.iter().collect(); - path2id.sort_by_key(|(_, entry)| *entry); + /// Get resources by their ID + /// + /// Returns None if there is no resource with the given ID + /// + /// **Note**: This can return multiple resources with the same ID in case of + /// hash collisions or files with the same content + pub fn get_resources_by_id( + &self, + id: Id, + ) -> Option<&Vec>> { + self.id_to_resources.get(&id) + } - for (path, entry) in path2id.iter() { - log::trace!("[store] {} by path {}", entry.id, path.display()); + /// Get a resource by its path + /// + /// Returns None if the resource does not exist + /// + /// **Note**: The path should be relative to the root path + pub fn get_resource_by_path>( + &self, + path: P, + ) -> Option<&IndexedResource> { + self.path_to_resource.get(path.as_ref()) + } - let timestamp = entry - .modified - .duration_since(UNIX_EPOCH) - .map_err(|_| { - ArklibError::Other(anyhow!("Error using duration since")) - })? - .as_millis(); + /// Build a new index from the given root path + pub fn build>(root_path: P) -> Result { + log::debug!("Building index at root path: {:?}", root_path.as_ref()); - let path = - pathdiff::diff_paths(path.to_str().unwrap(), self.root.clone()) - .ok_or(ArklibError::Path( - "Couldn't calculate path diff".into(), - ))?; + // Canonicalize the root path + let root = fs::canonicalize(&root_path)?; + let mut id_to_resources = HashMap::new(); + let mut path_to_resource = HashMap::new(); - writeln!(file, "{} {} {}", timestamp, entry.id, path.display())?; + // Loop through the root path and add resources to the index + let walker = WalkDir::new(&root) + .min_depth(1) // Skip the root directory + .into_iter() + .filter_entry(should_index); // Skip hidden files + for entry in walker { + let entry = entry.map_err(|e| { + ArklibError::Path(format!("Error walking directory: {}", e)) + })?; + // Ignore directories + if !entry.file_type().is_file() { + continue; + } + let path = entry.path(); + let metadata = fs::metadata(path)?; + let last_modified = metadata.modified()?; + let id = Id::from_path(path)?; + // Path is relative to the root + let path = path.strip_prefix(&root).map_err(|_| { + ArklibError::Path("Error stripping prefix".to_string()) + })?; + + // Create the resource and add it to the index + let resource = IndexedResource { + id: id.clone(), + path: path.to_path_buf(), + last_modified, + }; + path_to_resource.insert(resource.path.clone(), resource.clone()); + id_to_resources + .entry(id) + .or_insert_with(Vec::new) + .push(resource); } - log::trace!( - "Storing the index took {:?}", - start - .elapsed() - .map_err(|_| ArklibError::Other(anyhow!("SystemTime error"))) - ); - Ok(()) + Ok(ResourceIndex { + root, + id_to_resources, + path_to_resource, + }) } - pub fn provide>(root_path: P) -> Result { - match Self::load(&root_path) { - Ok(mut index) => { - log::debug!("Index loaded: {} entries", index.path2id.len()); - - match index.update_all() { - Ok(update) => { - log::debug!( - "Index updated: {} added, {} deleted", - update.added.len(), - update.deleted.len() - ); - } - Err(e) => { - log::error!( - "Failed to update index: {}", - e.to_string() - ); - } - } - - if let Err(e) = index.store() { - log::error!("{}", e.to_string()); + /// Update the index with the latest information from the file system + pub fn update_all(&mut self) -> Result> { + log::debug!("Updating index at root path: {:?}", self.root); + + let mut added = Vec::new(); + let mut modified = Vec::new(); + let mut removed = Vec::new(); + + let new_index = ResourceIndex::build(&self.root)?; + + // Compare the new index with the old index + let current_resources = self.resources(); + let new_resources = new_index.resources(); + for resource in new_resources.clone() { + // If the resource is in the old index, check if it has been + // modified + if let Some(current_resource) = + self.get_resource_by_path(&resource.path) + { + if current_resource != &resource { + modified.push(resource.clone()); } - Ok(index) } - Err(e) => { - log::warn!("{}", e.to_string()); - Ok(Self::build(root_path)) + // If the resource is not in the old index, it has been added + else { + added.push(resource.clone()); } } - } - - pub fn update_all(&mut self) -> Result> { - log::debug!("Updating the index"); - log::trace!("[update] known paths: {:?}", self.path2id.keys()); - - let curr_entries = discover_paths(self.root.clone()); - - //assuming that collections manipulation is - // quicker than asking `path.exists()` for every path - let curr_paths: Paths = curr_entries.keys().cloned().collect(); - let prev_paths: Paths = self.path2id.keys().cloned().collect(); - let preserved_paths: Paths = curr_paths - .intersection(&prev_paths) - .cloned() - .collect(); - - let created_paths: HashMap = curr_entries - .iter() - .filter_map(|(path, entry)| { - if !preserved_paths.contains(path.as_canonical_path()) { - Some((path.clone(), entry.clone())) - } else { - None - } - }) - .collect(); - - log::debug!("Checking updated paths"); - let updated_paths: HashMap = curr_entries - .into_iter() - .filter(|(path, dir_entry)| { - if !preserved_paths.contains(path.as_canonical_path()) { - false - } else { - let our_entry = &self.path2id[path]; - let prev_modified = our_entry.modified; - - let result = dir_entry.metadata(); - match result { - Err(msg) => { - log::error!( - "Couldn't retrieve metadata for {}: {}", - &path.display(), - msg - ); - false - } - Ok(metadata) => match metadata.modified() { - Err(msg) => { - log::error!( - "Couldn't retrieve timestamp for {}: {}", - &path.display(), - msg - ); - false - } - Ok(curr_modified) => { - let elapsed = curr_modified - .duration_since(prev_modified) - .unwrap(); - - let was_updated = - elapsed >= RESOURCE_UPDATED_THRESHOLD; - if was_updated { - log::trace!( - "[update] modified {} by path {} - \twas {:?} - \tnow {:?} - \telapsed {:?}", - our_entry.id, - path.display(), - prev_modified, - curr_modified, - elapsed - ); - } - - was_updated - } - }, - } - } - }) - .collect(); - - let mut deleted: HashSet = HashSet::new(); - - // treating both deleted and updated paths as deletions - prev_paths - .difference(&preserved_paths) - .cloned() - .chain(updated_paths.keys().cloned()) - .for_each(|path| { - if let Some(entry) = - self.path2id.remove(path.as_canonical_path()) - { - let k = self.collisions.remove(&entry.id).unwrap_or(1); - if k > 1 { - self.collisions.insert(entry.id, k - 1); - } else { - log::trace!( - "[delete] {} by path {}", - entry.id, - path.display() - ); - self.id2path.remove(&entry.id); - deleted.insert(entry.id); - } - } else { - log::warn!("Path {} was not known", path.display()); - } - }); - - let added: HashMap> = - scan_entries(updated_paths) - .into_iter() - .chain({ - log::debug!("Checking added paths"); - scan_entries(created_paths).into_iter() - }) - .filter(|(_, entry)| !self.id2path.contains_key(&entry.id)) - .collect(); - - for (path, entry) in added.iter() { - if deleted.contains(&entry.id) { - // emitting the resource as both deleted and added - // (renaming a duplicate might remain undetected) - log::trace!( - "[update] moved {} to path {}", - entry.id, - path.display() - ); + for resource in current_resources { + // If the resource is not in the new index, it has been removed + if !new_resources.contains(&resource) { + removed.push(resource.clone()); } - - self.insert_entry(path.clone(), entry.clone()); } - let added: HashMap = added - .into_iter() - .map(|(path, entry)| (path, entry.id)) - .collect(); - - Ok(IndexUpdate { deleted, added }) + // Update the index with the new index and return the result + *self = new_index; + Ok(IndexUpdate { + added, + modified, + removed, + }) } - // the caller must ensure that: - // * the index is up-to-date except this single path - // * the path hasn't been indexed before - pub fn index_new( + /// Track the addition of a newly added file to the resource index. + /// + /// This method checks if the file exists in the file system. + /// + /// # Arguments + /// * `relative_path` - The path of the file to be added (relative to the + /// root path of the index). + /// + /// # Returns + /// Returns `Ok(resource)` if the file was successfully added to the index. + /// + /// # Errors + /// - If the file does not exist in the file system. + /// - If there was an error calculating the checksum of the file. + pub fn track_addition>( &mut self, - path: &dyn AsRef, - ) -> Result> { - log::debug!("Indexing a new path"); - - if !path.as_ref().exists() { - return Err(ArklibError::Path( - "Absent paths cannot be indexed".into(), - )); + relative_path: P, + ) -> Result> { + log::debug!("Tracking addition of file: {:?}", relative_path.as_ref()); + + let path = relative_path.as_ref(); + let full_path = self.root.join(path); + if !full_path.exists() { + return Err(ArklibError::Path(format!( + "File does not exist: {:?}", + full_path + )) + .into()); } - - let path_buf = CanonicalPathBuf::canonicalize(path)?; - let path = path_buf.as_canonical_path(); - - return match fs::metadata(path) { - Err(_) => { - return Err(ArklibError::Path( - "Couldn't to retrieve file metadata".into(), - )); - } - Ok(metadata) => match scan_entry(path, metadata) { - Err(_) => { - return Err(ArklibError::Path( - "The path points to a directory or empty file".into(), - )); - } - Ok(new_entry) => { - let id = new_entry.clone().id; - - if let Some(nonempty) = self.collisions.get_mut(&id) { - *nonempty += 1; - } - - let mut added = HashMap::new(); - added.insert(path_buf.clone(), id.clone()); - - self.id2path.insert(id, path_buf.clone()); - self.path2id.insert(path_buf, new_entry); - - Ok(IndexUpdate { - added, - deleted: HashSet::new(), - }) - } - }, + let metadata = fs::metadata(&full_path)?; + let last_modified = metadata.modified()?; + let id = Id::from_path(&full_path)?; + + let resource = IndexedResource { + id: id.clone(), + path: path.to_path_buf(), + last_modified, }; + self.path_to_resource + .insert(resource.path.clone(), resource.clone()); + self.id_to_resources + .entry(id) + .or_default() + .push(resource.clone()); + + Ok(resource) } - // the caller must ensure that: - // * the index is up-to-date except this single path - // * the path has been indexed before - // * the path maps into `old_id` - // * the content by the path has been modified - pub fn update_one( + /// Track the removal of a file from the resource index. + /// + /// This method checks if the file exists in the file system + /// + /// # Arguments + /// * `relative_path` - The path of the file to be removed (relative to the + /// root path of the index). + /// + /// # Returns + /// Returns `Ok(resource)` if the resource was successfully removed from the + /// index. + /// + /// # Errors + /// - If the file still exists in the file system. + /// - If the resource does not exist in the index. + pub fn track_removal>( &mut self, - path: &dyn AsRef, - old_id: Id, - ) -> Result> { - log::debug!("Updating a single entry in the index"); - - if !path.as_ref().exists() { - return self.forget_id(old_id); + relative_path: P, + ) -> Result> { + log::debug!("Tracking removal of file: {:?}", relative_path.as_ref()); + + let path = relative_path.as_ref(); + let full_path = self.root.join(path); + if full_path.exists() { + return Err(ArklibError::Path(format!( + "File still exists: {:?}", + full_path + )) + .into()); } - let path_buf = CanonicalPathBuf::canonicalize(path)?; - let path = path_buf.as_canonical_path(); - - log::trace!( - "[update] paths {:?} has id {:?}", - path, - self.path2id[path] - ); - - return match fs::metadata(path) { - Err(_) => { - // updating the index after resource removal - // is a correct scenario - self.forget_path(path, old_id) - } - Ok(metadata) => { - match scan_entry(path, metadata) { - Err(_) => { - // a directory or empty file exists by the path - self.forget_path(path, old_id) - } - Ok(new_entry) => { - // valid resource exists by the path - - let curr_entry = &self.path2id.get(path); - if curr_entry.is_none() { - // if the path is not indexed, then we can't have - // `old_id` if you want - // to index new path, use `index_new` method - return Err(ArklibError::Path( - "Couldn't find the path in the index".into(), - )); - } - let curr_entry = curr_entry.unwrap(); - - if curr_entry.id == new_entry.id { - // in rare cases we are here due to hash collision - if curr_entry.modified == new_entry.modified { - log::warn!("path {:?} was not modified", &path); - } else { - log::warn!("path {:?} was modified but not its content", &path); - } - - // the caller must have ensured that the path was - // indeed update - return Err(ArklibError::Collision( - "New content has the same id".into(), - )); - } - - // new resource exists by the path - self.forget_path(path, old_id).map(|mut update| { - update - .added - .insert(path_buf.clone(), new_entry.clone().id); - self.insert_entry(path_buf, new_entry); - - update - }) - } - } + // Remove the resource from the index + let resource = self + .path_to_resource + .remove(path) + .ok_or_else(|| anyhow!("Resource not found: {}", path.display()))?; + + // Remove the resource from the id_to_resources map + if let Some(resources) = self.id_to_resources.get_mut(&resource.id) { + resources.retain(|r| r.path != resource.path); + if resources.is_empty() { + self.id_to_resources.remove(&resource.id); } - }; - } - - pub fn forget_id(&mut self, old_id: Id) -> Result> { - let old_path = self - .path2id - .drain() - .filter_map(|(k, v)| { - if v.id == old_id { - Some(k) - } else { - None - } - }) - .collect_vec(); - for p in old_path { - self.path2id.remove(&p); - } - self.id2path.remove(&old_id); - let mut deleted = HashSet::new(); - deleted.insert(old_id); - - Ok(IndexUpdate { - added: HashMap::new(), - deleted, - }) - } - - fn insert_entry(&mut self, path: CanonicalPathBuf, entry: IndexEntry) { - log::trace!("[add] {} by path {}", entry.id, path.display()); - let id = entry.clone().id; - - if let std::collections::hash_map::Entry::Vacant(e) = - self.id2path.entry(id.clone()) - { - e.insert(path.clone()); - } else if let Some(nonempty) = self.collisions.get_mut(&id) { - *nonempty += 1; - } else { - self.collisions.insert(id, 2); } - self.path2id.insert(path, entry); + Ok(resource) } - fn forget_path( + /// Track the modification of a file in the resource index. + /// + /// This method checks if the file exists in the file system and removes the + /// old resource from the index before adding the new resource to the + /// index. + /// + /// # Arguments + /// * `relative_path` - The relative path of the file to be modified. + /// + /// # Returns + /// Returns `Ok(new_resource)` if the resource was successfully modified in + /// the index. + /// + /// # Errors + /// - If there was a problem removing the old resource from the index. + /// - If there was a problem adding the new resource to the index. + pub fn track_modification>( &mut self, - path: &CanonicalPath, - old_id: Id, - ) -> Result> { - self.path2id.remove(path); - - if let Some(collisions) = self.collisions.get_mut(&old_id) { - debug_assert!( - *collisions > 1, - "Any collision must involve at least 2 resources" - ); - *collisions -= 1; - - if *collisions == 1 { - self.collisions.remove(&old_id); - } + relative_path: P, + ) -> Result> { + log::debug!( + "Tracking modification of file: {:?}", + relative_path.as_ref() + ); - // minor performance issue: - // we must find path of one of the collided - // resources and use it as new value - let maybe_collided_path = - self.path2id.iter().find_map(|(path, entry)| { - if entry.id == old_id { - Some(path) - } else { - None - } - }); - - if let Some(collided_path) = maybe_collided_path { - let old_path = self - .id2path - .insert(old_id.clone(), collided_path.clone()); - - debug_assert_eq!( - old_path.unwrap().as_canonical_path(), - path, - "Must forget the requested path" - ); - } else { - return Err(ArklibError::Collision( - "Illegal state of collision tracker".into(), - )); + let path = relative_path.as_ref(); + // Remove the resource from the index + let resource = self + .path_to_resource + .remove(path) + .ok_or_else(|| anyhow!("Resource not found: {}", path.display()))?; + + // Remove the resource from the id_to_resources map + if let Some(resources) = self.id_to_resources.get_mut(&resource.id) { + resources.retain(|r| r.path != resource.path); + if resources.is_empty() { + self.id_to_resources.remove(&resource.id); } - } else { - self.id2path.remove(&old_id.clone()); } - let mut deleted = HashSet::new(); - deleted.insert(old_id); - - Ok(IndexUpdate { - added: HashMap::new(), - deleted, - }) - } -} - -pub(crate) fn discover_paths>( - root_path: P, -) -> HashMap { - log::debug!( - "Discovering all files under path {}", - root_path.as_ref().display() - ); - - WalkDir::new(root_path) - .into_iter() - .filter_entry(|entry| !is_hidden(entry)) - .filter_map(|result| match result { - Ok(entry) => { - let path = entry.path(); - if !entry.file_type().is_dir() { - match CanonicalPathBuf::canonicalize(path) { - Ok(canonical_path) => Some((canonical_path, entry)), - Err(msg) => { - log::warn!( - "Couldn't canonicalize {}:\n{}", - path.display(), - msg - ); - None - } - } - } else { - None - } - } - Err(msg) => { - log::error!("Error during walking: {}", msg); - None - } - }) - .collect() -} - -fn scan_entry( - path: &CanonicalPath, - metadata: Metadata, -) -> Result> -where - Id: ResourceId, -{ - if metadata.is_dir() { - return Err(ArklibError::Path("Path is expected to be a file".into())); - } + // Add the new resource to the index + let new_resource = self.track_addition(path)?; - let size = metadata.len(); - if size == 0 { - Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Empty resource", - ))?; + Ok(new_resource) } - - let id = Id::from_path(path)?; - let modified = metadata.modified()?; - - Ok(IndexEntry { modified, id }) -} - -fn scan_entries( - entries: HashMap, -) -> HashMap> -where - Id: ResourceId, -{ - entries - .into_iter() - .filter_map(|(path_buf, entry)| { - let metadata = entry.metadata().ok()?; - - let path = path_buf.as_canonical_path(); - let result = scan_entry(path, metadata); - match result { - Err(msg) => { - log::error!( - "Couldn't retrieve metadata for {}:\n{}", - path.display(), - msg - ); - None - } - Ok(entry) => Some((path_buf, entry)), - } - }) - .collect() -} - -fn is_hidden(entry: &DirEntry) -> bool { - entry - .file_name() - .to_str() - .map(|s| s.starts_with('.')) - .unwrap_or(false) } diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index bf280e99..be68c43e 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -1,4 +1,8 @@ pub mod index; +mod serde; +mod utils; + +pub use utils::load_or_build_index; #[cfg(test)] mod tests; diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs new file mode 100644 index 00000000..9d16e8f0 --- /dev/null +++ b/fs-index/src/serde.rs @@ -0,0 +1,123 @@ +use std::{collections::HashMap, path::PathBuf, time::SystemTime}; + +use anyhow::Result; +use serde::{ + ser::{SerializeStruct, Serializer}, + Deserialize, Serialize, +}; + +use data_resource::ResourceId; + +use crate::{index::IndexedResource, ResourceIndex}; + +/// Data structure for serializing and deserializing the index +#[derive(Serialize, Deserialize)] +struct ResourceIndexData { + root: PathBuf, + resources: HashMap>, +} + +#[derive(Serialize, Deserialize)] +struct IndexedResourceData { + id: Id, + last_modified: u64, +} + +/// Custom implementation of [`Serialize`] for [`ResourceIndex`] +/// +/// To avoid writing a large repetitive index file with double maps, +/// we are only serializing the root path, and path_to_resource. +/// +/// Other fields can be reconstructed from the path_to_resource map. +impl Serialize for ResourceIndex +where + Id: ResourceId, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_struct("ResourceIndex", 2)?; + state.serialize_field("root", &self.root)?; + + let mut resources = HashMap::new(); + for (path, resource) in &self.path_to_resource { + let id = resource.id.clone(); + let last_modified = resource + .last_modified + .duration_since(SystemTime::UNIX_EPOCH) + .map_err(|e| { + serde::ser::Error::custom(format!( + "Failed to serialize last_modified: {}", + e + )) + })? + .as_nanos() as u64; + + let resource_data = IndexedResourceData { id, last_modified }; + resources.insert(path.clone(), resource_data); + } + + state.serialize_field("resources", &resources)?; + state.end() + } +} + +/// Custom implementation of [`Deserialize`] for [`ResourceIndex`] +/// +/// Deserializes the index from the root path and path_to_resource map. +/// Other fields are reconstructed from the path_to_resource map. +impl<'de, Id> Deserialize<'de> for ResourceIndex +where + Id: ResourceId, +{ + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: serde::Deserializer<'de>, + { + let index_data: ResourceIndexData = + ResourceIndexData::deserialize(deserializer)?; + + let mut path_to_resource = HashMap::new(); + let mut id_to_resources = HashMap::new(); + for (path, resource_data) in index_data.resources { + let last_modified = SystemTime::UNIX_EPOCH + + std::time::Duration::from_nanos(resource_data.last_modified); + let resource = IndexedResource { + id: resource_data.id, + path: path.clone(), + last_modified, + }; + path_to_resource.insert(path, resource.clone()); + id_to_resources + .entry(resource.id.clone()) + .or_insert_with(Vec::new) + .push(resource); + } + + Ok(ResourceIndex { + root: index_data.root, + id_to_resources, + path_to_resource, + }) + } +} + +/// Custom implementation of [`PartialEq`] for [`ResourceIndex`] +/// +/// The order of items in hashmaps is not relevant. +/// we just need to compare [`ResourceIndex::resources`] to check if the two +/// indexes are equal. +impl PartialEq for ResourceIndex +where + Id: ResourceId, +{ + fn eq(&self, other: &Self) -> bool { + let mut resources1 = self.resources(); + let mut resources2 = other.resources(); + resources1.sort_by(|a, b| a.path.cmp(&b.path)); + resources2.sort_by(|a, b| a.path.cmp(&b.path)); + + resources1 == resources2 && self.root == other.root + } +} diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 9eed0158..ae9c20eb 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -1,427 +1,603 @@ -use crate::{ - index::{discover_paths, IndexEntry}, - ResourceIndex, -}; -use canonical_path::CanonicalPathBuf; +use std::{fs, path::Path}; + +use anyhow::{anyhow, Result}; +use tempfile::TempDir; + +use data_resource::ResourceId; use dev_hash::Crc32; -use fs_atomic_versions::initialize; -use std::fs::File; -#[cfg(target_family = "unix")] -use std::fs::Permissions; -#[cfg(target_family = "unix")] -use std::os::unix::fs::PermissionsExt; -use std::{path::PathBuf, time::SystemTime}; -use uuid::Uuid; +use super::*; +use crate::{index::IndexedResource, utils::load_or_build_index}; + +/// A helper function to get [`IndexedResource`] from a file path +fn get_indexed_resource_from_file>( + path: P, + parent_dir: P, +) -> Result> { + let id = Crc32::from_path(&path)?; + + let relative_path = path + .as_ref() + .strip_prefix(parent_dir) + .map_err(|_| anyhow!("Failed to get relative path"))?; + + Ok(IndexedResource { + id: id, + path: relative_path.into(), + last_modified: fs::metadata(&path)?.modified()?, + }) +} -const FILE_SIZE_1: u64 = 10; -const FILE_SIZE_2: u64 = 11; +/// Test storing and loading the resource index. +/// +/// ## Test scenario: +/// - Build a resource index in the temporary directory. +/// - Store the index. +/// - Load the stored index. +/// - Assert that the loaded index matches the original index. +#[test] +fn test_store_and_load_index() { + let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); -const FILE_NAME_1: &str = "test1.txt"; -const FILE_NAME_2: &str = "test2.txt"; -const FILE_NAME_3: &str = "test3.txt"; + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); -const CRC32_1: Crc32 = Crc32(3817498742); -const CRC32_2: Crc32 = Crc32(1804055020); + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + index.store().expect("Failed to store index"); -fn get_temp_dir() -> PathBuf { - create_dir_at(std::env::temp_dir()) -} + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); -fn create_dir_at(path: PathBuf) -> PathBuf { - let mut dir_path = path.clone(); - dir_path.push(Uuid::new_v4().to_string()); - std::fs::create_dir(&dir_path).expect("Could not create temp dir"); - dir_path + assert_eq!(index, loaded_index); } -fn create_file_at( - path: PathBuf, - size: Option, - name: Option<&str>, -) -> (File, PathBuf) { - let mut file_path = path.clone(); - if let Some(file_name) = name { - file_path.push(file_name); - } else { - file_path.push(Uuid::new_v4().to_string()); - } - let file = - File::create(file_path.clone()).expect("Could not create temp file"); - file.set_len(size.unwrap_or(0)) - .expect("Could not set file size"); - (file, file_path) -} +/// Test storing and loading the resource index with collisions. +/// +/// ## Test scenario: +/// - Build a resource index in the temporary directory. +/// - Write duplicate files with the same content. +/// - Store the index. +/// - Load the stored index. +/// - Assert that the loaded index matches the original index. +#[test] +fn test_store_and_load_index_with_collisions() { + let temp_dir = + TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); -fn run_test_and_clean_up(test: impl FnOnce(PathBuf) + std::panic::UnwindSafe) { - initialize(); - - let path = get_temp_dir(); - let result = std::panic::catch_unwind(|| test(path.clone())); - std::fs::remove_dir_all(path.clone()) - .expect("Could not clean up after test"); - if result.is_err() { - panic!("{}", result.err().map(|_| "Test panicked").unwrap()) - } - assert!(result.is_ok()); -} + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); -// resource index build + let file_path2 = root_path.join("file2.txt"); + fs::write(&file_path2, "file content").expect("Failed to write to file"); -#[test] -fn index_build_should_process_1_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let actual: ResourceIndex = ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 1); - assert_eq!(actual.id2path.len(), 1); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 1); - }) -} + let file_path3 = root_path.join("file3.txt"); + fs::write(&file_path3, "file content").expect("Failed to write to file"); -#[test] -fn index_build_should_process_colliding_files_correctly() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let actual: ResourceIndex = ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 2); - assert_eq!(actual.id2path.len(), 1); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert_eq!(actual.collisions.len(), 1); - assert_eq!(actual.size(), 2); - }) -} + let file_path4 = root_path.join("file4.txt"); + fs::write(&file_path4, "file content").expect("Failed to write to file"); -// resource index update + // Now we have 4 files with the same content (same checksum) -#[test] -fn update_all_should_handle_renamed_file_correctly() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - create_file_at(path.clone(), Some(FILE_SIZE_2), Some(FILE_NAME_2)); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - - // rename test2.txt to test3.txt - let mut name_from = path.clone(); - name_from.push(FILE_NAME_2); - let mut name_to = path.clone(); - name_to.push(FILE_NAME_3); - std::fs::rename(name_from, name_to) - .expect("Should rename file successfully"); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 1); - assert_eq!(update.added.len(), 1); - }) + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + let checksum = + Crc32::from_path(&file_path).expect("Failed to get checksum"); + assert_eq!(index.len(), 4); + assert_eq!(index.collisions().len(), 1); + assert_eq!(index.collisions()[&checksum].len(), 4); + index.store().expect("Failed to store index"); + + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); + + assert_eq!(index, loaded_index); } +/// Test building an index with a file. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entry. +/// - Assert that the resource retrieved by path matches the expected resource. +/// - Assert that the resource retrieved by ID matches the expected resource. #[test] -fn update_all_should_index_new_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - let (_, expected_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 2); - assert_eq!(actual.id2path.len(), 2); - assert!(actual.id2path.contains_key(&CRC32_1)); - assert!(actual.id2path.contains_key(&CRC32_2)); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 1); - - let added_key = CanonicalPathBuf::canonicalize(expected_path.clone()) - .expect("CanonicalPathBuf should be fine"); - assert_eq!( - update - .added - .get(&added_key) - .expect("Key exists") - .clone(), - CRC32_2 - ) - }) +fn test_build_index_with_file() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); } +/// Test building an index with a directory. +/// +/// ## Test scenario: +/// - Create a subdirectory within the temporary directory. +/// - Create a file within the subdirectory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entry. +/// - Assert that the resource retrieved by path matches the expected resource. +/// - Assert that the resource retrieved by ID matches the expected resource. #[test] -fn index_new_should_index_new_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - let mut index: ResourceIndex = - ResourceIndex::build(path.clone()); - - let (_, new_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = index - .index_new(&new_path) - .expect("Should update index correctly"); - - assert_eq!(index.root, path.clone()); - assert_eq!(index.path2id.len(), 2); - assert_eq!(index.id2path.len(), 2); - assert!(index.id2path.contains_key(&CRC32_1)); - assert!(index.id2path.contains_key(&CRC32_2)); - assert_eq!(index.collisions.len(), 0); - assert_eq!(index.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 1); - - let added_key = CanonicalPathBuf::canonicalize(new_path.clone()) - .expect("CanonicalPathBuf should be fine"); - assert_eq!( - update - .added - .get(&added_key) - .expect("Key exists") - .clone(), - CRC32_2 - ) - }) +fn test_build_index_with_directory() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir_path = root_path.join("dir"); + fs::create_dir(&dir_path).expect("Failed to create dir"); + let file_path = dir_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("dir/file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); } +/// Test building an index with multiple files. +/// +/// ## Test scenario: +/// - Create multiple files within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains two entries. +/// - Assert that the resource retrieved by path for each file matches the +/// expected resource. #[test] -fn update_one_should_error_on_new_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), None); - let mut index = ResourceIndex::build(path.clone()); - - let (_, new_path) = - create_file_at(path.clone(), Some(FILE_SIZE_2), None); - - let update = index.update_one(&new_path, CRC32_2); - - assert!(update.is_err()) - }) +fn test_build_index_with_multiple_files() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file1_path = root_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + let file2_path = root_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1 = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file1.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("file2.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource2); } +/// Test building an index with multiple directories. +/// +/// ## Test scenario: +/// - Create multiple directories within the temporary directory, each +/// containing a file. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains two entries. +/// - Assert that the resources retrieved by path for each file match the +/// expected resources. #[test] -fn update_one_should_index_delete_file_successfully() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - - let mut actual = ResourceIndex::build(path.clone()); - - let mut file_path = path.clone(); - file_path.push(FILE_NAME_1); - std::fs::remove_file(file_path.clone()) - .expect("Should remove file successfully"); - - let update = actual - .update_one(&file_path.clone(), CRC32_1) - .expect("Should update index successfully"); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 0); - assert_eq!(update.deleted.len(), 1); - assert_eq!(update.added.len(), 0); - - assert!(update.deleted.contains(&CRC32_1)) - }) +fn test_build_index_with_multiple_directories() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_directories") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir1_path = root_path.join("dir1"); + fs::create_dir(&dir1_path).expect("Failed to create dir"); + let file1_path = dir1_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + + let dir2_path = root_path.join("dir2"); + fs::create_dir(&dir2_path).expect("Failed to create dir"); + let file2_path = dir2_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1 = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("dir1/file1.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("dir2/file2.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource2); } +/// Test tracking the removal of a file from the index. +/// +/// ## Test scenario: +/// - Create two files within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains two entries. +/// - Remove one of the files. +/// - Track the removal of the file in the index. +/// - Assert that the index contains only one entry after removal. +/// - Assert that the removed file is no longer present in the index, while the +/// other file remains. #[test] -fn update_all_should_error_on_files_without_permissions() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(FILE_NAME_1)); - let (file, _) = - create_file_at(path.clone(), Some(FILE_SIZE_2), Some(FILE_NAME_2)); - - let mut actual: ResourceIndex = - ResourceIndex::build(path.clone()); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - #[cfg(target_family = "unix")] - file.set_permissions(Permissions::from_mode(0o222)) - .expect("Should be fine"); - - let update = actual - .update_all() - .expect("Should update index correctly"); - - assert_eq!(actual.collisions.len(), 0); - assert_eq!(actual.size(), 2); - assert_eq!(update.deleted.len(), 0); - assert_eq!(update.added.len(), 0); - }) +fn test_track_removal() { + let temp_dir = TempDir::with_prefix("ark_test_track_removal") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2); + + fs::remove_file(&file_path).expect("Failed to remove file"); + + let file_relative_path = file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + index + .track_removal(&file_relative_path) + .expect("Failed to track removal"); + + assert_eq!(index.len(), 1); + assert!(index.get_resource_by_path("file.txt").is_none()); + assert!(index.get_resource_by_path("image.png").is_some()); } -// error cases - +/// Test tracking the removal of a file that doesn't exist. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains only one entry. +/// - Track the removal of a file that doesn't exist in the index. +/// - Assert that the index still contains only one entry. #[test] -fn update_one_should_not_update_absent_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let mut actual = ResourceIndex::build(path.clone()); - let old_id = Crc32(2); - let result = actual - .update_one(&missing_path, old_id.clone()) - .map(|i| i.deleted.clone().take(&old_id)) - .ok() - .flatten(); - - assert_eq!(result, Some(Crc32(2))); - }) +fn test_track_removal_non_existent() { + let temp_dir = TempDir::with_prefix("ark_test_track_removal_non_existent") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + let removal_result = index.track_removal(&new_file_relative_path); + assert!(removal_result.is_err()); + assert_eq!(index.len(), 1); } +/// Test tracking the addition of a new file to the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains only one entry. +/// - Create a new file in the temporary directory. +/// - Track the addition of the new file in the index. +/// - Assert that the index contains two entries after addition. +/// - Assert that both files are present in the index. #[test] -fn update_one_should_index_new_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let mut actual = ResourceIndex::build(path.clone()); - let old_id = Crc32(2); - let result = actual - .update_one(&missing_path, old_id.clone()) - .map(|i| i.deleted.clone().take(&old_id)) - .ok() - .flatten(); - - assert_eq!(result, Some(Crc32(2))); - }) +fn test_track_addition() { + let temp_dir = TempDir::with_prefix("ark_test_track_addition") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + index + .track_addition(&new_file_relative_path) + .expect("Failed to track addition"); + + assert_eq!(index.len(), 2); + assert!(index.get_resource_by_path("file.txt").is_some()); + assert!(index + .get_resource_by_path("new_file.txt") + .is_some()); } +/// Test for tracking addition of a file that doesn't exist +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains only one entry. +/// - Track the addition of a file that doesn't exist in the index. +/// - Assert that the index still contains only one entry. #[test] -fn should_not_index_empty_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(0), None); - let actual: ResourceIndex = ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) +fn test_track_addition_non_existent() { + let temp_dir = TempDir::with_prefix("ark_test_track_addition_non_existent") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + let addition_result = index.track_addition(&new_file_relative_path); + assert!(addition_result.is_err()); + assert_eq!(index.len(), 1); } +/// Test tracking the modification of a file in the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains only one entry. +/// - Update the content of the file. +/// - Track the modification of the file in the index. +/// - Assert that the index still contains only one entry. +/// - Assert that the modification timestamp of the file in the index matches +/// the actual file's modification timestamp. #[test] -fn should_not_index_hidden_file() { - run_test_and_clean_up(|path| { - create_file_at(path.clone(), Some(FILE_SIZE_1), Some(".hidden")); - let actual: ResourceIndex = ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) +fn test_track_modification() { + let temp_dir = TempDir::with_prefix("ark_test_track_modification") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + let file_relative_path = file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + index + .track_modification(&file_relative_path) + .expect("Failed to track modification"); + + assert_eq!(index.len(), 1); + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + assert_eq!( + resource.last_modified, + fs::metadata(&file_path) + .unwrap() + .modified() + .unwrap() + ); } +/// Test that track modification does not add a new file to the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains only one entry. +/// - Create a new file in the temporary directory. +/// - Track the modification of the new file in the index. +/// - Assert that the index still contains only one entry. #[test] -fn should_not_index_1_empty_directory() { - run_test_and_clean_up(|path| { - create_dir_at(path.clone()); - - let actual: ResourceIndex = ResourceIndex::build(path.clone()); - - assert_eq!(actual.root, path.clone()); - assert_eq!(actual.path2id.len(), 0); - assert_eq!(actual.id2path.len(), 0); - assert_eq!(actual.collisions.len(), 0); - }) +fn test_track_modification_does_not_add() { + let temp_dir = + TempDir::with_prefix("ark_test_track_modification_does_not_add") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + + let modification_result = index.track_modification(&new_file_relative_path); + assert!(modification_result.is_err()); } +/// Test updating the resource index. +/// +/// ## Test scenario: +/// - Create files within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create a new file, modify an existing file, and remove another file. +/// - Update the resource index. +/// - Assert that the index contains the expected number of entries after the +/// update. +/// - Assert that the entries in the index match the expected state after the +/// update. #[test] -fn discover_paths_should_not_walk_on_invalid_path() { - run_test_and_clean_up(|path| { - let mut missing_path = path.clone(); - missing_path.push("missing/directory"); - let actual = discover_paths(missing_path); - assert_eq!(actual.len(), 0); - }) +fn test_resource_index_update() { + let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content").expect("Failed to write to file"); + + let mut index = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2); + + // create new file + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + // modify file + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + // remove file + fs::remove_file(&image_path).expect("Failed to remove file"); + + index + .update_all() + .expect("Failed to update index"); + // Index now contains 2 resources (file.txt and new_file.txt) + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + let expected_resource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + assert_eq!(resource, &expected_resource); + + let _resource = index + .get_resource_by_path("new_file.txt") + .expect("Resource not found"); + + assert!(index.get_resource_by_path("image.png").is_none()); } +/// Test adding colliding files to the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create a new file with the same checksum as the existing file. +/// - Track the addition of the new file in the index. +/// - Assert that the index contains the expected number of entries after the +/// addition. +/// - Assert index.collisions contains the expected number of entries. #[test] -fn index_entry_order() { - let old1 = IndexEntry { - id: Crc32(2), - modified: SystemTime::UNIX_EPOCH, - }; - let old2 = IndexEntry { - id: Crc32(1), - modified: SystemTime::UNIX_EPOCH, - }; - - let new1 = IndexEntry { - id: Crc32(1), - modified: SystemTime::now(), - }; - let new2 = IndexEntry { - id: Crc32(2), - modified: SystemTime::now(), - }; - - assert_eq!(new1, new1); - assert_eq!(new2, new2); - assert_eq!(old1, old1); - assert_eq!(old2, old2); - - assert_ne!(new1, new2); - assert_ne!(new1, old1); - - assert!(new1 > old1); - assert!(new1 > old2); - assert!(new2 > old1); - assert!(new2 > old2); - assert!(new2 > new1); +fn test_add_colliding_files() { + let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + index + .track_addition(&new_file_relative_path) + .expect("Failed to track addition"); + + assert_eq!(index.len(), 2); + assert_eq!(index.collisions().len(), 1); } -/// Test the performance of `ResourceIndex::build` on a specific directory. +/// Test that we don't index hidden files. /// -/// This test evaluates the performance of building a resource -/// index using the `ResourceIndex::build` method on a given directory. -/// It measures the time taken to build the resource index and prints the -/// number of collisions detected. +/// ## Test scenario: +/// - Create a hidden file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// (0) #[test] -fn test_build_resource_index() { - use std::time::Instant; - - let path = "../test-assets/"; // The path to the directory to index - assert!( - std::path::Path::new(path).is_dir(), - "The provided path is not a directory or does not exist" - ); - - let start_time = Instant::now(); - let index: ResourceIndex = ResourceIndex::build(path.to_string()); - let elapsed_time = start_time.elapsed(); - - println!("Number of paths: {}", index.id2path.len()); - println!("Number of resources: {}", index.id2path.len()); - println!("Number of collisions: {}", index.collisions.len()); - println!("Time taken: {:?}", elapsed_time); +fn test_hidden_files() { + let temp_dir = TempDir::with_prefix("ark_test_hidden_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join(".hidden_file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 0); } diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs new file mode 100644 index 00000000..71a16180 --- /dev/null +++ b/fs-index/src/utils.rs @@ -0,0 +1,69 @@ +use std::{fs, io::BufReader, path::Path}; + +use data_error::{ArklibError, Result}; +use data_resource::ResourceId; +use fs_storage::{ARK_FOLDER, INDEX_PATH}; + +use crate::ResourceIndex; + +/// A helper function to check if the entry should be indexed (not hidden) +pub fn should_index(entry: &walkdir::DirEntry) -> bool { + !entry + .file_name() + .to_string_lossy() + .starts_with('.') +} + +/// Load the index from the file system +fn load_index, Id: ResourceId>( + root_path: P, +) -> Result> { + let index_path = Path::new(ARK_FOLDER).join(INDEX_PATH); + let index_path = fs::canonicalize(root_path.as_ref())?.join(index_path); + let index_file = fs::File::open(index_path)?; + let reader = BufReader::new(index_file); + let index = serde_json::from_reader(reader)?; + + Ok(index) +} + +/// Load the index from the file system, or build a new index if it doesn't +/// exist +/// +/// If `update` is true, the index will be updated and stored after loading +/// it. +pub fn load_or_build_index, Id: ResourceId>( + root_path: P, + update: bool, +) -> Result> { + log::debug!( + "Attempting to load or build index at root path: {:?}", + root_path.as_ref() + ); + + let index_path = Path::new(ARK_FOLDER).join(INDEX_PATH); + let index_path = fs::canonicalize(root_path.as_ref())?.join(index_path); + log::trace!("Index path: {:?}", index_path); + + if index_path.exists() { + log::trace!("Index file exists, loading index"); + + let mut index = load_index(root_path)?; + if update { + log::trace!("Updating loaded index"); + + index.update_all()?; + index.store()?; + } + Ok(index) + } else { + log::trace!("Index file does not exist, building index"); + + // Build a new index if it doesn't exist and store it + let index = ResourceIndex::build(root_path.as_ref())?; + index.store().map_err(|e| { + ArklibError::Path(format!("Failed to store index: {}", e)) + })?; + Ok(index) + } +} From d90ff2ef7769d01d2161073a6cb5a0993dca6ccf Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 30 Jun 2024 19:51:31 +0300 Subject: [PATCH 06/46] feat(fs-index): implement new benchmark functions for fs-index Signed-off-by: Tarek --- fs-index/benches/resource_index_benchmark.rs | 83 ++++++++++++++++++-- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index be113dca..fba6d048 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -28,22 +28,88 @@ fn resource_index_benchmark(c: &mut Criterion) { |b, path| { b.iter(|| { let index: ResourceIndex = - ResourceIndex::build(black_box(path)); - collisions_size = index.collisions.len(); + ResourceIndex::build(black_box(path)).unwrap(); + collisions_size = index.collisions().len(); }); }, ); println!("Collisions: {}", collisions_size); - // TODO: Benchmark `ResourceIndex::get_resource_by_id()` + // Benchmark `ResourceIndex::get_resources_by_id()` + let index: ResourceIndex = + ResourceIndex::build(benchmarks_dir).unwrap(); + let resources = index.resources(); + let resource_id = &resources.clone()[0].id; + group.bench_function("index_get_resource_by_id", |b| { + b.iter(|| { + let _resource = + index.get_resources_by_id(black_box(resource_id.clone())); + }); + }); + + // Benchmark `ResourceIndex::get_resource_by_path()` + let resource_path = &resources.clone()[0].path; + group.bench_function("index_get_resource_by_path", |b| { + b.iter(|| { + let _resource = + index.get_resource_by_path(black_box(resource_path.clone())); + }); + }); - // TODO: Benchmark `ResourceIndex::get_resource_by_path()` + // Benchmark `ResourceIndex::track_addition()` + let new_file = benchmarks_dir.join("new_file.txt"); + group.bench_function("index_track_addition", |b| { + b.iter(|| { + std::fs::File::create(&new_file).unwrap(); + std::fs::write(&new_file, "Hello, World!").unwrap(); + let mut index: ResourceIndex = + ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); + let _addition_result = index.track_addition(&new_file).unwrap(); - // TODO: Benchmark `ResourceIndex::track_addition()` + // Cleanup + std::fs::remove_file(&new_file).unwrap(); + }); + }); - // TODO: Benchmark `ResourceIndex::track_deletion()` + // Benchmark `ResourceIndex::track_removal()` + let removed_file = benchmarks_dir.join("new_file.txt"); + group.bench_function("index_track_removal", |b| { + b.iter(|| { + std::fs::File::create(&removed_file).unwrap(); + std::fs::write(&removed_file, "Hello, World!").unwrap(); + let mut index: ResourceIndex = + ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); + std::fs::remove_file(&removed_file).unwrap(); + let relative_path = removed_file + .strip_prefix(benchmarks_dir) + .unwrap() + .to_str() + .unwrap(); + let _removal_result = index.track_removal(&relative_path).unwrap(); + }); + }); - // TODO: Benchmark `ResourceIndex::track_update()` + // Benchmark `ResourceIndex::track_modification()` + let modified_file = benchmarks_dir.join("new_file.txt"); + group.bench_function("index_track_modification", |b| { + b.iter(|| { + std::fs::File::create(&modified_file).unwrap(); + std::fs::write(&modified_file, "Hello, World!").unwrap(); + let mut index: ResourceIndex = + ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); + std::fs::write(&modified_file, "Hello, World! Modified").unwrap(); + let relative_path = modified_file + .strip_prefix(benchmarks_dir) + .unwrap() + .to_str() + .unwrap(); + let _modification_result = + index.track_modification(&relative_path).unwrap(); + + // Cleanup + std::fs::remove_file(&modified_file).unwrap(); + }); + }); // Benchmark `ResourceIndex::update_all()` @@ -69,7 +135,8 @@ fn resource_index_benchmark(c: &mut Criterion) { .unwrap(); } let mut index: ResourceIndex = - ResourceIndex::build(black_box(&update_all_benchmarks_dir)); + ResourceIndex::build(black_box(&update_all_benchmarks_dir)) + .unwrap(); update_all_files(&update_all_benchmarks_dir.to_path_buf()); let _update_result = index.update_all().unwrap(); From aa6d50bb6b81a8e221d0da8a41426492d62f4dfa Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 14:40:41 +0300 Subject: [PATCH 07/46] feat(fs-index): ignore empty files in resource index Signed-off-by: Tarek --- fs-index/src/index.rs | 12 +++++++++ fs-index/src/tests.rs | 57 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 1026e557..edd977f0 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -215,6 +215,10 @@ impl ResourceIndex { } let path = entry.path(); let metadata = fs::metadata(path)?; + // Ignore empty files + if metadata.len() == 0 { + continue; + } let last_modified = metadata.modified()?; let id = Id::from_path(path)?; // Path is relative to the root @@ -316,6 +320,14 @@ impl ResourceIndex { .into()); } let metadata = fs::metadata(&full_path)?; + // return an error if the file is empty + if metadata.len() == 0 { + return Err(ArklibError::Path(format!( + "File is empty: {:?}", + full_path + )) + .into()); + } let last_modified = metadata.modified()?; let id = Id::from_path(&full_path)?; diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index ae9c20eb..41941baf 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -128,6 +128,30 @@ fn test_build_index_with_file() { assert_eq!(resource, &expected_resource); } +/// Test building an index with an empty file. +/// +/// ## Test scenario: +/// - Create an empty file within the temporary directory. +/// - Create a file with content within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entries. +#[test] +fn test_build_index_with_empty_file() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let empty_file_path = root_path.join("empty_file.txt"); + fs::write(&empty_file_path, "").expect("Failed to write to file"); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); +} + /// Test building an index with a directory. /// /// ## Test scenario: @@ -363,6 +387,39 @@ fn test_track_addition() { .is_some()); } +/// Test tracking the addition of an empty file to the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains only one entry. +/// - Create a new empty file in the temporary directory. +/// - Track the addition of the new file in the index. +/// - Assert that it retuns an error. +#[test] +fn test_track_addition_empty_file() { + let temp_dir = TempDir::with_prefix("ark_test_track_addition_empty_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "").expect("Failed to write to file"); + + let new_file_relative_path = new_file_path + .strip_prefix(root_path) + .expect("Failed to get relative path"); + let addition_result = index.track_addition(&new_file_relative_path); + assert!(addition_result.is_err()); +} + /// Test for tracking addition of a file that doesn't exist /// /// ## Test scenario: From dc0fe6b0ff7afb7d0bd17a1742bad47d056b6cb0 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 14:47:49 +0300 Subject: [PATCH 08/46] feat(fs-index): add a method for collisions counting Signed-off-by: Tarek --- fs-index/src/index.rs | 19 +++++++++++++++++++ fs-index/src/tests.rs | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index edd977f0..7051743e 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -143,6 +143,11 @@ impl ResourceIndex { } /// Return the ID collisions + /// + /// **Note**: If you are using a cryptographic hash function, collisions + /// should be files with the same content. If you are using a + /// non-cryptographic hash function, collisions can be files with the + /// same content or files whose content hash to the same value. pub fn collisions(&self) -> HashMap>> { // Filter out IDs with only one resource self.id_to_resources @@ -152,6 +157,20 @@ impl ResourceIndex { .collect() } + /// Return the number of ID collisions + /// + /// **Note**: If you are using a cryptographic hash function, collisions + /// should be files with the same content. If you are using a + /// non-cryptographic hash function, collisions can be files with the + /// same content or files whose content hash to the same value. + pub fn num_collisions(&self) -> usize { + self.id_to_resources + .values() + .filter(|resources| resources.len() > 1) + .map(|resources| resources.len()) + .sum() + } + /// Save the index to the file system (as a JSON file in /// /ARK_FOLDER/INDEX_PATH) pub fn store(&self) -> Result<()> { diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 41941baf..2849049a 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -637,6 +637,45 @@ fn test_add_colliding_files() { assert_eq!(index.collisions().len(), 1); } +/// Test `ResourceIndex::num_collisions()` method. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create 2 new files with the same checksum as the existing file. +/// - Update the index. +/// - Assert that the index contains the expected number of entries after the +/// update. +#[test] +fn test_num_collisions() { + let temp_dir = TempDir::with_prefix("ark_test_num_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); + + let new_file_path2 = root_path.join("new_file2.txt"); + fs::write(&new_file_path2, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 3); + assert_eq!(index.num_collisions(), 3); +} + /// Test that we don't index hidden files. /// /// ## Test scenario: From 9a9ee6923ab13ff77faeda4baecd8b612d5b08bd Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:18:26 +0300 Subject: [PATCH 09/46] feat(ark-cli): update ark-cli to resonate fs-index updates Signed-off-by: Tarek --- ark-cli/src/commands/link/utils.rs | 11 ++++++++++- ark-cli/src/commands/list.rs | 24 ++++++++++-------------- ark-cli/src/index_registrar.rs | 6 ++++-- ark-cli/src/util.rs | 17 ++++++++++++----- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/ark-cli/src/commands/link/utils.rs b/ark-cli/src/commands/link/utils.rs index 1b851966..1409834c 100644 --- a/ark-cli/src/commands/link/utils.rs +++ b/ark-cli/src/commands/link/utils.rs @@ -27,8 +27,17 @@ pub fn load_link( ) -> Result, AppError> { let path_from_index = id.clone().map(|id| { let index = provide_index(root); - index.id2path[&id].as_path().to_path_buf() + index + .get_resources_by_id(id.clone()) + .map(|r| r[0].path.clone()) + .ok_or_else(|| { + AppError::IndexError(format!( + "Resource with id {} not found", + id + )) + }) }); + let path_from_index = path_from_index.transpose()?; let path_from_user = file_path; let path = match (path_from_user, path_from_index) { diff --git a/ark-cli/src/commands/list.rs b/ark-cli/src/commands/list.rs index 99725c65..700ac7f9 100644 --- a/ark-cli/src/commands/list.rs +++ b/ark-cli/src/commands/list.rs @@ -75,15 +75,17 @@ impl List { .map_err(|_| { AppError::IndexError("Could not read index".to_owned()) })? - .path2id + .resources() .iter() - .filter_map(|(path, resource)| { + .filter_map(|indexed_resource| { + let path = indexed_resource.clone().path; + let id = indexed_resource.clone().id; let tags = if self.tags { Some( read_storage_value( &root, "tags", - &resource.id.to_string(), + &id.to_string(), &None, ) .map_or(vec![], |s| { @@ -101,7 +103,7 @@ impl List { read_storage_value( &root, "scores", - &resource.id.to_string(), + &id.to_string(), &None, ) .map_or(0, |s| s.parse::().unwrap_or(0)), @@ -113,7 +115,7 @@ impl List { let datetime = if self.modified { let format = "%b %e %H:%M %Y"; Some( - DateTime::::from(resource.modified) + DateTime::::from(indexed_resource.last_modified) .format(format) .to_string(), ) @@ -122,15 +124,9 @@ impl List { }; let (path, resource, content) = match entry_output { - EntryOutput::Both => ( - Some(path.to_owned().into_path_buf()), - Some(resource.clone().id), - None, - ), - EntryOutput::Path => { - (Some(path.to_owned().into_path_buf()), None, None) - } - EntryOutput::Id => (None, Some(resource.clone().id), None), + EntryOutput::Both => (Some(path), Some(id), None), + EntryOutput::Path => (Some(path), None, None), + EntryOutput::Id => (None, Some(id), None), EntryOutput::Link => match File::open(path) { Ok(mut file) => { let mut contents = String::new(); diff --git a/ark-cli/src/index_registrar.rs b/ark-cli/src/index_registrar.rs index 4d0ea6fd..34e4e599 100644 --- a/ark-cli/src/index_registrar.rs +++ b/ark-cli/src/index_registrar.rs @@ -2,7 +2,7 @@ use lazy_static::lazy_static; extern crate canonical_path; use data_error::{ArklibError, Result}; -use fs_index::ResourceIndex; +use fs_index::{load_or_build_index, ResourceIndex}; use std::{ collections::HashMap, @@ -36,7 +36,9 @@ pub fn provide_index>( } log::info!("Index has not been registered before"); - match ResourceIndex::provide(&root_path) { + // If the index has not been registered before, we need to load it, update + // it and register it + match load_or_build_index(&root_path, true) { Ok(index) => { let mut registrar = REGISTRAR.write().map_err(|_| { ArklibError::Other(anyhow::anyhow!("Failed to lock registrar")) diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index d2b216ac..ab478fa5 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -113,12 +113,15 @@ pub fn monitor_index( let duration = start.elapsed(); println!("Updating succeeded in {:?}\n", duration); - if !diff.deleted.is_empty() { - println!("Deleted: {:?}", diff.deleted); + if !diff.removed.is_empty() { + println!("Deleted: {:?}", diff.removed); } if !diff.added.is_empty() { println!("Added: {:?}", diff.added); } + if !diff.modified.is_empty() { + println!("Modified: {:?}", diff.modified); + } } } } @@ -129,10 +132,14 @@ pub fn monitor_index( ) })?; - println!("Here are {} entries in the index", index.size()); + println!("Here are {} entries in the index", index.len()); - for (key, count) in index.collisions.iter() { - println!("Id {:?} calculated {} times", key, count); + for (key, resources) in index.collisions().iter() { + println!( + "Id {:?} calculated {} times", + key, + resources.len() + ); } } } From efb6888e2ac55f734ffa56297bc258c09c418c53 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:37:45 +0300 Subject: [PATCH 10/46] feat(fs-index): write an example for resource index Signed-off-by: Tarek --- .gitignore | 3 +- fs-index/examples/resource_index.rs | 48 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 fs-index/examples/resource_index.rs diff --git a/.gitignore b/.gitignore index bc757308..00d8cf93 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ target Cargo.lock **/app_id -*.class \ No newline at end of file +*.class +**/.ark diff --git a/fs-index/examples/resource_index.rs b/fs-index/examples/resource_index.rs new file mode 100644 index 00000000..c73ae778 --- /dev/null +++ b/fs-index/examples/resource_index.rs @@ -0,0 +1,48 @@ +use std::path::Path; + +use anyhow::Result; + +use dev_hash::Blake3; +use fs_index::ResourceIndex; + +/// A simple example of how to use [`ResourceIndex`] to index a directory. +fn main() -> Result<()> { + // Create a new `ResourceIndex` from the directory "test-assets" using + // blake3 as the hashing algorithm. + let mut index: ResourceIndex = + ResourceIndex::build(Path::new("test-assets"))?; + + // Print the indexed resources. + for resource in index.resources() { + println!("{:?}", resource); + } + + // Save the index to a file. + index.store()?; + + // Get resources by their id. + let id = Blake3( + "172b4bf148e858b13dde0fc6613413bcb7552e5c4e5c45195ac6c80f20eb5ff5" + .to_string(), + ); + let resources = index + .get_resources_by_id(id.clone()) + .ok_or_else(|| { + anyhow::anyhow!("Resource with id {:?} not found", id) + })?; + for resource in resources { + println!("{:?}", resource); + } + + // Get resources by their path. + let path = Path::new("lena.jpg"); + let resource = index.get_resource_by_path(path).ok_or_else(|| { + anyhow::anyhow!("Resource with path {:?} not found", path) + })?; + println!("{:?}", resource); + + // Update the index. + index.update_all()?; + + Ok(()) +} From 68f87666c45d8b4e600f2c86ecc87ef653997f39 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:41:24 +0300 Subject: [PATCH 11/46] fix: fix clippy warnings Signed-off-by: Tarek --- fs-index/src/index.rs | 11 ++++------- fs-storage/src/jni/file_storage.rs | 14 +++++++------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 7051743e..93b30b96 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -335,8 +335,7 @@ impl ResourceIndex { return Err(ArklibError::Path(format!( "File does not exist: {:?}", full_path - )) - .into()); + ))); } let metadata = fs::metadata(&full_path)?; // return an error if the file is empty @@ -344,8 +343,7 @@ impl ResourceIndex { return Err(ArklibError::Path(format!( "File is empty: {:?}", full_path - )) - .into()); + ))); } let last_modified = metadata.modified()?; let id = Id::from_path(&full_path)?; @@ -371,7 +369,7 @@ impl ResourceIndex { /// /// # Arguments /// * `relative_path` - The path of the file to be removed (relative to the - /// root path of the index). + /// root path of the index). /// /// # Returns /// Returns `Ok(resource)` if the resource was successfully removed from the @@ -392,8 +390,7 @@ impl ResourceIndex { return Err(ArklibError::Path(format!( "File still exists: {:?}", full_path - )) - .into()); + ))); } // Remove the resource from the index diff --git a/fs-storage/src/jni/file_storage.rs b/fs-storage/src/jni/file_storage.rs index d693a04b..b62ed5bb 100644 --- a/fs-storage/src/jni/file_storage.rs +++ b/fs-storage/src/jni/file_storage.rs @@ -44,7 +44,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_create<'local>( let file_storage: FileStorage = FileStorage::new(label, Path::new(&path)).unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .expect("Failed to throw RuntimeException"); FileStorage::new("".to_string(), Path::new("")).unwrap() }); @@ -76,7 +76,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_remove<'local>( FileStorage::from_jlong(file_storage_ptr) .remove(&id) .unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .unwrap(); }); } @@ -113,7 +113,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_sync( FileStorage::from_jlong(file_storage_ptr) .sync() .unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .unwrap(); }); } @@ -128,7 +128,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_readFS( match FileStorage::from_jlong(file_storage_ptr).read_fs() { Ok(data) => data.clone(), Err(err) => { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .expect("Failed to throw RuntimeException"); return JObject::null().into_raw(); } @@ -201,7 +201,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_writeFS( FileStorage::from_jlong(file_storage_ptr) .write_fs() .unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .unwrap(); }); } @@ -218,7 +218,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_erase( Box::from_raw(file_storage_ptr as *mut FileStorage) }; file_storage.erase().unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .unwrap(); }); } @@ -233,7 +233,7 @@ pub extern "system" fn Java_dev_arkbuilders_core_FileStorage_merge( FileStorage::from_jlong(file_storage_ptr) .merge_from(FileStorage::from_jlong(other_file_storage_ptr)) .unwrap_or_else(|err| { - env.throw_new("java/lang/RuntimeException", &err.to_string()) + env.throw_new("java/lang/RuntimeException", err.to_string()) .unwrap(); }); } From e25eeeb1bf48e1f76a4038ddbb28e899f63065c5 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:53:06 +0300 Subject: [PATCH 12/46] feat(fs-index): implement getters for IndexedResource Signed-off-by: Tarek --- ark-cli/src/commands/link/utils.rs | 4 +-- ark-cli/src/commands/list.rs | 14 +++++---- fs-index/benches/resource_index_benchmark.rs | 6 ++-- fs-index/src/index.rs | 32 ++++++++++++++++++-- fs-index/src/serde.rs | 18 +++++------ fs-index/src/tests.rs | 12 ++++---- 6 files changed, 57 insertions(+), 29 deletions(-) diff --git a/ark-cli/src/commands/link/utils.rs b/ark-cli/src/commands/link/utils.rs index 1409834c..7771c38c 100644 --- a/ark-cli/src/commands/link/utils.rs +++ b/ark-cli/src/commands/link/utils.rs @@ -29,7 +29,7 @@ pub fn load_link( let index = provide_index(root); index .get_resources_by_id(id.clone()) - .map(|r| r[0].path.clone()) + .map(|r| r[0].path().to_owned()) .ok_or_else(|| { AppError::IndexError(format!( "Resource with id {} not found", @@ -54,7 +54,7 @@ pub fn load_link( } } (Some(path), None) => Ok(path.to_path_buf()), - (None, Some(path)) => Ok(path), + (None, Some(path)) => Ok(path.to_path_buf()), (None, None) => Err(AppError::LinkLoadError( "Provide a path or id for request.".to_owned(), ))?, diff --git a/ark-cli/src/commands/list.rs b/ark-cli/src/commands/list.rs index 700ac7f9..6798c8fe 100644 --- a/ark-cli/src/commands/list.rs +++ b/ark-cli/src/commands/list.rs @@ -78,8 +78,8 @@ impl List { .resources() .iter() .filter_map(|indexed_resource| { - let path = indexed_resource.clone().path; - let id = indexed_resource.clone().id; + let path = indexed_resource.path(); + let id = indexed_resource.id(); let tags = if self.tags { Some( read_storage_value( @@ -115,7 +115,7 @@ impl List { let datetime = if self.modified { let format = "%b %e %H:%M %Y"; Some( - DateTime::::from(indexed_resource.last_modified) + DateTime::::from(indexed_resource.last_modified()) .format(format) .to_string(), ) @@ -124,9 +124,11 @@ impl List { }; let (path, resource, content) = match entry_output { - EntryOutput::Both => (Some(path), Some(id), None), - EntryOutput::Path => (Some(path), None, None), - EntryOutput::Id => (None, Some(id), None), + EntryOutput::Both => { + (Some(path.to_owned()), Some(id.to_owned()), None) + } + EntryOutput::Path => (Some(path.to_owned()), None, None), + EntryOutput::Id => (None, Some(id.to_owned()), None), EntryOutput::Link => match File::open(path) { Ok(mut file) => { let mut contents = String::new(); diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index fba6d048..cc7441e4 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -39,7 +39,7 @@ fn resource_index_benchmark(c: &mut Criterion) { let index: ResourceIndex = ResourceIndex::build(benchmarks_dir).unwrap(); let resources = index.resources(); - let resource_id = &resources.clone()[0].id; + let resource_id = resources[0].id(); group.bench_function("index_get_resource_by_id", |b| { b.iter(|| { let _resource = @@ -48,11 +48,11 @@ fn resource_index_benchmark(c: &mut Criterion) { }); // Benchmark `ResourceIndex::get_resource_by_path()` - let resource_path = &resources.clone()[0].path; + let resource_path = resources[0].path(); group.bench_function("index_get_resource_by_path", |b| { b.iter(|| { let _resource = - index.get_resource_by_path(black_box(resource_path.clone())); + index.get_resource_by_path(black_box(resource_path)); }); }); diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 93b30b96..ec02fbc7 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -23,11 +23,37 @@ use crate::utils::should_index; )] pub struct IndexedResource { /// The unique identifier of the resource - pub id: Id, + id: Id, /// The path of the resource, relative to the root path - pub path: PathBuf, + path: PathBuf, /// The last modified time of the resource (from the file system metadata) - pub last_modified: SystemTime, + last_modified: SystemTime, +} + +impl IndexedResource { + /// Create a new indexed resource + pub fn new(id: Id, path: PathBuf, last_modified: SystemTime) -> Self { + IndexedResource { + id, + path, + last_modified, + } + } + + /// Return the ID of the resource + pub fn id(&self) -> &Id { + &self.id + } + + /// Return the path of the resource + pub fn path(&self) -> &Path { + &self.path + } + + /// Return the last modified time of the resource + pub fn last_modified(&self) -> SystemTime { + self.last_modified + } } /// Represents the index of resources in a directory. diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index 9d16e8f0..b99b191f 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -42,9 +42,9 @@ where let mut resources = HashMap::new(); for (path, resource) in &self.path_to_resource { - let id = resource.id.clone(); + let id = resource.id().clone(); let last_modified = resource - .last_modified + .last_modified() .duration_since(SystemTime::UNIX_EPOCH) .map_err(|e| { serde::ser::Error::custom(format!( @@ -83,14 +83,14 @@ where for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); - let resource = IndexedResource { - id: resource_data.id, - path: path.clone(), + let resource = IndexedResource::new( + resource_data.id, + path.clone(), last_modified, - }; + ); path_to_resource.insert(path, resource.clone()); id_to_resources - .entry(resource.id.clone()) + .entry(resource.id().clone()) .or_insert_with(Vec::new) .push(resource); } @@ -115,8 +115,8 @@ where fn eq(&self, other: &Self) -> bool { let mut resources1 = self.resources(); let mut resources2 = other.resources(); - resources1.sort_by(|a, b| a.path.cmp(&b.path)); - resources2.sort_by(|a, b| a.path.cmp(&b.path)); + resources1.sort_by(|a, b| a.path().cmp(b.path())); + resources2.sort_by(|a, b| a.path().cmp(b.path())); resources1 == resources2 && self.root == other.root } diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 2849049a..b9303991 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -21,11 +21,11 @@ fn get_indexed_resource_from_file>( .strip_prefix(parent_dir) .map_err(|_| anyhow!("Failed to get relative path"))?; - Ok(IndexedResource { - id: id, - path: relative_path.into(), - last_modified: fs::metadata(&path)?.modified()?, - }) + Ok(IndexedResource::new( + id, + relative_path.to_path_buf(), + fs::metadata(path)?.modified()?, + )) } /// Test storing and loading the resource index. @@ -492,7 +492,7 @@ fn test_track_modification() { .get_resource_by_path("file.txt") .expect("Resource not found"); assert_eq!( - resource.last_modified, + resource.last_modified(), fs::metadata(&file_path) .unwrap() .modified() From 7ba8bfabfedab4d022e5f0cbdd25425f327cd3bb Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:55:03 +0300 Subject: [PATCH 13/46] feat(fs-index): implement getters for IndexUpdate Signed-off-by: Tarek --- ark-cli/src/util.rs | 12 ++++++------ fs-index/src/index.rs | 23 ++++++++++++++++++++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index ab478fa5..bfa5d590 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -113,14 +113,14 @@ pub fn monitor_index( let duration = start.elapsed(); println!("Updating succeeded in {:?}\n", duration); - if !diff.removed.is_empty() { - println!("Deleted: {:?}", diff.removed); + if !diff.removed().is_empty() { + println!("Deleted: {:?}", diff.removed()); } - if !diff.added.is_empty() { - println!("Added: {:?}", diff.added); + if !diff.added().is_empty() { + println!("Added: {:?}", diff.added()); } - if !diff.modified.is_empty() { - println!("Modified: {:?}", diff.modified); + if !diff.modified().is_empty() { + println!("Modified: {:?}", diff.modified()); } } } diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index ec02fbc7..8d24d257 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -139,11 +139,28 @@ where #[derive(PartialEq, Debug)] pub struct IndexUpdate { /// Resources that were added during the update - pub added: Vec>, + added: Vec>, /// Resources that were modified during the update - pub modified: Vec>, + modified: Vec>, /// Resources that were removed during the update - pub removed: Vec>, + removed: Vec>, +} + +impl IndexUpdate { + /// Return the resources that were added during the update + pub fn added(&self) -> &Vec> { + &self.added + } + + /// Return the resources that were modified during the update + pub fn modified(&self) -> &Vec> { + &self.modified + } + + /// Return the resources that were removed during the update + pub fn removed(&self) -> &Vec> { + &self.removed + } } impl ResourceIndex { From e3222d9c1b2387b95672887341d3fa56cf978991 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 15:57:40 +0300 Subject: [PATCH 14/46] feat(fs-index): `get_resources_by_id` to take a reference of`Id` Signed-off-by: Tarek --- ark-cli/src/commands/link/utils.rs | 2 +- fs-index/benches/resource_index_benchmark.rs | 3 +-- fs-index/examples/resource_index.rs | 8 +++----- fs-index/src/index.rs | 4 ++-- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ark-cli/src/commands/link/utils.rs b/ark-cli/src/commands/link/utils.rs index 7771c38c..2d27ca9a 100644 --- a/ark-cli/src/commands/link/utils.rs +++ b/ark-cli/src/commands/link/utils.rs @@ -28,7 +28,7 @@ pub fn load_link( let path_from_index = id.clone().map(|id| { let index = provide_index(root); index - .get_resources_by_id(id.clone()) + .get_resources_by_id(&id) .map(|r| r[0].path().to_owned()) .ok_or_else(|| { AppError::IndexError(format!( diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index cc7441e4..1a1a4660 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -42,8 +42,7 @@ fn resource_index_benchmark(c: &mut Criterion) { let resource_id = resources[0].id(); group.bench_function("index_get_resource_by_id", |b| { b.iter(|| { - let _resource = - index.get_resources_by_id(black_box(resource_id.clone())); + let _resource = index.get_resources_by_id(black_box(resource_id)); }); }); diff --git a/fs-index/examples/resource_index.rs b/fs-index/examples/resource_index.rs index c73ae778..3f5cb797 100644 --- a/fs-index/examples/resource_index.rs +++ b/fs-index/examples/resource_index.rs @@ -25,11 +25,9 @@ fn main() -> Result<()> { "172b4bf148e858b13dde0fc6613413bcb7552e5c4e5c45195ac6c80f20eb5ff5" .to_string(), ); - let resources = index - .get_resources_by_id(id.clone()) - .ok_or_else(|| { - anyhow::anyhow!("Resource with id {:?} not found", id) - })?; + let resources = index.get_resources_by_id(&id).ok_or_else(|| { + anyhow::anyhow!("Resource with id {:?} not found", id) + })?; for resource in resources { println!("{:?}", resource); } diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 8d24d257..2f8a7a07 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -236,9 +236,9 @@ impl ResourceIndex { /// hash collisions or files with the same content pub fn get_resources_by_id( &self, - id: Id, + id: &Id, ) -> Option<&Vec>> { - self.id_to_resources.get(&id) + self.id_to_resources.get(id) } /// Get a resource by its path From e19b55faf878c10ad5d0fe3eda0a86fcb0e71b9a Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 2 Jul 2024 17:15:49 +0300 Subject: [PATCH 15/46] docs(fs-index): include a simple readme for the crate Signed-off-by: Tarek --- fs-index/README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 fs-index/README.md diff --git a/fs-index/README.md b/fs-index/README.md new file mode 100644 index 00000000..11365a2f --- /dev/null +++ b/fs-index/README.md @@ -0,0 +1,37 @@ +# fs-index + +The `fs-index` crate is part of the ARK framework, designed to help track resources in an index. This crate provides a robust system for managing a directory index, including tracking changes and querying resources. + +## Features + +The most important struct in this crate is `ResourceIndex` which comes with: + +- **Reactive API** + - `update_all`: Method to update the index by rescanning files and returning changes (additions/deletions/updates). +- **Snapshot API** + - `get_resources_by_id`: Query resources from the index by ID. + - `get_resource_by_path`: Query a resource from the index by its path. +- **Track API** + - `track_addition`: Track a newly added file (checks if the file exists in the file system). + - `track_removal`: Track the deletion of a file (checks if the file was actually deleted). + - `track_modification`: Track an update on a single file. + +## Custom Serialization + +The `ResourceIndex` struct includes a custom serialization implementation to avoid writing a large repetitive index file with double maps. + +## Tests and Benchmarks + +- Unit tests are located in `src/tests.rs`. +- The benchmarking suite is in `benches/resource_index_benchmark.rs`, benchmarking all methods of `ResourceIndex`. + - Run benchmarks with `cargo bench`. + +## Examples + +To get started, take a look at the examples in the `examples/` directory. + +To run a specific example: + +```shell +cargo run --example resource_index +``` From 4823ef0ecac26fd0ca0e7d9e192e39c6fd36b711 Mon Sep 17 00:00:00 2001 From: Tarek Date: Fri, 5 Jul 2024 15:08:17 +0300 Subject: [PATCH 16/46] fix(fmt): clean up doc comments Signed-off-by: Tarek --- ark-cli/src/commands/list.rs | 4 ++-- ark-cli/src/commands/render.rs | 4 ++-- ark-cli/src/index_registrar.rs | 4 ++-- data-resource/src/lib.rs | 4 ++-- dev-hash/benches/blake3.rs | 4 ++-- dev-hash/benches/crc32.rs | 4 ++-- fs-index/src/index.rs | 6 +++--- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ark-cli/src/commands/list.rs b/ark-cli/src/commands/list.rs index 6798c8fe..ee86152b 100644 --- a/ark-cli/src/commands/list.rs +++ b/ark-cli/src/commands/list.rs @@ -134,8 +134,8 @@ impl List { let mut contents = String::new(); match file.read_to_string(&mut contents) { Ok(_) => { - // Check if the content of the file is a - // valid url + // Check if the content + // of the file is a valid url let url = contents.trim(); let url = url::Url::parse(url); match url { diff --git a/ark-cli/src/commands/render.rs b/ark-cli/src/commands/render.rs index 82fde115..19014831 100644 --- a/ark-cli/src/commands/render.rs +++ b/ark-cli/src/commands/render.rs @@ -26,8 +26,8 @@ impl Render { let dest_path = filepath.with_file_name( filepath .file_stem() - // SAFETY: we know that the file stem is valid UTF-8 because it - // is a file name + // SAFETY: we know that the file stem is valid UTF-8 + // because it is a file name .unwrap() .to_str() .unwrap() diff --git a/ark-cli/src/index_registrar.rs b/ark-cli/src/index_registrar.rs index 34e4e599..a75df080 100644 --- a/ark-cli/src/index_registrar.rs +++ b/ark-cli/src/index_registrar.rs @@ -36,8 +36,8 @@ pub fn provide_index>( } log::info!("Index has not been registered before"); - // If the index has not been registered before, we need to load it, update - // it and register it + // If the index has not been registered before, + // we need to load it, update it and register it match load_or_build_index(&root_path, true) { Ok(index) => { let mut registrar = REGISTRAR.write().map_err(|_| { diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs index a8ac7774..ea7426c3 100644 --- a/data-resource/src/lib.rs +++ b/data-resource/src/lib.rs @@ -11,8 +11,8 @@ use std::{fmt::Debug, hash::Hash, path::Path}; /// Resources are identified by a hash value, which is computed from the /// resource's data. The hash value is used to uniquely identify the resource. /// -/// Implementors of this trait must provide a way to compute the hash value from -/// the resource's data. +/// Implementors of this trait must provide a way to compute +/// the hash value from the resource's data. pub trait ResourceId: Debug + Display diff --git a/dev-hash/benches/blake3.rs b/dev-hash/benches/blake3.rs index f434cb36..9b3c6541 100644 --- a/dev-hash/benches/blake3.rs +++ b/dev-hash/benches/blake3.rs @@ -17,8 +17,8 @@ fn generate_random_data(size: usize) -> Vec { (0..size).map(|_| rng.gen()).collect() } -/// Benchmarks the performance of resource ID creation from file paths and -/// random data. +/// Benchmarks the performance of resource ID creation +/// from file paths and random data. /// /// - Measures the time taken to create a resource ID from file paths. /// - Measures the time taken to create a resource ID from random data. diff --git a/dev-hash/benches/crc32.rs b/dev-hash/benches/crc32.rs index c85c4dc7..dab4f776 100644 --- a/dev-hash/benches/crc32.rs +++ b/dev-hash/benches/crc32.rs @@ -17,8 +17,8 @@ fn generate_random_data(size: usize) -> Vec { (0..size).map(|_| rng.gen()).collect() } -/// Benchmarks the performance of resource ID creation from file paths and -/// random data. +/// Benchmarks the performance of resource ID creation +/// from file paths and random data. /// /// - Measures the time taken to create a resource ID from file paths. /// - Measures the time taken to create a resource ID from random data. diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 2f8a7a07..6c36e542 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -322,8 +322,8 @@ impl ResourceIndex { let current_resources = self.resources(); let new_resources = new_index.resources(); for resource in new_resources.clone() { - // If the resource is in the old index, check if it has been - // modified + // If the resource is in the old index, + // check if it has been modified if let Some(current_resource) = self.get_resource_by_path(&resource.path) { @@ -381,7 +381,7 @@ impl ResourceIndex { ))); } let metadata = fs::metadata(&full_path)?; - // return an error if the file is empty + // empty files don't have content, so we can't compute id if metadata.len() == 0 { return Err(ArklibError::Path(format!( "File is empty: {:?}", From cd7e4a1f86e4378264553e098d612a9d211b1da9 Mon Sep 17 00:00:00 2001 From: Tarek Date: Fri, 5 Jul 2024 15:24:46 +0300 Subject: [PATCH 17/46] feat(fs-index): run unit tests on crc32 and blake3 Signed-off-by: Tarek --- .github/workflows/build.yml | 12 +++++++++--- fs-index/Cargo.toml | 6 ++++++ fs-index/src/tests.rs | 37 ++++++++++++++++++++----------------- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a6a7d947..e7659544 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,7 +37,9 @@ jobs: run: cargo build --verbose - name: Run tests - run: cargo test --verbose + run: | + cargo test --verbose + cargo test --verbose --features blake3 - name: Build Release run: cargo build --verbose --release @@ -69,7 +71,9 @@ jobs: run: cargo build --verbose --release - name: Run tests - run: cargo test --workspace --verbose + run: | + cargo test --workspace --verbose + cargo test --workspace --verbose --features blake3 - name: Install JDK uses: actions/setup-java@v4.2.1 @@ -98,7 +102,9 @@ jobs: run: cargo build --verbose --release - name: Run tests - run: cargo test --workspace --verbose + run: | + cargo test --workspace --verbose + cargo test --workspace --verbose --features blake3 - name: Install JDK uses: actions/setup-java@v4.2.1 diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index 0ded09dd..fa0d529a 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -33,3 +33,9 @@ dev-hash = { path = "../dev-hash" } name = "resource_index_benchmark" harness = false path = "benches/resource_index_benchmark.rs" + + +[features] +# This feature is only used for unit testing. It's a hack to allow +# running the same test suite with different hash functions. +blake3 = [] diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index b9303991..4c661482 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -4,7 +4,10 @@ use anyhow::{anyhow, Result}; use tempfile::TempDir; use data_resource::ResourceId; -use dev_hash::Crc32; +#[cfg(feature = "blake3")] +use dev_hash::Blake3 as HashType; +#[cfg(not(feature = "blake3"))] +use dev_hash::Crc32 as HashType; use super::*; use crate::{index::IndexedResource, utils::load_or_build_index}; @@ -13,8 +16,8 @@ use crate::{index::IndexedResource, utils::load_or_build_index}; fn get_indexed_resource_from_file>( path: P, parent_dir: P, -) -> Result> { - let id = Crc32::from_path(&path)?; +) -> Result> { + let id = HashType::from_path(&path)?; let relative_path = path .as_ref() @@ -44,7 +47,7 @@ fn test_store_and_load_index() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); assert_eq!(index.len(), 1); index.store().expect("Failed to store index"); @@ -84,10 +87,10 @@ fn test_store_and_load_index_with_collisions() { // Now we have 4 files with the same content (same checksum) - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); let checksum = - Crc32::from_path(&file_path).expect("Failed to get checksum"); + HashType::from_path(&file_path).expect("Failed to get checksum"); assert_eq!(index.len(), 4); assert_eq!(index.collisions().len(), 1); assert_eq!(index.collisions()[&checksum].len(), 4); @@ -147,7 +150,7 @@ fn test_build_index_with_empty_file() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); assert_eq!(index.len(), 1); } @@ -294,7 +297,7 @@ fn test_track_removal() { let image_path = root_path.join("image.png"); fs::write(&image_path, "image content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 2); @@ -330,7 +333,7 @@ fn test_track_removal_non_existent() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -364,7 +367,7 @@ fn test_track_addition() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -405,7 +408,7 @@ fn test_track_addition_empty_file() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -437,7 +440,7 @@ fn test_track_addition_non_existent() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -472,7 +475,7 @@ fn test_track_modification() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -519,7 +522,7 @@ fn test_track_modification_does_not_add() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -618,7 +621,7 @@ fn test_add_colliding_files() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -656,7 +659,7 @@ fn test_num_collisions() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1); @@ -692,7 +695,7 @@ fn test_hidden_files() { let file_path = root_path.join(".hidden_file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 0); From a0f2769af45e4565d43bb621ef597af9964837af Mon Sep 17 00:00:00 2001 From: Tarek Date: Fri, 5 Jul 2024 17:35:41 +0300 Subject: [PATCH 18/46] refactor(fs-index): revert the changes for the track api Signed-off-by: Tarek --- fs-index/benches/resource_index_benchmark.rs | 55 ---- fs-index/src/index.rs | 163 ----------- fs-index/src/tests.rs | 271 +------------------ 3 files changed, 2 insertions(+), 487 deletions(-) diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index 1a1a4660..5f73f172 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -55,61 +55,6 @@ fn resource_index_benchmark(c: &mut Criterion) { }); }); - // Benchmark `ResourceIndex::track_addition()` - let new_file = benchmarks_dir.join("new_file.txt"); - group.bench_function("index_track_addition", |b| { - b.iter(|| { - std::fs::File::create(&new_file).unwrap(); - std::fs::write(&new_file, "Hello, World!").unwrap(); - let mut index: ResourceIndex = - ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); - let _addition_result = index.track_addition(&new_file).unwrap(); - - // Cleanup - std::fs::remove_file(&new_file).unwrap(); - }); - }); - - // Benchmark `ResourceIndex::track_removal()` - let removed_file = benchmarks_dir.join("new_file.txt"); - group.bench_function("index_track_removal", |b| { - b.iter(|| { - std::fs::File::create(&removed_file).unwrap(); - std::fs::write(&removed_file, "Hello, World!").unwrap(); - let mut index: ResourceIndex = - ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); - std::fs::remove_file(&removed_file).unwrap(); - let relative_path = removed_file - .strip_prefix(benchmarks_dir) - .unwrap() - .to_str() - .unwrap(); - let _removal_result = index.track_removal(&relative_path).unwrap(); - }); - }); - - // Benchmark `ResourceIndex::track_modification()` - let modified_file = benchmarks_dir.join("new_file.txt"); - group.bench_function("index_track_modification", |b| { - b.iter(|| { - std::fs::File::create(&modified_file).unwrap(); - std::fs::write(&modified_file, "Hello, World!").unwrap(); - let mut index: ResourceIndex = - ResourceIndex::build(black_box(benchmarks_dir)).unwrap(); - std::fs::write(&modified_file, "Hello, World! Modified").unwrap(); - let relative_path = modified_file - .strip_prefix(benchmarks_dir) - .unwrap() - .to_str() - .unwrap(); - let _modification_result = - index.track_modification(&relative_path).unwrap(); - - // Cleanup - std::fs::remove_file(&modified_file).unwrap(); - }); - }); - // Benchmark `ResourceIndex::update_all()` // First, create a new temp directory specifically for the update_all diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 6c36e542..385aa6e7 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -6,7 +6,6 @@ use std::{ time::SystemTime, }; -use anyhow::anyhow; use log; use serde::{Deserialize, Serialize}; use walkdir::WalkDir; @@ -103,21 +102,6 @@ impl IndexedResource { /// let _resource = loaded_index /// .get_resource_by_path("cat.txt") /// .expect("Resource not found"); -/// -/// // Track the removal of a file -/// loaded_index -/// .track_removal(Path::new("cat.txt")) -/// .expect("Failed to track removal"); -/// -/// // Track the addition of a new file -/// loaded_index -/// .track_addition(Path::new("dog.txt")) -/// .expect("Failed to track addition"); -/// -/// // Track the modification of a file -/// loaded_index -/// .track_modification(Path::new("dog.txt")) -/// .expect("Failed to track modification"); /// ``` #[derive(Clone, Debug)] pub struct ResourceIndex @@ -351,151 +335,4 @@ impl ResourceIndex { removed, }) } - - /// Track the addition of a newly added file to the resource index. - /// - /// This method checks if the file exists in the file system. - /// - /// # Arguments - /// * `relative_path` - The path of the file to be added (relative to the - /// root path of the index). - /// - /// # Returns - /// Returns `Ok(resource)` if the file was successfully added to the index. - /// - /// # Errors - /// - If the file does not exist in the file system. - /// - If there was an error calculating the checksum of the file. - pub fn track_addition>( - &mut self, - relative_path: P, - ) -> Result> { - log::debug!("Tracking addition of file: {:?}", relative_path.as_ref()); - - let path = relative_path.as_ref(); - let full_path = self.root.join(path); - if !full_path.exists() { - return Err(ArklibError::Path(format!( - "File does not exist: {:?}", - full_path - ))); - } - let metadata = fs::metadata(&full_path)?; - // empty files don't have content, so we can't compute id - if metadata.len() == 0 { - return Err(ArklibError::Path(format!( - "File is empty: {:?}", - full_path - ))); - } - let last_modified = metadata.modified()?; - let id = Id::from_path(&full_path)?; - - let resource = IndexedResource { - id: id.clone(), - path: path.to_path_buf(), - last_modified, - }; - self.path_to_resource - .insert(resource.path.clone(), resource.clone()); - self.id_to_resources - .entry(id) - .or_default() - .push(resource.clone()); - - Ok(resource) - } - - /// Track the removal of a file from the resource index. - /// - /// This method checks if the file exists in the file system - /// - /// # Arguments - /// * `relative_path` - The path of the file to be removed (relative to the - /// root path of the index). - /// - /// # Returns - /// Returns `Ok(resource)` if the resource was successfully removed from the - /// index. - /// - /// # Errors - /// - If the file still exists in the file system. - /// - If the resource does not exist in the index. - pub fn track_removal>( - &mut self, - relative_path: P, - ) -> Result> { - log::debug!("Tracking removal of file: {:?}", relative_path.as_ref()); - - let path = relative_path.as_ref(); - let full_path = self.root.join(path); - if full_path.exists() { - return Err(ArklibError::Path(format!( - "File still exists: {:?}", - full_path - ))); - } - - // Remove the resource from the index - let resource = self - .path_to_resource - .remove(path) - .ok_or_else(|| anyhow!("Resource not found: {}", path.display()))?; - - // Remove the resource from the id_to_resources map - if let Some(resources) = self.id_to_resources.get_mut(&resource.id) { - resources.retain(|r| r.path != resource.path); - if resources.is_empty() { - self.id_to_resources.remove(&resource.id); - } - } - - Ok(resource) - } - - /// Track the modification of a file in the resource index. - /// - /// This method checks if the file exists in the file system and removes the - /// old resource from the index before adding the new resource to the - /// index. - /// - /// # Arguments - /// * `relative_path` - The relative path of the file to be modified. - /// - /// # Returns - /// Returns `Ok(new_resource)` if the resource was successfully modified in - /// the index. - /// - /// # Errors - /// - If there was a problem removing the old resource from the index. - /// - If there was a problem adding the new resource to the index. - pub fn track_modification>( - &mut self, - relative_path: P, - ) -> Result> { - log::debug!( - "Tracking modification of file: {:?}", - relative_path.as_ref() - ); - - let path = relative_path.as_ref(); - // Remove the resource from the index - let resource = self - .path_to_resource - .remove(path) - .ok_or_else(|| anyhow!("Resource not found: {}", path.display()))?; - - // Remove the resource from the id_to_resources map - if let Some(resources) = self.id_to_resources.get_mut(&resource.id) { - resources.retain(|r| r.path != resource.path); - if resources.is_empty() { - self.id_to_resources.remove(&resource.id); - } - } - - // Add the new resource to the index - let new_resource = self.track_addition(path)?; - - Ok(new_resource) - } } diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 4c661482..b2601cd0 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -275,270 +275,6 @@ fn test_build_index_with_multiple_directories() { assert_eq!(resource, &expected_resource2); } -/// Test tracking the removal of a file from the index. -/// -/// ## Test scenario: -/// - Create two files within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains two entries. -/// - Remove one of the files. -/// - Track the removal of the file in the index. -/// - Assert that the index contains only one entry after removal. -/// - Assert that the removed file is no longer present in the index, while the -/// other file remains. -#[test] -fn test_track_removal() { - let temp_dir = TempDir::with_prefix("ark_test_track_removal") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - let image_path = root_path.join("image.png"); - fs::write(&image_path, "image content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2); - - fs::remove_file(&file_path).expect("Failed to remove file"); - - let file_relative_path = file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - index - .track_removal(&file_relative_path) - .expect("Failed to track removal"); - - assert_eq!(index.len(), 1); - assert!(index.get_resource_by_path("file.txt").is_none()); - assert!(index.get_resource_by_path("image.png").is_some()); -} - -/// Test tracking the removal of a file that doesn't exist. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains only one entry. -/// - Track the removal of a file that doesn't exist in the index. -/// - Assert that the index still contains only one entry. -#[test] -fn test_track_removal_non_existent() { - let temp_dir = TempDir::with_prefix("ark_test_track_removal_non_existent") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - let removal_result = index.track_removal(&new_file_relative_path); - assert!(removal_result.is_err()); - assert_eq!(index.len(), 1); -} - -/// Test tracking the addition of a new file to the index. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains only one entry. -/// - Create a new file in the temporary directory. -/// - Track the addition of the new file in the index. -/// - Assert that the index contains two entries after addition. -/// - Assert that both files are present in the index. -#[test] -fn test_track_addition() { - let temp_dir = TempDir::with_prefix("ark_test_track_addition") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - index - .track_addition(&new_file_relative_path) - .expect("Failed to track addition"); - - assert_eq!(index.len(), 2); - assert!(index.get_resource_by_path("file.txt").is_some()); - assert!(index - .get_resource_by_path("new_file.txt") - .is_some()); -} - -/// Test tracking the addition of an empty file to the index. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains only one entry. -/// - Create a new empty file in the temporary directory. -/// - Track the addition of the new file in the index. -/// - Assert that it retuns an error. -#[test] -fn test_track_addition_empty_file() { - let temp_dir = TempDir::with_prefix("ark_test_track_addition_empty_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "").expect("Failed to write to file"); - - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - let addition_result = index.track_addition(&new_file_relative_path); - assert!(addition_result.is_err()); -} - -/// Test for tracking addition of a file that doesn't exist -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains only one entry. -/// - Track the addition of a file that doesn't exist in the index. -/// - Assert that the index still contains only one entry. -#[test] -fn test_track_addition_non_existent() { - let temp_dir = TempDir::with_prefix("ark_test_track_addition_non_existent") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - let addition_result = index.track_addition(&new_file_relative_path); - assert!(addition_result.is_err()); - assert_eq!(index.len(), 1); -} - -/// Test tracking the modification of a file in the index. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains only one entry. -/// - Update the content of the file. -/// - Track the modification of the file in the index. -/// - Assert that the index still contains only one entry. -/// - Assert that the modification timestamp of the file in the index matches -/// the actual file's modification timestamp. -#[test] -fn test_track_modification() { - let temp_dir = TempDir::with_prefix("ark_test_track_modification") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - fs::write(&file_path, "updated file content") - .expect("Failed to write to file"); - - let file_relative_path = file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - index - .track_modification(&file_relative_path) - .expect("Failed to track modification"); - - assert_eq!(index.len(), 1); - let resource = index - .get_resource_by_path("file.txt") - .expect("Resource not found"); - assert_eq!( - resource.last_modified(), - fs::metadata(&file_path) - .unwrap() - .modified() - .unwrap() - ); -} - -/// Test that track modification does not add a new file to the index. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains only one entry. -/// - Create a new file in the temporary directory. -/// - Track the modification of the new file in the index. -/// - Assert that the index still contains only one entry. -#[test] -fn test_track_modification_does_not_add() { - let temp_dir = - TempDir::with_prefix("ark_test_track_modification_does_not_add") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); - - let modification_result = index.track_modification(&new_file_relative_path); - assert!(modification_result.is_err()); -} - /// Test updating the resource index. /// /// ## Test scenario: @@ -629,12 +365,9 @@ fn test_add_colliding_files() { let new_file_path = root_path.join("new_file.txt"); fs::write(&new_file_path, "file content").expect("Failed to write to file"); - let new_file_relative_path = new_file_path - .strip_prefix(root_path) - .expect("Failed to get relative path"); index - .track_addition(&new_file_relative_path) - .expect("Failed to track addition"); + .update_all() + .expect("Failed to update index"); assert_eq!(index.len(), 2); assert_eq!(index.collisions().len(), 1); From 41d2da44b376d4ef47fba04cdd5a934a36eeb9cc Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 6 Jul 2024 17:54:47 +0300 Subject: [PATCH 19/46] docs(fs-index): update README with other use cases Signed-off-by: Tarek --- fs-index/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs-index/README.md b/fs-index/README.md index 11365a2f..140013c3 100644 --- a/fs-index/README.md +++ b/fs-index/README.md @@ -1,6 +1,8 @@ # fs-index -The `fs-index` crate is part of the ARK framework, designed to help track resources in an index. This crate provides a robust system for managing a directory index, including tracking changes and querying resources. +`fs-index` is a Rust crate for managing and indexing file system resources. It provides a flexible and efficient way to track changes, query resources, and keep files in sync across multiple devices or locations. + +Originally developed for the Ark framework to support local-first applications, `fs-index` can also be used in various scenarios including backup systems, content management, and more. ## Features From 3aa2a04cd025378cf1b137e6e5a35a0d3517ada5 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 7 Jul 2024 10:54:27 +0300 Subject: [PATCH 20/46] test(fs-index): define a macro to run tests regardless of the hash Signed-off-by: Tarek --- ark-cli/src/util.rs | 2 +- fs-index/Cargo.toml | 6 - fs-index/README.md | 8 +- fs-index/benches/resource_index_benchmark.rs | 2 +- fs-index/src/index.rs | 1 - fs-index/src/lib.rs | 12 +- fs-index/src/test_blake3.rs | 5 + fs-index/src/test_crc32.rs | 5 + fs-index/src/test_utils.rs | 493 +++++++++++++++++++ fs-index/src/tests.rs | 435 ---------------- 10 files changed, 515 insertions(+), 454 deletions(-) create mode 100644 fs-index/src/test_blake3.rs create mode 100644 fs-index/src/test_crc32.rs create mode 100644 fs-index/src/test_utils.rs delete mode 100644 fs-index/src/tests.rs diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index bfa5d590..be5273fa 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -1,5 +1,5 @@ use crate::ResourceId; -use fs_index::index::ResourceIndex; +use fs_index::ResourceIndex; use fs_metadata::METADATA_STORAGE_FOLDER; use fs_properties::PROPERTIES_STORAGE_FOLDER; use fs_storage::{ diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index fa0d529a..0ded09dd 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -33,9 +33,3 @@ dev-hash = { path = "../dev-hash" } name = "resource_index_benchmark" harness = false path = "benches/resource_index_benchmark.rs" - - -[features] -# This feature is only used for unit testing. It's a hack to allow -# running the same test suite with different hash functions. -blake3 = [] diff --git a/fs-index/README.md b/fs-index/README.md index 140013c3..9253375a 100644 --- a/fs-index/README.md +++ b/fs-index/README.md @@ -2,21 +2,17 @@ `fs-index` is a Rust crate for managing and indexing file system resources. It provides a flexible and efficient way to track changes, query resources, and keep files in sync across multiple devices or locations. -Originally developed for the Ark framework to support local-first applications, `fs-index` can also be used in various scenarios including backup systems, content management, and more. +Originally developed for the ARK framework to support local-first applications, `fs-index` can also be used in various scenarios including backup systems, content management, and more. ## Features The most important struct in this crate is `ResourceIndex` which comes with: - **Reactive API** - - `update_all`: Method to update the index by rescanning files and returning changes (additions/deletions/updates). + - `update_all`: Method to update the index by rescanning files and returning changes (additions/deletions). - **Snapshot API** - `get_resources_by_id`: Query resources from the index by ID. - `get_resource_by_path`: Query a resource from the index by its path. -- **Track API** - - `track_addition`: Track a newly added file (checks if the file exists in the file system). - - `track_removal`: Track the deletion of a file (checks if the file was actually deleted). - - `track_modification`: Track an update on a single file. ## Custom Serialization diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index 5f73f172..04ebc4c0 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -6,7 +6,7 @@ use criterion::{ use tempfile::TempDir; use dev_hash::Crc32; -use fs_index::index::ResourceIndex; +use fs_index::ResourceIndex; // The path to the test assets directory const DIR_PATH: &str = "../test-assets/"; diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 385aa6e7..16f1b41c 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -6,7 +6,6 @@ use std::{ time::SystemTime, }; -use log; use serde::{Deserialize, Serialize}; use walkdir::WalkDir; diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index be68c43e..2baa373f 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -1,10 +1,14 @@ -pub mod index; +mod index; mod serde; mod utils; pub use utils::load_or_build_index; -#[cfg(test)] -mod tests; - pub use index::ResourceIndex; + +#[cfg(test)] +mod test_blake3; +#[cfg(test)] +mod test_crc32; +#[cfg(test)] +mod test_utils; diff --git a/fs-index/src/test_blake3.rs b/fs-index/src/test_blake3.rs new file mode 100644 index 00000000..7cdd9f98 --- /dev/null +++ b/fs-index/src/test_blake3.rs @@ -0,0 +1,5 @@ +use dev_hash::Blake3; + +use crate::generate_tests; + +generate_tests!(Blake3); diff --git a/fs-index/src/test_crc32.rs b/fs-index/src/test_crc32.rs new file mode 100644 index 00000000..d05a7d3f --- /dev/null +++ b/fs-index/src/test_crc32.rs @@ -0,0 +1,5 @@ +use dev_hash::Crc32; + +use crate::generate_tests; + +generate_tests!(Crc32); diff --git a/fs-index/src/test_utils.rs b/fs-index/src/test_utils.rs new file mode 100644 index 00000000..b64e8ab5 --- /dev/null +++ b/fs-index/src/test_utils.rs @@ -0,0 +1,493 @@ +/// A macro to generate tests for the resource index. +/// +/// This macro generates tests for a given hash type. The hash type must +/// implement the `ResourceId` trait. +#[macro_export] +macro_rules! generate_tests { + ($hash_type:ty) => { + use std::{fs, path::Path}; + + use anyhow::{anyhow, Result}; + use tempfile::TempDir; + + use data_resource::ResourceId; + + use super::*; + use crate::{index::IndexedResource, utils::load_or_build_index}; + + /// A helper function to get [`IndexedResource`] from a file path + fn get_indexed_resource_from_file>( + path: P, + parent_dir: P, + ) -> Result> { + let id = <$hash_type>::from_path(&path)?; + + let relative_path = path + .as_ref() + .strip_prefix(parent_dir) + .map_err(|_| anyhow!("Failed to get relative path"))?; + + Ok(IndexedResource::new( + id, + relative_path.to_path_buf(), + fs::metadata(path)?.modified()?, + )) + } + + /// Test storing and loading the resource index. + /// + /// ## Test scenario: + /// - Build a resource index in the temporary directory. + /// - Store the index. + /// - Load the stored index. + /// - Assert that the loaded index matches the original index. + #[test] + fn test_store_and_load_index() { + let temp_dir = + TempDir::with_prefix("ark_test_store_and_load_index") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + index.store().expect("Failed to store index"); + + let loaded_index = load_or_build_index(root_path, false) + .expect("Failed to load index"); + + assert_eq!(index, loaded_index); + } + + /// Test storing and loading the resource index with collisions. + /// + /// ## Test scenario: + /// - Build a resource index in the temporary directory. + /// - Write duplicate files with the same content. + /// - Store the index. + /// - Load the stored index. + /// - Assert that the loaded index matches the original index. + #[test] + fn test_store_and_load_index_with_collisions() { + let temp_dir = TempDir::with_prefix( + "ark_test_store_and_load_index_with_collisions", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let file_path2 = root_path.join("file2.txt"); + fs::write(&file_path2, "file content") + .expect("Failed to write to file"); + + let file_path3 = root_path.join("file3.txt"); + fs::write(&file_path3, "file content") + .expect("Failed to write to file"); + + let file_path4 = root_path.join("file4.txt"); + fs::write(&file_path4, "file content") + .expect("Failed to write to file"); + + // Now we have 4 files with the same content (same checksum) + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + let checksum = <$hash_type>::from_path(&file_path) + .expect("Failed to get checksum"); + assert_eq!(index.len(), 4); + assert_eq!(index.collisions().len(), 1); + assert_eq!(index.collisions()[&checksum].len(), 4); + index.store().expect("Failed to store index"); + + let loaded_index = load_or_build_index(root_path, false) + .expect("Failed to load index"); + + assert_eq!(index, loaded_index); + } + + /// Test building an index with a file. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entry. + /// - Assert that the resource retrieved by path matches the expected + /// resource. + /// - Assert that the resource retrieved by ID matches the expected + /// resource. + #[test] + fn test_build_index_with_file() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); + } + + /// Test building an index with an empty file. + /// + /// ## Test scenario: + /// - Create an empty file within the temporary directory. + /// - Create a file with content within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entries. + #[test] + fn test_build_index_with_empty_file() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_empty_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let empty_file_path = root_path.join("empty_file.txt"); + fs::write(&empty_file_path, "").expect("Failed to write to file"); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + } + + /// Test building an index with a directory. + /// + /// ## Test scenario: + /// - Create a subdirectory within the temporary directory. + /// - Create a file within the subdirectory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entry. + /// - Assert that the resource retrieved by path matches the expected + /// resource. + /// - Assert that the resource retrieved by ID matches the expected + /// resource. + #[test] + fn test_build_index_with_directory() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_directory") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir_path = root_path.join("dir"); + fs::create_dir(&dir_path).expect("Failed to create dir"); + let file_path = dir_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("dir/file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); + } + + /// Test building an index with multiple files. + /// + /// ## Test scenario: + /// - Create multiple files within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains two entries. + /// - Assert that the resource retrieved by path for each file matches + /// the expected resource. + #[test] + fn test_build_index_with_multiple_files() { + let temp_dir = TempDir::with_prefix( + "ark_test_build_index_with_multiple_files", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file1_path = root_path.join("file1.txt"); + fs::write(&file1_path, "file1 content") + .expect("Failed to write to file"); + let file2_path = root_path.join("file2.txt"); + fs::write(&file2_path, "file2 content") + .expect("Failed to write to file"); + + let expected_resource1 = get_indexed_resource_from_file( + &file1_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + let expected_resource2 = get_indexed_resource_from_file( + &file2_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file1.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("file2.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource2); + } + + /// Test building an index with multiple directories. + /// + /// ## Test scenario: + /// - Create multiple directories within the temporary directory, each + /// containing a file. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains two entries. + /// - Assert that the resources retrieved by path for each file match + /// the expected resources. + #[test] + fn test_build_index_with_multiple_directories() { + let temp_dir = TempDir::with_prefix( + "ark_test_build_index_with_multiple_directories", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir1_path = root_path.join("dir1"); + fs::create_dir(&dir1_path).expect("Failed to create dir"); + let file1_path = dir1_path.join("file1.txt"); + fs::write(&file1_path, "file1 content") + .expect("Failed to write to file"); + + let dir2_path = root_path.join("dir2"); + fs::create_dir(&dir2_path).expect("Failed to create dir"); + let file2_path = dir2_path.join("file2.txt"); + fs::write(&file2_path, "file2 content") + .expect("Failed to write to file"); + + let expected_resource1 = get_indexed_resource_from_file( + &file1_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + let expected_resource2 = get_indexed_resource_from_file( + &file2_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("dir1/file1.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("dir2/file2.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource2); + } + + /// Test updating the resource index. + /// + /// ## Test scenario: + /// - Create files within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number of + /// entries. + /// - Create a new file, modify an existing file, and remove another + /// file. + /// - Update the resource index. + /// - Assert that the index contains the expected number of entries + /// after the update. + /// - Assert that the entries in the index match the expected state + /// after the update. + #[test] + fn test_resource_index_update() { + let temp_dir = + TempDir::with_prefix("ark_test_resource_index_update") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content") + .expect("Failed to write to file"); + + let mut index = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2); + + // create new file + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + // modify file + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + // remove file + fs::remove_file(&image_path).expect("Failed to remove file"); + + index + .update_all() + .expect("Failed to update index"); + // Index now contains 2 resources (file.txt and new_file.txt) + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + assert_eq!(resource, &expected_resource); + + let _resource = index + .get_resource_by_path("new_file.txt") + .expect("Resource not found"); + + assert!(index.get_resource_by_path("image.png").is_none()); + } + + /// Test adding colliding files to the index. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number of + /// entries. + /// - Create a new file with the same checksum as the existing file. + /// - Track the addition of the new file in the index. + /// - Assert that the index contains the expected number of entries + /// after the addition. + /// - Assert index.collisions contains the expected number of entries. + #[test] + fn test_add_colliding_files() { + let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let mut index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 2); + assert_eq!(index.collisions().len(), 1); + } + + /// Test `ResourceIndex::num_collisions()` method. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number of + /// entries. + /// - Create 2 new files with the same checksum as the existing file. + /// - Update the index. + /// - Assert that the index contains the expected number of entries + /// after the update. + #[test] + fn test_num_collisions() { + let temp_dir = TempDir::with_prefix("ark_test_num_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let mut index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content") + .expect("Failed to write to file"); + + let new_file_path2 = root_path.join("new_file2.txt"); + fs::write(&new_file_path2, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 3); + assert_eq!(index.num_collisions(), 3); + } + + /// Test that we don't index hidden files. + /// + /// ## Test scenario: + /// - Create a hidden file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number of + /// entries. (0) + #[test] + fn test_hidden_files() { + let temp_dir = TempDir::with_prefix("ark_test_hidden_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join(".hidden_file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 0); + } + }; +} diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs deleted file mode 100644 index b2601cd0..00000000 --- a/fs-index/src/tests.rs +++ /dev/null @@ -1,435 +0,0 @@ -use std::{fs, path::Path}; - -use anyhow::{anyhow, Result}; -use tempfile::TempDir; - -use data_resource::ResourceId; -#[cfg(feature = "blake3")] -use dev_hash::Blake3 as HashType; -#[cfg(not(feature = "blake3"))] -use dev_hash::Crc32 as HashType; - -use super::*; -use crate::{index::IndexedResource, utils::load_or_build_index}; - -/// A helper function to get [`IndexedResource`] from a file path -fn get_indexed_resource_from_file>( - path: P, - parent_dir: P, -) -> Result> { - let id = HashType::from_path(&path)?; - - let relative_path = path - .as_ref() - .strip_prefix(parent_dir) - .map_err(|_| anyhow!("Failed to get relative path"))?; - - Ok(IndexedResource::new( - id, - relative_path.to_path_buf(), - fs::metadata(path)?.modified()?, - )) -} - -/// Test storing and loading the resource index. -/// -/// ## Test scenario: -/// - Build a resource index in the temporary directory. -/// - Store the index. -/// - Load the stored index. -/// - Assert that the loaded index matches the original index. -#[test] -fn test_store_and_load_index() { - let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - index.store().expect("Failed to store index"); - - let loaded_index = - load_or_build_index(root_path, false).expect("Failed to load index"); - - assert_eq!(index, loaded_index); -} - -/// Test storing and loading the resource index with collisions. -/// -/// ## Test scenario: -/// - Build a resource index in the temporary directory. -/// - Write duplicate files with the same content. -/// - Store the index. -/// - Load the stored index. -/// - Assert that the loaded index matches the original index. -#[test] -fn test_store_and_load_index_with_collisions() { - let temp_dir = - TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let file_path2 = root_path.join("file2.txt"); - fs::write(&file_path2, "file content").expect("Failed to write to file"); - - let file_path3 = root_path.join("file3.txt"); - fs::write(&file_path3, "file content").expect("Failed to write to file"); - - let file_path4 = root_path.join("file4.txt"); - fs::write(&file_path4, "file content").expect("Failed to write to file"); - - // Now we have 4 files with the same content (same checksum) - - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - let checksum = - HashType::from_path(&file_path).expect("Failed to get checksum"); - assert_eq!(index.len(), 4); - assert_eq!(index.collisions().len(), 1); - assert_eq!(index.collisions()[&checksum].len(), 4); - index.store().expect("Failed to store index"); - - let loaded_index = - load_or_build_index(root_path, false).expect("Failed to load index"); - - assert_eq!(index, loaded_index); -} - -/// Test building an index with a file. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains one entry. -/// - Assert that the resource retrieved by path matches the expected resource. -/// - Assert that the resource retrieved by ID matches the expected resource. -#[test] -fn test_build_index_with_file() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); -} - -/// Test building an index with an empty file. -/// -/// ## Test scenario: -/// - Create an empty file within the temporary directory. -/// - Create a file with content within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains one entries. -#[test] -fn test_build_index_with_empty_file() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let empty_file_path = root_path.join("empty_file.txt"); - fs::write(&empty_file_path, "").expect("Failed to write to file"); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); -} - -/// Test building an index with a directory. -/// -/// ## Test scenario: -/// - Create a subdirectory within the temporary directory. -/// - Create a file within the subdirectory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains one entry. -/// - Assert that the resource retrieved by path matches the expected resource. -/// - Assert that the resource retrieved by ID matches the expected resource. -#[test] -fn test_build_index_with_directory() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir_path = root_path.join("dir"); - fs::create_dir(&dir_path).expect("Failed to create dir"); - let file_path = dir_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("dir/file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); -} - -/// Test building an index with multiple files. -/// -/// ## Test scenario: -/// - Create multiple files within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains two entries. -/// - Assert that the resource retrieved by path for each file matches the -/// expected resource. -#[test] -fn test_build_index_with_multiple_files() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_multiple_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file1_path = root_path.join("file1.txt"); - fs::write(&file1_path, "file1 content").expect("Failed to write to file"); - let file2_path = root_path.join("file2.txt"); - fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - - let expected_resource1 = - get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - let expected_resource2 = - get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file1.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("file2.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2); -} - -/// Test building an index with multiple directories. -/// -/// ## Test scenario: -/// - Create multiple directories within the temporary directory, each -/// containing a file. -/// - Build a resource index in the temporary directory. -/// - Assert that the index contains two entries. -/// - Assert that the resources retrieved by path for each file match the -/// expected resources. -#[test] -fn test_build_index_with_multiple_directories() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_multiple_directories") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir1_path = root_path.join("dir1"); - fs::create_dir(&dir1_path).expect("Failed to create dir"); - let file1_path = dir1_path.join("file1.txt"); - fs::write(&file1_path, "file1 content").expect("Failed to write to file"); - - let dir2_path = root_path.join("dir2"); - fs::create_dir(&dir2_path).expect("Failed to create dir"); - let file2_path = dir2_path.join("file2.txt"); - fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - - let expected_resource1 = - get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - let expected_resource2 = - get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("dir1/file1.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("dir2/file2.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource2); -} - -/// Test updating the resource index. -/// -/// ## Test scenario: -/// - Create files within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains the expected number of entries. -/// - Create a new file, modify an existing file, and remove another file. -/// - Update the resource index. -/// - Assert that the index contains the expected number of entries after the -/// update. -/// - Assert that the entries in the index match the expected state after the -/// update. -#[test] -fn test_resource_index_update() { - let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let image_path = root_path.join("image.png"); - fs::write(&image_path, "image content").expect("Failed to write to file"); - - let mut index = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2); - - // create new file - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - // modify file - fs::write(&file_path, "updated file content") - .expect("Failed to write to file"); - - // remove file - fs::remove_file(&image_path).expect("Failed to remove file"); - - index - .update_all() - .expect("Failed to update index"); - // Index now contains 2 resources (file.txt and new_file.txt) - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Resource not found"); - let expected_resource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource); - - let _resource = index - .get_resource_by_path("new_file.txt") - .expect("Resource not found"); - - assert!(index.get_resource_by_path("image.png").is_none()); -} - -/// Test adding colliding files to the index. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains the expected number of entries. -/// - Create a new file with the same checksum as the existing file. -/// - Track the addition of the new file in the index. -/// - Assert that the index contains the expected number of entries after the -/// addition. -/// - Assert index.collisions contains the expected number of entries. -#[test] -fn test_add_colliding_files() { - let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content").expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 2); - assert_eq!(index.collisions().len(), 1); -} - -/// Test `ResourceIndex::num_collisions()` method. -/// -/// ## Test scenario: -/// - Create a file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains the expected number of entries. -/// - Create 2 new files with the same checksum as the existing file. -/// - Update the index. -/// - Assert that the index contains the expected number of entries after the -/// update. -#[test] -fn test_num_collisions() { - let temp_dir = TempDir::with_prefix("ark_test_num_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content").expect("Failed to write to file"); - - let new_file_path2 = root_path.join("new_file2.txt"); - fs::write(&new_file_path2, "file content") - .expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 3); - assert_eq!(index.num_collisions(), 3); -} - -/// Test that we don't index hidden files. -/// -/// ## Test scenario: -/// - Create a hidden file within the temporary directory. -/// - Build a resource index in the temporary directory. -/// - Assert that the index initially contains the expected number of entries. -/// (0) -#[test] -fn test_hidden_files() { - let temp_dir = TempDir::with_prefix("ark_test_hidden_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join(".hidden_file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 0); -} From c54aaa2121a5b6f9cbf89f728a91e1307cd45ce3 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 7 Jul 2024 11:27:29 +0300 Subject: [PATCH 21/46] refactor(fs-index): refactor test modules into one file Signed-off-by: Tarek --- fs-index/src/lib.rs | 6 +- fs-index/src/test_blake3.rs | 5 - fs-index/src/test_crc32.rs | 5 - fs-index/src/test_utils.rs | 493 --------------------------------- fs-index/src/tests.rs | 539 ++++++++++++++++++++++++++++++++++++ 5 files changed, 540 insertions(+), 508 deletions(-) delete mode 100644 fs-index/src/test_blake3.rs delete mode 100644 fs-index/src/test_crc32.rs delete mode 100644 fs-index/src/test_utils.rs create mode 100644 fs-index/src/tests.rs diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index 2baa373f..59030cef 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -7,8 +7,4 @@ pub use utils::load_or_build_index; pub use index::ResourceIndex; #[cfg(test)] -mod test_blake3; -#[cfg(test)] -mod test_crc32; -#[cfg(test)] -mod test_utils; +mod tests; diff --git a/fs-index/src/test_blake3.rs b/fs-index/src/test_blake3.rs deleted file mode 100644 index 7cdd9f98..00000000 --- a/fs-index/src/test_blake3.rs +++ /dev/null @@ -1,5 +0,0 @@ -use dev_hash::Blake3; - -use crate::generate_tests; - -generate_tests!(Blake3); diff --git a/fs-index/src/test_crc32.rs b/fs-index/src/test_crc32.rs deleted file mode 100644 index d05a7d3f..00000000 --- a/fs-index/src/test_crc32.rs +++ /dev/null @@ -1,5 +0,0 @@ -use dev_hash::Crc32; - -use crate::generate_tests; - -generate_tests!(Crc32); diff --git a/fs-index/src/test_utils.rs b/fs-index/src/test_utils.rs deleted file mode 100644 index b64e8ab5..00000000 --- a/fs-index/src/test_utils.rs +++ /dev/null @@ -1,493 +0,0 @@ -/// A macro to generate tests for the resource index. -/// -/// This macro generates tests for a given hash type. The hash type must -/// implement the `ResourceId` trait. -#[macro_export] -macro_rules! generate_tests { - ($hash_type:ty) => { - use std::{fs, path::Path}; - - use anyhow::{anyhow, Result}; - use tempfile::TempDir; - - use data_resource::ResourceId; - - use super::*; - use crate::{index::IndexedResource, utils::load_or_build_index}; - - /// A helper function to get [`IndexedResource`] from a file path - fn get_indexed_resource_from_file>( - path: P, - parent_dir: P, - ) -> Result> { - let id = <$hash_type>::from_path(&path)?; - - let relative_path = path - .as_ref() - .strip_prefix(parent_dir) - .map_err(|_| anyhow!("Failed to get relative path"))?; - - Ok(IndexedResource::new( - id, - relative_path.to_path_buf(), - fs::metadata(path)?.modified()?, - )) - } - - /// Test storing and loading the resource index. - /// - /// ## Test scenario: - /// - Build a resource index in the temporary directory. - /// - Store the index. - /// - Load the stored index. - /// - Assert that the loaded index matches the original index. - #[test] - fn test_store_and_load_index() { - let temp_dir = - TempDir::with_prefix("ark_test_store_and_load_index") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - index.store().expect("Failed to store index"); - - let loaded_index = load_or_build_index(root_path, false) - .expect("Failed to load index"); - - assert_eq!(index, loaded_index); - } - - /// Test storing and loading the resource index with collisions. - /// - /// ## Test scenario: - /// - Build a resource index in the temporary directory. - /// - Write duplicate files with the same content. - /// - Store the index. - /// - Load the stored index. - /// - Assert that the loaded index matches the original index. - #[test] - fn test_store_and_load_index_with_collisions() { - let temp_dir = TempDir::with_prefix( - "ark_test_store_and_load_index_with_collisions", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let file_path2 = root_path.join("file2.txt"); - fs::write(&file_path2, "file content") - .expect("Failed to write to file"); - - let file_path3 = root_path.join("file3.txt"); - fs::write(&file_path3, "file content") - .expect("Failed to write to file"); - - let file_path4 = root_path.join("file4.txt"); - fs::write(&file_path4, "file content") - .expect("Failed to write to file"); - - // Now we have 4 files with the same content (same checksum) - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - let checksum = <$hash_type>::from_path(&file_path) - .expect("Failed to get checksum"); - assert_eq!(index.len(), 4); - assert_eq!(index.collisions().len(), 1); - assert_eq!(index.collisions()[&checksum].len(), 4); - index.store().expect("Failed to store index"); - - let loaded_index = load_or_build_index(root_path, false) - .expect("Failed to load index"); - - assert_eq!(index, loaded_index); - } - - /// Test building an index with a file. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entry. - /// - Assert that the resource retrieved by path matches the expected - /// resource. - /// - Assert that the resource retrieved by ID matches the expected - /// resource. - #[test] - fn test_build_index_with_file() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); - } - - /// Test building an index with an empty file. - /// - /// ## Test scenario: - /// - Create an empty file within the temporary directory. - /// - Create a file with content within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entries. - #[test] - fn test_build_index_with_empty_file() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_empty_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let empty_file_path = root_path.join("empty_file.txt"); - fs::write(&empty_file_path, "").expect("Failed to write to file"); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - } - - /// Test building an index with a directory. - /// - /// ## Test scenario: - /// - Create a subdirectory within the temporary directory. - /// - Create a file within the subdirectory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entry. - /// - Assert that the resource retrieved by path matches the expected - /// resource. - /// - Assert that the resource retrieved by ID matches the expected - /// resource. - #[test] - fn test_build_index_with_directory() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_directory") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir_path = root_path.join("dir"); - fs::create_dir(&dir_path).expect("Failed to create dir"); - let file_path = dir_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("dir/file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); - } - - /// Test building an index with multiple files. - /// - /// ## Test scenario: - /// - Create multiple files within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains two entries. - /// - Assert that the resource retrieved by path for each file matches - /// the expected resource. - #[test] - fn test_build_index_with_multiple_files() { - let temp_dir = TempDir::with_prefix( - "ark_test_build_index_with_multiple_files", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file1_path = root_path.join("file1.txt"); - fs::write(&file1_path, "file1 content") - .expect("Failed to write to file"); - let file2_path = root_path.join("file2.txt"); - fs::write(&file2_path, "file2 content") - .expect("Failed to write to file"); - - let expected_resource1 = get_indexed_resource_from_file( - &file1_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - let expected_resource2 = get_indexed_resource_from_file( - &file2_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file1.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("file2.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2); - } - - /// Test building an index with multiple directories. - /// - /// ## Test scenario: - /// - Create multiple directories within the temporary directory, each - /// containing a file. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains two entries. - /// - Assert that the resources retrieved by path for each file match - /// the expected resources. - #[test] - fn test_build_index_with_multiple_directories() { - let temp_dir = TempDir::with_prefix( - "ark_test_build_index_with_multiple_directories", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir1_path = root_path.join("dir1"); - fs::create_dir(&dir1_path).expect("Failed to create dir"); - let file1_path = dir1_path.join("file1.txt"); - fs::write(&file1_path, "file1 content") - .expect("Failed to write to file"); - - let dir2_path = root_path.join("dir2"); - fs::create_dir(&dir2_path).expect("Failed to create dir"); - let file2_path = dir2_path.join("file2.txt"); - fs::write(&file2_path, "file2 content") - .expect("Failed to write to file"); - - let expected_resource1 = get_indexed_resource_from_file( - &file1_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - let expected_resource2 = get_indexed_resource_from_file( - &file2_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("dir1/file1.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("dir2/file2.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource2); - } - - /// Test updating the resource index. - /// - /// ## Test scenario: - /// - Create files within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number of - /// entries. - /// - Create a new file, modify an existing file, and remove another - /// file. - /// - Update the resource index. - /// - Assert that the index contains the expected number of entries - /// after the update. - /// - Assert that the entries in the index match the expected state - /// after the update. - #[test] - fn test_resource_index_update() { - let temp_dir = - TempDir::with_prefix("ark_test_resource_index_update") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let image_path = root_path.join("image.png"); - fs::write(&image_path, "image content") - .expect("Failed to write to file"); - - let mut index = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2); - - // create new file - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - // modify file - fs::write(&file_path, "updated file content") - .expect("Failed to write to file"); - - // remove file - fs::remove_file(&image_path).expect("Failed to remove file"); - - index - .update_all() - .expect("Failed to update index"); - // Index now contains 2 resources (file.txt and new_file.txt) - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Resource not found"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource); - - let _resource = index - .get_resource_by_path("new_file.txt") - .expect("Resource not found"); - - assert!(index.get_resource_by_path("image.png").is_none()); - } - - /// Test adding colliding files to the index. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number of - /// entries. - /// - Create a new file with the same checksum as the existing file. - /// - Track the addition of the new file in the index. - /// - Assert that the index contains the expected number of entries - /// after the addition. - /// - Assert index.collisions contains the expected number of entries. - #[test] - fn test_add_colliding_files() { - let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let mut index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content") - .expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 2); - assert_eq!(index.collisions().len(), 1); - } - - /// Test `ResourceIndex::num_collisions()` method. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number of - /// entries. - /// - Create 2 new files with the same checksum as the existing file. - /// - Update the index. - /// - Assert that the index contains the expected number of entries - /// after the update. - #[test] - fn test_num_collisions() { - let temp_dir = TempDir::with_prefix("ark_test_num_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let mut index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content") - .expect("Failed to write to file"); - - let new_file_path2 = root_path.join("new_file2.txt"); - fs::write(&new_file_path2, "file content") - .expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 3); - assert_eq!(index.num_collisions(), 3); - } - - /// Test that we don't index hidden files. - /// - /// ## Test scenario: - /// - Create a hidden file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number of - /// entries. (0) - #[test] - fn test_hidden_files() { - let temp_dir = TempDir::with_prefix("ark_test_hidden_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join(".hidden_file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 0); - } - }; -} diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs new file mode 100644 index 00000000..bf0fd7b4 --- /dev/null +++ b/fs-index/src/tests.rs @@ -0,0 +1,539 @@ +/*! +# Test Structure for `fs-index` Crate + +This test suite is designed to validate the functionality of `fs-index` crate using different hash functions. +The crate is tested with both cryptographic and non-cryptographic hash functions +to ensure that the resource index works correctly regardless of the underlying hash function used. + +## Modules + +- `blake3`: This module contains tests using the `Blake3` hash function. +- `crc32`: This module contains tests using the `CRC32` hash function. +- `test_utils`: This module contains common helper macro to generate tests for different hash functions. + +*/ + +mod blake3 { + use dev_hash::Blake3; + + use crate::generate_tests_for_hash; + + generate_tests_for_hash!(Blake3); +} + +mod crc32 { + use dev_hash::Crc32; + + use crate::generate_tests_for_hash; + + generate_tests_for_hash!(Crc32); +} + +mod test_utils { + /// A macro to generate tests for the resource index. + /// + /// This macro generates tests for a given hash type. The hash type must + /// implement the `ResourceId` trait. + #[macro_export] + macro_rules! generate_tests_for_hash { + ($hash_type:ty) => { + use std::{fs, path::Path}; + + use anyhow::{anyhow, Result}; + use tempfile::TempDir; + + use data_resource::ResourceId; + + use crate::{ + index::IndexedResource, utils::load_or_build_index, + ResourceIndex, + }; + + /// A helper function to get [`IndexedResource`] from a file path + fn get_indexed_resource_from_file>( + path: P, + parent_dir: P, + ) -> Result> { + let id = <$hash_type>::from_path(&path)?; + + let relative_path = path + .as_ref() + .strip_prefix(parent_dir) + .map_err(|_| anyhow!("Failed to get relative path"))?; + + Ok(IndexedResource::new( + id, + relative_path.to_path_buf(), + fs::metadata(path)?.modified()?, + )) + } + + /// Test storing and loading the resource index. + /// + /// ## Test scenario: + /// - Build a resource index in the temporary directory. + /// - Store the index. + /// - Load the stored index. + /// - Assert that the loaded index matches the original index. + #[test] + fn test_store_and_load_index() { + let temp_dir = + TempDir::with_prefix("ark_test_store_and_load_index") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 1); + index.store().expect("Failed to store index"); + + let loaded_index = load_or_build_index(root_path, false) + .expect("Failed to load index"); + + assert_eq!(index, loaded_index); + } + + /// Test storing and loading the resource index with collisions. + /// + /// ## Test scenario: + /// - Build a resource index in the temporary directory. + /// - Write duplicate files with the same content. + /// - Store the index. + /// - Load the stored index. + /// - Assert that the loaded index matches the original index. + #[test] + fn test_store_and_load_index_with_collisions() { + let temp_dir = TempDir::with_prefix( + "ark_test_store_and_load_index_with_collisions", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let file_path2 = root_path.join("file2.txt"); + fs::write(&file_path2, "file content") + .expect("Failed to write to file"); + + let file_path3 = root_path.join("file3.txt"); + fs::write(&file_path3, "file content") + .expect("Failed to write to file"); + + let file_path4 = root_path.join("file4.txt"); + fs::write(&file_path4, "file content") + .expect("Failed to write to file"); + + // Now we have 4 files with the same content (same checksum) + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + let checksum = <$hash_type>::from_path(&file_path) + .expect("Failed to get checksum"); + assert_eq!(index.len(), 4); + assert_eq!(index.collisions().len(), 1); + assert_eq!(index.collisions()[&checksum].len(), 4); + index.store().expect("Failed to store index"); + + let loaded_index = load_or_build_index(root_path, false) + .expect("Failed to load index"); + + assert_eq!(index, loaded_index); + } + + /// Test building an index with a file. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entry. + /// - Assert that the resource retrieved by path matches the + /// expected resource. + /// - Assert that the resource retrieved by ID matches the expected + /// resource. + #[test] + fn test_build_index_with_file() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); + } + + /// Test building an index with an empty file. + /// + /// ## Test scenario: + /// - Create an empty file within the temporary directory. + /// - Create a file with content within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entries. + #[test] + fn test_build_index_with_empty_file() { + let temp_dir = TempDir::with_prefix( + "ark_test_build_index_with_empty_file", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let empty_file_path = root_path.join("empty_file.txt"); + fs::write(&empty_file_path, "") + .expect("Failed to write to file"); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 1); + } + + /// Test building an index with a directory. + /// + /// ## Test scenario: + /// - Create a subdirectory within the temporary directory. + /// - Create a file within the subdirectory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains one entry. + /// - Assert that the resource retrieved by path matches the + /// expected resource. + /// - Assert that the resource retrieved by ID matches the expected + /// resource. + #[test] + fn test_build_index_with_directory() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_directory") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir_path = root_path.join("dir"); + fs::create_dir(&dir_path).expect("Failed to create dir"); + let file_path = dir_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("dir/file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); + } + + /// Test building an index with multiple files. + /// + /// ## Test scenario: + /// - Create multiple files within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains two entries. + /// - Assert that the resource retrieved by path for each file + /// matches the expected resource. + #[test] + fn test_build_index_with_multiple_files() { + let temp_dir = TempDir::with_prefix( + "ark_test_build_index_with_multiple_files", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file1_path = root_path.join("file1.txt"); + fs::write(&file1_path, "file1 content") + .expect("Failed to write to file"); + let file2_path = root_path.join("file2.txt"); + fs::write(&file2_path, "file2 content") + .expect("Failed to write to file"); + + let expected_resource1 = get_indexed_resource_from_file( + &file1_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + let expected_resource2 = get_indexed_resource_from_file( + &file2_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file1.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("file2.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource2); + } + + /// Test building an index with multiple directories. + /// + /// ## Test scenario: + /// - Create multiple directories within the temporary directory, + /// each containing a file. + /// - Build a resource index in the temporary directory. + /// - Assert that the index contains two entries. + /// - Assert that the resources retrieved by path for each file + /// match the expected resources. + #[test] + fn test_build_index_with_multiple_directories() { + let temp_dir = TempDir::with_prefix( + "ark_test_build_index_with_multiple_directories", + ) + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir1_path = root_path.join("dir1"); + fs::create_dir(&dir1_path).expect("Failed to create dir"); + let file1_path = dir1_path.join("file1.txt"); + fs::write(&file1_path, "file1 content") + .expect("Failed to write to file"); + + let dir2_path = root_path.join("dir2"); + fs::create_dir(&dir2_path).expect("Failed to create dir"); + let file2_path = dir2_path.join("file2.txt"); + fs::write(&file2_path, "file2 content") + .expect("Failed to write to file"); + + let expected_resource1 = get_indexed_resource_from_file( + &file1_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + let expected_resource2 = get_indexed_resource_from_file( + &file2_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path) + .expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("dir1/file1.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("dir2/file2.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource2); + } + + /// Test updating the resource index. + /// + /// ## Test scenario: + /// - Create files within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number + /// of entries. + /// - Create a new file, modify an existing file, and remove another + /// file. + /// - Update the resource index. + /// - Assert that the index contains the expected number of entries + /// after the update. + /// - Assert that the entries in the index match the expected state + /// after the update. + #[test] + fn test_resource_index_update() { + let temp_dir = + TempDir::with_prefix("ark_test_resource_index_update") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content") + .expect("Failed to write to file"); + + let mut index = ResourceIndex::build(root_path) + .expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2); + + // create new file + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + // modify file + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + // remove file + fs::remove_file(&image_path).expect("Failed to remove file"); + + index + .update_all() + .expect("Failed to update index"); + // Index now contains 2 resources (file.txt and new_file.txt) + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + let expected_resource = get_indexed_resource_from_file( + &file_path, + &root_path.to_path_buf(), + ) + .expect("Failed to get indexed resource"); + assert_eq!(resource, &expected_resource); + + let _resource = index + .get_resource_by_path("new_file.txt") + .expect("Resource not found"); + + assert!(index.get_resource_by_path("image.png").is_none()); + } + + /// Test adding colliding files to the index. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number + /// of entries. + /// - Create a new file with the same checksum as the existing file. + /// - Track the addition of the new file in the index. + /// - Assert that the index contains the expected number of entries + /// after the addition. + /// - Assert index.collisions contains the expected number of + /// entries. + #[test] + fn test_add_colliding_files() { + let temp_dir = + TempDir::with_prefix("ark_test_add_colliding_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let mut index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 2); + assert_eq!(index.collisions().len(), 1); + } + + /// Test `ResourceIndex::num_collisions()` method. + /// + /// ## Test scenario: + /// - Create a file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number + /// of entries. + /// - Create 2 new files with the same checksum as the existing + /// file. + /// - Update the index. + /// - Assert that the index contains the expected number of entries + /// after the update. + #[test] + fn test_num_collisions() { + let temp_dir = TempDir::with_prefix("ark_test_num_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let mut index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content") + .expect("Failed to write to file"); + + let new_file_path2 = root_path.join("new_file2.txt"); + fs::write(&new_file_path2, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 3); + assert_eq!(index.num_collisions(), 3); + } + + /// Test that we don't index hidden files. + /// + /// ## Test scenario: + /// - Create a hidden file within the temporary directory. + /// - Build a resource index in the temporary directory. + /// - Assert that the index initially contains the expected number + /// of entries. (0) + #[test] + fn test_hidden_files() { + let temp_dir = TempDir::with_prefix("ark_test_hidden_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join(".hidden_file.txt"); + fs::write(&file_path, "file content") + .expect("Failed to write to file"); + + let index: ResourceIndex<$hash_type> = + ResourceIndex::build(root_path) + .expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 0); + } + }; + } +} From 1a73b30bec90b2610c72dceb12cfde8b9be395d7 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 7 Jul 2024 11:37:27 +0300 Subject: [PATCH 22/46] fix(ci): remove blake3 feature flag from build workflow Signed-off-by: Tarek --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e7659544..94591f1c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: - name: Run tests run: | cargo test --verbose - cargo test --verbose --features blake3 + cargo test --verbose - name: Build Release run: cargo build --verbose --release @@ -73,7 +73,7 @@ jobs: - name: Run tests run: | cargo test --workspace --verbose - cargo test --workspace --verbose --features blake3 + cargo test --workspace --verbose - name: Install JDK uses: actions/setup-java@v4.2.1 @@ -104,7 +104,7 @@ jobs: - name: Run tests run: | cargo test --workspace --verbose - cargo test --workspace --verbose --features blake3 + cargo test --workspace --verbose - name: Install JDK uses: actions/setup-java@v4.2.1 From 4ab92a03bc280815999e4bb3d310ccde566e51eb Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 7 Jul 2024 17:17:51 +0300 Subject: [PATCH 23/46] feat(index): revise the logic for update_all() Signed-off-by: Tarek --- ark-cli/src/util.rs | 3 - fs-index/src/index.rs | 250 +++++++++++++++++++++++++++++------------- fs-index/src/utils.rs | 101 +++++++++++++++-- 3 files changed, 263 insertions(+), 91 deletions(-) diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index be5273fa..40eff290 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -119,9 +119,6 @@ pub fn monitor_index( if !diff.added().is_empty() { println!("Added: {:?}", diff.added()); } - if !diff.modified().is_empty() { - println!("Modified: {:?}", diff.modified()); - } } } } diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 16f1b41c..720916c4 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -3,17 +3,19 @@ use std::{ fs, hash::Hash, path::{Path, PathBuf}, - time::SystemTime, + time::{Duration, SystemTime}, }; use serde::{Deserialize, Serialize}; -use walkdir::WalkDir; -use data_error::{ArklibError, Result}; +use data_error::Result; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -use crate::utils::should_index; +use crate::utils::{discover_paths, scan_entries}; + +/// The threshold for considering a resource updated +pub const RESOURCE_UPDATED_THRESHOLD: Duration = Duration::from_millis(1); /// Represents a resource in the index #[derive( @@ -123,8 +125,6 @@ where pub struct IndexUpdate { /// Resources that were added during the update added: Vec>, - /// Resources that were modified during the update - modified: Vec>, /// Resources that were removed during the update removed: Vec>, } @@ -135,11 +135,6 @@ impl IndexUpdate { &self.added } - /// Return the resources that were modified during the update - pub fn modified(&self) -> &Vec> { - &self.modified - } - /// Return the resources that were removed during the update pub fn removed(&self) -> &Vec> { &self.removed @@ -245,93 +240,192 @@ impl ResourceIndex { let mut id_to_resources = HashMap::new(); let mut path_to_resource = HashMap::new(); - // Loop through the root path and add resources to the index - let walker = WalkDir::new(&root) - .min_depth(1) // Skip the root directory + // Discover paths in the root directory + let paths = discover_paths(&root)?; + let entries: HashMap> = + scan_entries(paths); + + // Strip the root path from the entries + let entries: HashMap> = entries .into_iter() - .filter_entry(should_index); // Skip hidden files - for entry in walker { - let entry = entry.map_err(|e| { - ArklibError::Path(format!("Error walking directory: {}", e)) - })?; - // Ignore directories - if !entry.file_type().is_file() { - continue; - } - let path = entry.path(); - let metadata = fs::metadata(path)?; - // Ignore empty files - if metadata.len() == 0 { - continue; - } - let last_modified = metadata.modified()?; - let id = Id::from_path(path)?; - // Path is relative to the root - let path = path.strip_prefix(&root).map_err(|_| { - ArklibError::Path("Error stripping prefix".to_string()) - })?; - - // Create the resource and add it to the index - let resource = IndexedResource { - id: id.clone(), - path: path.to_path_buf(), - last_modified, - }; - path_to_resource.insert(resource.path.clone(), resource.clone()); + .map(|(path, resource)| { + let relative_path = path.strip_prefix(&root).unwrap().to_path_buf(); + let resource = IndexedResource::new( + resource.id().clone(), + relative_path.clone(), + resource.last_modified(), + ); + (relative_path, resource) + }) + .collect(); + + // Update the path to resource map + path_to_resource.extend(entries.clone()); + + // Update the ID to resources map + for resource in entries.values() { + let id = resource.id().clone(); id_to_resources .entry(id) .or_insert_with(Vec::new) - .push(resource); + .push(resource.clone()); } - Ok(ResourceIndex { + let index = ResourceIndex { root, id_to_resources, path_to_resource, - }) + }; + Ok(index) } /// Update the index with the latest information from the file system pub fn update_all(&mut self) -> Result> { log::debug!("Updating index at root path: {:?}", self.root); + log::trace!("Current index: {:#?}", self); let mut added = Vec::new(); - let mut modified = Vec::new(); let mut removed = Vec::new(); - let new_index = ResourceIndex::build(&self.root)?; - - // Compare the new index with the old index - let current_resources = self.resources(); - let new_resources = new_index.resources(); - for resource in new_resources.clone() { - // If the resource is in the old index, - // check if it has been modified - if let Some(current_resource) = - self.get_resource_by_path(&resource.path) - { - if current_resource != &resource { - modified.push(resource.clone()); - } - } - // If the resource is not in the old index, it has been added - else { - added.push(resource.clone()); - } + let current_paths = discover_paths(&self.root)?; + + // Assuming that collection manipulation is faster than repeated + // lookups + let current_entries: HashMap> = + scan_entries(current_paths.clone()); + let previous_entries = self.path_to_resource.clone(); + // `preserved_entries` is the intersection of current_entries and + // previous_entries + let preserved_entries: HashMap> = + current_entries + .iter() + .filter_map(|(path, _resource)| { + previous_entries.get(path).map(|prev_resource| { + (path.clone(), prev_resource.clone()) + }) + }) + .collect(); + + // `created_entries` is the difference between current_entries and + // preserved_entries + let created_entries: HashMap> = + current_entries + .iter() + .filter_map(|(path, resource)| { + if preserved_entries.contains_key(path) { + None + } else { + Some((path.clone(), resource.clone())) + } + }) + .collect(); + + // `updated_entries` is the difference between preserved_entries and + // current_entries where the last modified time has changed + // significantly + let updated_entries: HashMap> = + preserved_entries + .iter() + .filter_map(|(path, resource)| { + if current_entries.contains_key(path) { + None + } else { + let our_entry = + self.path_to_resource.get(path).unwrap(); + let previous_modified = our_entry.last_modified(); + + let current_modified = resource.last_modified(); + + let elapsed_time = match current_modified + .duration_since(previous_modified) { + Ok(duration) => duration, + Err(err) => { + log::error!( + "Failed to calculate elapsed time: {:?}", + err + ); + return None; + }}; + + + if elapsed_time > RESOURCE_UPDATED_THRESHOLD { + log::trace!( + "Resource updated: {:?}, previous: {:?}, current: {:?}, elapsed: {:?}", + path, + previous_modified, + current_modified, + elapsed_time + ); + + Some((path.clone(), resource.clone())) + } else { + None + } + } + }) + .collect(); + + // Remove resources that are not in the current entries + let removed_entries: HashMap> = + previous_entries + .iter() + .filter_map(|(path, resource)| { + if preserved_entries.contains_key(path) { + None + } else { + Some((path.clone(), resource.clone())) + } + }) + .collect(); + for (path, resource) in removed_entries { + log::trace! + ("Resource removed: {:?}, last modified: {:?}", path, resource.last_modified()); + + self.path_to_resource.remove(&path); + self.id_to_resources + .get_mut(resource.id()) + .unwrap() + .retain(|r| r.path() != resource.path()); + let id = resource.id().clone(); + let resources = self.id_to_resources.get_mut(&id).unwrap(); + resources.retain(|r| r.path() != resource.path()); + removed.push(resource); } - for resource in current_resources { - // If the resource is not in the new index, it has been removed - if !new_resources.contains(&resource) { - removed.push(resource.clone()); - } + + let added_entries: HashMap> = + updated_entries + .iter() + .chain(created_entries.iter()) + .filter_map(|(path, resource)| { + if self.path_to_resource.contains_key(path) { + None + } else { + Some((path.clone(), resource.clone())) + } + }) + .collect(); + + for (path, resource) in added_entries { + log::trace!("Resource added: {:?}", path); + + // strip the root path from the path + let relative_path = path.strip_prefix(&self.root).unwrap().to_path_buf(); + let resource = IndexedResource::new( + resource.id().clone(), + relative_path.clone(), + resource.last_modified(), + ); + + self.path_to_resource.insert(relative_path.clone(), resource.clone()); + let id = resource.id().clone(); + self.id_to_resources + .entry(id) + .or_default() + .push(resource.clone()); + added.push(resource); } - // Update the index with the new index and return the result - *self = new_index; - Ok(IndexUpdate { - added, - modified, - removed, - }) + Ok(IndexUpdate { added, removed }) } } + diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 71a16180..52f96bc4 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -1,18 +1,17 @@ -use std::{fs, io::BufReader, path::Path}; +use std::{ + collections::HashMap, + fs, + io::BufReader, + path::{Path, PathBuf}, +}; + +use walkdir::{DirEntry, WalkDir}; use data_error::{ArklibError, Result}; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -use crate::ResourceIndex; - -/// A helper function to check if the entry should be indexed (not hidden) -pub fn should_index(entry: &walkdir::DirEntry) -> bool { - !entry - .file_name() - .to_string_lossy() - .starts_with('.') -} +use crate::{index::IndexedResource, ResourceIndex}; /// Load the index from the file system fn load_index, Id: ResourceId>( @@ -67,3 +66,85 @@ pub fn load_or_build_index, Id: ResourceId>( Ok(index) } } + +/// A helper function to discover paths in a directory +/// +/// This function walks the directory tree starting from the root path and +/// returns a list of file paths. +/// +/// Ignore hidden files and empty files. +pub(crate) fn discover_paths>( + root_path: P, +) -> Result> { + log::debug!("Discovering paths at root path: {:?}", root_path.as_ref()); + + let walker = WalkDir::new(&root_path) + .min_depth(1) // Skip the root directory + .into_iter() + .filter_entry(should_index); // Skip hidden files and empty files + + // Filter out directories + let paths = walker + .filter_map(|entry| { + let entry = entry.ok()?; + if entry.file_type().is_file() { + Some(entry) + } else { + None + } + }) + .collect(); + + Ok(paths) +} + +/// A helper function to scan entries and create indexed resources +pub(crate) fn scan_entries( + paths: Vec, +) -> HashMap> { + let mut path_to_resource = HashMap::new(); + for entry in paths { + let resource = scan_entry(entry); + path_to_resource.insert(resource.path().to_path_buf(), resource); + } + path_to_resource +} + +/// A helper function to scan one entry and create an indexed resource +pub(crate) fn scan_entry( + entry: DirEntry, +) -> IndexedResource { + let path = entry.path().to_path_buf(); + let metadata = entry.metadata().unwrap(); + let last_modified = metadata.modified().unwrap(); + + // Get the ID of the resource + let id = Id::from_path(&path).unwrap(); + + // Create the indexed resource + IndexedResource::new(id, path, last_modified) +} + +/// A helper function to check if the entry should be indexed (not hidden or +/// empty) +fn should_index(entry: &walkdir::DirEntry) -> bool { + // Check if the entry is hidden + if entry + .file_name() + .to_string_lossy() + .starts_with('.') + { + return false; + } + + // Check if the entry is empty + if entry + .metadata() + .map(|m| m.len() == 0) + .unwrap_or(false) + { + return false; + } + + true +} From b113f85309ff4047cf073f41ebdaf77660793eec Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 7 Jul 2024 17:39:52 +0300 Subject: [PATCH 24/46] fix: cargo fmt --all Signed-off-by: Tarek --- fs-index/src/index.rs | 21 +++++++++++++-------- fs-storage/src/btreemap_iter.rs | 4 +--- fs-storage/src/jni/btreemap_iter.rs | 3 +-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 720916c4..ee9c2538 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -249,7 +249,8 @@ impl ResourceIndex { let entries: HashMap> = entries .into_iter() .map(|(path, resource)| { - let relative_path = path.strip_prefix(&root).unwrap().to_path_buf(); + let relative_path = + path.strip_prefix(&root).unwrap().to_path_buf(); let resource = IndexedResource::new( resource.id().clone(), relative_path.clone(), @@ -346,8 +347,6 @@ impl ResourceIndex { ); return None; }}; - - if elapsed_time > RESOURCE_UPDATED_THRESHOLD { log::trace!( "Resource updated: {:?}, previous: {:?}, current: {:?}, elapsed: {:?}", @@ -378,8 +377,11 @@ impl ResourceIndex { }) .collect(); for (path, resource) in removed_entries { - log::trace! - ("Resource removed: {:?}, last modified: {:?}", path, resource.last_modified()); + log::trace!( + "Resource removed: {:?}, last modified: {:?}", + path, + resource.last_modified() + ); self.path_to_resource.remove(&path); self.id_to_resources @@ -409,14 +411,18 @@ impl ResourceIndex { log::trace!("Resource added: {:?}", path); // strip the root path from the path - let relative_path = path.strip_prefix(&self.root).unwrap().to_path_buf(); + let relative_path = path + .strip_prefix(&self.root) + .unwrap() + .to_path_buf(); let resource = IndexedResource::new( resource.id().clone(), relative_path.clone(), resource.last_modified(), ); - self.path_to_resource.insert(relative_path.clone(), resource.clone()); + self.path_to_resource + .insert(relative_path.clone(), resource.clone()); let id = resource.id().clone(); self.id_to_resources .entry(id) @@ -428,4 +434,3 @@ impl ResourceIndex { Ok(IndexUpdate { added, removed }) } } - diff --git a/fs-storage/src/btreemap_iter.rs b/fs-storage/src/btreemap_iter.rs index b3d7a69f..bb52d6f1 100644 --- a/fs-storage/src/btreemap_iter.rs +++ b/fs-storage/src/btreemap_iter.rs @@ -1,7 +1,5 @@ use crate::base_storage::BaseStorage; -use std::cell::RefCell; -use std::collections::btree_map::Iter; -use std::rc::Rc; +use std::{cell::RefCell, collections::btree_map::Iter, rc::Rc}; pub struct BTreeMapIterator<'a, K, V> { iter: Rc>>, diff --git a/fs-storage/src/jni/btreemap_iter.rs b/fs-storage/src/jni/btreemap_iter.rs index 6ebd6d61..61573e09 100644 --- a/fs-storage/src/jni/btreemap_iter.rs +++ b/fs-storage/src/jni/btreemap_iter.rs @@ -1,5 +1,4 @@ -use crate::btreemap_iter::BTreeMapIterator; -use crate::file_storage::FileStorage; +use crate::{btreemap_iter::BTreeMapIterator, file_storage::FileStorage}; // This is the interface to the JVM that we'll call the majority of our // methods on. use jni::JNIEnv; From b503ebc4d82e5b7f9fa6ecf7787e4f05e8ac4fc3 Mon Sep 17 00:00:00 2001 From: Tarek Date: Thu, 18 Jul 2024 19:46:50 +0300 Subject: [PATCH 25/46] test(fs-index): add macro for parameterized hash function tests - Implement `hash_tests!` macro to generate tests for function and hash type pairs. - Implement test functions parameterized by hash type to validate `ResourceIndex` operations. - Add various tests for both `Blake3` and `Crc32` hash algorithms. Signed-off-by: Tarek --- fs-index/src/tests.rs | 997 ++++++++++++++++++++---------------------- 1 file changed, 474 insertions(+), 523 deletions(-) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index bf0fd7b4..8b001649 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -1,539 +1,490 @@ -/*! -# Test Structure for `fs-index` Crate - -This test suite is designed to validate the functionality of `fs-index` crate using different hash functions. -The crate is tested with both cryptographic and non-cryptographic hash functions -to ensure that the resource index works correctly regardless of the underlying hash function used. - -## Modules - -- `blake3`: This module contains tests using the `Blake3` hash function. -- `crc32`: This module contains tests using the `CRC32` hash function. -- `test_utils`: This module contains common helper macro to generate tests for different hash functions. - -*/ - -mod blake3 { - use dev_hash::Blake3; - - use crate::generate_tests_for_hash; - - generate_tests_for_hash!(Blake3); +//! This module provides tests for the `ResourceIndex` functionality using +//! different hash algorithms. +//! +//! The tests are parameterized by various hash types, such as `Blake3` and +//! `Crc32`, to ensure the implementation works consistently across different +//! hashing algorithms. +//! +//! # Structure +//! +//! - **Macros**: +//! - `hash_tests!`: Generates test functions for pairs of test functions and +//! hash types. +//! +//! - **Test Functions**: +//! - Defined to test various aspects of `ResourceIndex`, parameterized by +//! hash type. +//! +//! - **Helper Functions**: +//! - `get_indexed_resource_from_file`: Helper to create `IndexedResource` +//! from a file path. +//! +//! # Usage +//! +//! To add a new test for a specific hash type, add a new entry in the +//! `hash_tests!` macro invocation with the appropriate function and hash type. + +use dev_hash::{Blake3, Crc32}; +use std::{fs, path::Path}; + +use anyhow::{anyhow, Result}; +use tempfile::TempDir; + +use data_resource::ResourceId; + +use crate::{ + index::IndexedResource, utils::load_or_build_index, ResourceIndex, +}; + +/// A macro to generate tests for function and hash type pairs. +#[macro_export] +macro_rules! hash_tests { + ($($name:ident: ($func:ident, $hash_type:ty),)*) => { + $( + #[test] + fn $name() { + $func::<$hash_type>(); + } + )* + }; } -mod crc32 { - use dev_hash::Crc32; - - use crate::generate_tests_for_hash; - - generate_tests_for_hash!(Crc32); +// Use the macro to generate tests for the specified function and hash type +// pairs +hash_tests! { + // CRC32 + test_store_and_load_index_crc32: (test_store_and_load_index, Crc32), + test_store_and_load_index_with_collisions_crc32: (test_store_and_load_index_with_collisions, Crc32), + test_build_index_with_file_crc32: (test_build_index_with_file, Crc32), + test_build_index_with_empty_file_crc32: (test_build_index_with_empty_file, Crc32), + test_build_index_with_directory_crc32: (test_build_index_with_directory, Crc32), + test_build_index_with_multiple_files_crc32: (test_build_index_with_multiple_files, Crc32), + test_build_index_with_multiple_directories_crc32: (test_build_index_with_multiple_directories, Crc32), + test_resource_index_update_crc32: (test_resource_index_update, Crc32), + test_add_colliding_files_crc32: (test_add_colliding_files, Crc32), + test_num_collisions_crc32: (test_num_collisions, Crc32), + test_hidden_files_crc32: (test_hidden_files, Crc32), + + // Blake3 + test_store_and_load_index_blake3: (test_store_and_load_index, Blake3), + test_store_and_load_index_with_collisions_blake3: (test_store_and_load_index_with_collisions, Blake3), + test_build_index_with_file_blake3: (test_build_index_with_file, Blake3), + test_build_index_with_empty_file_blake3: (test_build_index_with_empty_file, Blake3), + test_build_index_with_directory_blake3: (test_build_index_with_directory, Blake3), + test_build_index_with_multiple_files_blake3: (test_build_index_with_multiple_files, Blake3), + test_build_index_with_multiple_directories_blake3: (test_build_index_with_multiple_directories, Blake3), + test_resource_index_update_blake3: (test_resource_index_update, Blake3), + test_add_colliding_files_blake3: (test_add_colliding_files, Blake3), + test_num_collisions_blake3: (test_num_collisions, Blake3), + test_hidden_files_blake3: (test_hidden_files, Blake3), } -mod test_utils { - /// A macro to generate tests for the resource index. - /// - /// This macro generates tests for a given hash type. The hash type must - /// implement the `ResourceId` trait. - #[macro_export] - macro_rules! generate_tests_for_hash { - ($hash_type:ty) => { - use std::{fs, path::Path}; - - use anyhow::{anyhow, Result}; - use tempfile::TempDir; - - use data_resource::ResourceId; - - use crate::{ - index::IndexedResource, utils::load_or_build_index, - ResourceIndex, - }; - - /// A helper function to get [`IndexedResource`] from a file path - fn get_indexed_resource_from_file>( - path: P, - parent_dir: P, - ) -> Result> { - let id = <$hash_type>::from_path(&path)?; - - let relative_path = path - .as_ref() - .strip_prefix(parent_dir) - .map_err(|_| anyhow!("Failed to get relative path"))?; - - Ok(IndexedResource::new( - id, - relative_path.to_path_buf(), - fs::metadata(path)?.modified()?, - )) - } +/// A helper function to get [`IndexedResource`] from a file path +fn get_indexed_resource_from_file>( + path: P, + parent_dir: P, +) -> Result> { + let id = H::from_path(&path)?; + + let relative_path = path + .as_ref() + .strip_prefix(parent_dir) + .map_err(|_| anyhow!("Failed to get relative path"))?; + + Ok(IndexedResource::new( + id, + relative_path.to_path_buf(), + fs::metadata(path)?.modified()?, + )) +} - /// Test storing and loading the resource index. - /// - /// ## Test scenario: - /// - Build a resource index in the temporary directory. - /// - Store the index. - /// - Load the stored index. - /// - Assert that the loaded index matches the original index. - #[test] - fn test_store_and_load_index() { - let temp_dir = - TempDir::with_prefix("ark_test_store_and_load_index") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 1); - index.store().expect("Failed to store index"); - - let loaded_index = load_or_build_index(root_path, false) - .expect("Failed to load index"); - - assert_eq!(index, loaded_index); - } +/// Test storing and loading the resource index. +/// +/// ## Test scenario: +/// - Build a resource index in the temporary directory. +/// - Store the index. +/// - Load the stored index. +/// - Assert that the loaded index matches the original index. +fn test_store_and_load_index() { + let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + index.store().expect("Failed to store index"); + + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); + + assert_eq!(index, loaded_index); +} - /// Test storing and loading the resource index with collisions. - /// - /// ## Test scenario: - /// - Build a resource index in the temporary directory. - /// - Write duplicate files with the same content. - /// - Store the index. - /// - Load the stored index. - /// - Assert that the loaded index matches the original index. - #[test] - fn test_store_and_load_index_with_collisions() { - let temp_dir = TempDir::with_prefix( - "ark_test_store_and_load_index_with_collisions", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let file_path2 = root_path.join("file2.txt"); - fs::write(&file_path2, "file content") - .expect("Failed to write to file"); - - let file_path3 = root_path.join("file3.txt"); - fs::write(&file_path3, "file content") - .expect("Failed to write to file"); - - let file_path4 = root_path.join("file4.txt"); - fs::write(&file_path4, "file content") - .expect("Failed to write to file"); - - // Now we have 4 files with the same content (same checksum) - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - let checksum = <$hash_type>::from_path(&file_path) - .expect("Failed to get checksum"); - assert_eq!(index.len(), 4); - assert_eq!(index.collisions().len(), 1); - assert_eq!(index.collisions()[&checksum].len(), 4); - index.store().expect("Failed to store index"); - - let loaded_index = load_or_build_index(root_path, false) - .expect("Failed to load index"); - - assert_eq!(index, loaded_index); - } +/// Test storing and loading the resource index with collisions. +/// +/// ## Test scenario: +/// - Build a resource index in the temporary directory. +/// - Write duplicate files with the same content. +/// - Store the index. +/// - Load the stored index. +/// - Assert that the loaded index matches the original index. +fn test_store_and_load_index_with_collisions() { + let temp_dir = + TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let file_path2 = root_path.join("file2.txt"); + fs::write(&file_path2, "file content").expect("Failed to write to file"); + + let file_path3 = root_path.join("file3.txt"); + fs::write(&file_path3, "file content").expect("Failed to write to file"); + + let file_path4 = root_path.join("file4.txt"); + fs::write(&file_path4, "file content").expect("Failed to write to file"); + + // Now we have 4 files with the same content (same checksum) + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + let checksum = H::from_path(&file_path).expect("Failed to get checksum"); + assert_eq!(index.len(), 4); + assert_eq!(index.collisions().len(), 1); + assert_eq!(index.collisions()[&checksum].len(), 4); + index.store().expect("Failed to store index"); + + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); + + assert_eq!(index, loaded_index); +} - /// Test building an index with a file. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entry. - /// - Assert that the resource retrieved by path matches the - /// expected resource. - /// - Assert that the resource retrieved by ID matches the expected - /// resource. - #[test] - fn test_build_index_with_file() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); - } +/// Test building an index with a file. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entry. +/// - Assert that the resource retrieved by path matches the expected resource. +/// - Assert that the resource retrieved by ID matches the expected resource. +fn test_build_index_with_file() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource: IndexedResource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); +} - /// Test building an index with an empty file. - /// - /// ## Test scenario: - /// - Create an empty file within the temporary directory. - /// - Create a file with content within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entries. - #[test] - fn test_build_index_with_empty_file() { - let temp_dir = TempDir::with_prefix( - "ark_test_build_index_with_empty_file", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let empty_file_path = root_path.join("empty_file.txt"); - fs::write(&empty_file_path, "") - .expect("Failed to write to file"); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 1); - } +/// Test building an index with an empty file. +/// +/// ## Test scenario: +/// - Create an empty file within the temporary directory. +/// - Create a file with content within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entries. +fn test_build_index_with_empty_file() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let empty_file_path = root_path.join("empty_file.txt"); + fs::write(&empty_file_path, "").expect("Failed to write to file"); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); +} - /// Test building an index with a directory. - /// - /// ## Test scenario: - /// - Create a subdirectory within the temporary directory. - /// - Create a file within the subdirectory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains one entry. - /// - Assert that the resource retrieved by path matches the - /// expected resource. - /// - Assert that the resource retrieved by ID matches the expected - /// resource. - #[test] - fn test_build_index_with_directory() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_directory") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir_path = root_path.join("dir"); - fs::create_dir(&dir_path).expect("Failed to create dir"); - let file_path = dir_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 1); - - let resource = index - .get_resource_by_path("dir/file.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); - } +/// Test building an index with a directory. +/// +/// ## Test scenario: +/// - Create a subdirectory within the temporary directory. +/// - Create a file within the subdirectory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains one entry. +/// - Assert that the resource retrieved by path matches the expected resource. +/// - Assert that the resource retrieved by ID matches the expected resource. +fn test_build_index_with_directory() { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir_path = root_path.join("dir"); + fs::create_dir(&dir_path).expect("Failed to create dir"); + let file_path = dir_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource: IndexedResource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1); + + let resource = index + .get_resource_by_path("dir/file.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource); +} - /// Test building an index with multiple files. - /// - /// ## Test scenario: - /// - Create multiple files within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains two entries. - /// - Assert that the resource retrieved by path for each file - /// matches the expected resource. - #[test] - fn test_build_index_with_multiple_files() { - let temp_dir = TempDir::with_prefix( - "ark_test_build_index_with_multiple_files", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file1_path = root_path.join("file1.txt"); - fs::write(&file1_path, "file1 content") - .expect("Failed to write to file"); - let file2_path = root_path.join("file2.txt"); - fs::write(&file2_path, "file2 content") - .expect("Failed to write to file"); - - let expected_resource1 = get_indexed_resource_from_file( - &file1_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - let expected_resource2 = get_indexed_resource_from_file( - &file2_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file1.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("file2.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2); - } +/// Test building an index with multiple files. +/// +/// ## Test scenario: +/// - Create multiple files within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains two entries. +/// - Assert that the resource retrieved by path for each file matches the +/// expected resource. +fn test_build_index_with_multiple_files() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file1_path = root_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + let file2_path = root_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1: IndexedResource = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file1.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("file2.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource2); +} - /// Test building an index with multiple directories. - /// - /// ## Test scenario: - /// - Create multiple directories within the temporary directory, - /// each containing a file. - /// - Build a resource index in the temporary directory. - /// - Assert that the index contains two entries. - /// - Assert that the resources retrieved by path for each file - /// match the expected resources. - #[test] - fn test_build_index_with_multiple_directories() { - let temp_dir = TempDir::with_prefix( - "ark_test_build_index_with_multiple_directories", - ) - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir1_path = root_path.join("dir1"); - fs::create_dir(&dir1_path).expect("Failed to create dir"); - let file1_path = dir1_path.join("file1.txt"); - fs::write(&file1_path, "file1 content") - .expect("Failed to write to file"); - - let dir2_path = root_path.join("dir2"); - fs::create_dir(&dir2_path).expect("Failed to create dir"); - let file2_path = dir2_path.join("file2.txt"); - fs::write(&file2_path, "file2 content") - .expect("Failed to write to file"); - - let expected_resource1 = get_indexed_resource_from_file( - &file1_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - let expected_resource2 = get_indexed_resource_from_file( - &file2_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path) - .expect("Failed to build index"); - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("dir1/file1.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource1); - - let resource = index - .get_resource_by_path("dir2/file2.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource2); - } +/// Test building an index with multiple directories. +/// +/// ## Test scenario: +/// - Create multiple directories within the temporary directory, each +/// containing a file. +/// - Build a resource index in the temporary directory. +/// - Assert that the index contains two entries. +/// - Assert that the resources retrieved by path for each file match the +/// expected resources. +fn test_build_index_with_multiple_directories() { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_directories") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir1_path = root_path.join("dir1"); + fs::create_dir(&dir1_path).expect("Failed to create dir"); + let file1_path = dir1_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + + let dir2_path = root_path.join("dir2"); + fs::create_dir(&dir2_path).expect("Failed to create dir"); + let file2_path = dir2_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1: IndexedResource = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("dir1/file1.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource1); + + let resource = index + .get_resource_by_path("dir2/file2.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource2); +} - /// Test updating the resource index. - /// - /// ## Test scenario: - /// - Create files within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number - /// of entries. - /// - Create a new file, modify an existing file, and remove another - /// file. - /// - Update the resource index. - /// - Assert that the index contains the expected number of entries - /// after the update. - /// - Assert that the entries in the index match the expected state - /// after the update. - #[test] - fn test_resource_index_update() { - let temp_dir = - TempDir::with_prefix("ark_test_resource_index_update") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let image_path = root_path.join("image.png"); - fs::write(&image_path, "image content") - .expect("Failed to write to file"); - - let mut index = ResourceIndex::build(root_path) - .expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2); - - // create new file - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - // modify file - fs::write(&file_path, "updated file content") - .expect("Failed to write to file"); - - // remove file - fs::remove_file(&image_path).expect("Failed to remove file"); - - index - .update_all() - .expect("Failed to update index"); - // Index now contains 2 resources (file.txt and new_file.txt) - assert_eq!(index.len(), 2); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Resource not found"); - let expected_resource = get_indexed_resource_from_file( - &file_path, - &root_path.to_path_buf(), - ) - .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource); - - let _resource = index - .get_resource_by_path("new_file.txt") - .expect("Resource not found"); - - assert!(index.get_resource_by_path("image.png").is_none()); - } +/// Test updating the resource index. +/// +/// ## Test scenario: +/// - Create files within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create a new file, modify an existing file, and remove another file. +/// - Update the resource index. +/// - Assert that the index contains the expected number of entries after the +/// update. +/// - Assert that the entries in the index match the expected state after the +/// update. +fn test_resource_index_update() { + let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2); + + // create new file + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + // modify file + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + // remove file + fs::remove_file(&image_path).expect("Failed to remove file"); + + index + .update_all() + .expect("Failed to update index"); + // Index now contains 2 resources (file.txt and new_file.txt) + assert_eq!(index.len(), 2); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + let expected_resource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + assert_eq!(resource, &expected_resource); + + let _resource = index + .get_resource_by_path("new_file.txt") + .expect("Resource not found"); + + assert!(index.get_resource_by_path("image.png").is_none()); +} - /// Test adding colliding files to the index. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number - /// of entries. - /// - Create a new file with the same checksum as the existing file. - /// - Track the addition of the new file in the index. - /// - Assert that the index contains the expected number of entries - /// after the addition. - /// - Assert index.collisions contains the expected number of - /// entries. - #[test] - fn test_add_colliding_files() { - let temp_dir = - TempDir::with_prefix("ark_test_add_colliding_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let mut index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content") - .expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 2); - assert_eq!(index.collisions().len(), 1); - } +/// Test adding colliding files to the index. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create a new file with the same checksum as the existing file. +/// - Track the addition of the new file in the index. +/// - Assert that the index contains the expected number of entries after the +/// addition. +/// - Assert index.collisions contains the expected number of entries. +fn test_add_colliding_files() { + let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 2); + assert_eq!(index.collisions().len(), 1); +} - /// Test `ResourceIndex::num_collisions()` method. - /// - /// ## Test scenario: - /// - Create a file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number - /// of entries. - /// - Create 2 new files with the same checksum as the existing - /// file. - /// - Update the index. - /// - Assert that the index contains the expected number of entries - /// after the update. - #[test] - fn test_num_collisions() { - let temp_dir = TempDir::with_prefix("ark_test_num_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let mut index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); - - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content") - .expect("Failed to write to file"); - - let new_file_path2 = root_path.join("new_file2.txt"); - fs::write(&new_file_path2, "file content") - .expect("Failed to write to file"); - - index - .update_all() - .expect("Failed to update index"); - - assert_eq!(index.len(), 3); - assert_eq!(index.num_collisions(), 3); - } +/// Test `ResourceIndex::num_collisions()` method. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// - Create 2 new files with the same checksum as the existing file. +/// - Update the index. +/// - Assert that the index contains the expected number of entries after the +/// update. +fn test_num_collisions() { + let temp_dir = TempDir::with_prefix("ark_test_num_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); + + let new_file_path2 = root_path.join("new_file2.txt"); + fs::write(&new_file_path2, "file content") + .expect("Failed to write to file"); + + index + .update_all() + .expect("Failed to update index"); + + assert_eq!(index.len(), 3); + assert_eq!(index.num_collisions(), 3); +} - /// Test that we don't index hidden files. - /// - /// ## Test scenario: - /// - Create a hidden file within the temporary directory. - /// - Build a resource index in the temporary directory. - /// - Assert that the index initially contains the expected number - /// of entries. (0) - #[test] - fn test_hidden_files() { - let temp_dir = TempDir::with_prefix("ark_test_hidden_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join(".hidden_file.txt"); - fs::write(&file_path, "file content") - .expect("Failed to write to file"); - - let index: ResourceIndex<$hash_type> = - ResourceIndex::build(root_path) - .expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 0); - } - }; - } +/// Test that we don't index hidden files. +/// +/// ## Test scenario: +/// - Create a hidden file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Assert that the index initially contains the expected number of entries. +/// (0) +fn test_hidden_files() { + let temp_dir = TempDir::with_prefix("ark_test_hidden_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join(".hidden_file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 0); } From d22b9e24dff771b032a853d0415398f636c4ff65 Mon Sep 17 00:00:00 2001 From: Tarek Date: Thu, 18 Jul 2024 19:59:27 +0300 Subject: [PATCH 26/46] test(fs-index): print index in case of assert failure Signed-off-by: Tarek --- fs-index/src/tests.rs | 68 +++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 8b001649..465aa51e 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -115,13 +115,13 @@ fn test_store_and_load_index() { let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); index.store().expect("Failed to store index"); let loaded_index = load_or_build_index(root_path, false).expect("Failed to load index"); - assert_eq!(index, loaded_index); + assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); } /// Test storing and loading the resource index with collisions. @@ -155,15 +155,15 @@ fn test_store_and_load_index_with_collisions() { let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); let checksum = H::from_path(&file_path).expect("Failed to get checksum"); - assert_eq!(index.len(), 4); - assert_eq!(index.collisions().len(), 1); - assert_eq!(index.collisions()[&checksum].len(), 4); + assert_eq!(index.len(), 4, "{:?}", index); + assert_eq!(index.collisions().len(), 1, "{:?}", index); + assert_eq!(index.collisions()[&checksum].len(), 4, "{:?}", index); index.store().expect("Failed to store index"); let loaded_index = load_or_build_index(root_path, false).expect("Failed to load index"); - assert_eq!(index, loaded_index); + assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); } /// Test building an index with a file. @@ -186,12 +186,16 @@ fn test_build_index_with_file() { .expect("Failed to get indexed resource"); let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); let resource = index .get_resource_by_path("file.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); + assert_eq!( + resource, &expected_resource, + "{:?} != {:?}", + resource, expected_resource + ); } /// Test building an index with an empty file. @@ -214,7 +218,7 @@ fn test_build_index_with_empty_file() { let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); } /// Test building an index with a directory. @@ -240,12 +244,16 @@ fn test_build_index_with_directory() { .expect("Failed to get indexed resource"); let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); let resource = index .get_resource_by_path("dir/file.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource); + assert_eq!( + resource, &expected_resource, + "{:?} != {:?}", + resource, expected_resource + ); } /// Test building an index with multiple files. @@ -275,17 +283,17 @@ fn test_build_index_with_multiple_files() { .expect("Failed to get indexed resource"); let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); + assert_eq!(index.len(), 2, "{:?}", index); let resource = index .get_resource_by_path("file1.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1); + assert_eq!(resource, &expected_resource1, "{:?}", resource); let resource = index .get_resource_by_path("file2.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2); + assert_eq!(resource, &expected_resource2, "{:?}", resource); } /// Test building an index with multiple directories. @@ -321,17 +329,17 @@ fn test_build_index_with_multiple_directories() { .expect("Failed to get indexed resource"); let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2); + assert_eq!(index.len(), 2, "{:?}", index); let resource = index .get_resource_by_path("dir1/file1.txt") .expect("Resource not found"); - assert_eq!(resource, &expected_resource1); + assert_eq!(resource, &expected_resource1, "{:?}", resource); let resource = index .get_resource_by_path("dir2/file2.txt") .expect("Resource not found"); - assert_eq!(resource, &expected_resource2); + assert_eq!(resource, &expected_resource2, "{:?}", resource); } /// Test updating the resource index. @@ -360,7 +368,7 @@ fn test_resource_index_update() { let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2); + assert_eq!(index.len(), 2, "{:?}", index); // create new file let new_file_path = root_path.join("new_file.txt"); @@ -378,7 +386,7 @@ fn test_resource_index_update() { .update_all() .expect("Failed to update index"); // Index now contains 2 resources (file.txt and new_file.txt) - assert_eq!(index.len(), 2); + assert_eq!(index.len(), 2, "{:?}", index); let resource = index .get_resource_by_path("file.txt") @@ -386,13 +394,17 @@ fn test_resource_index_update() { let expected_resource = get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource); + assert_eq!(resource, &expected_resource, "{:?}", resource); let _resource = index .get_resource_by_path("new_file.txt") .expect("Resource not found"); - assert!(index.get_resource_by_path("image.png").is_none()); + assert!( + index.get_resource_by_path("image.png").is_none(), + "{:?}", + index + ); } /// Test adding colliding files to the index. @@ -417,7 +429,7 @@ fn test_add_colliding_files() { let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); let new_file_path = root_path.join("new_file.txt"); fs::write(&new_file_path, "file content").expect("Failed to write to file"); @@ -426,8 +438,8 @@ fn test_add_colliding_files() { .update_all() .expect("Failed to update index"); - assert_eq!(index.len(), 2); - assert_eq!(index.collisions().len(), 1); + assert_eq!(index.len(), 2, "{:?}", index); + assert_eq!(index.collisions().len(), 1, "{:?}", index); } /// Test `ResourceIndex::num_collisions()` method. @@ -451,7 +463,7 @@ fn test_num_collisions() { let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1); + assert_eq!(index.len(), 1, "{:?}", index); let new_file_path = root_path.join("new_file.txt"); fs::write(&new_file_path, "file content").expect("Failed to write to file"); @@ -464,8 +476,8 @@ fn test_num_collisions() { .update_all() .expect("Failed to update index"); - assert_eq!(index.len(), 3); - assert_eq!(index.num_collisions(), 3); + assert_eq!(index.len(), 3, "{:?}", index); + assert_eq!(index.num_collisions(), 3, "{:?}", index); } /// Test that we don't index hidden files. @@ -486,5 +498,5 @@ fn test_hidden_files() { let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); - assert_eq!(index.len(), 0); + assert_eq!(index.len(), 0, "{:?}", index); } From d4d7e7392f0ccbdbaef4bb8db871abb48c80ea74 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 20 Jul 2024 14:44:23 +0300 Subject: [PATCH 27/46] fix(fs-index): revise the logic for updated_entries Signed-off-by: Tarek --- fs-index/src/index.rs | 84 ++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index ee9c2538..5a43ebed 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -321,44 +321,62 @@ impl ResourceIndex { }) .collect(); - // `updated_entries` is the difference between preserved_entries and - // current_entries where the last modified time has changed - // significantly + // `updated_entries` is the intersection of current_entries and + // preserved_entries where the last modified time has changed + // significantly (> RESOURCE_UPDATED_THRESHOLD) let updated_entries: HashMap> = - preserved_entries - .iter() - .filter_map(|(path, resource)| { - if current_entries.contains_key(path) { - None + current_entries + .into_iter() + .filter(|(path, entry)| { + if !preserved_entries.contains_key(path) { + false } else { - let our_entry = - self.path_to_resource.get(path).unwrap(); - let previous_modified = our_entry.last_modified(); - - let current_modified = resource.last_modified(); + let our_entry = &self.path_to_resource[path]; + let prev_modified = our_entry.last_modified(); - let elapsed_time = match current_modified - .duration_since(previous_modified) { - Ok(duration) => duration, - Err(err) => { + let result = entry.path().metadata(); + match result { + Err(msg) => { log::error!( - "Failed to calculate elapsed time: {:?}", - err + "Couldn't retrieve metadata for {}: {}", + &path.display(), + msg + ); + false + } + Ok(metadata) => match metadata.modified() { + Err(msg) => { + log::error!( + "Couldn't retrieve timestamp for {}: {}", + &path.display(), + msg ); - return None; - }}; - if elapsed_time > RESOURCE_UPDATED_THRESHOLD { - log::trace!( - "Resource updated: {:?}, previous: {:?}, current: {:?}, elapsed: {:?}", - path, - previous_modified, - current_modified, - elapsed_time - ); - - Some((path.clone(), resource.clone())) - } else { - None + false + } + Ok(curr_modified) => { + let elapsed = curr_modified + .duration_since(prev_modified) + .unwrap(); + + let was_updated = + elapsed >= RESOURCE_UPDATED_THRESHOLD; + if was_updated { + log::trace!( + "[update] modified {} by path {} + \twas {:?} + \tnow {:?} + \telapsed {:?}", + our_entry.id, + path.display(), + prev_modified, + curr_modified, + elapsed + ); + } + + was_updated + } + }, } } }) From d49f1ef23abd9f0b21e4e6c58580676b3f3f3061 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 20 Jul 2024 20:30:14 +0300 Subject: [PATCH 28/46] feat(fs-index): increase number of files in benchmarks Signed-off-by: Tarek --- fs-index/benches/resource_index_benchmark.rs | 63 +++++++------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/fs-index/benches/resource_index_benchmark.rs b/fs-index/benches/resource_index_benchmark.rs index 04ebc4c0..f6d3f21a 100644 --- a/fs-index/benches/resource_index_benchmark.rs +++ b/fs-index/benches/resource_index_benchmark.rs @@ -8,9 +8,6 @@ use tempfile::TempDir; use dev_hash::Crc32; use fs_index::ResourceIndex; -// The path to the test assets directory -const DIR_PATH: &str = "../test-assets/"; - fn resource_index_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("resource_index"); group.measurement_time(std::time::Duration::from_secs(20)); // Set the measurement time here @@ -70,8 +67,8 @@ fn resource_index_benchmark(c: &mut Criterion) { std::fs::remove_dir_all(&update_all_benchmarks_dir).unwrap(); std::fs::create_dir(&update_all_benchmarks_dir).unwrap(); - // Create 50 new files - for i in 0..50 { + // Create 5000 new files + for i in 0..5000 { let new_file = update_all_benchmarks_dir.join(format!("file_{}.txt", i)); std::fs::File::create(&new_file).unwrap(); @@ -97,49 +94,33 @@ criterion_group! { } criterion_main!(benches); -/// A helper function to setup a temp directory for the benchmarks using the -/// test assets directory +/// A helper function to setup a temp directory for the benchmarks fn setup_temp_dir() -> TempDir { - // assert the path exists and is a directory - assert!( - std::path::Path::new(DIR_PATH).is_dir(), - "The path: {} does not exist or is not a directory", - DIR_PATH - ); - // Create a temp directory let temp_dir = TempDir::with_prefix("ark-fs-index-benchmarks").unwrap(); let benchmarks_dir = temp_dir.path(); - let benchmarks_dir_str = benchmarks_dir.to_str().unwrap(); - log::info!("Temp directory for benchmarks: {}", benchmarks_dir_str); - - // Copy the test assets to the temp directory - let source = std::path::Path::new(DIR_PATH); - // Can't use fs::copy because the source is a directory - let output = std::process::Command::new("cp") - .arg("-r") - .arg(source) - .arg(benchmarks_dir_str) - .output() - .expect("Failed to copy test assets to temp directory"); - if !output.status.success() { - panic!( - "Failed to copy test assets to temp directory: {}", - String::from_utf8_lossy(&output.stderr) - ); + log::info!("Temp directory for benchmarks: {:?}", benchmarks_dir); + + // Create 10,000 files in the temp directory + for i in 0..10000 { + let new_file = benchmarks_dir.join(format!("file_{}.txt", i)); + std::fs::File::create(&new_file).unwrap(); + // We add the index `i` to the file content to make sure the content is + // unique This is to avoid collisions in the index + std::fs::write(&new_file, format!("Hello, World! {}", i)).unwrap(); } temp_dir } -/// A helper function that takes a directory and creates 50 new files, removes -/// 30 files, and modifies 10 files +/// A helper function that takes a directory and creates 5000 new files, removes +/// 3000 files, and modifies 1000 files /// -/// Note: The function assumes that the directory already contains 50 files -/// named `file_0.txt` to `file_49.txt` +/// Note: The function assumes that the directory already contains 5000 files +/// with the names `file_0.txt` to `file_4999.txt` fn update_all_files(dir: &PathBuf) { - // Create 50 new files - for i in 51..101 { + // Create 5000 new files + for i in 5001..10001 { let new_file = dir.join(format!("file_{}.txt", i)); std::fs::File::create(&new_file).unwrap(); // We add the index `i` to the file content to make sure the content is @@ -147,14 +128,14 @@ fn update_all_files(dir: &PathBuf) { std::fs::write(&new_file, format!("Hello, World! {}", i)).unwrap(); } - // Remove 30 files - for i in 0..30 { + // Remove 3000 files + for i in 0..3000 { let removed_file = dir.join(format!("file_{}.txt", i)); std::fs::remove_file(&removed_file).unwrap(); } - // Modify 10 files - for i in 40..50 { + // Modify 1000 files + for i in 4000..5000 { let modified_file = dir.join(format!("file_{}.txt", i)); std::fs::write(&modified_file, "Hello, World!").unwrap(); } From 0b8d8601ffa158a5d323ff4f0c25a1f934edd7bc Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 20 Jul 2024 21:02:51 +0300 Subject: [PATCH 29/46] feat(fs-index): use hashmaps instead of vectors for index update Signed-off-by: Tarek --- fs-index/src/index.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 5a43ebed..1e6355a2 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fs, hash::Hash, path::{Path, PathBuf}, @@ -124,19 +124,19 @@ where #[derive(PartialEq, Debug)] pub struct IndexUpdate { /// Resources that were added during the update - added: Vec>, + added: HashMap>, /// Resources that were removed during the update - removed: Vec>, + removed: HashSet, } impl IndexUpdate { /// Return the resources that were added during the update - pub fn added(&self) -> &Vec> { + pub fn added(&self) -> &HashMap> { &self.added } /// Return the resources that were removed during the update - pub fn removed(&self) -> &Vec> { + pub fn removed(&self) -> &HashSet { &self.removed } } @@ -285,8 +285,8 @@ impl ResourceIndex { log::debug!("Updating index at root path: {:?}", self.root); log::trace!("Current index: {:#?}", self); - let mut added = Vec::new(); - let mut removed = Vec::new(); + let mut added = HashMap::new(); + let mut removed = HashSet::new(); let current_paths = discover_paths(&self.root)?; @@ -409,7 +409,7 @@ impl ResourceIndex { let id = resource.id().clone(); let resources = self.id_to_resources.get_mut(&id).unwrap(); resources.retain(|r| r.path() != resource.path()); - removed.push(resource); + removed.insert(id); } let added_entries: HashMap> = @@ -443,10 +443,10 @@ impl ResourceIndex { .insert(relative_path.clone(), resource.clone()); let id = resource.id().clone(); self.id_to_resources - .entry(id) + .entry(id.clone()) .or_default() .push(resource.clone()); - added.push(resource); + added.insert(id, resource); } Ok(IndexUpdate { added, removed }) From 27d8c1948aa9395f822f2c7ee25348cc86799f4f Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 20 Jul 2024 21:23:17 +0300 Subject: [PATCH 30/46] fix(fs-index): update index to include map from id to paths Signed-off-by: Tarek --- fs-index/src/index.rs | 56 ++++++++++++++++++++++++------------------- fs-index/src/serde.rs | 18 ++++++++------ 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 1e6355a2..91c8228b 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -111,11 +111,11 @@ where { /// The root path of the index (canonicalized) pub(crate) root: PathBuf, - /// A map from resource IDs to resources + /// A map from resource IDs to paths /// /// Multiple resources can have the same ID (e.g., due to hash collisions /// or files with the same content) - pub(crate) id_to_resources: HashMap>>, + pub(crate) id_to_paths: HashMap>, /// A map from resource paths to resources pub(crate) path_to_resource: HashMap>, } @@ -169,12 +169,12 @@ impl ResourceIndex { /// should be files with the same content. If you are using a /// non-cryptographic hash function, collisions can be files with the /// same content or files whose content hash to the same value. - pub fn collisions(&self) -> HashMap>> { + pub fn collisions(&self) -> HashMap> { // Filter out IDs with only one resource - self.id_to_resources + self.id_to_paths .iter() - .filter(|(_, resources)| resources.len() > 1) - .map(|(id, resources)| (id.clone(), resources.clone())) + .filter(|(_id, paths)| paths.len() > 1) + .map(|(id, paths)| (id.clone(), paths.clone())) .collect() } @@ -185,10 +185,11 @@ impl ResourceIndex { /// non-cryptographic hash function, collisions can be files with the /// same content or files whose content hash to the same value. pub fn num_collisions(&self) -> usize { - self.id_to_resources + // Aggregate the number of collisions for each ID + self.id_to_paths .values() - .filter(|resources| resources.len() > 1) - .map(|resources| resources.len()) + .filter(|paths| paths.len() > 1) + .map(|paths| paths.len()) .sum() } @@ -215,8 +216,13 @@ impl ResourceIndex { pub fn get_resources_by_id( &self, id: &Id, - ) -> Option<&Vec>> { - self.id_to_resources.get(id) + ) -> Option>> { + let paths = self.id_to_paths.get(id)?; + let resources = paths + .iter() + .filter_map(|path| self.path_to_resource.get(path)) + .collect(); + Some(resources) } /// Get a resource by its path @@ -237,7 +243,7 @@ impl ResourceIndex { // Canonicalize the root path let root = fs::canonicalize(&root_path)?; - let mut id_to_resources = HashMap::new(); + let mut id_to_paths: HashMap> = HashMap::new(); let mut path_to_resource = HashMap::new(); // Discover paths in the root directory @@ -263,18 +269,18 @@ impl ResourceIndex { // Update the path to resource map path_to_resource.extend(entries.clone()); - // Update the ID to resources map + // Update the ID to paths map for resource in entries.values() { let id = resource.id().clone(); - id_to_resources + id_to_paths .entry(id) - .or_insert_with(Vec::new) - .push(resource.clone()); + .or_default() + .insert(resource.path().to_path_buf()); } let index = ResourceIndex { root, - id_to_resources, + id_to_paths, path_to_resource, }; Ok(index) @@ -402,14 +408,16 @@ impl ResourceIndex { ); self.path_to_resource.remove(&path); - self.id_to_resources + self.id_to_paths .get_mut(resource.id()) .unwrap() - .retain(|r| r.path() != resource.path()); + .remove(&path); let id = resource.id().clone(); - let resources = self.id_to_resources.get_mut(&id).unwrap(); - resources.retain(|r| r.path() != resource.path()); - removed.insert(id); + // Only remove the ID if it has no paths + if self.id_to_paths[&id].is_empty() { + self.id_to_paths.remove(&id); + removed.insert(id); + } } let added_entries: HashMap> = @@ -442,10 +450,10 @@ impl ResourceIndex { self.path_to_resource .insert(relative_path.clone(), resource.clone()); let id = resource.id().clone(); - self.id_to_resources + self.id_to_paths .entry(id.clone()) .or_default() - .push(resource.clone()); + .insert(relative_path.clone()); added.insert(id, resource); } diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index b99b191f..a220d6a4 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -1,4 +1,8 @@ -use std::{collections::HashMap, path::PathBuf, time::SystemTime}; +use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + time::SystemTime, +}; use anyhow::Result; use serde::{ @@ -79,7 +83,7 @@ where ResourceIndexData::deserialize(deserializer)?; let mut path_to_resource = HashMap::new(); - let mut id_to_resources = HashMap::new(); + let mut id_to_paths = HashMap::new(); for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); @@ -88,16 +92,16 @@ where path.clone(), last_modified, ); - path_to_resource.insert(path, resource.clone()); - id_to_resources + path_to_resource.insert(path.clone(), resource.clone()); + id_to_paths .entry(resource.id().clone()) - .or_insert_with(Vec::new) - .push(resource); + .or_insert_with(HashSet::new) + .insert(path); } Ok(ResourceIndex { root: index_data.root, - id_to_resources, + id_to_paths, path_to_resource, }) } From d076f345ceed0430eccbb5147b4495504202fb64 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 21 Jul 2024 12:03:29 +0300 Subject: [PATCH 31/46] feat(fs-index): define a macro for_each_hash!() for tests Signed-off-by: Tarek --- fs-index/src/tests.rs | 605 +++++++++++++++++++++--------------------- 1 file changed, 306 insertions(+), 299 deletions(-) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 465aa51e..1da0e7d7 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -8,8 +8,8 @@ //! # Structure //! //! - **Macros**: -//! - `hash_tests!`: Generates test functions for pairs of test functions and -//! hash types. +//! - `for_each_hash!`: A macro that takes a list of hash function types and a +//! block of code to execute for each hash type. //! //! - **Test Functions**: //! - Defined to test various aspects of `ResourceIndex`, parameterized by @@ -21,8 +21,11 @@ //! //! # Usage //! -//! To add a new test for a specific hash type, add a new entry in the -//! `hash_tests!` macro invocation with the appropriate function and hash type. +//! To add a new test for a specific hash type: +//! 1. Write a block of code generic over the hash type (a Type implementing +//! ResourceId trait). +//! 2. Use the `for_each_hash!` macro to execute the block of code for each +//! desired hash type. use dev_hash::{Blake3, Crc32}; use std::{fs, path::Path}; @@ -36,49 +39,20 @@ use crate::{ index::IndexedResource, utils::load_or_build_index, ResourceIndex, }; -/// A macro to generate tests for function and hash type pairs. +/// A macro that takes a list of hash function types and a block of code to +/// execute for each hash type. #[macro_export] -macro_rules! hash_tests { - ($($name:ident: ($func:ident, $hash_type:ty),)*) => { +macro_rules! for_each_hash { + ($($hash_type:ty),+ => $body:block) => { $( - #[test] - fn $name() { - $func::<$hash_type>(); + { + type H = $hash_type; + $body } - )* + )+ }; } -// Use the macro to generate tests for the specified function and hash type -// pairs -hash_tests! { - // CRC32 - test_store_and_load_index_crc32: (test_store_and_load_index, Crc32), - test_store_and_load_index_with_collisions_crc32: (test_store_and_load_index_with_collisions, Crc32), - test_build_index_with_file_crc32: (test_build_index_with_file, Crc32), - test_build_index_with_empty_file_crc32: (test_build_index_with_empty_file, Crc32), - test_build_index_with_directory_crc32: (test_build_index_with_directory, Crc32), - test_build_index_with_multiple_files_crc32: (test_build_index_with_multiple_files, Crc32), - test_build_index_with_multiple_directories_crc32: (test_build_index_with_multiple_directories, Crc32), - test_resource_index_update_crc32: (test_resource_index_update, Crc32), - test_add_colliding_files_crc32: (test_add_colliding_files, Crc32), - test_num_collisions_crc32: (test_num_collisions, Crc32), - test_hidden_files_crc32: (test_hidden_files, Crc32), - - // Blake3 - test_store_and_load_index_blake3: (test_store_and_load_index, Blake3), - test_store_and_load_index_with_collisions_blake3: (test_store_and_load_index_with_collisions, Blake3), - test_build_index_with_file_blake3: (test_build_index_with_file, Blake3), - test_build_index_with_empty_file_blake3: (test_build_index_with_empty_file, Blake3), - test_build_index_with_directory_blake3: (test_build_index_with_directory, Blake3), - test_build_index_with_multiple_files_blake3: (test_build_index_with_multiple_files, Blake3), - test_build_index_with_multiple_directories_blake3: (test_build_index_with_multiple_directories, Blake3), - test_resource_index_update_blake3: (test_resource_index_update, Blake3), - test_add_colliding_files_blake3: (test_add_colliding_files, Blake3), - test_num_collisions_blake3: (test_num_collisions, Blake3), - test_hidden_files_blake3: (test_hidden_files, Blake3), -} - /// A helper function to get [`IndexedResource`] from a file path fn get_indexed_resource_from_file>( path: P, @@ -105,23 +79,26 @@ fn get_indexed_resource_from_file>( /// - Store the index. /// - Load the stored index. /// - Assert that the loaded index matches the original index. -fn test_store_and_load_index() { - let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); +#[test] +fn test_store_and_load_index() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1, "{:?}", index); - index.store().expect("Failed to store index"); + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1, "{:?}", index); + index.store().expect("Failed to store index"); - let loaded_index = - load_or_build_index(root_path, false).expect("Failed to load index"); + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); - assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); + assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); + }); } /// Test storing and loading the resource index with collisions. @@ -132,38 +109,41 @@ fn test_store_and_load_index() { /// - Store the index. /// - Load the stored index. /// - Assert that the loaded index matches the original index. -fn test_store_and_load_index_with_collisions() { - let temp_dir = - TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); +#[test] +fn test_store_and_load_index_with_collisions() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = + TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); - let file_path2 = root_path.join("file2.txt"); - fs::write(&file_path2, "file content").expect("Failed to write to file"); + let file_path2 = root_path.join("file2.txt"); + fs::write(&file_path2, "file content").expect("Failed to write to file"); - let file_path3 = root_path.join("file3.txt"); - fs::write(&file_path3, "file content").expect("Failed to write to file"); + let file_path3 = root_path.join("file3.txt"); + fs::write(&file_path3, "file content").expect("Failed to write to file"); - let file_path4 = root_path.join("file4.txt"); - fs::write(&file_path4, "file content").expect("Failed to write to file"); + let file_path4 = root_path.join("file4.txt"); + fs::write(&file_path4, "file content").expect("Failed to write to file"); - // Now we have 4 files with the same content (same checksum) + // Now we have 4 files with the same content (same checksum) - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - let checksum = H::from_path(&file_path).expect("Failed to get checksum"); - assert_eq!(index.len(), 4, "{:?}", index); - assert_eq!(index.collisions().len(), 1, "{:?}", index); - assert_eq!(index.collisions()[&checksum].len(), 4, "{:?}", index); - index.store().expect("Failed to store index"); + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + let checksum = H::from_path(&file_path).expect("Failed to get checksum"); + assert_eq!(index.len(), 4, "{:?}", index); + assert_eq!(index.collisions().len(), 1, "{:?}", index); + assert_eq!(index.collisions()[&checksum].len(), 4, "{:?}", index); + index.store().expect("Failed to store index"); - let loaded_index = - load_or_build_index(root_path, false).expect("Failed to load index"); + let loaded_index = + load_or_build_index(root_path, false).expect("Failed to load index"); - assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); + assert_eq!(index, loaded_index, "{:?} != {:?}", index, loaded_index); + }); } /// Test building an index with a file. @@ -174,28 +154,31 @@ fn test_store_and_load_index_with_collisions() { /// - Assert that the index contains one entry. /// - Assert that the resource retrieved by path matches the expected resource. /// - Assert that the resource retrieved by ID matches the expected resource. -fn test_build_index_with_file() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource: IndexedResource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1, "{:?}", index); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Failed to get resource"); - assert_eq!( - resource, &expected_resource, - "{:?} != {:?}", - resource, expected_resource - ); +#[test] +fn test_build_index_with_file() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource: IndexedResource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1, "{:?}", index); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Failed to get resource"); + assert_eq!( + resource, &expected_resource, + "{:?} != {:?}", + resource, expected_resource + ); + }); } /// Test building an index with an empty file. @@ -205,20 +188,23 @@ fn test_build_index_with_file() { /// - Create a file with content within the temporary directory. /// - Build a resource index in the temporary directory. /// - Assert that the index contains one entries. -fn test_build_index_with_empty_file() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); +#[test] +fn test_build_index_with_empty_file() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); - let empty_file_path = root_path.join("empty_file.txt"); - fs::write(&empty_file_path, "").expect("Failed to write to file"); + let empty_file_path = root_path.join("empty_file.txt"); + fs::write(&empty_file_path, "").expect("Failed to write to file"); - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1, "{:?}", index); + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1, "{:?}", index); + }); } /// Test building an index with a directory. @@ -230,30 +216,33 @@ fn test_build_index_with_empty_file() { /// - Assert that the index contains one entry. /// - Assert that the resource retrieved by path matches the expected resource. /// - Assert that the resource retrieved by ID matches the expected resource. -fn test_build_index_with_directory() { - let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir_path = root_path.join("dir"); - fs::create_dir(&dir_path).expect("Failed to create dir"); - let file_path = dir_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource: IndexedResource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 1, "{:?}", index); - - let resource = index - .get_resource_by_path("dir/file.txt") - .expect("Failed to get resource"); - assert_eq!( - resource, &expected_resource, - "{:?} != {:?}", - resource, expected_resource - ); +#[test] +fn test_build_index_with_directory() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir_path = root_path.join("dir"); + fs::create_dir(&dir_path).expect("Failed to create dir"); + let file_path = dir_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + let expected_resource: IndexedResource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 1, "{:?}", index); + + let resource = index + .get_resource_by_path("dir/file.txt") + .expect("Failed to get resource"); + assert_eq!( + resource, &expected_resource, + "{:?} != {:?}", + resource, expected_resource + ); + }); } /// Test building an index with multiple files. @@ -264,36 +253,39 @@ fn test_build_index_with_directory() { /// - Assert that the index contains two entries. /// - Assert that the resource retrieved by path for each file matches the /// expected resource. -fn test_build_index_with_multiple_files() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_multiple_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file1_path = root_path.join("file1.txt"); - fs::write(&file1_path, "file1 content").expect("Failed to write to file"); - let file2_path = root_path.join("file2.txt"); - fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - - let expected_resource1: IndexedResource = - get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - let expected_resource2 = - get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2, "{:?}", index); - - let resource = index - .get_resource_by_path("file1.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1, "{:?}", resource); - - let resource = index - .get_resource_by_path("file2.txt") - .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2, "{:?}", resource); +#[test] +fn test_build_index_with_multiple_files() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file1_path = root_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + let file2_path = root_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1: IndexedResource = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2, "{:?}", index); + + let resource = index + .get_resource_by_path("file1.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource1, "{:?}", resource); + + let resource = index + .get_resource_by_path("file2.txt") + .expect("Failed to get resource"); + assert_eq!(resource, &expected_resource2, "{:?}", resource); + }); } /// Test building an index with multiple directories. @@ -305,41 +297,44 @@ fn test_build_index_with_multiple_files() { /// - Assert that the index contains two entries. /// - Assert that the resources retrieved by path for each file match the /// expected resources. -fn test_build_index_with_multiple_directories() { - let temp_dir = - TempDir::with_prefix("ark_test_build_index_with_multiple_directories") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let dir1_path = root_path.join("dir1"); - fs::create_dir(&dir1_path).expect("Failed to create dir"); - let file1_path = dir1_path.join("file1.txt"); - fs::write(&file1_path, "file1 content").expect("Failed to write to file"); - - let dir2_path = root_path.join("dir2"); - fs::create_dir(&dir2_path).expect("Failed to create dir"); - let file2_path = dir2_path.join("file2.txt"); - fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - - let expected_resource1: IndexedResource = - get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - let expected_resource2 = - get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - - let index = ResourceIndex::build(root_path).expect("Failed to build index"); - assert_eq!(index.len(), 2, "{:?}", index); - - let resource = index - .get_resource_by_path("dir1/file1.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource1, "{:?}", resource); - - let resource = index - .get_resource_by_path("dir2/file2.txt") - .expect("Resource not found"); - assert_eq!(resource, &expected_resource2, "{:?}", resource); +#[test] +fn test_build_index_with_multiple_directories() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = + TempDir::with_prefix("ark_test_build_index_with_multiple_directories") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let dir1_path = root_path.join("dir1"); + fs::create_dir(&dir1_path).expect("Failed to create dir"); + let file1_path = dir1_path.join("file1.txt"); + fs::write(&file1_path, "file1 content").expect("Failed to write to file"); + + let dir2_path = root_path.join("dir2"); + fs::create_dir(&dir2_path).expect("Failed to create dir"); + let file2_path = dir2_path.join("file2.txt"); + fs::write(&file2_path, "file2 content").expect("Failed to write to file"); + + let expected_resource1: IndexedResource = + get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + let expected_resource2 = + get_indexed_resource_from_file(&file2_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + + let index = ResourceIndex::build(root_path).expect("Failed to build index"); + assert_eq!(index.len(), 2, "{:?}", index); + + let resource = index + .get_resource_by_path("dir1/file1.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource1, "{:?}", resource); + + let resource = index + .get_resource_by_path("dir2/file2.txt") + .expect("Resource not found"); + assert_eq!(resource, &expected_resource2, "{:?}", resource); + }); } /// Test updating the resource index. @@ -354,57 +349,60 @@ fn test_build_index_with_multiple_directories() { /// update. /// - Assert that the entries in the index match the expected state after the /// update. -fn test_resource_index_update() { - let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let image_path = root_path.join("image.png"); - fs::write(&image_path, "image content").expect("Failed to write to file"); - - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 2, "{:?}", index); - - // create new file - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "new file content") - .expect("Failed to write to file"); - - // modify file - fs::write(&file_path, "updated file content") - .expect("Failed to write to file"); - - // remove file - fs::remove_file(&image_path).expect("Failed to remove file"); - - index - .update_all() - .expect("Failed to update index"); - // Index now contains 2 resources (file.txt and new_file.txt) - assert_eq!(index.len(), 2, "{:?}", index); - - let resource = index - .get_resource_by_path("file.txt") - .expect("Resource not found"); - let expected_resource = - get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) - .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource, "{:?}", resource); - - let _resource = index - .get_resource_by_path("new_file.txt") - .expect("Resource not found"); - - assert!( - index.get_resource_by_path("image.png").is_none(), - "{:?}", +#[test] +fn test_resource_index_update() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let image_path = root_path.join("image.png"); + fs::write(&image_path, "image content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 2, "{:?}", index); + + // create new file + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + // modify file + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + // remove file + fs::remove_file(&image_path).expect("Failed to remove file"); + index - ); + .update_all() + .expect("Failed to update index"); + // Index now contains 2 resources (file.txt and new_file.txt) + assert_eq!(index.len(), 2, "{:?}", index); + + let resource = index + .get_resource_by_path("file.txt") + .expect("Resource not found"); + let expected_resource = + get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) + .expect("Failed to get indexed resource"); + assert_eq!(resource, &expected_resource, "{:?}", resource); + + let _resource = index + .get_resource_by_path("new_file.txt") + .expect("Resource not found"); + + assert!( + index.get_resource_by_path("image.png").is_none(), + "{:?}", + index + ); + }); } /// Test adding colliding files to the index. @@ -418,28 +416,31 @@ fn test_resource_index_update() { /// - Assert that the index contains the expected number of entries after the /// addition. /// - Assert index.collisions contains the expected number of entries. -fn test_add_colliding_files() { - let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); +#[test] +fn test_add_colliding_files() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1, "{:?}", index); + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1, "{:?}", index); - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content").expect("Failed to write to file"); + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); - index - .update_all() - .expect("Failed to update index"); + index + .update_all() + .expect("Failed to update index"); - assert_eq!(index.len(), 2, "{:?}", index); - assert_eq!(index.collisions().len(), 1, "{:?}", index); + assert_eq!(index.len(), 2, "{:?}", index); + assert_eq!(index.collisions().len(), 1, "{:?}", index); + }); } /// Test `ResourceIndex::num_collisions()` method. @@ -452,32 +453,35 @@ fn test_add_colliding_files() { /// - Update the index. /// - Assert that the index contains the expected number of entries after the /// update. -fn test_num_collisions() { - let temp_dir = TempDir::with_prefix("ark_test_num_collisions") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); +#[test] +fn test_num_collisions() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_num_collisions") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); - let file_path = root_path.join("file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 1, "{:?}", index); + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 1, "{:?}", index); - let new_file_path = root_path.join("new_file.txt"); - fs::write(&new_file_path, "file content").expect("Failed to write to file"); + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "file content").expect("Failed to write to file"); - let new_file_path2 = root_path.join("new_file2.txt"); - fs::write(&new_file_path2, "file content") - .expect("Failed to write to file"); + let new_file_path2 = root_path.join("new_file2.txt"); + fs::write(&new_file_path2, "file content") + .expect("Failed to write to file"); - index - .update_all() - .expect("Failed to update index"); + index + .update_all() + .expect("Failed to update index"); - assert_eq!(index.len(), 3, "{:?}", index); - assert_eq!(index.num_collisions(), 3, "{:?}", index); + assert_eq!(index.len(), 3, "{:?}", index); + assert_eq!(index.num_collisions(), 3, "{:?}", index); + }); } /// Test that we don't index hidden files. @@ -487,16 +491,19 @@ fn test_num_collisions() { /// - Build a resource index in the temporary directory. /// - Assert that the index initially contains the expected number of entries. /// (0) -fn test_hidden_files() { - let temp_dir = TempDir::with_prefix("ark_test_hidden_files") - .expect("Failed to create temp dir"); - let root_path = temp_dir.path(); - - let file_path = root_path.join(".hidden_file.txt"); - fs::write(&file_path, "file content").expect("Failed to write to file"); - - let index: ResourceIndex = - ResourceIndex::build(root_path).expect("Failed to build index"); - index.store().expect("Failed to store index"); - assert_eq!(index.len(), 0, "{:?}", index); +#[test] +fn test_hidden_files() { + for_each_hash!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_hidden_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join(".hidden_file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + index.store().expect("Failed to store index"); + assert_eq!(index.len(), 0, "{:?}", index); + }); } From 5ef64948b82b1bdec7defb2266f6629de4a1292b Mon Sep 17 00:00:00 2001 From: Tarek Date: Sun, 21 Jul 2024 14:12:13 +0300 Subject: [PATCH 32/46] feat(fs-index): use IndexEntry in path_to_resource map Signed-off-by: Tarek --- fs-index/src/index.rs | 99 ++++++++++++++++++++++++++++--------------- fs-index/src/serde.rs | 15 +++---- fs-index/src/tests.rs | 16 +++---- 3 files changed, 80 insertions(+), 50 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 91c8228b..2d2857c6 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -56,6 +56,14 @@ impl IndexedResource { } } +#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] +pub struct IndexEntry { + /// The unique identifier of the resource + pub(crate) id: Id, + /// The last modified time of the resource (from the file system metadata) + pub(crate) last_modified: SystemTime, +} + /// Represents the index of resources in a directory. /// /// [`ResourceIndex`] provides functionality for managing a directory index, @@ -117,7 +125,7 @@ where /// or files with the same content) pub(crate) id_to_paths: HashMap>, /// A map from resource paths to resources - pub(crate) path_to_resource: HashMap>, + pub(crate) path_to_resource: HashMap>, } /// Represents the result of an update operation on the ResourceIndex @@ -160,7 +168,15 @@ impl ResourceIndex { /// Return the resources in the index pub fn resources(&self) -> Vec> { // Using path_to_resource so to avoid not collecting duplicates - self.path_to_resource.values().cloned().collect() + let mut resources = vec![]; + for (path, resource) in self.path_to_resource.iter() { + resources.push(IndexedResource::new( + resource.id.clone(), + path.clone(), + resource.last_modified, + )); + } + resources } /// Return the ID collisions @@ -216,12 +232,20 @@ impl ResourceIndex { pub fn get_resources_by_id( &self, id: &Id, - ) -> Option>> { + ) -> Option>> { + let mut resources = vec![]; + let paths = self.id_to_paths.get(id)?; - let resources = paths - .iter() - .filter_map(|path| self.path_to_resource.get(path)) - .collect(); + for path in paths { + let resource = self.path_to_resource.get(path)?; + let resource = IndexedResource::new( + resource.id.clone(), + path.clone(), + resource.last_modified, + ); + resources.push(resource); + } + Some(resources) } @@ -233,8 +257,14 @@ impl ResourceIndex { pub fn get_resource_by_path>( &self, path: P, - ) -> Option<&IndexedResource> { - self.path_to_resource.get(path.as_ref()) + ) -> Option> { + let resource = self.path_to_resource.get(path.as_ref())?; + let resource = IndexedResource::new( + resource.id.clone(), + path.as_ref().to_path_buf(), + resource.last_modified, + ); + Some(resource) } /// Build a new index from the given root path @@ -252,16 +282,22 @@ impl ResourceIndex { scan_entries(paths); // Strip the root path from the entries - let entries: HashMap> = entries + let entries: HashMap> = entries .into_iter() .map(|(path, resource)| { let relative_path = path.strip_prefix(&root).unwrap().to_path_buf(); - let resource = IndexedResource::new( - resource.id().clone(), - relative_path.clone(), - resource.last_modified(), - ); + let resource = IndexEntry { + id: resource.id().clone(), + last_modified: resource.last_modified(), + }; + + // Update the ID to paths map + id_to_paths + .entry(resource.id.clone()) + .or_default() + .insert(relative_path.clone()); + (relative_path, resource) }) .collect(); @@ -269,15 +305,6 @@ impl ResourceIndex { // Update the path to resource map path_to_resource.extend(entries.clone()); - // Update the ID to paths map - for resource in entries.values() { - let id = resource.id().clone(); - id_to_paths - .entry(id) - .or_default() - .insert(resource.path().to_path_buf()); - } - let index = ResourceIndex { root, id_to_paths, @@ -291,8 +318,8 @@ impl ResourceIndex { log::debug!("Updating index at root path: {:?}", self.root); log::trace!("Current index: {:#?}", self); - let mut added = HashMap::new(); - let mut removed = HashSet::new(); + let mut added: HashMap> = HashMap::new(); + let mut removed: HashSet = HashSet::new(); let current_paths = discover_paths(&self.root)?; @@ -303,7 +330,7 @@ impl ResourceIndex { let previous_entries = self.path_to_resource.clone(); // `preserved_entries` is the intersection of current_entries and // previous_entries - let preserved_entries: HashMap> = + let preserved_entries: HashMap> = current_entries .iter() .filter_map(|(path, _resource)| { @@ -338,7 +365,7 @@ impl ResourceIndex { false } else { let our_entry = &self.path_to_resource[path]; - let prev_modified = our_entry.last_modified(); + let prev_modified = our_entry.last_modified; let result = entry.path().metadata(); match result { @@ -389,7 +416,7 @@ impl ResourceIndex { .collect(); // Remove resources that are not in the current entries - let removed_entries: HashMap> = + let removed_entries: HashMap> = previous_entries .iter() .filter_map(|(path, resource)| { @@ -404,15 +431,15 @@ impl ResourceIndex { log::trace!( "Resource removed: {:?}, last modified: {:?}", path, - resource.last_modified() + resource.last_modified ); self.path_to_resource.remove(&path); self.id_to_paths - .get_mut(resource.id()) + .get_mut(&resource.id) .unwrap() .remove(&path); - let id = resource.id().clone(); + let id = resource.id.clone(); // Only remove the ID if it has no paths if self.id_to_paths[&id].is_empty() { self.id_to_paths.remove(&id); @@ -446,10 +473,14 @@ impl ResourceIndex { relative_path.clone(), resource.last_modified(), ); + let index_entry_resource = IndexEntry { + id: resource.id().clone(), + last_modified: resource.last_modified(), + }; self.path_to_resource - .insert(relative_path.clone(), resource.clone()); - let id = resource.id().clone(); + .insert(relative_path.clone(), index_entry_resource.clone()); + let id = resource.id.clone(); self.id_to_paths .entry(id.clone()) .or_default() diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index a220d6a4..600d94f8 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -12,7 +12,7 @@ use serde::{ use data_resource::ResourceId; -use crate::{index::IndexedResource, ResourceIndex}; +use crate::{index::IndexEntry, ResourceIndex}; /// Data structure for serializing and deserializing the index #[derive(Serialize, Deserialize)] @@ -46,9 +46,9 @@ where let mut resources = HashMap::new(); for (path, resource) in &self.path_to_resource { - let id = resource.id().clone(); + let id = resource.id.clone(); let last_modified = resource - .last_modified() + .last_modified .duration_since(SystemTime::UNIX_EPOCH) .map_err(|e| { serde::ser::Error::custom(format!( @@ -87,14 +87,13 @@ where for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); - let resource = IndexedResource::new( - resource_data.id, - path.clone(), + let resource = IndexEntry { + id: resource_data.id, last_modified, - ); + }; path_to_resource.insert(path.clone(), resource.clone()); id_to_paths - .entry(resource.id().clone()) + .entry(resource.id.clone()) .or_insert_with(HashSet::new) .insert(path); } diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 1da0e7d7..65a21728 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -9,7 +9,7 @@ //! //! - **Macros**: //! - `for_each_hash!`: A macro that takes a list of hash function types and a -//! block of code to execute for each hash type. +//! block of code to execute for each hash type. //! //! - **Test Functions**: //! - Defined to test various aspects of `ResourceIndex`, parameterized by @@ -174,7 +174,7 @@ fn test_build_index_with_file() { .get_resource_by_path("file.txt") .expect("Failed to get resource"); assert_eq!( - resource, &expected_resource, + resource, expected_resource, "{:?} != {:?}", resource, expected_resource ); @@ -238,7 +238,7 @@ fn test_build_index_with_directory() { .get_resource_by_path("dir/file.txt") .expect("Failed to get resource"); assert_eq!( - resource, &expected_resource, + resource, expected_resource, "{:?} != {:?}", resource, expected_resource ); @@ -279,12 +279,12 @@ fn test_build_index_with_multiple_files() { let resource = index .get_resource_by_path("file1.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource1, "{:?}", resource); + assert_eq!(resource, expected_resource1, "{:?}", resource); let resource = index .get_resource_by_path("file2.txt") .expect("Failed to get resource"); - assert_eq!(resource, &expected_resource2, "{:?}", resource); + assert_eq!(resource, expected_resource2, "{:?}", resource); }); } @@ -328,12 +328,12 @@ fn test_build_index_with_multiple_directories() { let resource = index .get_resource_by_path("dir1/file1.txt") .expect("Resource not found"); - assert_eq!(resource, &expected_resource1, "{:?}", resource); + assert_eq!(resource, expected_resource1, "{:?}", resource); let resource = index .get_resource_by_path("dir2/file2.txt") .expect("Resource not found"); - assert_eq!(resource, &expected_resource2, "{:?}", resource); + assert_eq!(resource, expected_resource2, "{:?}", resource); }); } @@ -391,7 +391,7 @@ fn test_resource_index_update() { let expected_resource = get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); - assert_eq!(resource, &expected_resource, "{:?}", resource); + assert_eq!(resource, expected_resource, "{:?}", resource); let _resource = index .get_resource_by_path("new_file.txt") From 07fd50fe37af2fa1a04e6f21ad5b04ea57d86fa8 Mon Sep 17 00:00:00 2001 From: Kirill Taran Date: Mon, 22 Jul 2024 14:47:23 +0900 Subject: [PATCH 33/46] Renamed the macro and its type parameter --- fs-index/src/tests.rs | 60 +++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 65a21728..8b166e24 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -8,7 +8,7 @@ //! # Structure //! //! - **Macros**: -//! - `for_each_hash!`: A macro that takes a list of hash function types and a +//! - `for_each_type!`: A macro that takes a list of hash function types and a //! block of code to execute for each hash type. //! //! - **Test Functions**: @@ -24,7 +24,7 @@ //! To add a new test for a specific hash type: //! 1. Write a block of code generic over the hash type (a Type implementing //! ResourceId trait). -//! 2. Use the `for_each_hash!` macro to execute the block of code for each +//! 2. Use the `for_each_type!` macro to execute the block of code for each //! desired hash type. use dev_hash::{Blake3, Crc32}; @@ -42,11 +42,11 @@ use crate::{ /// A macro that takes a list of hash function types and a block of code to /// execute for each hash type. #[macro_export] -macro_rules! for_each_hash { +macro_rules! for_each_type { ($($hash_type:ty),+ => $body:block) => { $( { - type H = $hash_type; + type Id = $hash_type; $body } )+ @@ -54,11 +54,11 @@ macro_rules! for_each_hash { } /// A helper function to get [`IndexedResource`] from a file path -fn get_indexed_resource_from_file>( +fn get_indexed_resource_from_file>( path: P, parent_dir: P, -) -> Result> { - let id = H::from_path(&path)?; +) -> Result> { + let id = Id::from_path(&path)?; let relative_path = path .as_ref() @@ -81,7 +81,7 @@ fn get_indexed_resource_from_file>( /// - Assert that the loaded index matches the original index. #[test] fn test_store_and_load_index() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -89,7 +89,7 @@ fn test_store_and_load_index() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); assert_eq!(index.len(), 1, "{:?}", index); index.store().expect("Failed to store index"); @@ -111,7 +111,7 @@ fn test_store_and_load_index() { /// - Assert that the loaded index matches the original index. #[test] fn test_store_and_load_index_with_collisions() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_store_and_load_index_with_collisions") .expect("Failed to create temp dir"); @@ -131,9 +131,9 @@ fn test_store_and_load_index_with_collisions() { // Now we have 4 files with the same content (same checksum) - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); - let checksum = H::from_path(&file_path).expect("Failed to get checksum"); + let checksum = Id::from_path(&file_path).expect("Failed to get checksum"); assert_eq!(index.len(), 4, "{:?}", index); assert_eq!(index.collisions().len(), 1, "{:?}", index); assert_eq!(index.collisions()[&checksum].len(), 4, "{:?}", index); @@ -156,14 +156,14 @@ fn test_store_and_load_index_with_collisions() { /// - Assert that the resource retrieved by ID matches the expected resource. #[test] fn test_build_index_with_file() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_build_index_with_file") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource: IndexedResource = + let expected_resource: IndexedResource = get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); @@ -190,7 +190,7 @@ fn test_build_index_with_file() { /// - Assert that the index contains one entries. #[test] fn test_build_index_with_empty_file() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_build_index_with_empty_file") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -201,7 +201,7 @@ fn test_build_index_with_empty_file() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); assert_eq!(index.len(), 1, "{:?}", index); }); @@ -218,7 +218,7 @@ fn test_build_index_with_empty_file() { /// - Assert that the resource retrieved by ID matches the expected resource. #[test] fn test_build_index_with_directory() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_build_index_with_directory") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -227,7 +227,7 @@ fn test_build_index_with_directory() { fs::create_dir(&dir_path).expect("Failed to create dir"); let file_path = dir_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let expected_resource: IndexedResource = + let expected_resource: IndexedResource = get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); @@ -255,7 +255,7 @@ fn test_build_index_with_directory() { /// expected resource. #[test] fn test_build_index_with_multiple_files() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_build_index_with_multiple_files") .expect("Failed to create temp dir"); @@ -266,7 +266,7 @@ fn test_build_index_with_multiple_files() { let file2_path = root_path.join("file2.txt"); fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - let expected_resource1: IndexedResource = + let expected_resource1: IndexedResource = get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); let expected_resource2 = @@ -299,7 +299,7 @@ fn test_build_index_with_multiple_files() { /// expected resources. #[test] fn test_build_index_with_multiple_directories() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_build_index_with_multiple_directories") .expect("Failed to create temp dir"); @@ -315,7 +315,7 @@ fn test_build_index_with_multiple_directories() { let file2_path = dir2_path.join("file2.txt"); fs::write(&file2_path, "file2 content").expect("Failed to write to file"); - let expected_resource1: IndexedResource = + let expected_resource1: IndexedResource = get_indexed_resource_from_file(&file1_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); let expected_resource2 = @@ -351,7 +351,7 @@ fn test_build_index_with_multiple_directories() { /// update. #[test] fn test_resource_index_update() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_resource_index_update") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -362,7 +362,7 @@ fn test_resource_index_update() { let image_path = root_path.join("image.png"); fs::write(&image_path, "image content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 2, "{:?}", index); @@ -418,7 +418,7 @@ fn test_resource_index_update() { /// - Assert index.collisions contains the expected number of entries. #[test] fn test_add_colliding_files() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_add_colliding_files") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -426,7 +426,7 @@ fn test_add_colliding_files() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1, "{:?}", index); @@ -455,7 +455,7 @@ fn test_add_colliding_files() { /// update. #[test] fn test_num_collisions() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_num_collisions") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -463,7 +463,7 @@ fn test_num_collisions() { let file_path = root_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let mut index: ResourceIndex = + let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 1, "{:?}", index); @@ -493,7 +493,7 @@ fn test_num_collisions() { /// (0) #[test] fn test_hidden_files() { - for_each_hash!(Crc32, Blake3 => { + for_each_type!(Crc32, Blake3 => { let temp_dir = TempDir::with_prefix("ark_test_hidden_files") .expect("Failed to create temp dir"); let root_path = temp_dir.path(); @@ -501,7 +501,7 @@ fn test_hidden_files() { let file_path = root_path.join(".hidden_file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); - let index: ResourceIndex = + let index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); index.store().expect("Failed to store index"); assert_eq!(index.len(), 0, "{:?}", index); From 7a83988d1a17427a82908498db89e926f8c7bff4 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 13:01:49 +0300 Subject: [PATCH 34/46] test(fs-index): add tests for update_all() Signed-off-by: Tarek --- fs-index/src/tests.rs | 93 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 8b166e24..bd67a498 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -507,3 +507,96 @@ fn test_hidden_files() { assert_eq!(index.len(), 0, "{:?}", index); }); } + +/// Test that we detect added files in `update_all`. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Create a new file. +/// - Update the resource index. +/// - Assert that the return from `update_all` is that `added` includes the +/// new file. +#[test] +fn test_update_all_added_files() { + for_each_type!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_added_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "new file content") + .expect("Failed to write to file"); + + let update_result = index.update_all().expect("Failed to update index"); + assert_eq!(update_result.added().len(), 1, "{:?}", update_result); + }); +} + +/// Test that we detect updated files using the last modified time. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Sleep for a second to ensure the last modified time is different. +/// - Update the file. +/// - Update the resource index. +/// - Assert that the return from `update_all` is that `added` includes the +/// updated file. +#[test] +fn test_update_all_updated_files() { + for_each_type!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_updated_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + + std::thread::sleep(std::time::Duration::from_secs(1)); + + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + let update_result = index.update_all().expect("Failed to update index"); + assert_eq!(update_result.added().len(), 1, "{:?}", update_result); + }); +} + +/// Test that we detect deleted files in `update_all`. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Remove the file. +/// - Update the resource index. +/// - Assert that the return from `update_all` is that `removed` includes the +/// deleted file. +#[test] +fn test_update_all_deleted_files() { + for_each_type!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_deleted_files") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + + fs::remove_file(&file_path).expect("Failed to remove file"); + + let update_result = index.update_all().expect("Failed to update index"); + assert_eq!(update_result.removed().len(), 1, "{:?}", update_result); + }); +} From 649e49bae17308f652e95a6ed45a8b77ca3412de Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 15:18:56 +0300 Subject: [PATCH 35/46] fix(fs-index): strip root path prefix from entries Signed-off-by: Tarek --- fs-index/src/index.rs | 26 ++++++-------------------- fs-index/src/utils.rs | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 2d2857c6..0175b688 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -279,14 +279,12 @@ impl ResourceIndex { // Discover paths in the root directory let paths = discover_paths(&root)?; let entries: HashMap> = - scan_entries(paths); + scan_entries(&root, paths); // Strip the root path from the entries let entries: HashMap> = entries .into_iter() .map(|(path, resource)| { - let relative_path = - path.strip_prefix(&root).unwrap().to_path_buf(); let resource = IndexEntry { id: resource.id().clone(), last_modified: resource.last_modified(), @@ -296,9 +294,9 @@ impl ResourceIndex { id_to_paths .entry(resource.id.clone()) .or_default() - .insert(relative_path.clone()); + .insert(path.clone()); - (relative_path, resource) + (path, resource) }) .collect(); @@ -326,7 +324,7 @@ impl ResourceIndex { // Assuming that collection manipulation is faster than repeated // lookups let current_entries: HashMap> = - scan_entries(current_paths.clone()); + scan_entries(self.root(), current_paths); let previous_entries = self.path_to_resource.clone(); // `preserved_entries` is the intersection of current_entries and // previous_entries @@ -462,29 +460,17 @@ impl ResourceIndex { for (path, resource) in added_entries { log::trace!("Resource added: {:?}", path); - - // strip the root path from the path - let relative_path = path - .strip_prefix(&self.root) - .unwrap() - .to_path_buf(); - let resource = IndexedResource::new( - resource.id().clone(), - relative_path.clone(), - resource.last_modified(), - ); let index_entry_resource = IndexEntry { id: resource.id().clone(), last_modified: resource.last_modified(), }; - self.path_to_resource - .insert(relative_path.clone(), index_entry_resource.clone()); + .insert(path.clone(), index_entry_resource.clone()); let id = resource.id.clone(); self.id_to_paths .entry(id.clone()) .or_default() - .insert(relative_path.clone()); + .insert(path.clone()); added.insert(id, resource); } diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 52f96bc4..626a0fbe 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -99,27 +99,37 @@ pub(crate) fn discover_paths>( } /// A helper function to scan entries and create indexed resources -pub(crate) fn scan_entries( +pub(crate) fn scan_entries, Id: ResourceId>( + root_path: P, paths: Vec, ) -> HashMap> { let mut path_to_resource = HashMap::new(); for entry in paths { - let resource = scan_entry(entry); + let resource = scan_entry(root_path.as_ref(), entry); path_to_resource.insert(resource.path().to_path_buf(), resource); } path_to_resource } /// A helper function to scan one entry and create an indexed resource -pub(crate) fn scan_entry( +pub(crate) fn scan_entry, Id: ResourceId>( + root_path: P, entry: DirEntry, ) -> IndexedResource { let path = entry.path().to_path_buf(); - let metadata = entry.metadata().unwrap(); - let last_modified = metadata.modified().unwrap(); + // Strip the root path from the entry path + let path = path + .strip_prefix(root_path.as_ref()) + .expect("Failed to strip prefix"); + let path = path.to_path_buf(); + + let metadata = entry.metadata().expect("Failed to get metadata"); + let last_modified = metadata + .modified() + .expect("Failed to get modified"); // Get the ID of the resource - let id = Id::from_path(&path).unwrap(); + let id = Id::from_path(entry.path()).expect("Failed to get ID from path"); // Create the indexed resource IndexedResource::new(id, path, last_modified) From 8437eafbd9bdf012b697824a6ea364f5f943b591 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 15:22:03 +0300 Subject: [PATCH 36/46] refactor(fs-index): rename IndexEntry to avoid confusion Signed-off-by: Tarek --- fs-index/src/index.rs | 24 +++++++++++++++++------- fs-index/src/serde.rs | 4 ++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 0175b688..3ffa5c9c 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -56,14 +56,24 @@ impl IndexedResource { } } +/// Represents a resource ID with its last modified time #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] -pub struct IndexEntry { +pub struct ResourceIdWithTimestamp { /// The unique identifier of the resource pub(crate) id: Id, /// The last modified time of the resource (from the file system metadata) pub(crate) last_modified: SystemTime, } +/// Represents a resource path with its last modified time +#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] +pub struct ResourcePathWithTimestamp { + /// The path of the resource, relative to the root path + pub(crate) path: PathBuf, + /// The last modified time of the resource (from the file system metadata) + pub(crate) last_modified: SystemTime, +} + /// Represents the index of resources in a directory. /// /// [`ResourceIndex`] provides functionality for managing a directory index, @@ -125,7 +135,7 @@ where /// or files with the same content) pub(crate) id_to_paths: HashMap>, /// A map from resource paths to resources - pub(crate) path_to_resource: HashMap>, + pub(crate) path_to_resource: HashMap>, } /// Represents the result of an update operation on the ResourceIndex @@ -282,10 +292,10 @@ impl ResourceIndex { scan_entries(&root, paths); // Strip the root path from the entries - let entries: HashMap> = entries + let entries: HashMap> = entries .into_iter() .map(|(path, resource)| { - let resource = IndexEntry { + let resource = ResourceIdWithTimestamp { id: resource.id().clone(), last_modified: resource.last_modified(), }; @@ -328,7 +338,7 @@ impl ResourceIndex { let previous_entries = self.path_to_resource.clone(); // `preserved_entries` is the intersection of current_entries and // previous_entries - let preserved_entries: HashMap> = + let preserved_entries: HashMap> = current_entries .iter() .filter_map(|(path, _resource)| { @@ -414,7 +424,7 @@ impl ResourceIndex { .collect(); // Remove resources that are not in the current entries - let removed_entries: HashMap> = + let removed_entries: HashMap> = previous_entries .iter() .filter_map(|(path, resource)| { @@ -460,7 +470,7 @@ impl ResourceIndex { for (path, resource) in added_entries { log::trace!("Resource added: {:?}", path); - let index_entry_resource = IndexEntry { + let index_entry_resource = ResourceIdWithTimestamp { id: resource.id().clone(), last_modified: resource.last_modified(), }; diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index 600d94f8..8c575127 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -12,7 +12,7 @@ use serde::{ use data_resource::ResourceId; -use crate::{index::IndexEntry, ResourceIndex}; +use crate::{index::ResourceIdWithTimestamp, ResourceIndex}; /// Data structure for serializing and deserializing the index #[derive(Serialize, Deserialize)] @@ -87,7 +87,7 @@ where for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); - let resource = IndexEntry { + let resource = ResourceIdWithTimestamp { id: resource_data.id, last_modified, }; From cd5bb601e3016ccf657a20dbcefaae1e74b286b7 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 16:00:06 +0300 Subject: [PATCH 37/46] fix(fs-index): IndexUpdate to include a hashset of resources added Signed-off-by: Tarek --- fs-index/src/index.rs | 59 ++++++++++++++++++++----------------------- fs-index/src/tests.rs | 47 ++++++++++++++++++++++++++++++++++ fs-index/src/utils.rs | 31 +++++++++++------------ 3 files changed, 90 insertions(+), 47 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 3ffa5c9c..6d6335f5 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -142,14 +142,14 @@ where #[derive(PartialEq, Debug)] pub struct IndexUpdate { /// Resources that were added during the update - added: HashMap>, + added: HashMap>, /// Resources that were removed during the update removed: HashSet, } impl IndexUpdate { /// Return the resources that were added during the update - pub fn added(&self) -> &HashMap> { + pub fn added(&self) -> &HashMap> { &self.added } @@ -288,18 +288,13 @@ impl ResourceIndex { // Discover paths in the root directory let paths = discover_paths(&root)?; - let entries: HashMap> = + let entries: HashMap> = scan_entries(&root, paths); // Strip the root path from the entries let entries: HashMap> = entries .into_iter() .map(|(path, resource)| { - let resource = ResourceIdWithTimestamp { - id: resource.id().clone(), - last_modified: resource.last_modified(), - }; - // Update the ID to paths map id_to_paths .entry(resource.id.clone()) @@ -326,14 +321,15 @@ impl ResourceIndex { log::debug!("Updating index at root path: {:?}", self.root); log::trace!("Current index: {:#?}", self); - let mut added: HashMap> = HashMap::new(); + let mut added: HashMap> = + HashMap::new(); let mut removed: HashSet = HashSet::new(); let current_paths = discover_paths(&self.root)?; // Assuming that collection manipulation is faster than repeated // lookups - let current_entries: HashMap> = + let current_entries: HashMap> = scan_entries(self.root(), current_paths); let previous_entries = self.path_to_resource.clone(); // `preserved_entries` is the intersection of current_entries and @@ -350,7 +346,7 @@ impl ResourceIndex { // `created_entries` is the difference between current_entries and // preserved_entries - let created_entries: HashMap> = + let created_entries: HashMap> = current_entries .iter() .filter_map(|(path, resource)| { @@ -365,17 +361,18 @@ impl ResourceIndex { // `updated_entries` is the intersection of current_entries and // preserved_entries where the last modified time has changed // significantly (> RESOURCE_UPDATED_THRESHOLD) - let updated_entries: HashMap> = + let updated_entries: HashMap> = current_entries .into_iter() - .filter(|(path, entry)| { + .filter(|(path, _entry)| { if !preserved_entries.contains_key(path) { false } else { let our_entry = &self.path_to_resource[path]; let prev_modified = our_entry.last_modified; - let result = entry.path().metadata(); + let entry_path = self.root.join(path); + let result = fs::metadata(&entry_path); match result { Err(msg) => { log::error!( @@ -455,33 +452,33 @@ impl ResourceIndex { } } - let added_entries: HashMap> = - updated_entries - .iter() - .chain(created_entries.iter()) - .filter_map(|(path, resource)| { - if self.path_to_resource.contains_key(path) { - None - } else { - Some((path.clone(), resource.clone())) - } - }) + // added_entries = created_entries + updated_entries + let added_entries: HashMap> = + created_entries + .into_iter() + .chain(updated_entries.into_iter()) .collect(); for (path, resource) in added_entries { log::trace!("Resource added: {:?}", path); - let index_entry_resource = ResourceIdWithTimestamp { - id: resource.id().clone(), - last_modified: resource.last_modified(), - }; self.path_to_resource - .insert(path.clone(), index_entry_resource.clone()); + .insert(path.clone(), resource.clone()); let id = resource.id.clone(); self.id_to_paths .entry(id.clone()) .or_default() .insert(path.clone()); - added.insert(id, resource); + + let resource_path_with_timestamp = ResourcePathWithTimestamp { + path: path.clone(), + last_modified: resource.last_modified, + }; + // If the ID is not in the added map, add it + // If the ID is in the added map, add the path to the set + added + .entry(id) + .or_default() + .insert(resource_path_with_timestamp); } Ok(IndexUpdate { added, removed }) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index bd67a498..c1b8b065 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -600,3 +600,50 @@ fn test_update_all_deleted_files() { assert_eq!(update_result.removed().len(), 1, "{:?}", update_result); }); } + +/// Test that we detect files with the same hash but different content in +/// `update_all`. +/// +/// ## Test scenario: +/// - Create a file within the temporary directory. +/// - Build a resource index in the temporary directory. +/// - Modify the file. +/// - Create a new file with the same content but different name (path). +/// - Update the resource index. +/// - Assert that the return from `update_all` is that `added` includes both +/// files. +#[test] +fn test_update_all_files_with_same_hash() { + for_each_type!(Crc32, Blake3 => { + let temp_dir = TempDir::with_prefix("ark_test_files_with_same_hash") + .expect("Failed to create temp dir"); + let root_path = temp_dir.path(); + + let file_path = root_path.join("file.txt"); + fs::write(&file_path, "file content").expect("Failed to write to file"); + + let mut index: ResourceIndex = + ResourceIndex::build(root_path).expect("Failed to build index"); + + std::thread::sleep(std::time::Duration::from_secs(1)); + + fs::write(&file_path, "updated file content") + .expect("Failed to write to file"); + + let new_file_path = root_path.join("new_file.txt"); + fs::write(&new_file_path, "updated file content") + .expect("Failed to write to file"); + + let update_result = index.update_all().expect("Failed to update index"); + // The lentgh of `added` should be 1 because the new file has the same + // content as the updated file. + assert_eq!(update_result.added().len(), 1, "{:?}", update_result); + + // The length of `added`'s first element should be 2 + assert_eq!(update_result.added().values().next().unwrap().len(), 2); + + // The length of `collisions` should be 1 because the new file has the + // same content as the updated file. + assert_eq!(index.collisions().len(), 1, "{:?}", index); + }); +} diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 626a0fbe..1f2f5988 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -11,7 +11,7 @@ use data_error::{ArklibError, Result}; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -use crate::{index::IndexedResource, ResourceIndex}; +use crate::{index::ResourceIdWithTimestamp, ResourceIndex}; /// Load the index from the file system fn load_index, Id: ResourceId>( @@ -102,27 +102,27 @@ pub(crate) fn discover_paths>( pub(crate) fn scan_entries, Id: ResourceId>( root_path: P, paths: Vec, -) -> HashMap> { +) -> HashMap> { let mut path_to_resource = HashMap::new(); for entry in paths { - let resource = scan_entry(root_path.as_ref(), entry); - path_to_resource.insert(resource.path().to_path_buf(), resource); + let resource = scan_entry(entry.clone()); + + let path = entry.path().to_path_buf(); + // Strip the root path from the entry path + let path = path + .strip_prefix(root_path.as_ref()) + .expect("Failed to strip prefix"); + let path = path.to_path_buf(); + + path_to_resource.insert(path, resource); } path_to_resource } /// A helper function to scan one entry and create an indexed resource -pub(crate) fn scan_entry, Id: ResourceId>( - root_path: P, +pub(crate) fn scan_entry( entry: DirEntry, -) -> IndexedResource { - let path = entry.path().to_path_buf(); - // Strip the root path from the entry path - let path = path - .strip_prefix(root_path.as_ref()) - .expect("Failed to strip prefix"); - let path = path.to_path_buf(); - +) -> ResourceIdWithTimestamp { let metadata = entry.metadata().expect("Failed to get metadata"); let last_modified = metadata .modified() @@ -131,8 +131,7 @@ pub(crate) fn scan_entry, Id: ResourceId>( // Get the ID of the resource let id = Id::from_path(entry.path()).expect("Failed to get ID from path"); - // Create the indexed resource - IndexedResource::new(id, path, last_modified) + ResourceIdWithTimestamp { id, last_modified } } /// A helper function to check if the entry should be indexed (not hidden or From 5248aaf5aef81762652c97d7388c5a73254635fa Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 16:01:18 +0300 Subject: [PATCH 38/46] fix: cargo clippy Signed-off-by: Tarek --- fs-index/src/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 6d6335f5..0221d7ae 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -456,7 +456,7 @@ impl ResourceIndex { let added_entries: HashMap> = created_entries .into_iter() - .chain(updated_entries.into_iter()) + .chain(updated_entries) .collect(); for (path, resource) in added_entries { From 511e718fa2c33ede4c22ed9a02ce596832357567 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 16:44:45 +0300 Subject: [PATCH 39/46] test(fs-index): add delay before rewriting the content Signed-off-by: Tarek --- fs-index/src/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index c1b8b065..7b3b0b3d 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -364,6 +364,7 @@ fn test_resource_index_update() { let mut index: ResourceIndex = ResourceIndex::build(root_path).expect("Failed to build index"); + std::thread::sleep(std::time::Duration::from_secs(1)); index.store().expect("Failed to store index"); assert_eq!(index.len(), 2, "{:?}", index); From c9626779972087bd045a677d93755552f0b554a1 Mon Sep 17 00:00:00 2001 From: Tarek Date: Sat, 3 Aug 2024 17:07:59 +0300 Subject: [PATCH 40/46] test(fs-index): avoid canonicalization on windows Signed-off-by: Tarek --- fs-index/src/index.rs | 15 ++++++++++----- fs-index/src/tests.rs | 2 -- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 0221d7ae..9854bbaa 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -281,15 +281,20 @@ impl ResourceIndex { pub fn build>(root_path: P) -> Result { log::debug!("Building index at root path: {:?}", root_path.as_ref()); - // Canonicalize the root path - let root = fs::canonicalize(&root_path)?; + let root_path = root_path.as_ref(); + // Canonicalize the root path (only for unix-like systems) + // Note: On windows, canonicalization adds a prefix to the path + // which breaks the path comparison in the tests + #[cfg(target_family = "unix")] + let root_path = root_path.canonicalize()?; + let mut id_to_paths: HashMap> = HashMap::new(); let mut path_to_resource = HashMap::new(); // Discover paths in the root directory - let paths = discover_paths(&root)?; + let paths = discover_paths(&root_path)?; let entries: HashMap> = - scan_entries(&root, paths); + scan_entries(&root_path, paths); // Strip the root path from the entries let entries: HashMap> = entries @@ -309,7 +314,7 @@ impl ResourceIndex { path_to_resource.extend(entries.clone()); let index = ResourceIndex { - root, + root: root_path.to_path_buf(), id_to_paths, path_to_resource, }; diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 7b3b0b3d..89749cc8 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -239,8 +239,6 @@ fn test_build_index_with_directory() { .expect("Failed to get resource"); assert_eq!( resource, expected_resource, - "{:?} != {:?}", - resource, expected_resource ); }); } From 17393df37b74b8ce01d67a36d9242b47d49d3895 Mon Sep 17 00:00:00 2001 From: Kirill Taran Date: Sun, 4 Aug 2024 11:35:16 +0900 Subject: [PATCH 41/46] Minor restructuring --- fs-index/src/index.rs | 125 +++++++++++++++++++----------------------- fs-index/src/serde.rs | 8 +-- fs-index/src/utils.rs | 10 ++-- 3 files changed, 65 insertions(+), 78 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 9854bbaa..f33ea139 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -56,23 +56,16 @@ impl IndexedResource { } } -/// Represents a resource ID with its last modified time +/// Represents an item with its last modified time #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] -pub struct ResourceIdWithTimestamp { +pub struct Timestamped { /// The unique identifier of the resource - pub(crate) id: Id, + pub(crate) item: Item, /// The last modified time of the resource (from the file system metadata) pub(crate) last_modified: SystemTime, } -/// Represents a resource path with its last modified time -#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] -pub struct ResourcePathWithTimestamp { - /// The path of the resource, relative to the root path - pub(crate) path: PathBuf, - /// The last modified time of the resource (from the file system metadata) - pub(crate) last_modified: SystemTime, -} +type IndexedPaths = HashSet>; /// Represents the index of resources in a directory. /// @@ -135,21 +128,21 @@ where /// or files with the same content) pub(crate) id_to_paths: HashMap>, /// A map from resource paths to resources - pub(crate) path_to_resource: HashMap>, + pub(crate) path_to_id: HashMap>, } /// Represents the result of an update operation on the ResourceIndex #[derive(PartialEq, Debug)] pub struct IndexUpdate { /// Resources that were added during the update - added: HashMap>, + added: HashMap, /// Resources that were removed during the update removed: HashSet, } impl IndexUpdate { /// Return the resources that were added during the update - pub fn added(&self) -> &HashMap> { + pub fn added(&self) -> &HashMap { &self.added } @@ -162,12 +155,12 @@ impl IndexUpdate { impl ResourceIndex { /// Return the number of resources in the index pub fn len(&self) -> usize { - self.path_to_resource.len() + self.path_to_id.len() } /// Return true if the index is empty pub fn is_empty(&self) -> bool { - self.path_to_resource.is_empty() + self.path_to_id.is_empty() } /// Return the root path of the index @@ -179,11 +172,11 @@ impl ResourceIndex { pub fn resources(&self) -> Vec> { // Using path_to_resource so to avoid not collecting duplicates let mut resources = vec![]; - for (path, resource) in self.path_to_resource.iter() { + for (path, id) in self.path_to_id.iter() { resources.push(IndexedResource::new( - resource.id.clone(), + id.item.clone(), path.clone(), - resource.last_modified, + id.last_modified, )); } resources @@ -247,11 +240,11 @@ impl ResourceIndex { let paths = self.id_to_paths.get(id)?; for path in paths { - let resource = self.path_to_resource.get(path)?; + let id = self.path_to_id.get(path)?; let resource = IndexedResource::new( - resource.id.clone(), + id.item.clone(), path.clone(), - resource.last_modified, + id.last_modified, ); resources.push(resource); } @@ -268,11 +261,11 @@ impl ResourceIndex { &self, path: P, ) -> Option> { - let resource = self.path_to_resource.get(path.as_ref())?; + let id = self.path_to_id.get(path.as_ref())?; let resource = IndexedResource::new( - resource.id.clone(), + id.item.clone(), path.as_ref().to_path_buf(), - resource.last_modified, + id.last_modified, ); Some(resource) } @@ -293,20 +286,20 @@ impl ResourceIndex { // Discover paths in the root directory let paths = discover_paths(&root_path)?; - let entries: HashMap> = + let entries: HashMap> = scan_entries(&root_path, paths); // Strip the root path from the entries - let entries: HashMap> = entries + let entries: HashMap> = entries .into_iter() - .map(|(path, resource)| { + .map(|(path, id)| { // Update the ID to paths map id_to_paths - .entry(resource.id.clone()) + .entry(id.item.clone()) .or_default() .insert(path.clone()); - (path, resource) + (path, id) }) .collect(); @@ -316,7 +309,7 @@ impl ResourceIndex { let index = ResourceIndex { root: root_path.to_path_buf(), id_to_paths, - path_to_resource, + path_to_id: path_to_resource, }; Ok(index) } @@ -326,20 +319,19 @@ impl ResourceIndex { log::debug!("Updating index at root path: {:?}", self.root); log::trace!("Current index: {:#?}", self); - let mut added: HashMap> = - HashMap::new(); + let mut added: HashMap = HashMap::new(); let mut removed: HashSet = HashSet::new(); let current_paths = discover_paths(&self.root)?; - // Assuming that collection manipulation is faster than repeated - // lookups - let current_entries: HashMap> = + // Assuming that collection manipulation + // is faster than repeated lookups + let current_entries: HashMap> = scan_entries(self.root(), current_paths); - let previous_entries = self.path_to_resource.clone(); - // `preserved_entries` is the intersection of current_entries and - // previous_entries - let preserved_entries: HashMap> = + let previous_entries = self.path_to_id.clone(); + // `preserved_entries` is the intersection of + // current_entries and previous_entries + let preserved_entries: HashMap> = current_entries .iter() .filter_map(|(path, _resource)| { @@ -349,9 +341,9 @@ impl ResourceIndex { }) .collect(); - // `created_entries` is the difference between current_entries and - // preserved_entries - let created_entries: HashMap> = + // `created_entries` is the difference + // between current_entries and preserved_entries + let created_entries: HashMap> = current_entries .iter() .filter_map(|(path, resource)| { @@ -366,14 +358,14 @@ impl ResourceIndex { // `updated_entries` is the intersection of current_entries and // preserved_entries where the last modified time has changed // significantly (> RESOURCE_UPDATED_THRESHOLD) - let updated_entries: HashMap> = + let updated_entries: HashMap> = current_entries .into_iter() .filter(|(path, _entry)| { if !preserved_entries.contains_key(path) { false } else { - let our_entry = &self.path_to_resource[path]; + let our_entry = &self.path_to_id[path]; let prev_modified = our_entry.last_modified; let entry_path = self.root.join(path); @@ -409,7 +401,7 @@ impl ResourceIndex { \twas {:?} \tnow {:?} \telapsed {:?}", - our_entry.id, + our_entry.item, path.display(), prev_modified, curr_modified, @@ -426,7 +418,7 @@ impl ResourceIndex { .collect(); // Remove resources that are not in the current entries - let removed_entries: HashMap> = + let removed_entries: HashMap> = previous_entries .iter() .filter_map(|(path, resource)| { @@ -437,19 +429,19 @@ impl ResourceIndex { } }) .collect(); - for (path, resource) in removed_entries { + for (path, id) in removed_entries { log::trace!( "Resource removed: {:?}, last modified: {:?}", path, - resource.last_modified + id.last_modified ); - self.path_to_resource.remove(&path); + self.path_to_id.remove(&path); self.id_to_paths - .get_mut(&resource.id) + .get_mut(&id.item) .unwrap() .remove(&path); - let id = resource.id.clone(); + let id = id.item.clone(); // Only remove the ID if it has no paths if self.id_to_paths[&id].is_empty() { self.id_to_paths.remove(&id); @@ -458,32 +450,29 @@ impl ResourceIndex { } // added_entries = created_entries + updated_entries - let added_entries: HashMap> = - created_entries - .into_iter() - .chain(updated_entries) - .collect(); + let added_entries: HashMap> = created_entries + .into_iter() + .chain(updated_entries) + .collect(); - for (path, resource) in added_entries { + for (path, id) in added_entries { log::trace!("Resource added: {:?}", path); - self.path_to_resource - .insert(path.clone(), resource.clone()); - let id = resource.id.clone(); + self.path_to_id.insert(path.clone(), id.clone()); + + let last_modified = id.last_modified; + let id = id.item.clone(); self.id_to_paths .entry(id.clone()) .or_default() .insert(path.clone()); - let resource_path_with_timestamp = ResourcePathWithTimestamp { - path: path.clone(), - last_modified: resource.last_modified, + let resource_path: Timestamped = Timestamped { + item: path.clone(), + last_modified, }; // If the ID is not in the added map, add it // If the ID is in the added map, add the path to the set - added - .entry(id) - .or_default() - .insert(resource_path_with_timestamp); + added.entry(id).or_default().insert(resource_path); } Ok(IndexUpdate { added, removed }) diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index 8c575127..a0c70db8 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -12,7 +12,7 @@ use serde::{ use data_resource::ResourceId; -use crate::{index::ResourceIdWithTimestamp, ResourceIndex}; +use crate::{index::TimestampedId, ResourceIndex}; /// Data structure for serializing and deserializing the index #[derive(Serialize, Deserialize)] @@ -45,7 +45,7 @@ where state.serialize_field("root", &self.root)?; let mut resources = HashMap::new(); - for (path, resource) in &self.path_to_resource { + for (path, resource) in &self.path_to_id { let id = resource.id.clone(); let last_modified = resource .last_modified @@ -87,7 +87,7 @@ where for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); - let resource = ResourceIdWithTimestamp { + let resource = TimestampedId { id: resource_data.id, last_modified, }; @@ -101,7 +101,7 @@ where Ok(ResourceIndex { root: index_data.root, id_to_paths, - path_to_resource, + path_to_id: path_to_resource, }) } } diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 1f2f5988..8fe0c2a7 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -11,7 +11,7 @@ use data_error::{ArklibError, Result}; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -use crate::{index::ResourceIdWithTimestamp, ResourceIndex}; +use crate::{index::TimestampedId, ResourceIndex}; /// Load the index from the file system fn load_index, Id: ResourceId>( @@ -102,7 +102,7 @@ pub(crate) fn discover_paths>( pub(crate) fn scan_entries, Id: ResourceId>( root_path: P, paths: Vec, -) -> HashMap> { +) -> HashMap> { let mut path_to_resource = HashMap::new(); for entry in paths { let resource = scan_entry(entry.clone()); @@ -120,9 +120,7 @@ pub(crate) fn scan_entries, Id: ResourceId>( } /// A helper function to scan one entry and create an indexed resource -pub(crate) fn scan_entry( - entry: DirEntry, -) -> ResourceIdWithTimestamp { +pub(crate) fn scan_entry(entry: DirEntry) -> TimestampedId { let metadata = entry.metadata().expect("Failed to get metadata"); let last_modified = metadata .modified() @@ -131,7 +129,7 @@ pub(crate) fn scan_entry( // Get the ID of the resource let id = Id::from_path(entry.path()).expect("Failed to get ID from path"); - ResourceIdWithTimestamp { id, last_modified } + TimestampedId { id, last_modified } } /// A helper function to check if the entry should be indexed (not hidden or From 1951fc92522c3c7cfd577317bd4cd7f9653fed87 Mon Sep 17 00:00:00 2001 From: Kirill Taran Date: Sun, 4 Aug 2024 11:42:01 +0900 Subject: [PATCH 42/46] Fix serde.rs --- fs-index/src/serde.rs | 21 ++++++++++++--------- fs-index/src/utils.rs | 11 +++++++---- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs-index/src/serde.rs b/fs-index/src/serde.rs index a0c70db8..176598a4 100644 --- a/fs-index/src/serde.rs +++ b/fs-index/src/serde.rs @@ -12,7 +12,7 @@ use serde::{ use data_resource::ResourceId; -use crate::{index::TimestampedId, ResourceIndex}; +use crate::{index::Timestamped, ResourceIndex}; /// Data structure for serializing and deserializing the index #[derive(Serialize, Deserialize)] @@ -45,9 +45,8 @@ where state.serialize_field("root", &self.root)?; let mut resources = HashMap::new(); - for (path, resource) in &self.path_to_id { - let id = resource.id.clone(); - let last_modified = resource + for (path, id) in &self.path_to_id { + let last_modified = id .last_modified .duration_since(SystemTime::UNIX_EPOCH) .map_err(|e| { @@ -58,7 +57,10 @@ where })? .as_nanos() as u64; - let resource_data = IndexedResourceData { id, last_modified }; + let resource_data = IndexedResourceData { + id: id.item.clone(), + last_modified, + }; resources.insert(path.clone(), resource_data); } @@ -87,13 +89,14 @@ where for (path, resource_data) in index_data.resources { let last_modified = SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(resource_data.last_modified); - let resource = TimestampedId { - id: resource_data.id, + + let id: Timestamped = Timestamped { + item: resource_data.id, last_modified, }; - path_to_resource.insert(path.clone(), resource.clone()); + path_to_resource.insert(path.clone(), id.clone()); id_to_paths - .entry(resource.id.clone()) + .entry(id.item.clone()) .or_insert_with(HashSet::new) .insert(path); } diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 8fe0c2a7..745a196b 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -11,7 +11,7 @@ use data_error::{ArklibError, Result}; use data_resource::ResourceId; use fs_storage::{ARK_FOLDER, INDEX_PATH}; -use crate::{index::TimestampedId, ResourceIndex}; +use crate::{index::Timestamped, ResourceIndex}; /// Load the index from the file system fn load_index, Id: ResourceId>( @@ -102,7 +102,7 @@ pub(crate) fn discover_paths>( pub(crate) fn scan_entries, Id: ResourceId>( root_path: P, paths: Vec, -) -> HashMap> { +) -> HashMap> { let mut path_to_resource = HashMap::new(); for entry in paths { let resource = scan_entry(entry.clone()); @@ -120,7 +120,7 @@ pub(crate) fn scan_entries, Id: ResourceId>( } /// A helper function to scan one entry and create an indexed resource -pub(crate) fn scan_entry(entry: DirEntry) -> TimestampedId { +pub(crate) fn scan_entry(entry: DirEntry) -> Timestamped { let metadata = entry.metadata().expect("Failed to get metadata"); let last_modified = metadata .modified() @@ -129,7 +129,10 @@ pub(crate) fn scan_entry(entry: DirEntry) -> TimestampedId { // Get the ID of the resource let id = Id::from_path(entry.path()).expect("Failed to get ID from path"); - TimestampedId { id, last_modified } + Timestamped { + item: id, + last_modified, + } } /// A helper function to check if the entry should be indexed (not hidden or From 8ece25e6edd885fcfb527ebdca12b96ff5ed0752 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 6 Aug 2024 17:38:01 +0300 Subject: [PATCH 43/46] doc(fs-index): comment on Timestamped::item Signed-off-by: Tarek --- fs-index/src/index.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index f33ea139..98bcafb8 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -59,7 +59,7 @@ impl IndexedResource { /// Represents an item with its last modified time #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] pub struct Timestamped { - /// The unique identifier of the resource + /// The item to be timestamped pub(crate) item: Item, /// The last modified time of the resource (from the file system metadata) pub(crate) last_modified: SystemTime, @@ -275,10 +275,7 @@ impl ResourceIndex { log::debug!("Building index at root path: {:?}", root_path.as_ref()); let root_path = root_path.as_ref(); - // Canonicalize the root path (only for unix-like systems) - // Note: On windows, canonicalization adds a prefix to the path - // which breaks the path comparison in the tests - #[cfg(target_family = "unix")] + // Canonicalize the root path let root_path = root_path.canonicalize()?; let mut id_to_paths: HashMap> = HashMap::new(); From 0e3cac8146796daedf1a1ab372eebf43f290fb8c Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 6 Aug 2024 17:42:03 +0300 Subject: [PATCH 44/46] test(fs-index): some debugging for windows ci Signed-off-by: Tarek --- fs-index/src/tests.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs-index/src/tests.rs b/fs-index/src/tests.rs index 89749cc8..678042ed 100644 --- a/fs-index/src/tests.rs +++ b/fs-index/src/tests.rs @@ -223,14 +223,34 @@ fn test_build_index_with_directory() { .expect("Failed to create temp dir"); let root_path = temp_dir.path(); + // print path + println!("Root path: {:?}", root_path); + // assert it exists + assert!(root_path.exists(), "Root path does not exist"); + let dir_path = root_path.join("dir"); fs::create_dir(&dir_path).expect("Failed to create dir"); + + // print dir path + println!("Dir path: {:?}", dir_path); + // assert it exists + assert!(dir_path.exists(), "Dir path does not exist"); + let file_path = dir_path.join("file.txt"); fs::write(&file_path, "file content").expect("Failed to write to file"); + + // print file path + println!("File path: {:?}", file_path); + // assert it exists + assert!(file_path.exists(), "File path does not exist"); + let expected_resource: IndexedResource = get_indexed_resource_from_file(&file_path, &root_path.to_path_buf()) .expect("Failed to get indexed resource"); + // print expected resource + println!("Expected resource: {:?}", expected_resource); + let index = ResourceIndex::build(root_path).expect("Failed to build index"); assert_eq!(index.len(), 1, "{:?}", index); From e0f6e7559d714bdf26bc0604a2a6f3f4ce93efd2 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 6 Aug 2024 22:40:08 +0300 Subject: [PATCH 45/46] fix(fs-index): refactor should_index Signed-off-by: Tarek --- fs-index/src/utils.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 745a196b..6c8cae72 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -78,21 +78,11 @@ pub(crate) fn discover_paths>( ) -> Result> { log::debug!("Discovering paths at root path: {:?}", root_path.as_ref()); - let walker = WalkDir::new(&root_path) - .min_depth(1) // Skip the root directory + let paths = WalkDir::new(root_path) + .min_depth(1) .into_iter() - .filter_entry(should_index); // Skip hidden files and empty files - - // Filter out directories - let paths = walker - .filter_map(|entry| { - let entry = entry.ok()?; - if entry.file_type().is_file() { - Some(entry) - } else { - None - } - }) + .filter_map(|e| e.ok()) + .filter(|e| should_index(e)) .collect(); Ok(paths) @@ -144,6 +134,7 @@ fn should_index(entry: &walkdir::DirEntry) -> bool { .to_string_lossy() .starts_with('.') { + log::trace!("Ignoring hidden file: {:?}", entry.path()); return false; } @@ -153,6 +144,19 @@ fn should_index(entry: &walkdir::DirEntry) -> bool { .map(|m| m.len() == 0) .unwrap_or(false) { + log::trace!("Ignoring empty file: {:?}", entry.path()); + return false; + } + + // Check if the entry isn't a file + if !entry.file_type().is_file() { + log::trace!("Ignoring non-file: {:?}", entry.path()); + return false; + } + + // Check if it's the index file + if entry.file_name() == INDEX_PATH { + log::trace!("Ignoring index file: {:?}", entry.path()); return false; } From f4ad8dfeda09b50e3f76a7fc8c14fad6d2f32d0f Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 6 Aug 2024 22:43:17 +0300 Subject: [PATCH 46/46] fix: cargo clippy Signed-off-by: Tarek --- fs-index/src/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs-index/src/utils.rs b/fs-index/src/utils.rs index 6c8cae72..8e6a9f12 100644 --- a/fs-index/src/utils.rs +++ b/fs-index/src/utils.rs @@ -82,7 +82,7 @@ pub(crate) fn discover_paths>( .min_depth(1) .into_iter() .filter_map(|e| e.ok()) - .filter(|e| should_index(e)) + .filter(should_index) .collect(); Ok(paths)