Skip to content

Commit

Permalink
fs-index: Track API (ARK-Builders#85)
Browse files Browse the repository at this point in the history
* feat(fs-index): implement method update_one() for track api

Signed-off-by: Tarek <[email protected]>

* test(fs-index): unit tests for resource index track API

Signed-off-by: Tarek <[email protected]>

* feat(fs-index): add benchmarks for Resourceindex::update_one

Signed-off-by: Tarek <[email protected]>

* feat(fs-index): update_one() to be its own type of API

Signed-off-by: Tarek <[email protected]>

* docs(fs-index): add a note that selective API is experimental

Signed-off-by: Tarek <[email protected]>

* benchmark(fs-index): increase number of updates for update_one()

Signed-off-by: Tarek <[email protected]>

* feat(fs-index): debug_assert that the path already exists in index

Signed-off-by: Tarek <[email protected]>

---------

Signed-off-by: Tarek <[email protected]>
  • Loading branch information
tareknaser authored Sep 3, 2024
1 parent 6c4d8a0 commit 56343b4
Show file tree
Hide file tree
Showing 5 changed files with 394 additions and 12 deletions.
2 changes: 2 additions & 0 deletions fs-index/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ The most important struct in this crate is `ResourceIndex` which comes with:
- **Snapshot API**
- `get_resources_by_id`: Query resources from the index by ID.
- `get_resource_by_path`: Query a resource from the index by its path.
- **Selective API**
- `update_one`: Method to manually update a specific resource by selectively rescanning a single file.

## Custom Serialization

Expand Down
60 changes: 60 additions & 0 deletions fs-index/benches/resource_index_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,66 @@ fn resource_index_benchmark(c: &mut Criterion) {
});
});

// Benchmark `ResourceIndex::update_one()`

// First, create a new temp directory specifically for the update_one
// benchmark since we will be creating new files, removing files, and
// modifying files

let update_one_benchmarks_dir =
TempDir::with_prefix("ark-fs-index-benchmarks-update-one").unwrap();
let update_one_benchmarks_dir = update_one_benchmarks_dir.path();

group.bench_function("index_update_one", |b| {
b.iter(|| {
// Clear the directory
std::fs::remove_dir_all(&update_one_benchmarks_dir).unwrap();
std::fs::create_dir(&update_one_benchmarks_dir).unwrap();

// Create 5000 new files
for i in 0..5000 {
let new_file =
update_one_benchmarks_dir.join(format!("file_{}.txt", i));
std::fs::File::create(&new_file).unwrap();
std::fs::write(&new_file, format!("Hello, World! {}", i))
.unwrap();
}
let mut index: ResourceIndex<Crc32> =
ResourceIndex::build(black_box(&update_one_benchmarks_dir))
.unwrap();

// Create 1000 new files
for i in 5000..6000 {
let new_file =
update_one_benchmarks_dir.join(format!("file_{}.txt", i));
std::fs::File::create(&new_file).unwrap();
std::fs::write(&new_file, format!("Hello, World! {}", i))
.unwrap();
}

// Modify 1000 files
for i in 4000..5000 {
let modified_file =
update_one_benchmarks_dir.join(format!("file_{}.txt", i));
std::fs::write(&modified_file, format!("Bye, World! {}", i))
.unwrap();
}

// Remove 1000 files
for i in 3000..4000 {
let removed_file =
update_one_benchmarks_dir.join(format!("file_{}.txt", i));
std::fs::remove_file(&removed_file).unwrap();
}

// Call update_one for each of the 3000 files
for i in 3000..6000 {
let file_path = format!("file_{}.txt", i);
let _update_result = index.update_one(&file_path).unwrap();
}
});
});

group.finish();
}

Expand Down
94 changes: 86 additions & 8 deletions fs-index/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,20 @@ type IndexedPaths = HashSet<Timestamped<PathBuf>>;
/// #### Reactive API
/// - [`ResourceIndex::update_all`]: Method to update the index by rescanning
/// files and returning changes (additions/deletions/updates).
///
/// #### Snapshot API
/// - [`ResourceIndex::get_resources_by_id`]: Query resources from the index by
/// ID.
/// - [`ResourceIndex::get_resource_by_path`]: Query a resource from the index
/// by its path.
///
/// #### Track API
/// Allows for fine-grained control over tracking changes in the index
/// - [`ResourceIndex::track_addition`]: Track a newly added file (checks if the
/// file exists in the file system).
/// - [`ResourceIndex::track_removal`]: Track the deletion of a file (checks if
/// the file was actually deleted).
/// - [`ResourceIndex::track_modification`]: Track an update on a single file.
/// #### Selective API
/// - [`ResourceIndex::update_one`]: An experimental method to manually update a
/// specific resource by rescanning a single file. It provides targeted
/// control but is less dynamic than the reactive `update_all()`. The reactive
/// API is typically preferred for broader updates.
///
///
/// ## Examples
/// ```no_run
Expand All @@ -97,7 +97,7 @@ type IndexedPaths = HashSet<Timestamped<PathBuf>>;
/// use dev_hash::Crc32;
///
/// // Define the root path
/// let root_path = Path::new("animals");
/// let root_path = Path::new("path/to/animals");
///
/// // Build the index
/// let index: ResourceIndex<Crc32> = ResourceIndex::build(root_path).expect("Failed to build index");
Expand Down Expand Up @@ -474,4 +474,82 @@ impl<Id: ResourceId> ResourceIndex<Id> {

Ok(IndexUpdate { added, removed })
}

/// Update the index with the latest information from the file system
/// for a single resource
///
/// This method accepts the relative path of a single resource and updates
/// the index regardless of whether the resource was added, removed, or
/// modified.
///
/// **Note**: The caller must ensure that:
/// - The index is up-to-date with the file system except for the updated
/// resource
/// - In case of a addition, the resource was not already in the index
/// - In case of a modification or removal, the resource was already in the
/// index
pub fn update_one<P: AsRef<Path>>(
&mut self,
relative_path: P,
) -> Result<()> {
let path = relative_path.as_ref();
let entry_path = self.root.join(path);

// Check if the entry exists in the file system
if !entry_path.exists() {
// If the entry does not exist in the file system, it's a removal

// Remove the resource from the path to ID map
debug_assert!(
self.path_to_id.contains_key(path),
"Caller must ensure that the resource exists in the index: {:?}",
path
);
let id = self.path_to_id.remove(path).unwrap();
self.id_to_paths
.get_mut(&id.item)
.unwrap()
.remove(path);
// If the ID has no paths, remove it from the ID to paths map
if self.id_to_paths[&id.item].is_empty() {
self.id_to_paths.remove(&id.item);
}

log::trace!("Resource removed: {:?}", path);
} else {
// If the entry exists in the file system, it's an addition or
// update. In either case, we need to update the index
// with the latest information about the resource

let id = Id::from_path(entry_path.clone())?;
let metadata = fs::metadata(&entry_path)?;
let last_modified = metadata.modified()?;
let resource_path = Timestamped {
item: id.clone(),
last_modified,
};

// In case of modification, we need to remove the old path from
// the ID to paths map
if let Some(prev_id) = self.path_to_id.get(path) {
self.id_to_paths
.get_mut(&prev_id.item)
.unwrap()
.remove(path);
}

// Update the path to resource map
self.path_to_id
.insert(path.to_path_buf(), resource_path);
// Update the ID to paths map
self.id_to_paths
.entry(id.clone())
.or_default()
.insert(path.to_path_buf());

log::trace!("Resource added/updated: {:?}", path);
}

Ok(())
}
}
Loading

0 comments on commit 56343b4

Please sign in to comment.