diff --git a/Cargo.toml b/Cargo.toml index 5327f12d..46e25867 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,10 @@ [workspace] -members = ["fs-atomic-versions", "fs-index", "fs-utils"] -default-members = ["fs-atomic-versions", "fs-index", "fs-utils"] +members = ["data-resource", "fs-atomic-versions", "fs-index", "fs-utils"] +default-members = [ + "data-resource", + "fs-atomic-versions", + "fs-index", + "fs-utils", +] resolver = "2" diff --git a/README.md b/README.md index ef13bee9..5f3499a6 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,12 @@ The purpose of the library is to manage _resource index_ of folders with various
-| Package | Description | -| -------------------- | ----------------------------------------- | -| `fs-index` | Resource Index construction and updating | -| `fs-atomic-versions` | Version-based preventing of dirty writes | -| `fs-utils` | Utility functions and common code | +| Package | Description | +| -------------------- | ---------------------------------------- | +| `fs-index` | Resource Index construction and updating | +| `data-resource` | Resource hashing and ID construction | +| `fs-atomic-versions` | Version-based preventing of dirty writes | +| `fs-utils` | Utility functions and common code |
@@ -58,7 +59,7 @@ cargo bench index_build ### Benchmarking Local Files -Our benchmark suite includes tests on local files and directories. These benchmarks are located in the [`benches/`](/benches) directory. Each benchmark sets a time limit using `group.measurement_time()`, which you can adjust manually based on your requirements. +Our benchmark suite includes tests on local files and directories. These benchmarks are located in the `benches/` directory of some crates. Each benchmark sets a time limit using `group.measurement_time()`, which you can adjust manually based on your requirements. You have the flexibility to benchmark specific files or folders by modifying the variables within the benchmark files. By default, the benchmarks operate on the [`testdata/`](../testdata/) directory and its contents. You can change the directory/files by setting the `DIR_PATH` and `FILE_PATHS` variables to the desired values. diff --git a/data-resource/Cargo.toml b/data-resource/Cargo.toml new file mode 100644 index 00000000..87b7c5d3 --- /dev/null +++ b/data-resource/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "data-resource" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +fs-utils = { path = "../fs-utils" } +fs-atomic-versions = { path = "../fs-atomic-versions" } + +log = { version = "0.4.17", features = ["release_max_level_off"] } +serde = { version = "1.0.138", features = ["derive"] } +crc32fast = "1.3.2" +anyhow = "1" + + +[dev-dependencies] +# benchmarking +criterion = { version = "0.5", features = ["html_reports"] } +pprof = { version = "0.13", features = ["criterion", "flamegraph"] } +rand = "0.8" + +[[bench]] +name = "compute_bytes_benchmark" +harness = false +path = "benches/compute_bytes_benchmark.rs" diff --git a/fs-index/benches/compute_bytes_benchmark.rs b/data-resource/benches/compute_bytes_benchmark.rs similarity index 98% rename from fs-index/benches/compute_bytes_benchmark.rs rename to data-resource/benches/compute_bytes_benchmark.rs index a0a6313c..6789a32f 100644 --- a/fs-index/benches/compute_bytes_benchmark.rs +++ b/data-resource/benches/compute_bytes_benchmark.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use fs_index::id::ResourceId; +use data_resource::ResourceId; use pprof::criterion::{Output, PProfProfiler}; use rand::prelude::*; use std::fs; diff --git a/fs-index/src/id.rs b/data-resource/src/id.rs similarity index 98% rename from fs-index/src/id.rs rename to data-resource/src/id.rs index 77c44cdc..606f8c5d 100644 --- a/fs-index/src/id.rs +++ b/data-resource/src/id.rs @@ -9,7 +9,7 @@ use std::io::{BufRead, BufReader}; use std::path::Path; use std::str::FromStr; -use crate::{ArklibError, Result}; +use fs_utils::errors::{ArklibError, Result}; #[derive( Eq, diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs new file mode 100644 index 00000000..5fbec495 --- /dev/null +++ b/data-resource/src/lib.rs @@ -0,0 +1,3 @@ +mod id; + +pub use id::ResourceId; diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index 7c49514f..24b2d33f 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -11,9 +11,9 @@ bench = false [dependencies] fs-utils = { path = "../fs-utils" } fs-atomic-versions = { path = "../fs-atomic-versions" } +data-resource = { path = "../data-resource" } log = { version = "0.4.17", features = ["release_max_level_off"] } -crc32fast = "1.3.2" walkdir = "2.3.2" anyhow = "1.0.58" lazy_static = "1.4.0" @@ -51,11 +51,6 @@ target-lexicon = "0.12.4" ureq = "2.4.0" ring = "=0.17.5" -[[bench]] -name = "compute_bytes_benchmark" -harness = false -path = "benches/compute_bytes_benchmark.rs" - [[bench]] name = "index_build_benchmark" harness = false diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index f27bb7e9..81a41c6b 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -12,8 +12,8 @@ use walkdir::{DirEntry, WalkDir}; use log; -use crate::id::ResourceId; use crate::{ArklibError, Result, ARK_FOLDER, INDEX_PATH}; +use data_resource::ResourceId; #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Clone, Debug)] pub struct IndexEntry { @@ -666,10 +666,10 @@ fn is_hidden(entry: &DirEntry) -> bool { #[cfg(test)] mod tests { - use crate::id::ResourceId; use crate::index::{discover_paths, IndexEntry}; use crate::ResourceIndex; use canonical_path::CanonicalPathBuf; + use data_resource::ResourceId; use fs_atomic_versions::initialize; use std::fs::File; #[cfg(target_os = "linux")] diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index 3ce4f7b0..ef174829 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -5,7 +5,6 @@ extern crate canonical_path; use fs_utils::errors::{ArklibError, Result}; -pub mod id; pub mod index; pub mod link; pub mod pdf; diff --git a/fs-index/src/link.rs b/fs-index/src/link.rs index d6f97e46..ea5dc738 100644 --- a/fs-index/src/link.rs +++ b/fs-index/src/link.rs @@ -1,10 +1,10 @@ -use crate::id::ResourceId; use crate::storage::meta::store_metadata; use crate::storage::prop::store_properties; use crate::{ storage::prop::load_raw_properties, AtomicFile, Result, ARK_FOLDER, PREVIEWS_STORAGE_FOLDER, PROPERTIES_STORAGE_FOLDER, }; +use data_resource::ResourceId; use reqwest::header::HeaderValue; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; diff --git a/fs-index/src/storage/meta.rs b/fs-index/src/storage/meta.rs index f819076d..8591bf5a 100644 --- a/fs-index/src/storage/meta.rs +++ b/fs-index/src/storage/meta.rs @@ -4,8 +4,8 @@ use std::fmt::Debug; use std::io::Read; use std::path::Path; -use crate::id::ResourceId; use crate::{Result, ARK_FOLDER, METADATA_STORAGE_FOLDER}; +use data_resource::ResourceId; pub fn store_metadata< S: Serialize + DeserializeOwned + Clone + Debug, diff --git a/fs-index/src/storage/prop.rs b/fs-index/src/storage/prop.rs index a894ae68..43654075 100644 --- a/fs-index/src/storage/prop.rs +++ b/fs-index/src/storage/prop.rs @@ -6,8 +6,8 @@ use std::fmt::Debug; use std::io::Read; use std::path::Path; -use crate::id::ResourceId; use crate::{Result, ARK_FOLDER, PROPERTIES_STORAGE_FOLDER}; +use data_resource::ResourceId; pub fn store_properties< S: Serialize + DeserializeOwned + Clone + Debug,