From bf940de92eae97a149bca2a1f090b80c59583ba6 Mon Sep 17 00:00:00 2001 From: Ishan Bhanuka Date: Mon, 25 Mar 2024 16:39:37 -0400 Subject: [PATCH] Add FileStorage logic, example and documentation Co-Authored-by: Ishan Bhanuka Co-Authored-by: Pushkar Mishra Co-Authored-by: Tarek Co-Authored-by: Kirill Taran --- data-error/src/lib.rs | 17 ++- data-resource/src/lib.rs | 1 - fs-storage/Cargo.toml | 10 ++ fs-storage/README.md | 29 +++++ fs-storage/examples/cli.rs | 80 ++++++++++++ fs-storage/src/file_storage.rs | 223 +++++++++++++++++++++++++++++++++ fs-storage/src/lib.rs | 1 + rust-toolchain.toml | 3 + 8 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 fs-storage/README.md create mode 100644 fs-storage/examples/cli.rs create mode 100644 fs-storage/src/file_storage.rs create mode 100644 rust-toolchain.toml diff --git a/data-error/src/lib.rs b/data-error/src/lib.rs index d8a1dd74..1852afe4 100644 --- a/data-error/src/lib.rs +++ b/data-error/src/lib.rs @@ -1,4 +1,4 @@ -use std::str::Utf8Error; +use std::{convert::Infallible, str::Utf8Error}; use thiserror::Error; pub type Result = std::result::Result; @@ -15,6 +15,9 @@ pub enum ArklibError { Parse, #[error("Networking error")] Network, + /// Storage error shows label and error message + #[error("Storage error: {0} {1}")] + Storage(String, String), #[error(transparent)] Other(#[from] anyhow::Error), } @@ -48,3 +51,15 @@ impl From> for ArklibError { Self::Other(anyhow::anyhow!(e.to_string())) } } + +impl From<&str> for ArklibError { + fn from(e: &str) -> Self { + Self::Other(anyhow::anyhow!(e.to_string())) + } +} + +impl From for ArklibError { + fn from(_: Infallible) -> Self { + Self::Parse + } +} diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs index 21701636..4d7cfb16 100644 --- a/data-resource/src/lib.rs +++ b/data-resource/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::anyhow; use crc32fast::Hasher; -use log; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display, Formatter}; use std::fs; diff --git a/fs-storage/Cargo.toml b/fs-storage/Cargo.toml index 09fc9687..6ef7efc2 100644 --- a/fs-storage/Cargo.toml +++ b/fs-storage/Cargo.toml @@ -8,4 +8,14 @@ name = "fs_storage" crate-type = ["rlib"] bench = false +[[example]] +name = "cli" + [dependencies] +data-error = { path = "../data-error" } +log = { version = "0.4.17", features = ["release_max_level_off"] } + +[dev-dependencies] +log = { version = "0.4.17", features = ["release_max_level_off"] } +tempdir = "0.3.7" +serde_json = "1.0.82" diff --git a/fs-storage/README.md b/fs-storage/README.md new file mode 100644 index 00000000..8426decb --- /dev/null +++ b/fs-storage/README.md @@ -0,0 +1,29 @@ +# Ark file system storage + +File system storage implementation for writing key value pairs to disk. + +## Steps to use CLI +- Create a test.json file of key:values pairs you want to store. +```json +{ + "key1": "value1", + "key2": "value2", + "key3": "value3" +} +``` + +- Run Write Command +```bash +cargo run --example cli write /tmp/z test.json +``` + +- Run Read Command +```bash +cargo run --example cli read /tmp/z key1,key2 +``` + +- Get Output +```bash +key1: value1 +key2: value2 +``` diff --git a/fs-storage/examples/cli.rs b/fs-storage/examples/cli.rs new file mode 100644 index 00000000..af96f9d7 --- /dev/null +++ b/fs-storage/examples/cli.rs @@ -0,0 +1,80 @@ +use fs_storage::file_storage::FileStorage; +use serde_json::Value; +use std::collections::BTreeMap; +use std::env; +use std::fs; +use std::path::Path; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() < 3 { + println!("Usage:"); + println!(" cargo run -- write "); + println!(" cargo run -- read "); + return; + } + + let command = &args[1]; + let path = &args[2]; + + match command.as_str() { + "read" => { + let keys = if args.len() > 3 { + args[3] + .split(',') + .map(|s| s.to_string()) + .collect::>() + } else { + vec![] + }; + let mut fs = FileStorage::new("cli".to_string(), Path::new(path)); + let map: BTreeMap = fs.read_file().unwrap(); + if keys.is_empty() { + for (key, value) in map { + println!("{}: {}", key, value); + } + } else { + for key in &keys { + if let Some(value) = map.get(key) { + println!("{}: {}", key, value); + } else { + println!("Key '{}' not found", key); + } + } + } + } + "write" => { + if args.len() < 4 { + println!("Usage: cargo run -- write "); + return; + } + + let json_file = &args[3]; + let json_contents = fs::read_to_string(json_file) + .expect("Failed to read JSON file"); + let json_value: Value = + serde_json::from_str(&json_contents).expect("Invalid JSON"); + + let mut kv_pairs = BTreeMap::new(); + if let Value::Object(object) = json_value { + for (key, value) in object { + if let Value::String(value_str) = value { + kv_pairs.insert(key, value_str); + } else { + println!( + "Warning: Skipping non-string value for key '{}'", + key + ); + } + } + } else { + println!("JSON value is not an object"); + return; + } + + let mut fs = FileStorage::new("cli".to_string(), Path::new(path)); + fs.write_file(&kv_pairs).unwrap(); + } + _ => eprintln!("Invalid command. Use 'read' or 'write'."), + } +} diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs new file mode 100644 index 00000000..c787d19f --- /dev/null +++ b/fs-storage/src/file_storage.rs @@ -0,0 +1,223 @@ +use std::fmt::Debug; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::str::FromStr; +use std::time::SystemTime; +use std::{ + collections::BTreeMap, + fmt::Display, + path::{Path, PathBuf}, +}; + +use data_error::{ArklibError, Result}; + +const STORAGE_VERSION: i32 = 2; +const STORAGE_VERSION_PREFIX: &str = "version "; +const KEY_VALUE_SEPARATOR: char = ':'; + +pub struct FileStorage { + label: String, + path: PathBuf, + timestamp: SystemTime, +} + +impl FileStorage { + /// Create a new file storage with a diagnostic label and file path + pub fn new(label: String, path: &Path) -> Self { + Self { + label, + path: PathBuf::from(path), + timestamp: SystemTime::now(), + } + } + + /// Check if underlying file has been updated + /// + /// This check can be used before reading the file. + pub fn is_file_updated(&self) -> Result { + let file_timestamp = fs::metadata(&self.path)?.modified()?; + Ok(self.timestamp < file_timestamp) + } + + /// Read data from disk + /// + /// Data is read as a key value pairs separated by a symbol and stored + /// in a [BTreeMap] with a generic key K and V value. A handler + /// is called on the data after reading it. + pub fn read_file(&mut self) -> Result> + where + K: FromStr + std::hash::Hash + std::cmp::Eq + Debug + std::cmp::Ord, + V: FromStr + Debug, + ArklibError: From<::Err>, + ArklibError: From<::Err>, + { + let file = fs::File::open(&self.path)?; + let reader = BufReader::new(file); + let mut lines = reader.lines(); + + let new_timestamp = fs::metadata(&self.path)?.modified()?; + match lines.next() { + Some(header) => { + let header = header?; + self.verify_version(&header)?; + + let mut value_by_id = BTreeMap::new(); + for line in lines { + let line = line?; + if line.is_empty() { + continue; + } + + let parts: Vec<&str> = + line.split(KEY_VALUE_SEPARATOR).collect(); + let id = K::from_str(parts[0])?; + let value = V::from_str(parts[1])?; + value_by_id.insert(id, value); + } + + self.timestamp = new_timestamp; + Ok(value_by_id) + } + None => Err(ArklibError::Storage( + self.label.clone(), + "Storage file is missing header".to_owned(), + )), + } + } + + /// Write data to file + /// + /// Data is a key-value mapping between [ResourceId] and a generic Value + pub fn write_file( + &mut self, + value_by_id: &BTreeMap, + ) -> Result<()> + where + K: Display, + V: Display, + { + fs::create_dir_all(self.path.parent().unwrap())?; + let file = File::create(&self.path)?; + let mut writer = BufWriter::new(file); + + writer.write_all( + format!("{}{}\n", STORAGE_VERSION_PREFIX, STORAGE_VERSION) + .as_bytes(), + )?; + + for (id, value) in value_by_id { + writer.write_all( + format!("{}{}{}\n", id, KEY_VALUE_SEPARATOR, value).as_bytes(), + )?; + } + + let new_timestamp = fs::metadata(&self.path)?.modified()?; + if new_timestamp == self.timestamp { + return Err("Timestamp didn't update".into()); + } + self.timestamp = new_timestamp; + + log::info!( + "{} {} entries has been written", + self.label, + value_by_id.len() + ); + Ok(()) + } + + pub fn erase(&self) { + if let Err(e) = fs::remove_file(&self.path) { + log::error!( + "{} Failed to delete file because of error: {}", + self.label, + e + ) + } + } + + /// Verify the version stored in the file header + fn verify_version(&self, header: &str) -> Result<()> { + if !header.starts_with(STORAGE_VERSION_PREFIX) { + return Err(ArklibError::Storage( + self.label.clone(), + "Unknown storage version prefix".to_owned(), + )); + } + + let version = header[STORAGE_VERSION_PREFIX.len()..] + .parse::() + .map_err(|_err| { + <&str as Into>::into( + "Unable to parse storage version", + ) + })?; + + if version != STORAGE_VERSION { + return Err(ArklibError::Storage( + self.label.clone(), + format!( + "Storage version mismatch: expected {}, found {}", + STORAGE_VERSION, version + ), + )); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + use tempdir::TempDir; + + use crate::file_storage::FileStorage; + + #[test] + fn test_file_storage_write_read() { + let temp_dir = + TempDir::new("tmp").expect("Failed to create temporary directory"); + let storage_path = temp_dir.path().join("test_storage.txt"); + + let mut file_storage = + FileStorage::new("TestStorage".to_string(), &storage_path); + + let mut data_to_write = BTreeMap::new(); + data_to_write.insert("key1".to_string(), "value1".to_string()); + data_to_write.insert("key2".to_string(), "value2".to_string()); + + file_storage + .write_file(&data_to_write) + .expect("Failed to write data to disk"); + + let data_read: BTreeMap<_, _> = file_storage + .read_file() + .expect("Failed to read data from disk"); + + assert_eq!(data_read, data_to_write); + } + + #[test] + fn test_file_storage_auto_delete() { + let temp_dir = + TempDir::new("tmp").expect("Failed to create temporary directory"); + let storage_path = temp_dir.path().join("test_storage.txt"); + + let mut file_storage = + FileStorage::new("TestStorage".to_string(), &storage_path); + + let mut data_to_write = BTreeMap::new(); + data_to_write.insert("key1".to_string(), "value1".to_string()); + data_to_write.insert("key2".to_string(), "value2".to_string()); + + file_storage + .write_file(&data_to_write) + .expect("Failed to write data to disk"); + + assert_eq!(storage_path.exists(), true); + + file_storage.erase(); + + assert_eq!(storage_path.exists(), false); + } +} diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs index 6e7af127..adeb750b 100644 --- a/fs-storage/src/lib.rs +++ b/fs-storage/src/lib.rs @@ -1,3 +1,4 @@ +pub mod file_storage; pub const ARK_FOLDER: &str = ".ark"; // Should not be lost if possible diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..6a8344b5 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +version = "1.75.0" +channel = "stable"