Skip to content

Commit

Permalink
Add cache support to similar music files (#558)
Browse files Browse the repository at this point in the history
* Simplify cache code

* Better saving/loading.
Add support for loading/saving json files in release mode

* Broken files cache

* Finally same music cache
  • Loading branch information
qarmin authored Jan 5, 2022
1 parent db3b1f5 commit aaa5885
Show file tree
Hide file tree
Showing 17 changed files with 528 additions and 427 deletions.
5 changes: 3 additions & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
## Version 4.0.0 - ?
- Multithreading support for collecting files to check(2/3x speedup on 4 thread processor and SSD) - [#502](https://github.com/qarmin/czkawka/pull/502), [#504](https://github.com/qarmin/czkawka/pull/504)
- Add Polish, German and Italian translation - [#469](https://github.com/qarmin/czkawka/pull/469), [#508](https://github.com/qarmin/czkawka/pull/508), [5be](https://github.com/qarmin/czkawka/commit/5be801e76395855f07ab1da43cdbb8bd0b843834)
- Add multiple translations - Polish, Italian, French, German, Russian ... - [#469](https://github.com/qarmin/czkawka/pull/469), [#508](https://github.com/qarmin/czkawka/pull/508), [5be](https://github.com/qarmin/czkawka/commit/5be801e76395855f07ab1da43cdbb8bd0b843834)
- Add support for finding similar videos - [#460](https://github.com/qarmin/czkawka/pull/460)
- GUI code refactoring(could fix some bugs) - [#462](https://github.com/qarmin/czkawka/pull/462)
- GUI code refactoring and search code unification - [#462](https://github.com/qarmin/czkawka/pull/462), [#531](https://github.com/qarmin/czkawka/pull/531)
- Fixed crash when trying to hard/symlink 0 files - [#462](https://github.com/qarmin/czkawka/pull/462)
- GTK 4 compatibility improvements for future change of toolkit - [#467](https://github.com/qarmin/czkawka/pull/467), [#468](https://github.com/qarmin/czkawka/pull/468), [#473](https://github.com/qarmin/czkawka/pull/473), [#474](https://github.com/qarmin/czkawka/pull/474), [#503](https://github.com/qarmin/czkawka/pull/503), [#505](https://github.com/qarmin/czkawka/pull/505)
- Change minimal supported OS to Ubuntu 20.04(needed by GTK) - [#468](https://github.com/qarmin/czkawka/pull/468)
Expand All @@ -22,6 +22,7 @@
- Image compare performance and usability improvements - [#529](https://github.com/qarmin/czkawka/pull/529), [#528](https://github.com/qarmin/czkawka/pull/528), [#530](https://github.com/qarmin/czkawka/pull/530), [#525](https://github.com/qarmin/czkawka/pull/525)
- Reorganize(unify) saving/loading data from file - [#524](https://github.com/qarmin/czkawka/pull/524)
- Add "reference folders" - [#516](https://github.com/qarmin/czkawka/pull/516)
- Add cache for similar music files - [#558](https://github.com/qarmin/czkawka/pull/558)

## Version 3.3.1 - 22.11.2021r
- Fix crash when moving buttons [#457](https://github.com/qarmin/czkawka/pull/457)
Expand Down
183 changes: 68 additions & 115 deletions czkawka_core/src/broken_files.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::collections::BTreeMap;
use std::fs::{File, Metadata, OpenOptions};
use std::fs::{File, Metadata};
use std::io::prelude::*;
use std::io::{BufReader, BufWriter};
use std::path::{Path, PathBuf};
Expand All @@ -10,10 +10,10 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::{fs, mem, panic, thread};

use crossbeam_channel::Receiver;
use directories_next::ProjectDirs;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};

use crate::common::Common;
use crate::common::{open_cache_folder, Common};
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
Expand All @@ -23,8 +23,6 @@ use crate::fl;
use crate::localizer::generate_translation_hashmap;
use crate::similar_images::{AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS};

const CACHE_FILE_NAME: &str = "cache_broken_files.txt";

#[derive(Debug)]
pub struct ProgressData {
pub current_stage: u8,
Expand All @@ -39,7 +37,7 @@ pub enum DeleteMethod {
Delete,
}

#[derive(Clone)]
#[derive(Clone, Serialize, Deserialize)]
pub struct FileEntry {
pub path: PathBuf,
pub modified_date: u64,
Expand All @@ -48,7 +46,7 @@ pub struct FileEntry {
pub error_string: String,
}

#[derive(Copy, Clone, PartialEq, Eq)]
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum TypeOfFile {
Unknown = -1,
Image = 0,
Expand Down Expand Up @@ -82,6 +80,8 @@ pub struct BrokenFiles {
delete_method: DeleteMethod,
stopped_search: bool,
use_cache: bool,
delete_outdated_cache: bool, // TODO add this to GUI
save_also_as_json: bool,
}

impl BrokenFiles {
Expand All @@ -98,6 +98,8 @@ impl BrokenFiles {
stopped_search: false,
broken_files: Default::default(),
use_cache: true,
delete_outdated_cache: true,
save_also_as_json: false,
}
}

Expand Down Expand Up @@ -135,6 +137,10 @@ impl BrokenFiles {
self.delete_method = delete_method;
}

pub fn set_save_also_as_json(&mut self, save_also_as_json: bool) {
self.save_also_as_json = save_also_as_json;
}

pub fn set_use_cache(&mut self, use_cache: bool) {
self.use_cache = use_cache;
}
Expand Down Expand Up @@ -350,7 +356,7 @@ impl BrokenFiles {
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();

if self.use_cache {
loaded_hash_map = match load_cache_from_file(&mut self.text_messages) {
loaded_hash_map = match load_cache_from_file(&mut self.text_messages, self.delete_outdated_cache) {
Some(t) => t,
None => Default::default(),
};
Expand Down Expand Up @@ -501,7 +507,7 @@ impl BrokenFiles {
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages);
save_cache_to_file(&all_results, &mut self.text_messages, self.save_also_as_json);
}

self.information.number_of_broken_files = self.broken_files.len();
Expand Down Expand Up @@ -620,137 +626,84 @@ impl PrintResults for BrokenFiles {
}
}

fn save_cache_to_file(hashmap_file_entry: &BTreeMap<String, FileEntry>, text_messages: &mut Messages) {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
// Lin: /home/username/.cache/czkawka
// Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
// Mac: /Users/Username/Library/Caches/pl.Qarmin.Czkawka

let cache_dir = PathBuf::from(proj_dirs.cache_dir());
if cache_dir.exists() {
if !cache_dir.is_dir() {
text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
return;
}
} else if let Err(e) = fs::create_dir_all(&cache_dir) {
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
return;
fn save_cache_to_file(old_hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
let mut hashmap: BTreeMap<String, FileEntry> = Default::default();
for (path, fe) in old_hashmap {
if fe.size > 1024 {
hashmap.insert(path.clone(), fe.clone());
}
let cache_file = cache_dir.join(CACHE_FILE_NAME);
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(e) => {
}
let hashmap = &hashmap;

if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.messages
.push(format!("Cannot create or open cache file {}, reason {}", cache_file.display(), e));
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
};
let mut writer = BufWriter::new(file_handler);

for file_entry in hashmap_file_entry.values() {
// Only save to cache files which have more than 1KB
if file_entry.size > 1024 {
let string: String = format!(
"{}//{}//{}//{}",
file_entry.path.display(),
file_entry.size,
file_entry.modified_date,
file_entry.error_string
);

if let Err(e) = writeln!(writer, "{}", string) {
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.messages
.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
};
}
}
}

text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}

fn load_cache_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String, FileEntry>> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join(CACHE_FILE_NAME);
// TODO add before checking if cache exists(if not just return) but if exists then enable error
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_inspected) => {
// text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
return None;
}
};

let reader = BufReader::new(file_handler);

let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = Default::default();

// Read the file line by line using the lines() iterator from std::io::BufRead.
for (index, line) in reader.lines().enumerate() {
let line = match line {
fn load_cache_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), false, true, &mut text_messages.warnings) {
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry>;
if let Some(file_handler) = file_handler {
let reader = BufReader::new(file_handler);
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
return None;
}
};
} else {
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
hashmap_loaded_entries = match serde_json::from_reader(reader) {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
return None;
}
};
let uuu = line.split("//").collect::<Vec<&str>>();
if uuu.len() != 4 {
text_messages
.warnings
.push(format!("Found invalid data in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
// Don't load cache data if destination file not exists
if Path::new(uuu[0]).exists() {
hashmap_loaded_entries.insert(
uuu[0].to_string(),
FileEntry {
path: PathBuf::from(uuu[0]),
size: match uuu[1].parse::<u64>() {
Ok(t) => t,
Err(e) => {
text_messages.warnings.push(format!(
"Found invalid size value in line {} - ({}) in cache file {}, reason {}",
index + 1,
line,
cache_file.display(),
e
));
continue;
}
},
modified_date: match uuu[2].parse::<u64>() {
Ok(t) => t,
Err(e) => {
text_messages.warnings.push(format!(
"Found invalid modified date value in line {} - ({}) in cache file {}, reason {}",
index + 1,
line,
cache_file.display(),
e
));
continue;
}
},
type_of_file: check_extension_avaibility(&uuu[0].to_lowercase()),
error_string: uuu[3].to_string(),
},
);
}
}

// Don't load cache data if destination file not exists
if delete_outdated_cache {
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
}

text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));

return Some(hashmap_loaded_entries);
}

text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
None
}

fn get_cache_file() -> String {
"cache_broken_files.bin".to_string()
}

fn check_extension_avaibility(file_name_lowercase: &str) -> TypeOfFile {
if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
TypeOfFile::Image
Expand Down
59 changes: 58 additions & 1 deletion czkawka_core/src/common.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use directories_next::ProjectDirs;
use image::{DynamicImage, ImageBuffer, Rgb};
use imagepipe::{ImageSource, Pipeline};
use std::ffi::OsString;
use std::fs;
use std::fs::OpenOptions;
use std::fs::{File, OpenOptions};
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
Expand All @@ -11,6 +12,62 @@ use std::time::SystemTime;

pub struct Common();

pub fn open_cache_folder(cache_file_name: &str, save_to_cache: bool, use_json: bool, warnings: &mut Vec<String>) -> Option<((Option<File>, PathBuf), (Option<File>, PathBuf))> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join(cache_file_name);
let cache_file_json = cache_dir.join(cache_file_name.replace(".bin", ".json"));

let mut file_handler_default = None;
let mut file_handler_json = None;

if save_to_cache {
if cache_dir.exists() {
if !cache_dir.is_dir() {
warnings.push(format!("Config dir {} is a file!", cache_dir.display()));
return None;
}
} else if let Err(e) = fs::create_dir_all(&cache_dir) {
warnings.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
return None;
}

file_handler_default = Some(match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(e) => {
warnings.push(format!("Cannot create or open cache file {}, reason {}", cache_file.display(), e));
return None;
}
});
if use_json {
file_handler_json = Some(match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file_json) {
Ok(t) => t,
Err(e) => {
warnings.push(format!("Cannot create or open cache file {}, reason {}", cache_file_json.display(), e));
return None;
}
});
}
} else {
if let Ok(t) = OpenOptions::new().read(true).open(&cache_file) {
file_handler_default = Some(t);
} else {
if use_json {
file_handler_json = Some(match OpenOptions::new().read(true).open(&cache_file_json) {
Ok(t) => t,
Err(_) => return None,
});
} else {
// messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // No error or warning
return None;
}
}
};
return Some(((file_handler_default, cache_file), (file_handler_json, cache_file_json)));
}
None
}

pub fn get_dynamic_image_from_raw_image(path: impl AsRef<Path> + std::fmt::Debug) -> Option<DynamicImage> {
let file_handler = match OpenOptions::new().read(true).open(&path) {
Ok(t) => t,
Expand Down
Loading

0 comments on commit aaa5885

Please sign in to comment.