From 31e35576ec4d3ddce9524bc3434f40a473a9a260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= <41945903+qarmin@users.noreply.github.com> Date: Sun, 8 Nov 2020 09:10:49 +0100 Subject: [PATCH] Add support for multithreading to finding same music (#99) --- README.md | 3 ++ czkawka_core/src/same_music.rs | 95 ++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index dbce3d8e4..7d0acc153 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,9 @@ cargo run --bin czkawka_cli ## Benchmarks Since Czkawka is written in Rust and aims to be a faster alternative to FSlint (written in Python), we need to compare the speed of these tools. +Currently, I'm working on multithreading support in Czkawka so benchmarks should be updated in versions 1.4.0+. +Also Dupeguru probably will have new 4.0.5 release soon. + I prepared a directory and performed a test without any folder exceptions(I removed all directories from FSlint and Czkawka from other tabs than Include Directory) which contained 320004 files and 36902 folders and 108844 duplicates files in 34475 groups which took 4.53 GB. Minimum file size to check I set to 1 KB on all programs diff --git a/czkawka_core/src/same_music.rs b/czkawka_core/src/same_music.rs index dd473e5b7..2ff3ea104 100644 --- a/czkawka_core/src/same_music.rs +++ b/czkawka_core/src/same_music.rs @@ -11,6 +11,7 @@ use crate::common_messages::Messages; use crate::common_traits::*; use audiotags::Tag; use crossbeam_channel::Receiver; +use rayon::prelude::*; use std::collections::HashMap; #[derive(Eq, PartialEq, Clone, Debug)] @@ -73,6 +74,7 @@ impl Info { pub struct SameMusic { text_messages: Messages, information: Info, + music_to_check: Vec, music_entries: Vec, duplicated_music_entries: Vec>, directories: Directories, @@ -92,12 +94,13 @@ impl SameMusic { recursive_search: true, directories: Directories::new(), excluded_items: ExcludedItems::new(), - music_entries: vec![], + music_entries: Vec::with_capacity(2048), delete_method: DeleteMethod::None, music_similarity: MusicSimilarity::NONE, stopped_search: false, minimal_file_size: 1024, duplicated_music_entries: vec![], + music_to_check: Vec::with_capacity(2048), } } @@ -107,6 +110,10 @@ impl SameMusic { self.stopped_search = true; return; } + if !self.check_records_multithreaded(stop_receiver) { + self.stopped_search = true; + return; + } if !self.check_for_duplicates(stop_receiver) { self.stopped_search = true; return; @@ -229,10 +236,8 @@ impl SameMusic { continue 'dir; } - let tag = Tag::new().read_from_path(¤t_file_name).unwrap(); - // Creating new file entry - let fe: FileEntry = FileEntry { + let file_entry: FileEntry = FileEntry { size: metadata.len(), path: current_file_name.clone(), modified_date: match metadata.modified() { @@ -248,30 +253,16 @@ impl SameMusic { continue 'dir; } // Permissions Denied }, - title: match tag.title() { - Some(t) => t.to_string(), - None => "".to_string(), - }, - artist: match tag.artist() { - Some(t) => t.to_string(), - None => "".to_string(), - }, - album_title: match tag.album_title() { - Some(t) => t.to_string(), - None => "".to_string(), - }, - album_artist: match tag.album_artist() { - Some(t) => t.to_string(), - None => "".to_string(), - }, - year: match tag.year() { - Some(t) => t, - None => 0, - }, + title: "".to_string(), + + artist: "".to_string(), + album_title: "".to_string(), + album_artist: "".to_string(), + year: 0, }; // Adding files to Vector - self.music_entries.push(fe); + self.music_to_check.push(file_entry); self.information.number_of_checked_files += 1; } else { @@ -285,10 +276,62 @@ impl SameMusic { } self.information.number_of_music_entries = self.music_entries.len(); - Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string()); + Common::print_time(start_time, SystemTime::now(), "check_files".to_string()); true } + fn check_records_multithreaded(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool { + let start_time: SystemTime = SystemTime::now(); + + let vec_file_entry = self + .music_to_check + .par_iter() + .map(|file_entry| { + if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { + // This will not break + return None; + } + let mut file_entry = file_entry.clone(); + + let tag = match Tag::new().read_from_path(&file_entry.path) { + Ok(t) => t, + Err(_) => return Option::from((file_entry, false)), // Data not in utf-8, etc. + }; + + file_entry.title = match tag.title() { + Some(t) => t.to_string(), + None => "".to_string(), + }; + file_entry.artist = match tag.artist() { + Some(t) => t.to_string(), + None => "".to_string(), + }; + file_entry.album_title = match tag.album_title() { + Some(t) => t.to_string(), + None => "".to_string(), + }; + file_entry.album_artist = match tag.album_artist() { + Some(t) => t.to_string(), + None => "".to_string(), + }; + file_entry.year = match tag.year() { + Some(t) => t, + None => 0, + }; + + Option::from((file_entry, true)) + }) + .while_some() + .filter(|file_entry| file_entry.1) + .map(|file_entry| file_entry.0) + .collect::>(); + + // Adding files to Vector + self.music_entries = vec_file_entry; + + Common::print_time(start_time, SystemTime::now(), "check_records_multithreaded".to_string()); + true + } fn check_for_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool { if MusicSimilarity::NONE == self.music_similarity { panic!("This can't be none");