diff --git a/README.md b/README.md new file mode 100644 index 0000000..9f200bb --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +Adding New Language +=================== +Update the language code in the list at + + +Update the language wordlist +============================ +``` +$ cargo run -- --lang ta --update-langs --wordlist-url https://raw.githubusercontent.com/arcturusannamalai/wordlists/main/ + Finished dev [unoptimized + debuginfo] target(s) in 0.17s + Running `target/debug/dym --lang ta --update-langs --wordlist-url 'https://raw.githubusercontent.com/arcturusannamalai/wordlists/main/'` +Downloading English word list... +Accessing URL: https://raw.githubusercontent.com/arcturusannamalai/wordlists/main//en +[00:00:00] [############################################################################################################################] 4.12MiB/4.12MiB (0s) +Downloading Tamil word list... +Accessing URL: https://raw.githubusercontent.com/arcturusannamalai/wordlists/main//ta +[00:00:00] [############################################################################################################################] 1.73MiB/1.73MiB (0s) +``` + +Run did you mean: +================ +e.g. +``` +cargo run -- --lang ta கலஅ + Compiling didyoumean v1.1.4 (/Users/user/devel/rust-in-action/didyoumean) + Finished dev [unoptimized + debuginfo] target(s) in 1.18s + Running `target/debug/dym --lang ta 'கலஅ'` +Did you mean? +1. கலி +2. கலை +3. கல் +4. அ +5. அகல் + +``` + + +For more info see the help text and options, +``` +$ cargo run -- --help + +didyoumean user$ cargo run -- --help + Compiling didyoumean v1.1.4 (/Users/user/devel/rust-in-action/didyoumean) + Finished dev [unoptimized + debuginfo] target(s) in 2.24s + Running `target/debug/dym --help` +didyoumean 1.1.4 +Hisbaan Noorani +Did You Mean: A cli spelling corrector + +USAGE: + dym [OPTIONS] [SEARCH_TERM] + +ARGS: + + + +OPTIONS: + -c, --clean-output + Print a clean version of the output without the title, numbers or colour. + + -h, --help + Print help information + + -l, --lang + Select the desired language using its locale code. For example, English would have the + locale code en and French would have the locale code fr. See --print-langs for a list of + locale codes and the corresponding languages. + + [default: en] + + -n, --number + Change the number of words the program will print. The default value is five. + + [default: 5] + + --print-langs + Display a list of supported languages and their respective locale codes. + + --update-langs + Update all language files from the repository specified by CLI @wordlist-url@. + + -v, --verbose + Print verbose output including the edit distance of the found word to the queried word. + + -V, --version + Print version information + + -w, --wordlist-url + Wordlist repository URL. The default value is + 'https://raw.githubusercontent.com/hisbaan/wordlists/main' + + [default: https://raw.githubusercontent.com/hisbaan/wordlists/main] + + -y, --yank + Yank (copy) the selected word to the system clipboard. If no word is selected, the + clipboard will not be altered. + +``` + + + diff --git a/src/cli.rs b/src/cli.rs index eaacca9..0cca4e8 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -4,54 +4,62 @@ use clap::Parser; #[derive(Parser)] #[clap(author = "Hisbaan Noorani", version = "1.1.4", about = "Did You Mean: A cli spelling corrector", long_about = None)] pub struct Cli { - pub search_term: Option, - #[clap( - short = 'n', - long = "number", - default_value_t = 5, - help = "Change the number of matches printed", - long_help = "Change the number of words the program will print. The default value is five." - )] - pub number: usize, - #[clap( - short = 'c', - long = "clean-output", - help = "Print clean output", - long_help = "Print a clean version of the output without the title, numbers or colour." - )] - pub clean_output: bool, - #[clap( - short = 'v', - long = "verbose", - help = "Print verbose output", - long_help = "Print verbose output including the edit distance of the found word to the queried word." - )] - pub verbose: bool, - #[clap( - short = 'y', - long = "yank", - help = "Yank (copy) to the system cliboard", - long_help = "Yank (copy) the selected word to the system clipboard. If no word is selected, the clipboard will not be altered." - )] - pub yank: bool, - #[clap( - short = 'l', - long = "lang", - help = "Select the desired language using the locale code (en, fr, sp, etc.)", - long_help = "Select the desired language using its locale code. For example, English would have the locale code en and French would have the locale code fr. See --print-langs for a list of locale codes and the corresponding languages.", - default_value = "en" - )] - pub lang: String, - #[clap( - long = "print-langs", - help = "Display a list of supported languages", - long_help = "Display a list of supported languages and their respective locale codes." - )] - pub print_langs: bool, - #[clap( - long = "update-langs", - help = "Update all language files", - long_help = "Update all language files from the repository https://github.com/hisbaan/wordlists." - )] - pub update_langs: bool, + pub search_term: Option, + #[clap( + short = 'n', + long = "number", + default_value_t = 5, + help = "Change the number of matches printed", + long_help = "Change the number of words the program will print. The default value is five." + )] + pub number: usize, + #[clap( + short = 'c', + long = "clean-output", + help = "Print clean output", + long_help = "Print a clean version of the output without the title, numbers or colour." + )] + pub clean_output: bool, + #[clap( + short = 'v', + long = "verbose", + help = "Print verbose output", + long_help = "Print verbose output including the edit distance of the found word to the queried word." + )] + pub verbose: bool, + #[clap( + short = 'y', + long = "yank", + help = "Yank (copy) to the system cliboard", + long_help = "Yank (copy) the selected word to the system clipboard. If no word is selected, the clipboard will not be altered." + )] + pub yank: bool, + #[clap( + short = 'l', + long = "lang", + help = "Select the desired language using the locale code (en, fr, sp, etc.)", + long_help = "Select the desired language using its locale code. For example, English would have the locale code en and French would have the locale code fr. See --print-langs for a list of locale codes and the corresponding languages.", + default_value = "en" + )] + pub lang: String, + #[clap( + long = "print-langs", + help = "Display a list of supported languages", + long_help = "Display a list of supported languages and their respective locale codes." + )] + pub print_langs: bool, + #[clap( + long = "update-langs", + help = "Update all language files", + long_help = "Update all language files from the repository specified by CLI @wordlist-url@." + )] + pub update_langs: bool, + #[clap( + short = 'w', + long = "wordlist-url", + help = "Wordlist repository URL", + long_help = "Wordlist repository URL. The default value is 'https://raw.githubusercontent.com/hisbaan/wordlists/main'", + default_value = "https://raw.githubusercontent.com/hisbaan/wordlists/main" + )] + pub wordlist_url: String, } diff --git a/src/langs.rs b/src/langs.rs index 4114c03..ed5b73d 100644 --- a/src/langs.rs +++ b/src/langs.rs @@ -64,6 +64,7 @@ pub static LOCALES: phf::Map<&'static str, &'static str> = phf_map! { "st" => "Sesotho", "sv" => "Swedish", "sw" => "Swahili", + "ta" => "Tamil", "tg" => "Tajik", "th" => "Thai", "tk" => "Turkmen", @@ -129,6 +130,7 @@ pub static SUPPORTED_LANGS: phf::Map<&'static str, &'static str> = phf_map! { "st" => "Sesotho", "sv" => "Swedish", "sw" => "Swahili", + "ta" => "Tamil", "tg" => "Tajik", "tk" => "Turkmen", "tl" => "Tagalog", diff --git a/src/lib.rs b/src/lib.rs index 45f1000..d7afd0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,52 +11,53 @@ use nix::unistd::{fork, ForkResult}; /// /// * `string` - the string to be copied. pub fn yank(string: &str) { - let platform = std::env::consts::OS; - if vec![ - "linux", - "freebsd", - "netbsd", - "dragonfly", - "netbsd", - "openbsd", - "solaris", - ] - .contains(&platform) - { - // The platform is linux/*bsd and is likely using X11 or Wayland. - // There is a fix needed for clipboard use in cases like these. - // The clipboard is cleared on X11/Wayland after the process that set it exist. - // To combat this, we will fork and keep a process aroudn until the clipboard - // is cleared. - // Ideally, this wouldn't be an issue but it was a conscious design decision - // on X11/Wayland - #[cfg(unix)] - match unsafe { fork() } { - Ok(ForkResult::Child) => { - let mut ctx: ClipboardContext = ClipboardProvider::new().unwrap(); - ctx.set_contents(string.to_owned()).unwrap(); - - // Keep the process running until the clipboard changes. - loop { - let clipboard = ctx.get_contents().unwrap(); - std::thread::sleep(std::time::Duration::from_secs(1)); - if clipboard != string { - std::process::exit(0); - } - } - } - Err(_) => { - println!("{}", "Error: Clipboard fork failed".red()); - std::process::exit(1); - } - _ => {} - } - } else { - // The platform is NOT running X11/Wayland and thus, we don't have to handle - // the clipboard clearing behaviour. + let platform = std::env::consts::OS; + if vec![ + "linux", + "freebsd", + "netbsd", + "dragonfly", + "mac osx", + "netbsd", + "openbsd", + "solaris", + ] + .contains(&platform) + { + // The platform is linux/*bsd and is likely using X11 or Wayland. + // There is a fix needed for clipboard use in cases like these. + // The clipboard is cleared on X11/Wayland after the process that set it exist. + // To combat this, we will fork and keep a process aroudn until the clipboard + // is cleared. + // Ideally, this wouldn't be an issue but it was a conscious design decision + // on X11/Wayland + #[cfg(unix)] + match unsafe { fork() } { + Ok(ForkResult::Child) => { let mut ctx: ClipboardContext = ClipboardProvider::new().unwrap(); ctx.set_contents(string.to_owned()).unwrap(); + + // Keep the process running until the clipboard changes. + loop { + let clipboard = ctx.get_contents().unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + if clipboard != string { + std::process::exit(0); + } + } + } + Err(_) => { + println!("{}", "Error: Clipboard fork failed".red()); + std::process::exit(1); + } + _ => {} } + } else { + // The platform is NOT running X11/Wayland and thus, we don't have to handle + // the clipboard clearing behaviour. + let mut ctx: ClipboardContext = ClipboardProvider::new().unwrap(); + ctx.set_contents(string.to_owned()).unwrap(); + } } /// Insert `element` at `index` preserving length. @@ -77,12 +78,12 @@ pub fn yank(string: &str) { /// assert_eq!(to_shift, vec![0, 1, 11, 2, 3]); /// ``` pub fn insert_and_shift(list: &mut Vec, index: usize, element: T) { - if index > list.len() - 1 { - return; - } + if index > list.len() - 1 { + return; + } - list.insert(index, element); - list.truncate(list.len() - 1); + list.insert(index, element); + list.truncate(list.len() - 1); } /// Return the edit distance between `search_term` and `known_term`. @@ -109,53 +110,49 @@ pub fn insert_and_shift(list: &mut Vec, index: usize, element: T) { /// ``` #[allow(clippy::iter_count, clippy::needless_range_loop)] pub fn edit_distance(search_chars: &[char], known_term: &str) -> usize { - // Set local constants for repeated use later. - let known_chars: Vec = known_term.chars().collect(); - let n = search_chars.iter().count() + 1; - let m = known_chars.iter().count() + 1; + // Set local constants for repeated use later. + let known_chars: Vec = known_term.chars().collect(); + let n = search_chars.iter().count() + 1; + let m = known_chars.iter().count() + 1; - // Setup matrix 2D vector. - let mut mat = vec![0; m * n]; + // Setup matrix 2D vector. + let mut mat = vec![0; m * n]; - // Initialize values of the matrix. - for i in 1..n { - mat[i * m] = i; - } - for i in 1..m { - mat[i] = i; - } + // Initialize values of the matrix. + for i in 1..n { + mat[i * m] = i; + } + for i in 1..m { + mat[i] = i; + } - // Run the algorithm. - for i in 1..n { - // let search_char_i_minus_one = search_chars[i - 1]; - // let search_char_i_minus_two = if i > 1 { search_chars[i - 2] } else { ' ' }; - for j in 1..m { - let sub_cost = if search_chars[i - 1] == known_chars[j - 1] { - 0 - } else { - 1 - }; + // Run the algorithm. + for i in 1..n { + // let search_char_i_minus_one = search_chars[i - 1]; + // let search_char_i_minus_two = if i > 1 { search_chars[i - 2] } else { ' ' }; + for j in 1..m { + let sub_cost = if search_chars[i - 1] == known_chars[j - 1] { + 0 + } else { + 1 + }; - mat[i * m + j] = min( - mat[(i - 1) * m + j - 1] + sub_cost, // substitution cost - min( - mat[(i - 1) * m + j] + 1, // deletion cost - mat[i * m + j - 1] + 1, // insertion cost - ), - ); - if i > 1 - && j > 1 - && search_chars[i - 1] == known_chars[j - 2] - && search_chars[i - 2] == known_chars[j - 1] - { - mat[i * m + j] = min( - mat[i * m + j], - mat[(i - 2) * m + j - 2] + 1, // transposition cost - ); - } - } + mat[i * m + j] = min( + mat[(i - 1) * m + j - 1] + sub_cost, // substitution cost + min( + mat[(i - 1) * m + j] + 1, // deletion cost + mat[i * m + j - 1] + 1, // insertion cost + ), + ); + if i > 1 && j > 1 && search_chars[i - 1] == known_chars[j - 2] && search_chars[i - 2] == known_chars[j - 1] { + mat[i * m + j] = min( + mat[i * m + j], + mat[(i - 2) * m + j - 2] + 1, // transposition cost + ); + } } + } - // Return the bottom left corner of the matrix. - mat[m * n - 1] + // Return the bottom left corner of the matrix. + mat[m * n - 1] } diff --git a/src/main.rs b/src/main.rs index f976982..30bfaf1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,7 +58,8 @@ fn run_app() -> std::result::Result<(), Error> { // Update all downloaded languages. if args.update_langs { - update_langs(); + assert!(!args.wordlist_url.ends_with("/"),"URL should end with branch name in github without trailing /"); + update_langs(args.wordlist_url); std::process::exit(0); } @@ -88,7 +89,7 @@ fn run_app() -> std::result::Result<(), Error> { } if SUPPORTED_LANGS.contains_key(args.lang.as_str()) { - fetch_word_list(args.lang.to_owned()); + fetch_word_list(args.lang.to_owned(),args.wordlist_url.to_owned()); } else { // Not supported // Initialize new command. @@ -110,14 +111,13 @@ fn run_app() -> std::result::Result<(), Error> { // Exit with error. clap::Error::exit(&error); } - // Get word list. The program will only get here if/when this is a valid word list. let word_list = read_to_string(dirs::data_dir().unwrap().join("didyoumean").join(args.lang)) .expect("Error reading file"); // Get dictionary of words from words.txt. let dictionary = word_list.split('\n'); - + //assert!(dictionary.clone().count()>20,"Size of wordlist > 20 words"); // Create mutable vecs for storing the top n words. let mut top_n_words = vec![""; args.number]; let mut top_n_dists = vec![search_term.len() * 10; args.number]; @@ -224,7 +224,7 @@ fn run_app() -> std::result::Result<(), Error> { /// /// * `lang` - A locale code string to define the word list file to fetch. #[tokio::main] -async fn fetch_word_list(lang: String) { +async fn fetch_word_list(lang: String,wordlist_url: String) { // Get data directory. let data_dir = dirs::data_dir().unwrap().join("didyoumean"); @@ -244,10 +244,11 @@ async fn fetch_word_list(lang: String) { ); let url = format!( - "https://raw.githubusercontent.com/hisbaan/wordlists/main/{}", + "{}/{}", + wordlist_url, &lang ); - + println!("Accessing URL: {}",url); // Setup reqwest. let response = get(&url).await.expect("Request failed"); let total_size = response.content_length().unwrap(); @@ -280,7 +281,7 @@ async fn fetch_word_list(lang: String) { } /// Update the word list files by deleting and downloading the files from the repository. -fn update_langs() { +fn update_langs(wordlist_url : String) { let data = data_dir().unwrap().join("didyoumean"); // Create data directory if it doesn't exist. @@ -299,7 +300,7 @@ fn update_langs() { // Only delete and download if the language is supported. if SUPPORTED_LANGS.contains_key(string) { remove_file(data.join(&string)).expect("Failed to update file (deletion failed)"); - fetch_word_list(string.to_string()); + fetch_word_list(string.to_string(),wordlist_url.to_string()); } } }