From 7e050f65b4cb91432c0a504a1956c03e5eb7a390 Mon Sep 17 00:00:00 2001 From: Cody Bloemhard Date: Tue, 16 Aug 2022 23:20:21 +0200 Subject: [PATCH] added: language report; refactored: report module for common report stuff; --- src/japanese.rs | 6 ++-- src/language.rs | 62 +++++++++++++++++++++++++++++++++++++++ src/main.rs | 78 +++++++++++++++++++++++++++++++------------------ src/report.rs | 64 ++++++++++++++++++++++++++++++++++++++++ src/stats.rs | 55 +++++----------------------------- 5 files changed, 186 insertions(+), 79 deletions(-) create mode 100644 src/language.rs create mode 100644 src/report.rs diff --git a/src/japanese.rs b/src/japanese.rs index c2a87bd..9ff35eb 100644 --- a/src/japanese.rs +++ b/src/japanese.rs @@ -152,13 +152,11 @@ pub fn is_latin(c: char) -> bool{ } pub fn is_hiragana(c: char) -> bool{ - "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろ - わをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽゐゃゅょっ".contains(c) + "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽゐゃゅょっ".contains(c) } pub fn is_katakana(c: char) -> bool{ - "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロ - ワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポャュョッ".contains(c) + "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポャュョッ".contains(c) } pub fn is_punctuation(c: char) -> bool{ diff --git a/src/language.rs b/src/language.rs new file mode 100644 index 0000000..e82f7b0 --- /dev/null +++ b/src/language.rs @@ -0,0 +1,62 @@ +use crate::structure::*; +use crate::japanese::*; +use crate::report::*; + +use std::fmt::Write; +use std::collections::HashMap; + +#[derive(Debug, Clone, Default)] +pub struct LangStats{ + rp: ReportHeader, + kanji: HashMap, + other: HashMap, +} + +pub fn lang_stats_report(mut s: LangStats, doc: &mut String){ + write_header(&mut s.rp, doc); + + write_list(s.other, "Hiragana/Katakana frequencies:", doc); + write_list(s.kanji, "Kanji frequencies:", doc); +} + +pub fn accumulate_lang_stats(chapter: Chapter, stats: &mut LangStats, log: &mut String){ + set_current_manga(&mut stats.rp.manga, chapter.manga, log); + stats.rp.volumes.push(chapter.volume); + stats.rp.chapters.push(chapter.chapter); + for picture in chapter.pic{ + stats.rp.pictures += 1; + + if let Some(texts) = picture.text{ + for text in texts{ + let replacements = if let Some(kmap) = &text.kmap{ + for [kanji, mapping] in kmap{ + let key = format!("{}: {}", kanji, mapping); + update(&mut stats.kanji, &key, 1, |a, b| a + b); + } + map_kanjis(&text.lines, kmap.as_slice()) + } else { + text.lines.clone() + }; + if could_contain_kanji(&replacements){ + let _ = writeln!( + log, + "Warning: lines {:#?} contain kanji or untranslateable characters. + Every kanji is counted as one (1) mora.", + replacements + ); + } + let morae = replacements.iter().flat_map(|line| line.chars()) + .fold(0, |acc, c| acc + to_mora(c)); + for line in text.lines{ + for c in line.chars(){ + if is_hiragana(c) || is_katakana(c) { + update(&mut stats.other, &c.to_string(), 1, |a, b| a + b); + } + } + } + stats.rp.morae += morae; + } + }; + } +} + diff --git a/src/main.rs b/src/main.rs index b8fa446..706923a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,14 @@ mod structure; mod japanese; -mod stats; mod transcribe; +mod language; +mod stats; +mod report; use structure::*; -use stats::*; use transcribe::*; +use language::*; +use stats::*; use clap::Parser; @@ -29,7 +32,7 @@ struct Args{ } #[derive(Debug, Default, Clone, Copy, PartialEq, clap::ValueEnum)] -enum Mode { #[default] Transcribe, Stats } +enum Mode { #[default] Transcribe, Stats, Language } #[derive(Debug, Default, Clone, Copy, PartialEq, clap::ValueEnum)] enum OutputMode { #[default] Stdout, File } @@ -42,38 +45,57 @@ fn main() { } let mut log = String::new(); let mut doc = String::new(); - let mut stats = Stats::default(); - let mut fileroot = args.inputfiles[0].clone(); - for file in args.inputfiles{ - let contents = match fs::read_to_string(&file){ - Ok(contents) => contents, - Err(error) => { - println!("Could not read file: \"{}\".\n\tError: {}", file.display(), error); - continue; + match args.mode{ + Mode::Transcribe => { + for file in args.inputfiles{ + let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; }; + doc.clear(); + write_transcription(chapter, &mut doc, &mut log); + write_output(args.outputmode, &args.outputdir, file, &doc); } - }; - let chapter = match toml::from_str::(&contents){ - Ok(chapter) => chapter, - Err(error) => panic!("{} (error position is an estimation!)", error), - }; - if args.mode == Mode::Transcribe{ - doc.clear(); - write_transcription(chapter, &mut doc, &mut log); - write_output(args.outputmode, &args.outputdir, file, &doc); - } else { - accumulate_stats(chapter, &mut stats, &mut log); - } - } - if args.mode == Mode::Stats{ - fileroot.set_file_name("stats"); - stats_report(stats, &mut doc); - write_output(args.outputmode, &args.outputdir, fileroot, &doc); + }, + Mode::Stats => { + let mut stats = Stats::default(); + let mut fileroot = args.inputfiles[0].clone(); + for file in args.inputfiles{ + let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; }; + accumulate_stats(chapter, &mut stats, &mut log); + } + fileroot.set_file_name("stats"); + stats_report(stats, &mut doc); + write_output(args.outputmode, &args.outputdir, fileroot, &doc); + }, + Mode::Language => { + let mut stats = LangStats::default(); + let mut fileroot = args.inputfiles[0].clone(); + for file in args.inputfiles{ + let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; }; + accumulate_lang_stats(chapter, &mut stats, &mut log); + } + fileroot.set_file_name("stats"); + lang_stats_report(stats, &mut doc); + write_output(args.outputmode, &args.outputdir, fileroot, &doc); + }, } if args.log { println!("{}", log); } } +fn get_chapter(file: &PathBuf) -> Option{ + let contents = match fs::read_to_string(&file){ + Ok(contents) => contents, + Err(error) => { + println!("Could not read file: \"{}\".\n\tError: {}", file.display(), error); + return None; + } + }; + match toml::from_str::(&contents){ + Ok(chapter) => Some(chapter), + Err(error) => panic!("{} (error position is an estimation!)", error), + } +} + fn write_output(outputmode: OutputMode, outputdir: &Option, mut file: PathBuf, doc: &str){ if outputmode == OutputMode::File{ if let Some(outdir) = outputdir{ diff --git a/src/report.rs b/src/report.rs new file mode 100644 index 0000000..8eccc73 --- /dev/null +++ b/src/report.rs @@ -0,0 +1,64 @@ +use std::fmt::Write; +use std::collections::HashMap; + +#[derive(Debug, Clone, Default)] +pub struct ReportHeader{ + pub manga: String, + pub volumes: Vec, + pub chapters: Vec, + pub pictures: usize, + pub morae: usize, +} + +pub fn write_header(h: &mut ReportHeader, doc: &mut String){ + let _ = writeln!(doc, "Manga: {}", h.manga); + let _ = write!(doc, "Volumes: "); + + h.volumes.sort(); + h.volumes.dedup(); + for vol in &h.volumes{ + let _ = write!(doc, "{}, ", vol); + } + doc.pop(); + doc.pop(); + let _ = writeln!(doc); + + let _ = write!(doc, "Chapters: "); + h.chapters.sort(); + h.chapters.dedup(); + for chap in &h.chapters{ + let _ = write!(doc, "{}, ", chap); + } + doc.pop(); + doc.pop(); + let _ = writeln!(doc); + + let _ = writeln!(doc, "Pictures: {}", h.pictures); + let _ = writeln!(doc, "Morae spoken: {}", h.morae); +} + + +pub fn write_list(hmap: HashMap, title: &str, doc: &mut String){ + let _ = writeln!(doc, "{}", title); + let mut list = hmap.into_iter().collect::>(); + list.sort_unstable_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + for (name, count) in list{ + let _ = writeln!(doc, "\t{}: {}", name, count); + } +} + +pub fn update(map: &mut HashMap, key: &str, val: T, fun: fn(T, T) -> T){ + if let Some(x) = map.get_mut(key){ + *x = fun(*x, val); + } else { + map.insert(key.to_string(), val); + } +} + +pub fn set_current_manga(current: &mut String, mut chapter: String, log: &mut String){ + if current.is_empty(){ + *current = chapter; + } else if current != &mut chapter{ + let _ = writeln!(log, "Different manga found: {}. Current manga is: {}.", chapter, current); + } +} diff --git a/src/stats.rs b/src/stats.rs index 99bf593..941a837 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,16 +1,13 @@ use crate::structure::*; use crate::japanese::*; +use crate::report::*; use std::fmt::Write; use std::collections::HashMap; #[derive(Debug, Clone, Default)] pub struct Stats{ - manga: String, - volumes: Vec, - chapters: Vec, - pictures: usize, - morae: usize, + rp: ReportHeader, locations: HashMap, characters: HashMap, speaks: HashMap, @@ -19,30 +16,7 @@ pub struct Stats{ } pub fn stats_report(mut s: Stats, doc: &mut String){ - let _ = writeln!(doc, "Manga: {}", s.manga); - let _ = write!(doc, "Volumes: "); - - s.volumes.sort(); - s.volumes.dedup(); - for vol in s.volumes{ - let _ = write!(doc, "{}, ", vol); - } - doc.pop(); - doc.pop(); - let _ = writeln!(doc); - - let _ = write!(doc, "Chapters: "); - s.chapters.sort(); - s.chapters.dedup(); - for chap in s.chapters{ - let _ = write!(doc, "{}, ", chap); - } - doc.pop(); - doc.pop(); - let _ = writeln!(doc); - - let _ = writeln!(doc, "Pictures: {}", s.pictures); - let _ = writeln!(doc, "Morae spoken: {}", s.morae); + write_header(&mut s.rp, doc); let _ = writeln!(doc, "Locations: "); let mut locs = s.locations.into_iter().collect::>(); @@ -67,24 +41,11 @@ pub fn stats_report(mut s: Stats, doc: &mut String){ } pub fn accumulate_stats(chapter: Chapter, stats: &mut Stats, log: &mut String){ - fn update(map: &mut HashMap, key: &str, val: T, fun: fn(T, T) -> T){ - if let Some(x) = map.get_mut(key){ - *x = fun(*x, val); - } else { - map.insert(key.to_string(), val); - } - } - - if stats.manga.is_empty(){ - stats.manga = chapter.manga; - } else if stats.manga != chapter.manga{ - let _ = writeln!(log, "Different manga found: {}. Current manga is: {}.", - chapter.manga, stats.manga); - } - stats.volumes.push(chapter.volume); - stats.chapters.push(chapter.chapter); + set_current_manga(&mut stats.rp.manga, chapter.manga, log); + stats.rp.volumes.push(chapter.volume); + stats.rp.chapters.push(chapter.chapter); for picture in chapter.pic{ - stats.pictures += 1; + stats.rp.pictures += 1; let location = picture.location.unwrap_or_default(); let mut pic_morae = 0; if let Some(characters) = picture.characters{ @@ -109,7 +70,7 @@ pub fn accumulate_stats(chapter: Chapter, stats: &mut Stats, log: &mut String){ } let morae = replacements.iter().flat_map(|line| line.chars()) .fold(0, |acc, c| acc + to_mora(c)); - stats.morae += morae; + stats.rp.morae += morae; pic_morae += morae; update(&mut stats.speaks, &text.from, morae, |a, b| a + b); if let Some(receiver) = text.to{