Skip to content

Commit

Permalink
added: language report; refactored: report module for common report s…
Browse files Browse the repository at this point in the history
…tuff;
  • Loading branch information
codybloemhard committed Aug 16, 2022
1 parent d665f3b commit 7e050f6
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 79 deletions.
6 changes: 2 additions & 4 deletions src/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,11 @@ pub fn is_latin(c: char) -> bool{
}

pub fn is_hiragana(c: char) -> bool{
"あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろ
わをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽゐゃゅょっ".contains(c)
"あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽゐゃゅょっ".contains(c)
}

pub fn is_katakana(c: char) -> bool{
"アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロ
ワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポャュョッ".contains(c)
"アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポャュョッ".contains(c)
}

pub fn is_punctuation(c: char) -> bool{
Expand Down
62 changes: 62 additions & 0 deletions src/language.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use crate::structure::*;
use crate::japanese::*;
use crate::report::*;

use std::fmt::Write;
use std::collections::HashMap;

#[derive(Debug, Clone, Default)]
pub struct LangStats{
rp: ReportHeader,
kanji: HashMap<String, usize>,
other: HashMap<String, usize>,
}

pub fn lang_stats_report(mut s: LangStats, doc: &mut String){
write_header(&mut s.rp, doc);

write_list(s.other, "Hiragana/Katakana frequencies:", doc);
write_list(s.kanji, "Kanji frequencies:", doc);
}

pub fn accumulate_lang_stats(chapter: Chapter, stats: &mut LangStats, log: &mut String){
set_current_manga(&mut stats.rp.manga, chapter.manga, log);
stats.rp.volumes.push(chapter.volume);
stats.rp.chapters.push(chapter.chapter);
for picture in chapter.pic{
stats.rp.pictures += 1;

if let Some(texts) = picture.text{
for text in texts{
let replacements = if let Some(kmap) = &text.kmap{
for [kanji, mapping] in kmap{
let key = format!("{}: {}", kanji, mapping);
update(&mut stats.kanji, &key, 1, |a, b| a + b);
}
map_kanjis(&text.lines, kmap.as_slice())
} else {
text.lines.clone()
};
if could_contain_kanji(&replacements){
let _ = writeln!(
log,
"Warning: lines {:#?} contain kanji or untranslateable characters.
Every kanji is counted as one (1) mora.",
replacements
);
}
let morae = replacements.iter().flat_map(|line| line.chars())
.fold(0, |acc, c| acc + to_mora(c));
for line in text.lines{
for c in line.chars(){
if is_hiragana(c) || is_katakana(c) {
update(&mut stats.other, &c.to_string(), 1, |a, b| a + b);
}
}
}
stats.rp.morae += morae;
}
};
}
}

78 changes: 50 additions & 28 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
mod structure;
mod japanese;
mod stats;
mod transcribe;
mod language;
mod stats;
mod report;

use structure::*;
use stats::*;
use transcribe::*;
use language::*;
use stats::*;

use clap::Parser;

Expand All @@ -29,7 +32,7 @@ struct Args{
}

#[derive(Debug, Default, Clone, Copy, PartialEq, clap::ValueEnum)]
enum Mode { #[default] Transcribe, Stats }
enum Mode { #[default] Transcribe, Stats, Language }

#[derive(Debug, Default, Clone, Copy, PartialEq, clap::ValueEnum)]
enum OutputMode { #[default] Stdout, File }
Expand All @@ -42,38 +45,57 @@ fn main() {
}
let mut log = String::new();
let mut doc = String::new();
let mut stats = Stats::default();
let mut fileroot = args.inputfiles[0].clone();
for file in args.inputfiles{
let contents = match fs::read_to_string(&file){
Ok(contents) => contents,
Err(error) => {
println!("Could not read file: \"{}\".\n\tError: {}", file.display(), error);
continue;
match args.mode{
Mode::Transcribe => {
for file in args.inputfiles{
let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; };
doc.clear();
write_transcription(chapter, &mut doc, &mut log);
write_output(args.outputmode, &args.outputdir, file, &doc);
}
};
let chapter = match toml::from_str::<Chapter>(&contents){
Ok(chapter) => chapter,
Err(error) => panic!("{} (error position is an estimation!)", error),
};
if args.mode == Mode::Transcribe{
doc.clear();
write_transcription(chapter, &mut doc, &mut log);
write_output(args.outputmode, &args.outputdir, file, &doc);
} else {
accumulate_stats(chapter, &mut stats, &mut log);
}
}
if args.mode == Mode::Stats{
fileroot.set_file_name("stats");
stats_report(stats, &mut doc);
write_output(args.outputmode, &args.outputdir, fileroot, &doc);
},
Mode::Stats => {
let mut stats = Stats::default();
let mut fileroot = args.inputfiles[0].clone();
for file in args.inputfiles{
let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; };
accumulate_stats(chapter, &mut stats, &mut log);
}
fileroot.set_file_name("stats");
stats_report(stats, &mut doc);
write_output(args.outputmode, &args.outputdir, fileroot, &doc);
},
Mode::Language => {
let mut stats = LangStats::default();
let mut fileroot = args.inputfiles[0].clone();
for file in args.inputfiles{
let chapter = if let Some(c) = get_chapter(&file) { c } else { continue; };
accumulate_lang_stats(chapter, &mut stats, &mut log);
}
fileroot.set_file_name("stats");
lang_stats_report(stats, &mut doc);
write_output(args.outputmode, &args.outputdir, fileroot, &doc);
},
}
if args.log {
println!("{}", log);
}
}

fn get_chapter(file: &PathBuf) -> Option<Chapter>{
let contents = match fs::read_to_string(&file){
Ok(contents) => contents,
Err(error) => {
println!("Could not read file: \"{}\".\n\tError: {}", file.display(), error);
return None;
}
};
match toml::from_str::<Chapter>(&contents){
Ok(chapter) => Some(chapter),
Err(error) => panic!("{} (error position is an estimation!)", error),
}
}

fn write_output(outputmode: OutputMode, outputdir: &Option<PathBuf>, mut file: PathBuf, doc: &str){
if outputmode == OutputMode::File{
if let Some(outdir) = outputdir{
Expand Down
64 changes: 64 additions & 0 deletions src/report.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use std::fmt::Write;
use std::collections::HashMap;

#[derive(Debug, Clone, Default)]
pub struct ReportHeader{
pub manga: String,
pub volumes: Vec<usize>,
pub chapters: Vec<usize>,
pub pictures: usize,
pub morae: usize,
}

pub fn write_header(h: &mut ReportHeader, doc: &mut String){
let _ = writeln!(doc, "Manga: {}", h.manga);
let _ = write!(doc, "Volumes: ");

h.volumes.sort();
h.volumes.dedup();
for vol in &h.volumes{
let _ = write!(doc, "{}, ", vol);
}
doc.pop();
doc.pop();
let _ = writeln!(doc);

let _ = write!(doc, "Chapters: ");
h.chapters.sort();
h.chapters.dedup();
for chap in &h.chapters{
let _ = write!(doc, "{}, ", chap);
}
doc.pop();
doc.pop();
let _ = writeln!(doc);

let _ = writeln!(doc, "Pictures: {}", h.pictures);
let _ = writeln!(doc, "Morae spoken: {}", h.morae);
}


pub fn write_list(hmap: HashMap<String, usize>, title: &str, doc: &mut String){
let _ = writeln!(doc, "{}", title);
let mut list = hmap.into_iter().collect::<Vec<_>>();
list.sort_unstable_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
for (name, count) in list{
let _ = writeln!(doc, "\t{}: {}", name, count);
}
}

pub fn update<T: Copy>(map: &mut HashMap<String, T>, key: &str, val: T, fun: fn(T, T) -> T){
if let Some(x) = map.get_mut(key){
*x = fun(*x, val);
} else {
map.insert(key.to_string(), val);
}
}

pub fn set_current_manga(current: &mut String, mut chapter: String, log: &mut String){
if current.is_empty(){
*current = chapter;
} else if current != &mut chapter{
let _ = writeln!(log, "Different manga found: {}. Current manga is: {}.", chapter, current);
}
}
55 changes: 8 additions & 47 deletions src/stats.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
use crate::structure::*;
use crate::japanese::*;
use crate::report::*;

use std::fmt::Write;
use std::collections::HashMap;

#[derive(Debug, Clone, Default)]
pub struct Stats{
manga: String,
volumes: Vec<usize>,
chapters: Vec<usize>,
pictures: usize,
morae: usize,
rp: ReportHeader,
locations: HashMap<String, (usize, usize)>,
characters: HashMap<String, usize>,
speaks: HashMap<String, usize>,
Expand All @@ -19,30 +16,7 @@ pub struct Stats{
}

pub fn stats_report(mut s: Stats, doc: &mut String){
let _ = writeln!(doc, "Manga: {}", s.manga);
let _ = write!(doc, "Volumes: ");

s.volumes.sort();
s.volumes.dedup();
for vol in s.volumes{
let _ = write!(doc, "{}, ", vol);
}
doc.pop();
doc.pop();
let _ = writeln!(doc);

let _ = write!(doc, "Chapters: ");
s.chapters.sort();
s.chapters.dedup();
for chap in s.chapters{
let _ = write!(doc, "{}, ", chap);
}
doc.pop();
doc.pop();
let _ = writeln!(doc);

let _ = writeln!(doc, "Pictures: {}", s.pictures);
let _ = writeln!(doc, "Morae spoken: {}", s.morae);
write_header(&mut s.rp, doc);

let _ = writeln!(doc, "Locations: ");
let mut locs = s.locations.into_iter().collect::<Vec<_>>();
Expand All @@ -67,24 +41,11 @@ pub fn stats_report(mut s: Stats, doc: &mut String){
}

pub fn accumulate_stats(chapter: Chapter, stats: &mut Stats, log: &mut String){
fn update<T: Copy>(map: &mut HashMap<String, T>, key: &str, val: T, fun: fn(T, T) -> T){
if let Some(x) = map.get_mut(key){
*x = fun(*x, val);
} else {
map.insert(key.to_string(), val);
}
}

if stats.manga.is_empty(){
stats.manga = chapter.manga;
} else if stats.manga != chapter.manga{
let _ = writeln!(log, "Different manga found: {}. Current manga is: {}.",
chapter.manga, stats.manga);
}
stats.volumes.push(chapter.volume);
stats.chapters.push(chapter.chapter);
set_current_manga(&mut stats.rp.manga, chapter.manga, log);
stats.rp.volumes.push(chapter.volume);
stats.rp.chapters.push(chapter.chapter);
for picture in chapter.pic{
stats.pictures += 1;
stats.rp.pictures += 1;
let location = picture.location.unwrap_or_default();
let mut pic_morae = 0;
if let Some(characters) = picture.characters{
Expand All @@ -109,7 +70,7 @@ pub fn accumulate_stats(chapter: Chapter, stats: &mut Stats, log: &mut String){
}
let morae = replacements.iter().flat_map(|line| line.chars())
.fold(0, |acc, c| acc + to_mora(c));
stats.morae += morae;
stats.rp.morae += morae;
pic_morae += morae;
update(&mut stats.speaks, &text.from, morae, |a, b| a + b);
if let Some(receiver) = text.to{
Expand Down

0 comments on commit 7e050f6

Please sign in to comment.