From 3509c8a4b2403b94437d27bdec7958bf05cb5129 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 19 Nov 2021 19:36:58 +0900
Subject: [PATCH 01/60] Use Option for dict_pma

---
 vaporetto/src/predictor.rs | 72 +++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 33 deletions(-)
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index a2a6f245..c50cf54e 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -20,7 +20,7 @@ use daachorse::DoubleArrayAhoCorasick;
 /// Predictor.
 pub struct Predictor {
     word_pma: DoubleArrayAhoCorasick,
-    dict_pma: DoubleArrayAhoCorasick,
+    dict_pma: Option<DoubleArrayAhoCorasick>,
     word_weights: Vec<Vec<ScoreValue>>,
     dict_weights: Vec<DictWeight>,
     dict_word_wise: bool,
@@ -79,7 +79,11 @@ impl Predictor {
 
         let word_pma = DoubleArrayAhoCorasick::new(words).unwrap();
         let type_pma = DoubleArrayAhoCorasick::new(model.types).unwrap();
-        let dict_pma = DoubleArrayAhoCorasick::new(dict).unwrap();
+        let dict_pma = if dict.is_empty() {
+            None
+        } else {
+            Some(DoubleArrayAhoCorasick::new(dict).unwrap())
+        };
 
         let type_scorer = TypeScorer::new(type_pma, type_weights, model.type_window_size);
 
@@ -213,40 +217,42 @@ impl Predictor {
     }
 
     fn add_dict_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let char_start = if start >= self.dict_window_size {
-            start + 1 - self.dict_window_size
-        } else {
-            0
-        };
-        let text_start = sentence.char_to_str_pos[char_start];
-        let char_end = std::cmp::min(
-            start + ys.len() + self.dict_window_size,
-            sentence.char_to_str_pos.len() - 1,
-        );
-        let text_end = sentence.char_to_str_pos[char_end];
-        let text = &sentence.text[text_start..text_end];
-        let padding = start - char_start + 1;
-        for m in self.dict_pma.find_overlapping_iter(&text) {
-            let m_start = sentence.str_to_char_pos[m.start() + text_start] - char_start;
-            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
-            let idx = if self.dict_word_wise {
-                m.pattern()
+        if let Some(dict_pma) = self.dict_pma.as_ref() {
+            let char_start = if start >= self.dict_window_size {
+                start + 1 - self.dict_window_size
             } else {
-                std::cmp::min(m_end - m_start, self.dict_weights.len()) - 1
+                0
             };
-            let dict_weight = self.dict_weights[idx];
-            if m_start >= padding && m_start < padding + ys.len() {
-                ys[m_start - padding] += dict_weight.right;
-            }
-            let range_start = std::cmp::max(0, m_start as isize - padding as isize + 1);
-            let range_end = std::cmp::min(m_end as isize - padding as isize, ys.len() as isize);
-            if range_start < range_end {
-                for y in &mut ys[range_start as usize..range_end as usize] {
-                    *y += dict_weight.inner;
+            let text_start = sentence.char_to_str_pos[char_start];
+            let char_end = std::cmp::min(
+                start + ys.len() + self.dict_window_size,
+                sentence.char_to_str_pos.len() - 1,
+            );
+            let text_end = sentence.char_to_str_pos[char_end];
+            let text = &sentence.text[text_start..text_end];
+            let padding = start - char_start + 1;
+            for m in dict_pma.find_overlapping_iter(&text) {
+                let m_start = sentence.str_to_char_pos[m.start() + text_start] - char_start;
+                let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
+                let idx = if self.dict_word_wise {
+                    m.pattern()
+                } else {
+                    std::cmp::min(m_end - m_start, self.dict_weights.len()) - 1
+                };
+                let dict_weight = self.dict_weights[idx];
+                if m_start >= padding && m_start < padding + ys.len() {
+                    ys[m_start - padding] += dict_weight.right;
+                }
+                let range_start = std::cmp::max(0, m_start as isize - padding as isize + 1);
+                let range_end = std::cmp::min(m_end as isize - padding as isize, ys.len() as isize);
+                if range_start < range_end {
+                    for y in &mut ys[range_start as usize..range_end as usize] {
+                        *y += dict_weight.inner;
+                    }
+                }
+                if m_end >= padding && m_end < ys.len() + padding {
+                    ys[m_end - padding] += dict_weight.left;
                 }
-            }
-            if m_end >= padding && m_end < ys.len() + padding {
-                ys[m_end - padding] += dict_weight.left;
             }
         }
     }

From d7439b05412147f21452ececeaca0ef368d75058 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 25 Nov 2021 10:57:36 +0900
Subject: [PATCH 02/60] Remove multi-threading feature

---
 vaporetto/Cargo.toml       |   2 -
 vaporetto/src/lib.rs       |   3 -
 vaporetto/src/predictor.rs | 182 -------------------------------------
 3 files changed, 187 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 18f1baf0..ca1531c6 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -19,14 +19,12 @@ daachorse = "0.2.0"  # MIT or Apache-2.0
 serde = { version = "1.0", features = ["derive"] }  # MIT or Apache-2.0
 
 byteorder = { version = "1.4", optional = true }  # Unlicense or MIT
-crossbeam-channel = { version = "0.5", optional = true }  # MIT or Apache-2.0
 liblinear = { version = "1", optional = true }  # MIT
 
 [features]
 default = ["model-quantize"]
 kytea = ["byteorder"]
 model-quantize = []
-multithreading = ["crossbeam-channel"]
 train = ["liblinear"]
 
 [package.metadata.docs.rs]
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index d7107e66..662f7b8c 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -46,9 +46,6 @@ pub use model::Model;
 pub use predictor::Predictor;
 pub use sentence::{BoundaryType, CharacterType, Sentence};
 
-#[cfg(feature = "multithreading")]
-pub use predictor::MultithreadPredictor;
-
 #[cfg(feature = "train")]
 pub use trainer::{Dataset, SolverType, Trainer};
 
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index a2a6f245..91aadd6a 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,16 +1,6 @@
 use std::collections::HashMap;
 use std::ops::Range;
 
-#[cfg(feature = "multithreading")]
-use std::cell::RefCell;
-#[cfg(feature = "multithreading")]
-use std::sync::Arc;
-#[cfg(feature = "multithreading")]
-use std::thread;
-
-#[cfg(feature = "multithreading")]
-use crossbeam_channel::{Receiver, Sender};
-
 use crate::model::{DictWeight, Model, ScoreValue};
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
@@ -377,178 +367,6 @@ impl Predictor {
         self.dict_window_size = std::cmp::max(size, 1);
         self
     }
-
-    /// Creates a multithreading predictor. This function is the alias of
-    /// [`MultithreadPredictor::new()`].
-    ///
-    /// # Arguments
-    ///
-    /// * `n_threads` - The number of threads.
-    /// * `chunk_size` - The chunk size of each thread.
-    ///
-    /// # Returns
-    ///
-    /// A multithread predictor.
-    #[cfg(feature = "multithreading")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "multithreading")))]
-    pub fn multithreading(self, n_threads: usize, chunk_size: usize) -> MultithreadPredictor {
-        MultithreadPredictor::new(self, n_threads, chunk_size)
-    }
-}
-
-/// Predictor for multithreading.
-#[cfg(feature = "multithreading")]
-#[cfg_attr(docsrs, doc(cfg(feature = "multithreading")))]
-pub struct MultithreadPredictor {
-    task_tx: Sender<(Arc<Sentence>, Range<usize>, Vec<ScoreValue>)>,
-    result_rx: Receiver<(Vec<ScoreValue>, Range<usize>)>,
-    chunk_size: usize,
-    ys_pool: RefCell<Vec<Vec<ScoreValue>>>,
-
-    #[cfg(feature = "model-quantize")]
-    quantize_multiplier: f64,
-}
-
-#[cfg(feature = "multithreading")]
-impl MultithreadPredictor {
-    /// Creates a multithreading predictor.
-    ///
-    /// # Arguments
-    ///
-    /// * `predictor` - A normal predictor.
-    /// * `n_threads` - The number of threads.
-    /// * `chunk_size` - The chunk size of each thread.
-    ///
-    /// # Returns
-    ///
-    /// A multithread predictor.
-    pub fn new(predictor: Predictor, n_threads: usize, chunk_size: usize) -> Self {
-        let predictor = Arc::new(predictor);
-
-        let (result_tx, result_rx) = crossbeam_channel::unbounded();
-        let (task_tx, task_rx) =
-            crossbeam_channel::unbounded::<(Arc<Sentence>, Range<usize>, Vec<ScoreValue>)>();
-        for _ in 0..n_threads {
-            let predictor = Arc::clone(&predictor);
-            let result_tx = result_tx.clone();
-            let task_rx = task_rx.clone();
-            thread::spawn(move || {
-                for (sentence, range, mut ys) in task_rx {
-                    predictor.predict_partial_impl(
-                        &sentence,
-                        range.clone(),
-                        &mut ys[..range.len()],
-                    );
-                    std::mem::drop(sentence);
-                    result_tx.send((ys, range)).unwrap();
-                }
-            });
-        }
-
-        Self {
-            task_tx,
-            result_rx,
-            chunk_size,
-            ys_pool: RefCell::new(vec![]),
-
-            #[cfg(feature = "model-quantize")]
-            quantize_multiplier: predictor.quantize_multiplier,
-        }
-    }
-
-    /// Predicts word boundaries.
-    ///
-    /// # Arguments
-    ///
-    /// * `sentence` - A sentence.
-    ///
-    /// # Returns
-    ///
-    /// A sentence with predicted boundary information.
-    pub fn predict(&self, sentence: Sentence) -> Sentence {
-        let sentence = Arc::new(sentence);
-
-        let mut n_chunks = 0;
-        let mut ys_pool = self.ys_pool.borrow_mut();
-        for start in (0..sentence.boundaries.len()).step_by(self.chunk_size) {
-            let ys = ys_pool
-                .pop()
-                .unwrap_or_else(|| vec![ScoreValue::default(); self.chunk_size]);
-            let sentence = Arc::clone(&sentence);
-            let end = std::cmp::min(start + self.chunk_size, sentence.boundaries.len());
-            self.task_tx.send((sentence, start..end, ys)).unwrap();
-            n_chunks += 1;
-        }
-        let mut boundaries = vec![BoundaryType::Unknown; sentence.boundaries.len()];
-        for _ in 0..n_chunks {
-            let (ys, range) = self.result_rx.recv().unwrap();
-            for (&y, b) in ys.iter().zip(&mut boundaries[range]) {
-                *b = if y >= ScoreValue::default() {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-            }
-            ys_pool.push(ys);
-        }
-
-        let mut sentence = Arc::try_unwrap(sentence).unwrap();
-        sentence.boundaries = boundaries;
-        sentence
-    }
-
-    /// Predicts word boundaries. This function inserts scores.
-    ///
-    /// # Arguments
-    ///
-    /// * `sentence` - A sentence.
-    ///
-    /// # Returns
-    ///
-    /// A sentence with predicted boundary information.
-    pub fn predict_with_score(&self, mut sentence: Sentence) -> Sentence {
-        let mut scores = sentence
-            .boundary_scores
-            .take()
-            .unwrap_or_else(|| vec![0.; sentence.boundaries.len()]);
-        let sentence = Arc::new(sentence);
-        let mut n_chunks = 0;
-        let mut ys_pool = self.ys_pool.borrow_mut();
-        for start in (0..sentence.boundaries.len()).step_by(self.chunk_size) {
-            let ys = ys_pool
-                .pop()
-                .unwrap_or_else(|| vec![ScoreValue::default(); self.chunk_size]);
-            let sentence = Arc::clone(&sentence);
-            let end = std::cmp::min(start + self.chunk_size, sentence.boundaries.len());
-            self.task_tx.send((sentence, start..end, ys)).unwrap();
-            n_chunks += 1;
-        }
-        let mut boundaries = vec![BoundaryType::Unknown; sentence.boundaries.len()];
-        for _ in 0..n_chunks {
-            let (ys, range) = self.result_rx.recv().unwrap();
-            for (&y, (b, s)) in ys
-                .iter()
-                .zip(boundaries[range.clone()].iter_mut().zip(&mut scores[range]))
-            {
-                *b = if y >= ScoreValue::default() {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-
-                #[cfg(feature = "model-quantize")]
-                let y = y as f64 * self.quantize_multiplier;
-
-                *s = y;
-            }
-            ys_pool.push(ys);
-        }
-
-        let mut sentence = Arc::try_unwrap(sentence).unwrap();
-        sentence.boundaries = boundaries;
-        sentence.boundary_scores.replace(scores);
-        sentence
-    }
 }
 
 #[cfg(test)]

From d4f06e71ce67ee96f7f42d5ce5c32077141786f6 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 25 Nov 2021 13:03:17 +0900
Subject: [PATCH 03/60] Add scorer modules for char_ngrams and dict

---
 vaporetto/src/char_scorer.rs |  73 +++++++++++++++++++++
 vaporetto/src/dict_scorer.rs |  94 ++++++++++++++++++++++++++
 vaporetto/src/lib.rs         |   2 +
 vaporetto/src/predictor.rs   | 123 +++++++----------------------------
 4 files changed, 194 insertions(+), 98 deletions(-)
 create mode 100644 vaporetto/src/char_scorer.rs
 create mode 100644 vaporetto/src/dict_scorer.rs

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
new file mode 100644
index 00000000..4256fa34
--- /dev/null
+++ b/vaporetto/src/char_scorer.rs
@@ -0,0 +1,73 @@
+use crate::model::ScoreValue;
+use crate::sentence::Sentence;
+use daachorse::DoubleArrayAhoCorasick;
+
+pub enum CharScorer {
+    Pma(CharScorerPma),
+}
+
+impl CharScorer {
+    pub fn new(
+        pma: DoubleArrayAhoCorasick,
+        weights: Vec<Vec<ScoreValue>>,
+        window_size: usize,
+    ) -> Self {
+        Self::Pma(CharScorerPma::new(pma, weights, window_size))
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
+        match self {
+            CharScorer::Pma(pma) => pma.add_scores(sentence, start, ys),
+        }
+    }
+}
+
+pub struct CharScorerPma {
+    pma: DoubleArrayAhoCorasick,
+    weights: Vec<Vec<ScoreValue>>,
+    window_size: usize,
+}
+
+impl CharScorerPma {
+    pub fn new(
+        pma: DoubleArrayAhoCorasick,
+        weights: Vec<Vec<ScoreValue>>,
+        window_size: usize,
+    ) -> Self {
+        Self {
+            pma,
+            weights,
+            window_size,
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
+        let char_start = if start >= self.window_size {
+            start + 1 - self.window_size
+        } else {
+            0
+        };
+        let text_start = sentence.char_to_str_pos[char_start];
+        let char_end = std::cmp::min(
+            start + ys.len() + self.window_size,
+            sentence.char_to_str_pos.len() - 1,
+        );
+        let text_end = sentence.char_to_str_pos[char_end];
+        let text = &sentence.text[text_start..text_end];
+        let padding = start - char_start + 1;
+        for m in self.pma.find_overlapping_no_suffix_iter(&text) {
+            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
+            let offset = m_end as isize - self.window_size as isize - padding as isize;
+            let weights = &self.weights[m.pattern()];
+            if offset >= 0 {
+                for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
+                    *y += w;
+                }
+            } else {
+                for (w, y) in weights[-offset as usize..].iter().zip(ys.iter_mut()) {
+                    *y += w;
+                }
+            }
+        }
+    }
+}
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
new file mode 100644
index 00000000..40dae9cb
--- /dev/null
+++ b/vaporetto/src/dict_scorer.rs
@@ -0,0 +1,94 @@
+use crate::model::{DictWeight, ScoreValue};
+use crate::sentence::Sentence;
+use daachorse::DoubleArrayAhoCorasick;
+
+pub enum DictScorer {
+    Pma(DictScorerPma),
+}
+
+impl DictScorer {
+    pub fn new(
+        pma: DoubleArrayAhoCorasick,
+        weights: Vec<DictWeight>,
+        word_wise_score: bool,
+    ) -> Self {
+        Self::Pma(DictScorerPma::new(pma, weights, word_wise_score))
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
+        match self {
+            DictScorer::Pma(pma) => pma.add_scores(sentence, start, ys),
+        }
+    }
+
+    pub fn window_size(&mut self, size: usize) {
+        match self {
+            DictScorer::Pma(pma) => pma.window_size(size),
+        }
+    }
+}
+
+pub struct DictScorerPma {
+    pma: DoubleArrayAhoCorasick,
+    weights: Vec<DictWeight>,
+    window_size: usize,
+    word_wise_score: bool,
+}
+
+impl DictScorerPma {
+    pub fn new(
+        pma: DoubleArrayAhoCorasick,
+        weights: Vec<DictWeight>,
+        word_wise_score: bool,
+    ) -> Self {
+        Self {
+            pma,
+            weights,
+            window_size: 1,
+            word_wise_score,
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
+        let char_start = if start >= self.window_size {
+            start + 1 - self.window_size
+        } else {
+            0
+        };
+        let text_start = sentence.char_to_str_pos[char_start];
+        let char_end = std::cmp::min(
+            start + ys.len() + self.window_size,
+            sentence.char_to_str_pos.len() - 1,
+        );
+        let text_end = sentence.char_to_str_pos[char_end];
+        let text = &sentence.text[text_start..text_end];
+        let padding = start - char_start + 1;
+        for m in self.pma.find_overlapping_iter(&text) {
+            let m_start = sentence.str_to_char_pos[m.start() + text_start] - char_start;
+            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
+            let idx = if self.word_wise_score {
+                m.pattern()
+            } else {
+                std::cmp::min(m_end - m_start, self.weights.len()) - 1
+            };
+            let dict_weight = self.weights[idx];
+            if m_start >= padding && m_start < padding + ys.len() {
+                ys[m_start - padding] += dict_weight.right;
+            }
+            let range_start = std::cmp::max(0, m_start as isize - padding as isize + 1);
+            let range_end = std::cmp::min(m_end as isize - padding as isize, ys.len() as isize);
+            if range_start < range_end {
+                for y in &mut ys[range_start as usize..range_end as usize] {
+                    *y += dict_weight.inner;
+                }
+            }
+            if m_end >= padding && m_end < ys.len() + padding {
+                ys[m_end - padding] += dict_weight.left;
+            }
+        }
+    }
+
+    pub fn window_size(&mut self, size: usize) {
+        self.window_size = std::cmp::max(size, 1);
+    }
+}
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index d7107e66..7b545359 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -33,6 +33,8 @@ mod model;
 mod predictor;
 mod sentence;
 mod type_scorer;
+mod char_scorer;
+mod dict_scorer;
 
 #[cfg(feature = "train")]
 mod feature;
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index c50cf54e..0480c888 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -13,22 +13,19 @@ use crossbeam_channel::{Receiver, Sender};
 
 use crate::model::{DictWeight, Model, ScoreValue};
 use crate::sentence::{BoundaryType, Sentence};
+use crate::char_scorer::CharScorer;
 use crate::type_scorer::TypeScorer;
+use crate::dict_scorer::DictScorer;
 
 use daachorse::DoubleArrayAhoCorasick;
 
 /// Predictor.
 pub struct Predictor {
-    word_pma: DoubleArrayAhoCorasick,
-    dict_pma: Option<DoubleArrayAhoCorasick>,
-    word_weights: Vec<Vec<ScoreValue>>,
-    dict_weights: Vec<DictWeight>,
-    dict_word_wise: bool,
     bias: ScoreValue,
-    char_window_size: usize,
-    dict_window_size: usize,
 
+    char_scorer: CharScorer,
     type_scorer: TypeScorer,
+    dict_scorer: Option<DictScorer>,
 
     #[cfg(feature = "model-quantize")]
     quantize_multiplier: f64,
@@ -47,11 +44,11 @@ impl Predictor {
     pub fn new(model: Model) -> Self {
         let bias = model.bias;
 
-        let words = model.words;
+        let chars = model.words;
         let dict = model.dict;
         let dict_weights = model.dict_weights;
 
-        let mut word_weights: Vec<_> = model
+        let mut char_weights: Vec<_> = model
             .word_weights
             .into_iter()
             .map(|ws| ws.into_iter().map(|w| w as ScoreValue).collect())
@@ -65,39 +62,36 @@ impl Predictor {
         let (dict, dict_weights) = Self::merge_dict_weights(
             dict,
             dict_weights,
-            &words,
-            &mut word_weights,
+            &chars,
+            &mut char_weights,
             model.char_window_size,
             model.dict_word_wise,
         );
 
-        let word_weights = Self::merge_weights(&words, &word_weights);
+        let char_weights = Self::merge_weights(&chars, &char_weights);
         let type_weights = Self::merge_weights(&model.types, &type_weights);
 
         #[cfg(feature = "model-quantize")]
         let bias = bias as i32;
 
-        let word_pma = DoubleArrayAhoCorasick::new(words).unwrap();
+        let char_pma = DoubleArrayAhoCorasick::new(chars).unwrap();
         let type_pma = DoubleArrayAhoCorasick::new(model.types).unwrap();
-        let dict_pma = if dict.is_empty() {
+
+        let char_scorer = CharScorer::new(char_pma, char_weights, model.char_window_size);
+        let type_scorer = TypeScorer::new(type_pma, type_weights, model.type_window_size);
+        let dict_scorer = if dict.is_empty() {
             None
         } else {
-            Some(DoubleArrayAhoCorasick::new(dict).unwrap())
+            let dict_pma = DoubleArrayAhoCorasick::new(dict).unwrap();
+            Some(DictScorer::new(dict_pma, dict_weights, model.dict_word_wise))
         };
 
-        let type_scorer = TypeScorer::new(type_pma, type_weights, model.type_window_size);
-
         Self {
-            word_pma,
-            dict_pma,
-            word_weights,
-            dict_weights,
-            dict_word_wise: model.dict_word_wise,
             bias,
-            char_window_size: model.char_window_size,
-            dict_window_size: 1,
 
+            char_scorer,
             type_scorer,
+            dict_scorer,
 
             #[cfg(feature = "model-quantize")]
             quantize_multiplier: model.quantize_multiplier,
@@ -186,77 +180,6 @@ impl Predictor {
         result
     }
 
-    fn add_word_ngram_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let char_start = if start >= self.char_window_size {
-            start + 1 - self.char_window_size
-        } else {
-            0
-        };
-        let text_start = sentence.char_to_str_pos[char_start];
-        let char_end = std::cmp::min(
-            start + ys.len() + self.char_window_size,
-            sentence.char_to_str_pos.len() - 1,
-        );
-        let text_end = sentence.char_to_str_pos[char_end];
-        let text = &sentence.text[text_start..text_end];
-        let padding = start - char_start + 1;
-        for m in self.word_pma.find_overlapping_no_suffix_iter(&text) {
-            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
-            let offset = m_end as isize - self.char_window_size as isize - padding as isize;
-            let weights = &self.word_weights[m.pattern()];
-            if offset >= 0 {
-                for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
-                    *y += w;
-                }
-            } else {
-                for (w, y) in weights[-offset as usize..].iter().zip(ys.iter_mut()) {
-                    *y += w;
-                }
-            }
-        }
-    }
-
-    fn add_dict_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        if let Some(dict_pma) = self.dict_pma.as_ref() {
-            let char_start = if start >= self.dict_window_size {
-                start + 1 - self.dict_window_size
-            } else {
-                0
-            };
-            let text_start = sentence.char_to_str_pos[char_start];
-            let char_end = std::cmp::min(
-                start + ys.len() + self.dict_window_size,
-                sentence.char_to_str_pos.len() - 1,
-            );
-            let text_end = sentence.char_to_str_pos[char_end];
-            let text = &sentence.text[text_start..text_end];
-            let padding = start - char_start + 1;
-            for m in dict_pma.find_overlapping_iter(&text) {
-                let m_start = sentence.str_to_char_pos[m.start() + text_start] - char_start;
-                let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
-                let idx = if self.dict_word_wise {
-                    m.pattern()
-                } else {
-                    std::cmp::min(m_end - m_start, self.dict_weights.len()) - 1
-                };
-                let dict_weight = self.dict_weights[idx];
-                if m_start >= padding && m_start < padding + ys.len() {
-                    ys[m_start - padding] += dict_weight.right;
-                }
-                let range_start = std::cmp::max(0, m_start as isize - padding as isize + 1);
-                let range_end = std::cmp::min(m_end as isize - padding as isize, ys.len() as isize);
-                if range_start < range_end {
-                    for y in &mut ys[range_start as usize..range_end as usize] {
-                        *y += dict_weight.inner;
-                    }
-                }
-                if m_end >= padding && m_end < ys.len() + padding {
-                    ys[m_end - padding] += dict_weight.left;
-                }
-            }
-        }
-    }
-
     fn predict_partial_impl(
         &self,
         sentence: &Sentence,
@@ -264,9 +187,11 @@ impl Predictor {
         ys: &mut [ScoreValue],
     ) {
         ys.fill(self.bias);
-        self.add_word_ngram_scores(sentence, range.start, ys);
+        self.char_scorer.add_scores(sentence, range.start, ys);
         self.type_scorer.add_scores(sentence, range.start, ys);
-        self.add_dict_scores(sentence, range.start, ys);
+        if let Some(dict_scorer) = self.dict_scorer.as_ref() {
+            dict_scorer.add_scores(sentence, range.start, ys);
+        }
     }
 
     /// Predicts word boundaries of the specified range of a sentence.
@@ -380,7 +305,9 @@ impl Predictor {
     ///
     /// A predictor with the specified window size.
     pub fn dict_window_size(mut self, size: usize) -> Self {
-        self.dict_window_size = std::cmp::max(size, 1);
+        if let Some(dict_scorer) = self.dict_scorer.as_mut() {
+            dict_scorer.window_size(size);
+        }
         self
     }
 

From 1c4370bb6078c83dd5b3bad5801b3a06b8b9f5c8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 25 Nov 2021 13:05:07 +0900
Subject: [PATCH 04/60] Rename some variables

---
 vaporetto/src/kytea_model.rs | 44 +++++++++++++++++----------
 vaporetto/src/lib.rs         |  4 +--
 vaporetto/src/model.rs       | 58 +++++++++++++++++++-----------------
 vaporetto/src/predictor.rs   | 36 ++++++++++++----------
 4 files changed, 80 insertions(+), 62 deletions(-)

diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 78b407c9..60585291 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -409,20 +409,32 @@ impl TryFrom<KyteaModel> for Model {
             .type_dict
             .ok_or_else(|| anyhow!("no type dictionary."))?;
 
-        let mut words: Vec<Vec<u8>> = vec![];
-        let mut word_weights = vec![];
-        for (word, v) in char_dict.dump_items() {
-            let weight_size = config.char_w as usize * 2 - word.len() + 1;
-            words.push(word.into_iter().collect::<String>().as_bytes().to_vec());
-            word_weights.push(v[..weight_size].to_vec());
+        let mut char_ngrams: Vec<Vec<u8>> = vec![];
+        let mut char_ngram_weights = vec![];
+        for (char_ngram, v) in char_dict.dump_items() {
+            let weight_size = config.char_w as usize * 2 - char_ngram.len() + 1;
+            char_ngrams.push(
+                char_ngram
+                    .into_iter()
+                    .collect::<String>()
+                    .as_bytes()
+                    .to_vec(),
+            );
+            char_ngram_weights.push(v[..weight_size].to_vec());
         }
 
-        let mut types: Vec<Vec<u8>> = vec![];
-        let mut type_weights = vec![];
-        for (word, v) in type_dict.dump_items() {
-            let weight_size = config.type_w as usize * 2 - word.len() + 1;
-            types.push(word.into_iter().collect::<String>().as_bytes().to_vec());
-            type_weights.push(v[..weight_size].to_vec());
+        let mut type_ngrams: Vec<Vec<u8>> = vec![];
+        let mut type_ngram_weights = vec![];
+        for (type_ngram, v) in type_dict.dump_items() {
+            let weight_size = config.type_w as usize * 2 - type_ngram.len() + 1;
+            type_ngrams.push(
+                type_ngram
+                    .into_iter()
+                    .collect::<String>()
+                    .as_bytes()
+                    .to_vec(),
+            );
+            type_ngram_weights.push(v[..weight_size].to_vec());
         }
 
         let mut dict: Vec<Vec<u8>> = vec![];
@@ -445,15 +457,15 @@ impl TryFrom<KyteaModel> for Model {
         }
 
         Ok(Self {
-            words,
-            types,
+            char_ngrams,
+            type_ngrams,
             dict,
 
             #[cfg(feature = "model-quantize")]
             quantize_multiplier,
 
-            word_weights,
-            type_weights,
+            char_ngram_weights,
+            type_ngram_weights,
             dict_weights,
             dict_word_wise: true,
             bias,
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 7b545359..8daa5986 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -29,12 +29,12 @@
 #[macro_use]
 mod utils;
 
+mod char_scorer;
+mod dict_scorer;
 mod model;
 mod predictor;
 mod sentence;
 mod type_scorer;
-mod char_scorer;
-mod dict_scorer;
 
 #[cfg(feature = "train")]
 mod feature;
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index b2465346..35747b0e 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -33,12 +33,12 @@ pub struct DictWeight {
 /// Model data.
 #[derive(Serialize, Deserialize)]
 pub struct Model {
-    pub(crate) words: Vec<Vec<u8>>,
-    pub(crate) types: Vec<Vec<u8>>,
+    pub(crate) char_ngrams: Vec<Vec<u8>>,
+    pub(crate) type_ngrams: Vec<Vec<u8>>,
     pub(crate) dict: Vec<Vec<u8>>,
 
-    pub(crate) word_weights: Vec<Vec<WeightValue>>,
-    pub(crate) type_weights: Vec<Vec<WeightValue>>,
+    pub(crate) char_ngram_weights: Vec<Vec<WeightValue>>,
+    pub(crate) type_ngram_weights: Vec<Vec<WeightValue>>,
     pub(crate) dict_weights: Vec<DictWeight>,
 
     #[cfg(feature = "model-quantize")]
@@ -105,13 +105,13 @@ impl Model {
             .unwrap() as i32;
 
         let bias = model.label_bias(wb_idx);
-        let mut words = vec![];
-        let mut types = vec![];
-        let mut word_weights = vec![];
-        let mut type_weights = vec![];
+        let mut char_ngrams = vec![];
+        let mut type_ngrams = vec![];
+        let mut char_ngram_weights = vec![];
+        let mut type_ngram_weights = vec![];
         let mut dict_weights = vec![DictWeight::default(); dict_word_max_size];
-        let mut word_ids = StringIdManager::new();
-        let mut type_ids = StringIdManager::new();
+        let mut char_ngram_ids = StringIdManager::new();
+        let mut type_ngram_ids = StringIdManager::new();
 
         #[cfg(feature = "model-quantize")]
         let quantize_multiplier = {
@@ -138,27 +138,29 @@ impl Model {
             let weight = weight / quantize_multiplier;
 
             match feature.feature {
-                FeatureContent::CharacterNgram(word) => {
-                    let id = word_ids.get_id(word.as_bytes());
-                    if id == word_weights.len() {
-                        words.push(word.as_bytes().to_vec());
-                        word_weights.push(vec![
+                FeatureContent::CharacterNgram(char_ngram) => {
+                    let id = char_ngram_ids.get_id(char_ngram.as_bytes());
+                    if id == char_ngram_weights.len() {
+                        char_ngrams.push(char_ngram.as_bytes().to_vec());
+                        char_ngram_weights.push(vec![
                             WeightValue::default();
-                            char_window_size * 2 - word.chars().count() + 1
+                            char_window_size * 2
+                                - char_ngram.chars().count()
+                                + 1
                         ]);
                     }
-                    word_weights[id][feature.rel_position] = weight as WeightValue;
+                    char_ngram_weights[id][feature.rel_position] = weight as WeightValue;
                 }
-                FeatureContent::CharacterTypeNgram(word) => {
-                    let id = type_ids.get_id(word) as usize;
-                    if id == type_weights.len() {
-                        types.push(word.to_vec());
-                        type_weights.push(vec![
+                FeatureContent::CharacterTypeNgram(type_ngram) => {
+                    let id = type_ngram_ids.get_id(type_ngram) as usize;
+                    if id == type_ngram_weights.len() {
+                        type_ngrams.push(type_ngram.to_vec());
+                        type_ngram_weights.push(vec![
                             WeightValue::default();
-                            type_window_size * 2 - word.len() + 1
+                            type_window_size * 2 - type_ngram.len() + 1
                         ]);
                     }
-                    type_weights[id][feature.rel_position] = weight as WeightValue;
+                    type_ngram_weights[id][feature.rel_position] = weight as WeightValue;
                 }
                 FeatureContent::DictionaryWord(size) => match feature.rel_position {
                     0 => dict_weights[size - 1].right = weight as ScoreValue,
@@ -169,15 +171,15 @@ impl Model {
             };
         }
         Self {
-            words,
-            types,
+            char_ngrams,
+            type_ngrams,
             dict,
 
             #[cfg(feature = "model-quantize")]
             quantize_multiplier,
 
-            word_weights,
-            type_weights,
+            char_ngram_weights,
+            type_ngram_weights,
             dict_weights,
             dict_word_wise: false,
             bias,
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 0480c888..61169c0c 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -11,11 +11,11 @@ use std::thread;
 #[cfg(feature = "multithreading")]
 use crossbeam_channel::{Receiver, Sender};
 
+use crate::char_scorer::CharScorer;
+use crate::dict_scorer::DictScorer;
 use crate::model::{DictWeight, Model, ScoreValue};
 use crate::sentence::{BoundaryType, Sentence};
-use crate::char_scorer::CharScorer;
 use crate::type_scorer::TypeScorer;
-use crate::dict_scorer::DictScorer;
 
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -44,17 +44,17 @@ impl Predictor {
     pub fn new(model: Model) -> Self {
         let bias = model.bias;
 
-        let chars = model.words;
+        let char_ngrams = model.char_ngrams;
         let dict = model.dict;
         let dict_weights = model.dict_weights;
 
-        let mut char_weights: Vec<_> = model
-            .word_weights
+        let mut char_ngram_weights: Vec<_> = model
+            .char_ngram_weights
             .into_iter()
             .map(|ws| ws.into_iter().map(|w| w as ScoreValue).collect())
             .collect();
-        let type_weights: Vec<_> = model
-            .type_weights
+        let type_ngram_weights: Vec<_> = model
+            .type_ngram_weights
             .into_iter()
             .map(|ws| ws.into_iter().map(|w| w as ScoreValue).collect())
             .collect();
@@ -62,28 +62,32 @@ impl Predictor {
         let (dict, dict_weights) = Self::merge_dict_weights(
             dict,
             dict_weights,
-            &chars,
-            &mut char_weights,
+            &char_ngrams,
+            &mut char_ngram_weights,
             model.char_window_size,
             model.dict_word_wise,
         );
 
-        let char_weights = Self::merge_weights(&chars, &char_weights);
-        let type_weights = Self::merge_weights(&model.types, &type_weights);
+        let char_ngram_weights = Self::merge_weights(&char_ngrams, &char_ngram_weights);
+        let type_ngram_weights = Self::merge_weights(&model.type_ngrams, &type_ngram_weights);
 
         #[cfg(feature = "model-quantize")]
         let bias = bias as i32;
 
-        let char_pma = DoubleArrayAhoCorasick::new(chars).unwrap();
-        let type_pma = DoubleArrayAhoCorasick::new(model.types).unwrap();
+        let char_pma = DoubleArrayAhoCorasick::new(char_ngrams).unwrap();
+        let type_pma = DoubleArrayAhoCorasick::new(model.type_ngrams).unwrap();
 
-        let char_scorer = CharScorer::new(char_pma, char_weights, model.char_window_size);
-        let type_scorer = TypeScorer::new(type_pma, type_weights, model.type_window_size);
+        let char_scorer = CharScorer::new(char_pma, char_ngram_weights, model.char_window_size);
+        let type_scorer = TypeScorer::new(type_pma, type_ngram_weights, model.type_window_size);
         let dict_scorer = if dict.is_empty() {
             None
         } else {
             let dict_pma = DoubleArrayAhoCorasick::new(dict).unwrap();
-            Some(DictScorer::new(dict_pma, dict_weights, model.dict_word_wise))
+            Some(DictScorer::new(
+                dict_pma,
+                dict_weights,
+                model.dict_word_wise,
+            ))
         };
 
         Self {

From e8b664d46fbbe681be1aab323523940e20ff2f6e Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 25 Nov 2021 14:37:00 +0900
Subject: [PATCH 05/60] Fix var names

---
 vaporetto/src/predictor.rs | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 61169c0c..3d6b5bc0 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -520,21 +520,21 @@ mod tests {
     ///   世:                 40  42
     fn generate_model_1() -> Model {
         Model {
-            words: vec![
+            char_ngrams: vec![
                 "我ら".as_bytes().to_vec(),
                 "全世界".as_bytes().to_vec(),
                 "国民".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "界".as_bytes().to_vec(),
             ],
-            types: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec![
                 "全世界".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "世".as_bytes().to_vec(),
             ],
             #[cfg(not(feature = "model-quantize"))]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![0.5, 1.0, 1.5, 2.0, 2.5],
                 vec![3.0, 3.5, 4.0, 4.5],
                 vec![5.0, 5.5, 6.0, 6.5, 7.0],
@@ -542,7 +542,7 @@ mod tests {
                 vec![10.0, 10.5, 11.0, 11.5, 12.0, 12.5],
             ],
             #[cfg(feature = "model-quantize")]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![1, 2, 3, 4, 5],
                 vec![6, 7, 8, 9],
                 vec![10, 11, 12, 13, 14],
@@ -550,14 +550,14 @@ mod tests {
                 vec![20, 21, 22, 23, 24, 25],
             ],
             #[cfg(not(feature = "model-quantize"))]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![13.0, 13.5, 14.0, 14.5],
                 vec![15.0, 15.5, 16.0, 16.5],
                 vec![17.0, 17.5, 18.0],
                 vec![18.5, 19.0, 19.5],
             ],
             #[cfg(feature = "model-quantize")]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![26, 27, 28, 29],
                 vec![30, 31, 32, 33],
                 vec![34, 35, 36],
@@ -629,21 +629,21 @@ mod tests {
     ///   世:                 38  40
     fn generate_model_2() -> Model {
         Model {
-            words: vec![
+            char_ngrams: vec![
                 "我ら".as_bytes().to_vec(),
                 "全世界".as_bytes().to_vec(),
                 "国民".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "界".as_bytes().to_vec(),
             ],
-            types: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec![
                 "全世界".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "世".as_bytes().to_vec(),
             ],
             #[cfg(not(feature = "model-quantize"))]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![0.25, 0.5, 0.75],
                 vec![1.0, 1.25],
                 vec![1.5, 1.75, 2.0],
@@ -651,7 +651,7 @@ mod tests {
                 vec![3.0, 3.25, 3.5, 3.75],
             ],
             #[cfg(feature = "model-quantize")]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![1, 2, 3],
                 vec![4, 5],
                 vec![6, 7, 8],
@@ -659,14 +659,14 @@ mod tests {
                 vec![12, 13, 14, 15],
             ],
             #[cfg(not(feature = "model-quantize"))]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![4.0, 4.25, 4.5, 4.75, 5.0, 5.25],
                 vec![5.5, 5.75, 6.0, 6.25, 6.5, 6.75],
                 vec![7.0, 7.25, 7.5, 7.75, 8.0],
                 vec![8.25, 8.5, 8.75, 9.0, 9.25],
             ],
             #[cfg(feature = "model-quantize")]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![16, 17, 18, 19, 20, 21],
                 vec![22, 23, 24, 25, 26, 27],
                 vec![28, 29, 30, 31, 32],
@@ -748,21 +748,21 @@ mod tests {
     ///   世:                 44  46
     fn generate_model_3() -> Model {
         Model {
-            words: vec![
+            char_ngrams: vec![
                 "我ら".as_bytes().to_vec(),
                 "全世界".as_bytes().to_vec(),
                 "国民".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "界".as_bytes().to_vec(),
             ],
-            types: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec![
                 "国民".as_bytes().to_vec(),
                 "世界".as_bytes().to_vec(),
                 "世".as_bytes().to_vec(),
             ],
             #[cfg(not(feature = "model-quantize"))]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![0.25, 0.5, 0.75],
                 vec![1.0, 1.25],
                 vec![1.5, 1.75, 2.0],
@@ -770,7 +770,7 @@ mod tests {
                 vec![3.0, 3.25, 3.5, 3.75],
             ],
             #[cfg(feature = "model-quantize")]
-            word_weights: vec![
+            char_ngram_weights: vec![
                 vec![1, 2, 3],
                 vec![4, 5],
                 vec![6, 7, 8],
@@ -778,14 +778,14 @@ mod tests {
                 vec![12, 13, 14, 15],
             ],
             #[cfg(not(feature = "model-quantize"))]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![4.0, 4.25, 4.5, 4.75, 5.0, 5.25],
                 vec![5.5, 5.75, 6.0, 6.25, 6.5, 6.75],
                 vec![7.0, 7.25, 7.5, 7.75, 8.0],
                 vec![8.25, 8.5, 8.75, 9.0, 9.25],
             ],
             #[cfg(feature = "model-quantize")]
-            type_weights: vec![
+            type_ngram_weights: vec![
                 vec![16, 17, 18, 19, 20, 21],
                 vec![22, 23, 24, 25, 26, 27],
                 vec![28, 29, 30, 31, 32],

From da9b5bfbf58825360b7ce1f5b8f773624586eba3 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 25 Nov 2021 19:15:08 +0900
Subject: [PATCH 06/60] Expand enum

---
 vaporetto/src/char_scorer.rs | 24 ++----------------------
 vaporetto/src/dict_scorer.rs | 30 ++----------------------------
 2 files changed, 4 insertions(+), 50 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 4256fa34..31cc0911 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -2,33 +2,13 @@ use crate::model::ScoreValue;
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
-pub enum CharScorer {
-    Pma(CharScorerPma),
-}
-
-impl CharScorer {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
-        Self::Pma(CharScorerPma::new(pma, weights, window_size))
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        match self {
-            CharScorer::Pma(pma) => pma.add_scores(sentence, start, ys),
-        }
-    }
-}
-
-pub struct CharScorerPma {
+pub struct CharScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<Vec<ScoreValue>>,
     window_size: usize,
 }
 
-impl CharScorerPma {
+impl CharScorer {
     pub fn new(
         pma: DoubleArrayAhoCorasick,
         weights: Vec<Vec<ScoreValue>>,
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 40dae9cb..2c9e9326 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -2,40 +2,14 @@ use crate::model::{DictWeight, ScoreValue};
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
-pub enum DictScorer {
-    Pma(DictScorerPma),
-}
-
-impl DictScorer {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<DictWeight>,
-        word_wise_score: bool,
-    ) -> Self {
-        Self::Pma(DictScorerPma::new(pma, weights, word_wise_score))
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        match self {
-            DictScorer::Pma(pma) => pma.add_scores(sentence, start, ys),
-        }
-    }
-
-    pub fn window_size(&mut self, size: usize) {
-        match self {
-            DictScorer::Pma(pma) => pma.window_size(size),
-        }
-    }
-}
-
-pub struct DictScorerPma {
+pub struct DictScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<DictWeight>,
     window_size: usize,
     word_wise_score: bool,
 }
 
-impl DictScorerPma {
+impl DictScorer {
     pub fn new(
         pma: DoubleArrayAhoCorasick,
         weights: Vec<DictWeight>,

From 058fa971856e2ceccf7fa2a335560b676dd4c054 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 26 Nov 2021 10:34:59 +0900
Subject: [PATCH 07/60] Remove bench directory (#4)

* Remove bench directory

* Update README
---
 .gitmodules                                   |  12 --
 README.md                                     |   2 +
 bench/README.md                               |  16 ---
 bench/compile_all.sh                          |  54 ---------
 bench/download_resources.sh                   |  23 ----
 bench/elapsed_time.patch                      | 114 ------------------
 bench/kuromoji/pom.xml                        |  72 -----------
 .../src/main/java/kuromoji_bench/App.java     |  28 -----
 bench/kytea                                   |   1 -
 bench/lindera                                 |   1 -
 bench/mecab                                   |   1 -
 bench/run_all.sh                              |  31 -----
 bench/stats.py                                |  46 -------
 bench/sudachi.rs                              |   1 -
 bench/sudachi/pom.xml                         |  72 -----------
 .../src/main/java/sudachi_bench/App.java      |  36 ------
 bench/sudachi/sudachi.json                    |  25 ----
 17 files changed, 2 insertions(+), 533 deletions(-)
 delete mode 100644 .gitmodules
 delete mode 100644 bench/README.md
 delete mode 100755 bench/compile_all.sh
 delete mode 100755 bench/download_resources.sh
 delete mode 100644 bench/elapsed_time.patch
 delete mode 100644 bench/kuromoji/pom.xml
 delete mode 100644 bench/kuromoji/src/main/java/kuromoji_bench/App.java
 delete mode 160000 bench/kytea
 delete mode 160000 bench/lindera
 delete mode 160000 bench/mecab
 delete mode 100755 bench/run_all.sh
 delete mode 100755 bench/stats.py
 delete mode 160000 bench/sudachi.rs
 delete mode 100644 bench/sudachi/pom.xml
 delete mode 100644 bench/sudachi/src/main/java/sudachi_bench/App.java
 delete mode 100644 bench/sudachi/sudachi.json

diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index c2e05dab..00000000
--- a/.gitmodules
+++ /dev/null
@@ -1,12 +0,0 @@
-[submodule "bench/kytea"]
-	path = bench/kytea
-	url = https://github.com/neubig/kytea.git
-[submodule "bench/lindera"]
-	path = bench/lindera
-	url = https://github.com/lindera-morphology/lindera.git
-[submodule "bench/mecab"]
-	path = bench/mecab
-	url = https://github.com/taku910/mecab.git
-[submodule "bench/sudachi.rs"]
-	path = bench/sudachi.rs
-	url = https://github.com/WorksApplications/sudachi.rs.git
diff --git a/README.md b/README.md
index c4f49838..97a925d1 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,8 @@ You can specify all arguments above multiple times.
 
 ## Speed Comparison of Various Tokenizers
 
+You can find the comparison script at [here](https://github.com/legalforce-research/tokenizer-speed-bench).
+
 ### Experimental Setup
 
 * Document: Japanese training data of Kyoto Free Translation Task
diff --git a/bench/README.md b/bench/README.md
deleted file mode 100644
index f66549b2..00000000
--- a/bench/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Benchmarking of various tokenizers
-
-## Preparation
-
-```
-% git submodule update --init
-% ./download_resources.sh
-% ./compile_all.sh
-```
-
-## Measurement
-
-```
-% ./run_all.sh 2>&1 | tee ./results
-% ./stats.py < ./results
-```
diff --git a/bench/compile_all.sh b/bench/compile_all.sh
deleted file mode 100755
index d9e3fabe..00000000
--- a/bench/compile_all.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-which patch
-which cargo
-which autoreconf
-which libtool
-which make
-which mvn
-
-set +e
-
-patch -p1 -N < ./elapsed_time.patch
-
-set -e
-
-pushd ..
-cargo build --release
-./target/release/convert_kytea_model --model-in "./bench/kytea/jp-0.4.7-6.mod" --model-out "./jp-0.4.7-6.tokenize.mod"
-popd
-
-pushd ./kytea
-autoreconf -i
-./configure
-make
-popd
-
-pushd ./mecab/mecab
-./configure --prefix=$(cd .. && pwd)/tmpusr
-make
-make install
-popd
-pushd ./mecab/mecab-ipadic
-./configure --with-charset=utf8 --prefix=$(cd .. && pwd)/tmpusr --with-mecab-config=../mecab/mecab-config
-make
-make install
-popd
-
-pushd ./kuromoji
-mvn compile
-popd
-
-pushd ./lindera
-cargo build --release
-popd
-
-pushd ./sudachi
-mvn compile
-popd
-
-pushd ./sudachi.rs
-cargo build --release
-popd
diff --git a/bench/download_resources.sh b/bench/download_resources.sh
deleted file mode 100755
index 4f5e91df..00000000
--- a/bench/download_resources.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-which wget
-which gunzip
-which unzip
-which tar
-
-pushd ./kytea
-wget "http://www.phontron.com/kytea/download/model/jp-0.4.7-6.mod.gz"
-gunzip "./jp-0.4.7-6.mod.gz"
-popd
-pushd ./sudachi
-wget "http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-20210802-core.zip"
-unzip "./sudachi-dictionary-20210802-core.zip"
-popd
-pushd ./sudachi.rs
-./fetch_dictionary.sh
-popd
-
-wget "http://www.phontron.com/kftt/download/kftt-data-1.0.tar.gz"
-tar xf "./kftt-data-1.0.tar.gz"
diff --git a/bench/elapsed_time.patch b/bench/elapsed_time.patch
deleted file mode 100644
index 9f5211a1..00000000
--- a/bench/elapsed_time.patch
+++ /dev/null
@@ -1,114 +0,0 @@
---- a/kytea/src/lib/kytea.cpp
-+++ b/kytea/src/lib/kytea.cpp
-@@ -19,6 +19,7 @@
- #include <cmath>
- #include <sstream>
- #include <iostream>
-+#include <chrono>
- #include <kytea/config.h>
- #include <kytea/kytea.h>
- #include <kytea/dictionary.h>
-@@ -1206,6 +1207,8 @@ void Kytea::analyze() {
-     for(int i = 0; i < config_->getNumTags(); i++)
-         out->setDoTag(i,config_->getDoTag(i));
- 
-+    chrono::steady_clock::time_point begin = chrono::steady_clock::now();
-+
-     KyteaSentence* next;
-     while((next = in->readSentence()) != 0) {
-         if(config_->getDoWS())
-@@ -1218,6 +1221,9 @@ void Kytea::analyze() {
-         delete next;
-     }
- 
-+    chrono::steady_clock::time_point end = chrono::steady_clock::now();
-+    cerr << "Elapsed-kytea: " << (double) chrono::duration_cast<chrono::milliseconds>(end - begin).count() / 1000 << " [sec]" << endl;
-+
-     delete in;
-     delete out;
-     if(inStr) delete inStr;
---- a/mecab/mecab/src/tagger.cpp
-+++ b/mecab/mecab/src/tagger.cpp
-@@ -6,6 +6,7 @@
- #include <cstring>
- #include <iostream>
- #include <iterator>
-+#include <chrono>
- #include "common.h"
- #include "connector.h"
- #include "mecab.h"
-@@ -1229,6 +1230,8 @@ int mecab_do(int argc, char **argv) {
-     WHAT_ERROR("cannot create tagger");
-   }
- 
-+  std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
-+
-   for (size_t i = 0; i < rest.size(); ++i) {
-     MeCab::istream_wrapper ifs(rest[i].c_str());
-     if (!*ifs) {
-@@ -1255,6 +1258,8 @@ int mecab_do(int argc, char **argv) {
-         std::strncpy(ibuf, sentence.c_str(), ibufsize);
-       }
-       if (ifs->eof() && !ibuf[0]) {
-+        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
-+        std::cerr << "Elapsed-mecab: " << (double) std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / 1000 << " [sec]" << std::endl;
-         return false;
-       }
-       if (ifs->fail()) {
---- a/lindera/lindera-cli/src/main.rs
-+++ b/lindera/lindera-cli/src/main.rs
-@@ -2,6 +2,7 @@ use std::fs;
- use std::io;
- use std::io::{BufRead, BufReader};
- use std::path::Path;
-+use std::time::Instant;
- 
- use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg};
- 
-@@ -123,6 +124,8 @@ fn main() -> LinderaResult<()> {
-         Box::new(BufReader::new(io::stdin()))
-     };
- 
-+    let start = Instant::now();
-+
-     loop {
-         // read the text to be tokenized from stdin
-         let mut text = String::new();
-@@ -145,5 +148,8 @@ fn main() -> LinderaResult<()> {
-         };
-     }
- 
-+    let duration = start.elapsed();
-+    eprintln!("Elapsed-lindera: {} [sec]", duration.as_secs_f64());
-+
-     Ok(())
- }
---- a/sudachi.rs/sudachi-cli/src/main.rs
-+++ b/sudachi.rs/sudachi-cli/src/main.rs
-@@ -20,6 +20,7 @@ use std::fs::File;
- use std::io::{self, BufRead, BufReader, BufWriter, Write};
- use std::path::PathBuf;
- use std::process;
-+use std::time::Instant;
- 
- use structopt::StructOpt;
- 
-@@ -132,6 +133,8 @@ fn main() {
- 
-     let format = make_output::<&JapaneseDictionary>(&args);
- 
-+    let start = Instant::now();
-+
-     // tokenize and output results
-     for line in reader.lines() {
-         let input = line.expect("Failed to read line");
-@@ -157,6 +160,9 @@ fn main() {
-     }
-     // it is recommended to call write before dropping BufWriter
-     writer.flush().expect("flush failed");
-+
-+    let duration = start.elapsed();
-+    eprintln!("Elapsed-sudachi.rs: {} [sec]", duration.as_secs_f64());
- }
- 
- fn make_output<T: DictionaryAccess>(cli: &Cli) -> Box<dyn SudachiOutput<T>> {
diff --git a/bench/kuromoji/pom.xml b/bench/kuromoji/pom.xml
deleted file mode 100644
index 5f88e1a3..00000000
--- a/bench/kuromoji/pom.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>kuromoji_bench</groupId>
-  <artifactId>kuromoji_bench</artifactId>
-  <version>1.0-SNAPSHOT</version>
-
-  <name>kuromoji_bench</name>
-
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <maven.compiler.source>1.7</maven.compiler.source>
-    <maven.compiler.target>1.7</maven.compiler.target>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.atilika.kuromoji</groupId>
-      <artifactId>kuromoji-ipadic</artifactId>
-      <version>0.9.0</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
-      <plugins>
-        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
-        <plugin>
-          <artifactId>maven-clean-plugin</artifactId>
-          <version>3.1.0</version>
-        </plugin>
-        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
-        <plugin>
-          <artifactId>maven-resources-plugin</artifactId>
-          <version>3.0.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.8.0</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.22.1</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-jar-plugin</artifactId>
-          <version>3.0.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-install-plugin</artifactId>
-          <version>2.5.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-deploy-plugin</artifactId>
-          <version>2.8.2</version>
-        </plugin>
-        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
-        <plugin>
-          <artifactId>maven-site-plugin</artifactId>
-          <version>3.7.1</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-project-info-reports-plugin</artifactId>
-          <version>3.0.0</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>
diff --git a/bench/kuromoji/src/main/java/kuromoji_bench/App.java b/bench/kuromoji/src/main/java/kuromoji_bench/App.java
deleted file mode 100644
index 7f347d38..00000000
--- a/bench/kuromoji/src/main/java/kuromoji_bench/App.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package kuromoji_bench;
-
-import com.atilika.kuromoji.ipadic.Token;
-import com.atilika.kuromoji.ipadic.Tokenizer;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Scanner;
-import java.time.Instant;
-import java.time.Duration;
-
-public class App {
-    public static void main(String[] args) {
-        Tokenizer tokenizer = new Tokenizer();
-        Scanner input = new Scanner(System.in);
-        Instant start = Instant.now();
-        while (input.hasNext()) {
-            List<Token> tokens = tokenizer.tokenize(input.nextLine());
-            List<String> words = new ArrayList<String>();
-            for (Token token : tokens) {
-                words.add(token.getSurface());
-            }
-            System.out.println(String.join(" ", words));
-        }
-        Instant finish = Instant.now();
-        double timeElapsed = (double) Duration.between(start, finish).toMillis() / 1000;
-        System.err.println("Elapsed-kuromoji: " + timeElapsed + " [sec]");
-    }
-}
diff --git a/bench/kytea b/bench/kytea
deleted file mode 160000
index 73a94c4a..00000000
--- a/bench/kytea
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 73a94c4a3045087a7e90f27700f3b870a72625e7
diff --git a/bench/lindera b/bench/lindera
deleted file mode 160000
index 0f500336..00000000
--- a/bench/lindera
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0f50033653631261a290ae4ac94cc16bfe63f3bb
diff --git a/bench/mecab b/bench/mecab
deleted file mode 160000
index 046fa78b..00000000
--- a/bench/mecab
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 046fa78b2ed56fbd4fac312040f6d62fc1bc31e3
diff --git a/bench/run_all.sh b/bench/run_all.sh
deleted file mode 100755
index 6b6f2365..00000000
--- a/bench/run_all.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-INPUT_DATA="./kftt-data-1.0/data/orig/kyoto-train.ja"
-
-for i in 0 1 2 3 4 5 6 7 8 9
-do
-    for j in 0 1 2 3 4 5 6 7 8 9
-    do
-        echo "iter" $i $j
-
-        ./kytea/src/bin/kytea -model "./kytea/jp-0.4.7-6.mod" -notags < $INPUT_DATA > /dev/null
-
-        ../target/release/predict --model "../jp-0.4.7-6.tokenize.mod" < $INPUT_DATA > /dev/null
-
-        ./mecab/tmpusr/bin/mecab -Owakati < $INPUT_DATA > /dev/null
-
-        pushd ./kuromoji
-        mvn exec:java -Dexec.mainClass=kuromoji_bench.App < ../$INPUT_DATA > /dev/null
-        popd
-
-        ./lindera/target/release/lindera -O wakati < $INPUT_DATA > /dev/null
-
-        pushd ./sudachi
-        mvn exec:java -Dexec.mainClass=sudachi_bench.App < ../$INPUT_DATA > /dev/null
-        popd
-
-        ./sudachi.rs/target/release/sudachi -w -m C < $INPUT_DATA > /dev/null
-    done
-done
diff --git a/bench/stats.py b/bench/stats.py
deleted file mode 100755
index 9004493f..00000000
--- a/bench/stats.py
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import collections
-import math
-import re
-import sys
-
-
-RE_DICT = [
-    ('kytea', re.compile(r'Elapsed-kytea: ([0-9\.]+) \[sec\]')),
-    ('vaporetto', re.compile(r'Elapsed: ([0-9\.]+) \[sec\]')),
-    ('mecab', re.compile(r'Elapsed-mecab: ([0-9\.]+) \[sec\]')),
-    ('kuromoji', re.compile(r'Elapsed-kuromoji: ([0-9\.]+) \[sec\]')),
-    ('lindera', re.compile(r'Elapsed-lindera: ([0-9\.]+) \[sec\]')),
-    ('sudachi', re.compile(r'Elapsed-sudachi: ([0-9\.]+) \[sec\]')),
-    ('sudachi.rs', re.compile(r'Elapsed-sudachi.rs: ([0-9\.]+) \[sec\]')),
-]
-
-N_CHARS = 16318893
-
-
-def mean_std(times: list[float]) -> (float, float):
-    speeds = [N_CHARS / time for time in times]
-    mean = sum(speeds) / len(speeds)
-    dist = sum((speed - mean) ** 2 for speed in speeds) / len(speeds)
-    return mean, math.sqrt(dist)
-
-
-def _main():
-    times = collections.defaultdict(list)
-    for line in sys.stdin:
-        for name, r in RE_DICT:
-            m = r.match(line)
-            if m is not None:
-                times[name].append(float(m.group(1)))
-                break
-
-    for name, _ in RE_DICT:
-        mean, std = mean_std(times[name])
-        print(f'{name} {mean} {std}')
-
-
-if __name__ == '__main__':
-    _main()
diff --git a/bench/sudachi.rs b/bench/sudachi.rs
deleted file mode 160000
index 1cf62ec2..00000000
--- a/bench/sudachi.rs
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 1cf62ec2d6949db76e5aa2625c9b76f747960ac1
diff --git a/bench/sudachi/pom.xml b/bench/sudachi/pom.xml
deleted file mode 100644
index 26d7f26d..00000000
--- a/bench/sudachi/pom.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>sudachi_bench</groupId>
-  <artifactId>sudachi_bench</artifactId>
-  <version>1.0-SNAPSHOT</version>
-
-  <name>sudachi_bench</name>
-
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <maven.compiler.source>1.7</maven.compiler.source>
-    <maven.compiler.target>1.7</maven.compiler.target>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.worksap.nlp</groupId>
-      <artifactId>sudachi</artifactId>
-      <version>0.5.2</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
-      <plugins>
-        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
-        <plugin>
-          <artifactId>maven-clean-plugin</artifactId>
-          <version>3.1.0</version>
-        </plugin>
-        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
-        <plugin>
-          <artifactId>maven-resources-plugin</artifactId>
-          <version>3.0.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.8.0</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.22.1</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-jar-plugin</artifactId>
-          <version>3.0.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-install-plugin</artifactId>
-          <version>2.5.2</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-deploy-plugin</artifactId>
-          <version>2.8.2</version>
-        </plugin>
-        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
-        <plugin>
-          <artifactId>maven-site-plugin</artifactId>
-          <version>3.7.1</version>
-        </plugin>
-        <plugin>
-          <artifactId>maven-project-info-reports-plugin</artifactId>
-          <version>3.0.0</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>
diff --git a/bench/sudachi/src/main/java/sudachi_bench/App.java b/bench/sudachi/src/main/java/sudachi_bench/App.java
deleted file mode 100644
index ac249c98..00000000
--- a/bench/sudachi/src/main/java/sudachi_bench/App.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package sudachi_bench;
-
-import java.io.IOException;
-import com.worksap.nlp.sudachi.Tokenizer;
-import com.worksap.nlp.sudachi.Dictionary;
-import com.worksap.nlp.sudachi.DictionaryFactory;
-import com.worksap.nlp.sudachi.Morpheme;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Scanner;
-import java.time.Instant;
-import java.time.Duration;
-import java.nio.file.Paths;
-import java.nio.file.Files;
-
-public class App {
-    public static void main(String[] args) throws IOException {
-        String settings = Files.readString(Paths.get("sudachi.json"));
-        Scanner input = new Scanner(System.in);
-        try (Dictionary dict = new DictionaryFactory().create(settings)) {
-            Tokenizer tokenizer = dict.create();
-            Instant start = Instant.now();
-            while (input.hasNext()) {
-                List<Morpheme> tokens = tokenizer.tokenize(Tokenizer.SplitMode.C, input.nextLine());
-                List<String> words = new ArrayList<String>();
-                for (Morpheme token : tokens) {
-                    words.add(token.surface());
-                }
-                System.out.println(String.join(" ", words));
-            }
-            Instant finish = Instant.now();
-            double timeElapsed = (double) Duration.between(start, finish).toMillis() / 1000;
-            System.err.println("Elapsed-sudachi: " + timeElapsed + " [sec]");
-        }
-    }
-}
diff --git a/bench/sudachi/sudachi.json b/bench/sudachi/sudachi.json
deleted file mode 100644
index 9a94c67c..00000000
--- a/bench/sudachi/sudachi.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-    "systemDict" : "sudachi-dictionary-20210802/system_core.dic",
-    "inputTextPlugin" : [
-        { "class" : "com.worksap.nlp.sudachi.DefaultInputTextPlugin" },
-        { "class" : "com.worksap.nlp.sudachi.ProlongedSoundMarkInputTextPlugin",
-          "prolongedSoundMarks": ["ー", "-", "⁓", "〜", "〰"],
-          "replacementSymbol": "ー"}
-    ],
-    "oovProviderPlugin" : [
-        { "class" : "com.worksap.nlp.sudachi.MeCabOovProviderPlugin" },
-        { "class" : "com.worksap.nlp.sudachi.SimpleOovProviderPlugin",
-          "oovPOS" : [ "補助記号", "一般", "*", "*", "*", "*" ],
-          "leftId" : 5968,
-          "rightId" : 5968,
-          "cost" : 3857 }
-    ],
-    "pathRewritePlugin" : [
-        { "class" : "com.worksap.nlp.sudachi.JoinNumericPlugin",
-          "joinKanjiNumeric" : true },
-        { "class" : "com.worksap.nlp.sudachi.JoinKatakanaOovPlugin",
-          "oovPOS" : [ "名詞", "普通名詞", "一般", "*", "*", "*" ],
-          "minLength" : 3
-        }
-    ]
-}

From 1fcf6c04b6871177695d450e86ae8b6b981472ad Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 26 Nov 2021 10:56:02 +0900
Subject: [PATCH 08/60] Add JS file generator and simplify example script (#5)

* Add JS file generator and simplify example script

* Update README.md

* Update build_portable_js.sh

* Update build_portable_js.sh
---
 Cargo.toml                          |   3 +
 model/model.zstd                    | Bin 258 -> 0 bytes
 vaporetto_wasm/Cargo.toml           |   6 +
 vaporetto_wasm/README.md            |  38 +++++--
 vaporetto_wasm/build_portable_js.sh |  16 +++
 vaporetto_wasm/src/lib.rs           | 109 ++++++++++++++----
 vaporetto_wasm/www/index.html       |   8 +-
 vaporetto_wasm/www/index.js         | 170 ++++------------------------
 8 files changed, 163 insertions(+), 187 deletions(-)
 delete mode 100644 model/model.zstd
 create mode 100755 vaporetto_wasm/build_portable_js.sh

diff --git a/Cargo.toml b/Cargo.toml
index a3ba8aa0..09c8e826 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,5 +7,8 @@ members = [
     "train",
     "evaluate",
     "convert_kytea_model",
+]
+
+exclude = [
     "vaporetto_wasm",
 ]
diff --git a/model/model.zstd b/model/model.zstd
deleted file mode 100644
index 8d409665268e8889a50d295df613be9091e92485..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 258
zcmdPcs{fZE<19PF6m|xF=EqGt*&a8yHSK)d+|jrm$eR4Psi$%MWX{Kp>wAFWJ)A%>
zAZIcsqlbq_NJxkW6Jv-6gWYjK=@*~0E7+V@S*sq1+`p26m36T*8v_uqumUmXvv*2D
z_8Pm@>n{ARZ(w9=U}R-sSR?|H2LfiB1rx4x2E7RAy0GFbqpnclgAR-9=L7niS(z?Z
zOf@;d`;xQy_<LQ8U%T^9?7qp&!mQ)->A2XlD}|=_9~y2fbZX}NFvsP)<@vQY4;Dmi
z()L(ls1kSoD(jTLkGfSDJR^-O9vs@twe4N>m1hsEA8q=kr}4o5pfYdQu^+o<p5Y5;
F0RZYTZxa9j

diff --git a/vaporetto_wasm/Cargo.toml b/vaporetto_wasm/Cargo.toml
index 2b7d7de2..99a7afad 100644
--- a/vaporetto_wasm/Cargo.toml
+++ b/vaporetto_wasm/Cargo.toml
@@ -12,3 +12,9 @@ vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
 vaporetto_rules = { path = "../vaporetto_rules" }  # MIT or Apache-2.0
 wasm-bindgen = "0.2.75"  # MIT or Apache-2.0
 ruzstd = "0.2.4"  # MIT
+wee_alloc = "0.4.5"  # MPL-2.0
+
+[profile.release]
+opt-level = "z"
+codegen-units = 1
+lto = true
diff --git a/vaporetto_wasm/README.md b/vaporetto_wasm/README.md
index 7fde2175..7507e68d 100644
--- a/vaporetto_wasm/README.md
+++ b/vaporetto_wasm/README.md
@@ -1,19 +1,33 @@
 # WebAssembly example of Vaporetto
 
-1. Build a model file:
-   ```
-   # jp-0.4.7-5.mod is a model file distributed by KyTea.
-   cargo run --release -p convert_kytea_model -- --model-in ./jp-0.4.7-5.mod --model-out ../model/model.zstd
-   ```
+## How to build?
 
-2. Build a web assembly:
-   ```
-   % wasm-pack build --release --target web
-   ```
+1. Build a model file refering the [documentation](../README.md).
 
-3. Launch the server:
+2. Build a JS file containing a web assembly using `build_portable_js.sh`.
+   This script requires a model file, an identifier, and an output path.
+   
+   The identifier must consist of alphanumeric characters and underscores.
    ```
-   % python3 -m http.server 8000
+   ./build_portable_js.sh <MODEL_FILE> <IDENTIFIER> <OUTPUT>
    ```
 
-4. Open http://localhost:8000/www
+3. You can use the generated JS file like the follwing code:
+   ```html
+   <!DOCTYPE html>
+   <html>
+       <head>
+           <script src="vaporetto.js"></script>
+           <script>
+               // Replace IDENTIFIER with a string you specified.
+               vaporetto_IDENTIFIER().then((Vaporetto) => {
+                   const vaporetto = Vaporetto.new("DG");
+                   const tokens = vaporetto.tokenize("火星猫の生態");
+                   console.log(tokens);
+               });
+           </script>
+       </head>
+       <body>
+       </body>
+   </html>
+   ```
diff --git a/vaporetto_wasm/build_portable_js.sh b/vaporetto_wasm/build_portable_js.sh
new file mode 100755
index 00000000..d4584f11
--- /dev/null
+++ b/vaporetto_wasm/build_portable_js.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -eu
+
+DIRNAME="$(dirname $0)"
+MODEL="$(realpath $1)"
+IDENT="$2"
+OUTPUT="$3"
+pushd "$DIRNAME"
+VAPORETTO_MODEL_PATH="$MODEL" wasm-pack build --release --target no-modules
+popd
+encoded_wasm=$(base64 < "${DIRNAME}/pkg/vaporetto_wasm_bg.wasm")
+cat \
+    <(sed "s/wasm_bindgen/__vaporetto_${IDENT}_wbg/g" < "${DIRNAME}/pkg/vaporetto_wasm.js") \
+    <(echo "async function vaporetto_${IDENT}(){await __vaporetto_${IDENT}_wbg(fetch('data:application/wasm;base64,${encoded_wasm}'));return __vaporetto_${IDENT}_wbg.Vaporetto;}") \
+    > "$OUTPUT"
diff --git a/vaporetto_wasm/src/lib.rs b/vaporetto_wasm/src/lib.rs
index e7a9189d..3d75ef72 100644
--- a/vaporetto_wasm/src/lib.rs
+++ b/vaporetto_wasm/src/lib.rs
@@ -4,68 +4,127 @@ use js_sys::{Array, Object};
 use vaporetto::{BoundaryType, CharacterType, Model, Predictor, Sentence};
 use vaporetto_rules::{
     sentence_filters::{ConcatGraphemeClustersFilter, KyteaWsConstFilter},
-    SentenceFilter,
+    string_filters::KyteaFullwidthFilter,
+    SentenceFilter, StringFilter,
 };
 use wasm_bindgen::{prelude::*, JsValue};
 
+#[global_allocator]
+static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
+
 #[wasm_bindgen]
 pub struct Vaporetto {
     predictor: Predictor,
+    fullwidth_filter: KyteaFullwidthFilter,
     post_filters: Vec<Box<dyn SentenceFilter>>,
 }
 
 #[wasm_bindgen]
 impl Vaporetto {
     #[wasm_bindgen]
-    pub fn new() -> Self {
-        let mut f = Cursor::new(include_bytes!("../../model/model.zstd"));
+    pub fn new(filters: &str) -> Self {
+        let mut f = Cursor::new(include_bytes!(env!("VAPORETTO_MODEL_PATH")));
         let mut decoder = ruzstd::StreamingDecoder::new(&mut f).unwrap();
         let mut buff = vec![];
         decoder.read_to_end(&mut buff).unwrap();
         let model = Model::read(&mut buff.as_slice()).unwrap();
         let predictor = Predictor::new(model);
-        let post_filters: Vec<Box<dyn SentenceFilter>> = vec![
-            Box::new(ConcatGraphemeClustersFilter::new()),
-            Box::new(KyteaWsConstFilter::new(CharacterType::Digit)),
-        ];
+        let post_filters: Vec<_> = filters
+            .chars()
+            .map(|c| {
+                let b: Box<dyn SentenceFilter> = match c {
+                    'D' => Box::new(KyteaWsConstFilter::new(CharacterType::Digit)),
+                    'R' => Box::new(KyteaWsConstFilter::new(CharacterType::Roman)),
+                    'H' => Box::new(KyteaWsConstFilter::new(CharacterType::Hiragana)),
+                    'T' => Box::new(KyteaWsConstFilter::new(CharacterType::Katakana)),
+                    'K' => Box::new(KyteaWsConstFilter::new(CharacterType::Kanji)),
+                    'O' => Box::new(KyteaWsConstFilter::new(CharacterType::Other)),
+                    'G' => Box::new(ConcatGraphemeClustersFilter::new()),
+                    _ => panic!("invalid filter: {}", c),
+                };
+                b
+            })
+            .collect();
         Self {
             predictor,
+            fullwidth_filter: KyteaFullwidthFilter::new(),
             post_filters,
         }
     }
 
     #[wasm_bindgen]
-    pub fn predict_partial(&self, text: &str, start: usize, end: usize) -> Object {
-        let s = if let Ok(s) = Sentence::from_raw(text) {
+    pub fn tokenize(&self, text: &str) -> Object {
+        let result = Array::new();
+        let mut s = if let Ok(s) = Sentence::from_raw(text) {
             s
         } else {
-            return JsValue::NULL.into();
+            return result.into();
         };
-        if start >= end {
-            return JsValue::NULL.into();
-        }
-        let s = self.predictor.predict_partial_with_score(s, start..end);
+        let norm = self.fullwidth_filter.filter(text);
+        let s_norm = if let Ok(s) = Sentence::from_raw(norm) {
+            s
+        } else {
+            return result.into();
+        };
+        let s_norm = self.predictor.predict(s_norm);
+        s.boundaries_mut().clone_from_slice(s_norm.boundaries());
         let s = self
             .post_filters
             .iter()
             .fold(s, |s, filter| filter.filter(s));
 
+        if let Ok(words) = s.to_tokenized_vec() {
+            for word in words {
+                result.push(&JsValue::from_str(word));
+            }
+        }
+        result.into()
+    }
+
+    #[wasm_bindgen]
+    pub fn predict(&self, text: &str) -> Object {
         let result = Array::new();
-        for (&score, &b) in s.boundary_scores().unwrap()[start..end]
+        let text = self.fullwidth_filter.filter(text);
+        let s = if let Ok(s) = Sentence::from_raw(text) {
+            s
+        } else {
+            return result.into();
+        };
+        let s = self.predictor.predict(s);
+        let s = self
+            .post_filters
             .iter()
-            .zip(&s.boundaries()[start..end])
-        {
-            let boundary = Array::new();
-            boundary.push(&JsValue::from_bool(b == BoundaryType::WordBoundary));
-            boundary.push(&JsValue::from_f64(score));
-            result.push(&boundary);
+            .fold(s, |s, filter| filter.filter(s));
+
+        for &b in s.boundaries() {
+            result.push(&JsValue::from_bool(b == BoundaryType::WordBoundary));
         }
         result.into()
     }
-}
 
-impl Default for Vaporetto {
-    fn default() -> Self {
-        Self::new()
+    #[wasm_bindgen]
+    pub fn predict_with_score(&self, text: &str) -> Object {
+        let result = Array::new();
+        let text = self.fullwidth_filter.filter(text);
+        let s = if let Ok(s) = Sentence::from_raw(text) {
+            s
+        } else {
+            return result.into();
+        };
+        let s = self.predictor.predict_with_score(s);
+        let s = self
+            .post_filters
+            .iter()
+            .fold(s, |s, filter| filter.filter(s));
+
+        if let Some(boundaries) = s.boundary_scores() {
+            for (&score, &b) in boundaries.iter().zip(s.boundaries()) {
+                let boundary = Array::new();
+                boundary.push(&JsValue::from_bool(b == BoundaryType::WordBoundary));
+                boundary.push(&JsValue::from_f64(score));
+                result.push(&boundary);
+            }
+        }
+        result.into()
     }
 }
diff --git a/vaporetto_wasm/www/index.html b/vaporetto_wasm/www/index.html
index a6a43b8c..8434232c 100644
--- a/vaporetto_wasm/www/index.html
+++ b/vaporetto_wasm/www/index.html
@@ -2,9 +2,10 @@
 <html>
     <head>
         <meta charset="utf-8">
-        <title>Vaporetto Real-time Tokenization</title>
+        <title>Vaporetto Demo</title>
         <link rel="stylesheet" href="index.css">
-        <script type="module" src="./index.js"></script>
+        <script src="vaporetto_bccwj_suw_small.js"></script>
+        <script src="index.js"></script>
     </head>
     <body>
         <header>
@@ -17,9 +18,8 @@
             </div>
             <div id="targetarea">
                 <div>Output:</div>
-                <pre id="tokenized"><span></span></pre>
+                <pre id="tokenized"></pre>
             </div>
         </div>
-        <div id="loading">Loading...</div>
     </body>
 </html>
diff --git a/vaporetto_wasm/www/index.js b/vaporetto_wasm/www/index.js
index e7f0deb2..212e5985 100644
--- a/vaporetto_wasm/www/index.js
+++ b/vaporetto_wasm/www/index.js
@@ -1,152 +1,30 @@
-import init from '../pkg/vaporetto_wasm.js';
-import * as wasm from '../pkg/vaporetto_wasm.js';
-
-const loading = document.getElementById("loading");
-loading.style.display = "block";
-
-function run() {
-    const predictor = wasm.Vaporetto.new();
-
-    loading.style.display = "none";
-
-    function createTextSpan(text) {
-        const span = document.createElement("span");
-        const textnode = document.createTextNode(text);
-        span.appendChild(textnode);
-        return span;
+function createTextSpan(text, isBoundary, score) {
+    const span = document.createElement("span");
+    const textnode = document.createTextNode(text);
+    span.appendChild(textnode);
+    if (isBoundary) {
+        span.style.borderLeft = "5pt solid rgba(0, 0, 0, " + Math.atan(score / 2) + ")";
     }
+    return span;
+}
 
-    function replace_text(elem, prev_text, text, range_from, range_to, boundaries, window_size) {
-        const prev_boundary_start = Math.max(range_from[0] - window_size, 0);
-        const prev_boundary_end = Math.min(range_from[1] + window_size - 1, prev_text.length - 1);
-        const node_end_idx = prev_boundary_end + 1;
-        let node_end = elem.childNodes[0];
-        if (prev_text.length != 0) {
-            node_end = elem.childNodes[node_end_idx];
-            if (range_from[0] == 0) {
-                node_end.previousSibling.remove();
-            }
-            for (let i = prev_boundary_end - prev_boundary_start; i > 0; --i) {
-                node_end.previousSibling.remove();
-            }
-        }
-        const next_boundary_start = Math.max(range_to[0] - window_size, 0);
-        const next_boundary_end = Math.min(range_to[1] + window_size - 1, text.length - 1);
-        if (text.length != 0) {
-            if (range_to[0] == 0) {
-                node_end.before(createTextSpan(text[next_boundary_start]));
-            }
-            for (let i = 0; i < next_boundary_end - next_boundary_start; ++i) {
-                const elem = createTextSpan(text[next_boundary_start + i + 1]);
-                if (boundaries[i][0]) {
-                    elem.style.borderLeft = '5pt solid rgba(0, 0, 0, ' + Math.atan(boundaries[i][1] / 2) + ')';
-                }
-                node_end.before(elem);
-            }
-        }
-    }
-
-    const input_text = document.getElementById('input_text');
-    input_text.value = "";
-
-    const window_size = 3;
-
-    let input_data = null;
-    let prev_range = [0, 0];
-    let prev_chars = [];
-    let chars_pos_map = [0];
-
-    let composition_start = null;
-    input_text.addEventListener('compositionstart', function (e) {
-        composition_start = chars_pos_map[e.target.selectionStart];
-    });
-
-    input_text.addEventListener('compositionend', function (e) {
-        composition_start = null;
-    });
-
-    input_text.addEventListener('beforeinput', function (e) {
-        input_data = e.data;
-        if (composition_start != null) {
-            prev_range = [composition_start, chars_pos_map[e.target.selectionEnd]];
-        } else {
-            prev_range = [chars_pos_map[e.target.selectionStart], chars_pos_map[e.target.selectionEnd]];
-        }
-    });
-
-    input_text.addEventListener('input', function (e) {
-        const t0 = performance.now();
+vaporetto_bccwj_suw_small().then((Vaporetto) => {
+    const vaporetto_suw = Vaporetto.new("DG");
 
-        const cur_text = e.target.value;
-        const cur_chars = Array.from(cur_text);
-        chars_pos_map = new Array(cur_text.length);
-        let utf16_pos = 0;
-        for (let i = 0; i < cur_chars.length; ++i) {
-            chars_pos_map[utf16_pos] = i;
-            utf16_pos += cur_chars[i].length;
+    input_text.addEventListener("input", (e) => {
+        const text = input_text.value;
+        const scores = vaporetto_suw.predict_with_score(text);
+        let i = -1;
+        while (tokenized.firstChild) {
+            tokenized.removeChild(tokenized.firstChild);
         }
-        chars_pos_map.push(cur_chars.length);
-
-        let range_from = null;
-        let range_to = null;
-        switch (e.inputType) {
-            case 'insertText':
-            case 'insertLineBreak':
-            case 'insertParagraph':
-            case 'insertFromPaste':
-            case 'insertCompositionText':
-                range_from = prev_range;
-                range_to = [prev_range[0], prev_range[1] + cur_chars.length - prev_chars.length];
-                break;
-            case 'deleteWordBackward':
-            case 'deleteWordForward':
-            case 'deleteSoftLineBackward':
-            case 'deleteSoftLineForward':
-            case 'deleteEntireSoftLine':
-            case 'deleteHardLineBackward':
-            case 'deleteHardLineForward':
-            case 'deleteByCut':
-            case 'deleteContent':
-            case 'deleteContentBackward':
-            case 'deleteContentForward':
-                const start = chars_pos_map[e.target.selectionStart];
-                const right_length = cur_chars.length - start;
-                const prev_end = prev_chars.length - right_length;
-                range_from = [start, prev_end];
-                range_to = [start, start];
-                break;
-            default:
-                range_from = [0, prev_chars.length];
-                range_to = [0, cur_chars.length];
+        for (let c of text) {
+            if (i >= 0) {
+                tokenized.appendChild(createTextSpan(c, scores[i][0], scores[i][1]));
+            } else {
+                tokenized.appendChild(createTextSpan(c, false, 0));
+            }
+            ++i;
         }
-
-        const tokenized = document.getElementById("tokenized");
-
-        const predict_chars_start = Math.max(range_to[0] - window_size * 2 + 1, 0);
-        const predict_chars_end = Math.min(range_to[1] + window_size * 2 - 1, cur_chars.length);
-        const predict_chars = cur_chars.slice(predict_chars_start, predict_chars_end);
-
-        const boundary_start = Math.max(range_to[0] - window_size, 0);
-        const boundary_end = Math.min(range_to[1] + window_size - 1, cur_chars.length - 1);
-
-        const predict_boundary_start = boundary_start - predict_chars_start;
-        const predict_boundary_end = boundary_end - predict_chars_start;
-
-        const boundaries = predictor.predict_partial(predict_chars.join(""), predict_boundary_start, predict_boundary_end);
-
-        console.log("input with window:", predict_chars);
-        console.log("prediction range:", [predict_boundary_start, predict_boundary_end]);
-        console.log("boundaries:", boundaries);
-
-        replace_text(tokenized, prev_chars, cur_chars, range_from, range_to, boundaries, window_size);
-
-        const t1 = performance.now();
-
-        console.log("Elapsed:", t1 - t0, "[ms]");
-        console.log("-----");
-
-        prev_chars = cur_chars;
     });
-}
-
-init().then(run);
+});

From fb79a8081bc2c4618e33592d80e5a2b3fa18c6ea Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 26 Nov 2021 16:15:04 +0900
Subject: [PATCH 09/60] Remove Predictor::predict_partial() (#6)

* Remove Predictor::predict_partial()

* Format

* Remove unnecessary dict_window_size()

* Update vaporetto/src/dict_scorer.rs

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 vaporetto/src/char_scorer.rs |  21 +--
 vaporetto/src/dict_scorer.rs |  43 ++----
 vaporetto/src/predictor.rs   | 280 +++++------------------------------
 vaporetto/src/type_scorer.rs |  45 ++----
 4 files changed, 68 insertions(+), 321 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 31cc0911..da28bb1a 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -21,23 +21,10 @@ impl CharScorer {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let char_start = if start >= self.window_size {
-            start + 1 - self.window_size
-        } else {
-            0
-        };
-        let text_start = sentence.char_to_str_pos[char_start];
-        let char_end = std::cmp::min(
-            start + ys.len() + self.window_size,
-            sentence.char_to_str_pos.len() - 1,
-        );
-        let text_end = sentence.char_to_str_pos[char_end];
-        let text = &sentence.text[text_start..text_end];
-        let padding = start - char_start + 1;
-        for m in self.pma.find_overlapping_no_suffix_iter(&text) {
-            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
-            let offset = m_end as isize - self.window_size as isize - padding as isize;
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+        for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
+            let m_end = sentence.str_to_char_pos[m.end()];
+            let offset = m_end as isize - self.window_size as isize - 1;
             let weights = &self.weights[m.pattern()];
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 2c9e9326..668a9d30 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -5,7 +5,6 @@ use daachorse::DoubleArrayAhoCorasick;
 pub struct DictScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<DictWeight>,
-    window_size: usize,
     word_wise_score: bool,
 }
 
@@ -18,51 +17,29 @@ impl DictScorer {
         Self {
             pma,
             weights,
-            window_size: 1,
             word_wise_score,
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let char_start = if start >= self.window_size {
-            start + 1 - self.window_size
-        } else {
-            0
-        };
-        let text_start = sentence.char_to_str_pos[char_start];
-        let char_end = std::cmp::min(
-            start + ys.len() + self.window_size,
-            sentence.char_to_str_pos.len() - 1,
-        );
-        let text_end = sentence.char_to_str_pos[char_end];
-        let text = &sentence.text[text_start..text_end];
-        let padding = start - char_start + 1;
-        for m in self.pma.find_overlapping_iter(&text) {
-            let m_start = sentence.str_to_char_pos[m.start() + text_start] - char_start;
-            let m_end = sentence.str_to_char_pos[m.end() + text_start] - char_start;
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+        for m in self.pma.find_overlapping_iter(&sentence.text) {
+            let m_start = sentence.str_to_char_pos[m.start()];
+            let m_end = sentence.str_to_char_pos[m.end()];
             let idx = if self.word_wise_score {
                 m.pattern()
             } else {
                 std::cmp::min(m_end - m_start, self.weights.len()) - 1
             };
             let dict_weight = self.weights[idx];
-            if m_start >= padding && m_start < padding + ys.len() {
-                ys[m_start - padding] += dict_weight.right;
+            if m_start != 0 {
+                ys[m_start - 1] += dict_weight.right;
             }
-            let range_start = std::cmp::max(0, m_start as isize - padding as isize + 1);
-            let range_end = std::cmp::min(m_end as isize - padding as isize, ys.len() as isize);
-            if range_start < range_end {
-                for y in &mut ys[range_start as usize..range_end as usize] {
-                    *y += dict_weight.inner;
-                }
+            for y in &mut ys[m_start..m_end - 1] {
+                *y += dict_weight.inner;
             }
-            if m_end >= padding && m_end < ys.len() + padding {
-                ys[m_end - padding] += dict_weight.left;
+            if m_end <= ys.len() {
+                ys[m_end - 1] += dict_weight.left;
             }
         }
     }
-
-    pub fn window_size(&mut self, size: usize) {
-        self.window_size = std::cmp::max(size, 1);
-    }
 }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 8829140f..ee4d7af8 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,5 +1,4 @@
 use std::collections::HashMap;
-use std::ops::Range;
 
 use crate::char_scorer::CharScorer;
 use crate::dict_scorer::DictScorer;
@@ -174,85 +173,15 @@ impl Predictor {
         result
     }
 
-    fn predict_partial_impl(
-        &self,
-        sentence: &Sentence,
-        range: Range<usize>,
-        ys: &mut [ScoreValue],
-    ) {
+    fn predict_impl(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
         ys.fill(self.bias);
-        self.char_scorer.add_scores(sentence, range.start, ys);
-        self.type_scorer.add_scores(sentence, range.start, ys);
+        self.char_scorer.add_scores(sentence, ys);
+        self.type_scorer.add_scores(sentence, ys);
         if let Some(dict_scorer) = self.dict_scorer.as_ref() {
-            dict_scorer.add_scores(sentence, range.start, ys);
+            dict_scorer.add_scores(sentence, ys);
         }
     }
 
-    /// Predicts word boundaries of the specified range of a sentence.
-    ///
-    /// # Arguments
-    ///
-    /// * `sentence` - A sentence.
-    /// * `range` - The range of the sentence.
-    ///
-    /// # Returns
-    ///
-    /// A sentence with predicted boundary information.
-    pub fn predict_partial(&self, mut sentence: Sentence, range: Range<usize>) -> Sentence {
-        let mut ys = vec![ScoreValue::default(); range.len()];
-        self.predict_partial_impl(&sentence, range.clone(), &mut ys);
-        for (y, b) in ys.into_iter().zip(sentence.boundaries[range].iter_mut()) {
-            *b = if y >= ScoreValue::default() {
-                BoundaryType::WordBoundary
-            } else {
-                BoundaryType::NotWordBoundary
-            };
-        }
-        sentence
-    }
-
-    /// Predicts word boundaries of the specified range of a sentence. This function inserts
-    /// scores.
-    ///
-    /// # Arguments
-    ///
-    /// * `sentence` - A sentence.
-    /// * `range` - The range of the sentence.
-    ///
-    /// # Returns
-    ///
-    /// A sentence with predicted boundary information.
-    pub fn predict_partial_with_score(
-        &self,
-        mut sentence: Sentence,
-        range: Range<usize>,
-    ) -> Sentence {
-        let mut ys = vec![ScoreValue::default(); range.len()];
-        self.predict_partial_impl(&sentence, range.clone(), &mut ys);
-        let mut scores = sentence
-            .boundary_scores
-            .take()
-            .unwrap_or_else(|| vec![0.; sentence.boundaries.len()]);
-        for (y, (b, s)) in ys.into_iter().zip(
-            sentence.boundaries[range.clone()]
-                .iter_mut()
-                .zip(scores[range].iter_mut()),
-        ) {
-            *b = if y >= ScoreValue::default() {
-                BoundaryType::WordBoundary
-            } else {
-                BoundaryType::NotWordBoundary
-            };
-
-            #[cfg(feature = "model-quantize")]
-            let y = y as f64 * self.quantize_multiplier;
-
-            *s = y;
-        }
-        sentence.boundary_scores.replace(scores);
-        sentence
-    }
-
     /// Predicts word boundaries.
     ///
     /// # Arguments
@@ -262,13 +191,20 @@ impl Predictor {
     /// # Returns
     ///
     /// A sentence with predicted boundary information.
-    pub fn predict(&self, sentence: Sentence) -> Sentence {
+    pub fn predict(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
-        if boundaries_size == 0 {
-            sentence
-        } else {
-            self.predict_partial(sentence, 0..boundaries_size)
+        if boundaries_size != 0 {
+            let mut ys = vec![ScoreValue::default(); boundaries_size];
+            self.predict_impl(&sentence, &mut ys);
+            for (y, b) in ys.into_iter().zip(sentence.boundaries.iter_mut()) {
+                *b = if y >= ScoreValue::default() {
+                    BoundaryType::WordBoundary
+                } else {
+                    BoundaryType::NotWordBoundary
+                };
+            }
         }
+        sentence
     }
 
     /// Predicts word boundaries. This function inserts scores.
@@ -280,29 +216,33 @@ impl Predictor {
     /// # Returns
     ///
     /// A sentence with predicted boundary information.
-    pub fn predict_with_score(&self, sentence: Sentence) -> Sentence {
+    pub fn predict_with_score(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
-        if boundaries_size == 0 {
-            sentence
-        } else {
-            self.predict_partial_with_score(sentence, 0..boundaries_size)
-        }
-    }
-
-    /// Sets the window size of words in the dictionary.
-    ///
-    /// # Arguments
-    ///
-    /// * `size` - The window size.
-    ///
-    /// # Returns
-    ///
-    /// A predictor with the specified window size.
-    pub fn dict_window_size(mut self, size: usize) -> Self {
-        if let Some(dict_scorer) = self.dict_scorer.as_mut() {
-            dict_scorer.window_size(size);
+        if boundaries_size != 0 {
+            let mut ys = vec![ScoreValue::default(); boundaries_size];
+            self.predict_impl(&sentence, &mut ys);
+            let mut scores = sentence
+                .boundary_scores
+                .take()
+                .unwrap_or_else(|| vec![0.; boundaries_size]);
+            for (y, (b, s)) in ys
+                .into_iter()
+                .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
+            {
+                *b = if y >= ScoreValue::default() {
+                    BoundaryType::WordBoundary
+                } else {
+                    BoundaryType::NotWordBoundary
+                };
+
+                #[cfg(feature = "model-quantize")]
+                let y = y as f64 * self.quantize_multiplier;
+
+                *s = y;
+            }
+            sentence.boundary_scores.replace(scores);
         }
-        self
+        sentence
     }
 }
 
@@ -794,142 +734,4 @@ mod tests {
             s.boundary_scores().unwrap(),
         );
     }
-
-    #[test]
-    fn test_predict_partial_1() {
-        let model = generate_model_1();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial(s, 1..5);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-    }
-
-    #[test]
-    fn test_predict_partial_2() {
-        let model = generate_model_2();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial(s, 2..7);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-    }
-
-    #[test]
-    fn test_predict_partial_3() {
-        let model = generate_model_3();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial(s, 2..6);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-    }
-
-    #[test]
-    fn test_predict_partial_with_score_1() {
-        let model = generate_model_1();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial_with_score(s, 1..5);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-        assert_eq!(
-            &[0.0, -2.5, 22.5, 66.0, 66.5, 0.0, 0.0, 0.0],
-            s.boundary_scores().unwrap(),
-        );
-    }
-
-    #[test]
-    fn test_predict_partial_with_score_2() {
-        let model = generate_model_2();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial_with_score(s, 2..7);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-        assert_eq!(
-            &[0.0, 0.0, -9.75, 14.25, 26.0, 8.5, -19.75, 0.0],
-            s.boundary_scores().unwrap(),
-        );
-    }
-
-    #[test]
-    fn test_predict_partial_with_score_3() {
-        let model = generate_model_3();
-        let p = Predictor::new(model);
-        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
-        let s = p.predict_partial_with_score(s, 2..6);
-        assert_eq!(
-            &[
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::WordBoundary,
-                BoundaryType::NotWordBoundary,
-                BoundaryType::Unknown,
-                BoundaryType::Unknown,
-            ],
-            s.boundaries(),
-        );
-        assert_eq!(
-            &[0.0, 0.0, -20.75, 4.5, 16.25, -3.0, 0.0, 0.0],
-            s.boundary_scores().unwrap(),
-        );
-    }
 }
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 0254d663..2e4b81dd 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -20,10 +20,10 @@ impl TypeScorer {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
         match self {
-            TypeScorer::Pma(pma) => pma.add_scores(sentence, start, ys),
-            TypeScorer::Cache(cache) => cache.add_scores(sentence, start, ys),
+            TypeScorer::Pma(pma) => pma.add_scores(sentence, ys),
+            TypeScorer::Cache(cache) => cache.add_scores(sentence, ys),
         }
     }
 }
@@ -47,20 +47,12 @@ impl TypeScorerPma {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let type_start = if start >= self.window_size {
-            start + 1 - self.window_size
-        } else {
-            0
-        };
-        let type_end = std::cmp::min(
-            start + ys.len() + self.window_size,
-            sentence.char_type.len(),
-        );
-        let char_type = &sentence.char_type[type_start..type_end];
-        let padding = start - type_start + 1;
-        for m in self.pma.find_overlapping_no_suffix_iter(&char_type) {
-            let offset = m.end() as isize - self.window_size as isize - padding as isize;
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+        for m in self
+            .pma
+            .find_overlapping_no_suffix_iter(&sentence.char_type)
+        {
+            let offset = m.end() as isize - self.window_size as isize - 1;
             let weights = &self.weights[m.pattern()];
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
@@ -111,28 +103,17 @@ impl TypeScorerCache {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, start: usize, ys: &mut [ScoreValue]) {
-        let type_start = if start >= self.window_size {
-            start + 1 - self.window_size
-        } else {
-            0
-        };
-        let type_end = std::cmp::min(
-            start + ys.len() + self.window_size,
-            sentence.char_type.len(),
-        );
-        let char_type = &sentence.char_type[type_start..type_end];
-        let offset = self.window_size + start;
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
         let mut seqid = 0;
-        for i in 0..offset {
-            if let Some(ct) = char_type.get(i) {
+        for i in 0..self.window_size {
+            if let Some(ct) = sentence.char_type.get(i) {
                 seqid = self.increment_seqid(seqid, *ct);
             } else {
                 seqid = self.increment_seqid_without_char(seqid);
             };
         }
         for (i, y) in ys.iter_mut().enumerate() {
-            if let Some(ct) = char_type.get(i + offset) {
+            if let Some(ct) = sentence.char_type.get(i + self.window_size) {
                 seqid = self.increment_seqid(seqid, *ct);
             } else {
                 seqid = self.increment_seqid_without_char(seqid);

From 23a9a7df8afea3ac96eceaed5b93856c4161676e Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 29 Nov 2021 00:49:35 +0900
Subject: [PATCH 10/60] Update rust.yml (#2)

---
 .github/workflows/rust.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 0e735a18..79e78881 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -1,8 +1,8 @@
 on:
   push:
-    branches: [ main ]
+    branches: [ main, develop ]
   pull_request:
-    branches: [ main ]
+    branches: [ main, develop ]
 
 name: build
 

From 3ce7d7211683b1a721d4334ca68d16cea3635e84 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 29 Nov 2021 16:04:53 +0900
Subject: [PATCH 11/60] Support SIMD (#1)

* Add simd feature

* Use cfg_attr

* Disable simd in stable Rust

* Use std::simd

* CharScorerVector -> CharScorerNaive

* Fix var name

* Remove unnecessary checking

* Fix job name of CI

* Add simd_len() function to CharScorerSimd

* Remove unnecessary switch
---
 .github/workflows/rust.yml   |  4 +-
 vaporetto/Cargo.toml         |  1 +
 vaporetto/src/char_scorer.rs | 85 +++++++++++++++++++++++++++++++++++-
 vaporetto/src/lib.rs         |  1 +
 vaporetto/src/predictor.rs   | 71 +++++++++++++++++++++++++++---
 5 files changed, 152 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 79e78881..3e0dbc1b 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -48,11 +48,11 @@ jobs:
           command: test
           args: --release -p vaporetto --no-default-features
 
-      - name: Run cargo test (vaporetto / all-features)
+      - name: Run cargo test (vaporetto / features kytea+train)
         uses: actions-rs/cargo@v1
         with:
           command: test
-          args: --release -p vaporetto --all-features
+          args: --release -p vaporetto --features kytea,train
 
   nightly:
     name: Nightly
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index ca1531c6..ab056251 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -26,6 +26,7 @@ default = ["model-quantize"]
 kytea = ["byteorder"]
 model-quantize = []
 train = ["liblinear"]
+simd = []
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index da28bb1a..987ecc60 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -2,13 +2,52 @@ use crate::model::ScoreValue;
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
-pub struct CharScorer {
+#[cfg(feature = "simd")]
+use std::simd::i32x8;
+
+pub enum CharScorer {
+    Naive(CharScorerNaive),
+
+    #[cfg(feature = "simd")]
+    Simd(CharScorerSimd),
+}
+
+impl CharScorer {
+    pub fn new(
+        pma: DoubleArrayAhoCorasick,
+        weights: Vec<Vec<ScoreValue>>,
+        window_size: usize,
+    ) -> Self {
+        #[cfg(not(feature = "simd"))]
+        {
+            Self::Naive(CharScorerNaive::new(pma, weights, window_size))
+        }
+
+        #[cfg(feature = "simd")]
+        if window_size <= 4 {
+            Self::Simd(CharScorerSimd::new(pma, weights, window_size))
+        } else {
+            Self::Naive(CharScorerNaive::new(pma, weights, window_size))
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
+        match self {
+            CharScorer::Naive(naive) => naive.add_scores(sentence, &mut ys[padding..]),
+
+            #[cfg(feature = "simd")]
+            CharScorer::Simd(simd) => simd.add_scores(sentence, padding, ys),
+        }
+    }
+}
+
+pub struct CharScorerNaive {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<Vec<ScoreValue>>,
     window_size: usize,
 }
 
-impl CharScorer {
+impl CharScorerNaive {
     pub fn new(
         pma: DoubleArrayAhoCorasick,
         weights: Vec<Vec<ScoreValue>>,
@@ -38,3 +77,45 @@ impl CharScorer {
         }
     }
 }
+
+#[cfg(feature = "simd")]
+pub struct CharScorerSimd {
+    pma: DoubleArrayAhoCorasick,
+    weights: Vec<i32x8>,
+    window_size: usize,
+}
+
+#[cfg(feature = "simd")]
+impl CharScorerSimd {
+    pub fn new(pma: DoubleArrayAhoCorasick, weights: Vec<Vec<i32>>, window_size: usize) -> Self {
+        let weights: Vec<_> = weights
+            .iter()
+            .map(|w| {
+                let mut s = [0i32; 8];
+                s[..w.len()].copy_from_slice(&w);
+                i32x8::from_array(s)
+            })
+            .collect();
+        Self {
+            pma,
+            weights,
+            window_size,
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
+        for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
+            let m_end = sentence.str_to_char_pos[m.end()];
+            let offset = padding as isize + m_end as isize - self.window_size as isize - 1;
+            let weights = &self.weights[m.pattern()];
+            let ys_slice = &mut ys[offset as usize..offset as usize + 8];
+            let mut target = i32x8::from_slice(ys_slice);
+            target += weights;
+            ys_slice.copy_from_slice(target.as_array());
+        }
+    }
+
+    pub const fn simd_len() -> usize {
+        8
+    }
+}
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index aead06a6..b59cd2aa 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -1,4 +1,5 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![cfg_attr(feature = "simd", feature(portable_simd))]
 
 //! # Vaporetto
 //!
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index ee4d7af8..b83d6f16 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -6,6 +6,9 @@ use crate::model::{DictWeight, Model, ScoreValue};
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
+#[cfg(feature = "simd")]
+use crate::char_scorer::CharScorerSimd;
+
 use daachorse::DoubleArrayAhoCorasick;
 
 /// Predictor.
@@ -18,6 +21,9 @@ pub struct Predictor {
 
     #[cfg(feature = "model-quantize")]
     quantize_multiplier: f64,
+
+    #[cfg(feature = "simd")]
+    padding: usize,
 }
 
 impl Predictor {
@@ -88,6 +94,9 @@ impl Predictor {
 
             #[cfg(feature = "model-quantize")]
             quantize_multiplier: model.quantize_multiplier,
+
+            #[cfg(feature = "simd")]
+            padding: model.char_window_size.max(model.type_window_size),
         }
     }
 
@@ -173,12 +182,12 @@ impl Predictor {
         result
     }
 
-    fn predict_impl(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
         ys.fill(self.bias);
-        self.char_scorer.add_scores(sentence, ys);
-        self.type_scorer.add_scores(sentence, ys);
+        self.char_scorer.add_scores(sentence, padding, ys);
+        self.type_scorer.add_scores(sentence, &mut ys[padding..]);
         if let Some(dict_scorer) = self.dict_scorer.as_ref() {
-            dict_scorer.add_scores(sentence, ys);
+            dict_scorer.add_scores(sentence, &mut ys[padding..]);
         }
     }
 
@@ -193,9 +202,11 @@ impl Predictor {
     /// A sentence with predicted boundary information.
     pub fn predict(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
+
+        #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
             let mut ys = vec![ScoreValue::default(); boundaries_size];
-            self.predict_impl(&sentence, &mut ys);
+            self.predict_impl(&sentence, 0, &mut ys);
             for (y, b) in ys.into_iter().zip(sentence.boundaries.iter_mut()) {
                 *b = if y >= ScoreValue::default() {
                     BoundaryType::WordBoundary
@@ -204,6 +215,24 @@ impl Predictor {
                 };
             }
         }
+
+        #[cfg(feature = "simd")]
+        if boundaries_size != 0 {
+            let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
+            let mut ys = vec![ScoreValue::default(); ys_size];
+            self.predict_impl(&sentence, self.padding, &mut ys);
+            for (&y, b) in ys[self.padding..]
+                .into_iter()
+                .zip(sentence.boundaries.iter_mut())
+            {
+                *b = if y >= ScoreValue::default() {
+                    BoundaryType::WordBoundary
+                } else {
+                    BoundaryType::NotWordBoundary
+                };
+            }
+        }
+
         sentence
     }
 
@@ -218,9 +247,11 @@ impl Predictor {
     /// A sentence with predicted boundary information.
     pub fn predict_with_score(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
+
+        #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
             let mut ys = vec![ScoreValue::default(); boundaries_size];
-            self.predict_impl(&sentence, &mut ys);
+            self.predict_impl(&sentence, 0, &mut ys);
             let mut scores = sentence
                 .boundary_scores
                 .take()
@@ -242,6 +273,34 @@ impl Predictor {
             }
             sentence.boundary_scores.replace(scores);
         }
+
+        #[cfg(feature = "simd")]
+        if boundaries_size != 0 {
+            let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
+            let mut ys = vec![ScoreValue::default(); ys_size];
+            self.predict_impl(&sentence, self.padding, &mut ys);
+            let mut scores = sentence
+                .boundary_scores
+                .take()
+                .unwrap_or_else(|| vec![0.; boundaries_size]);
+            for (&y, (b, s)) in ys[self.padding..]
+                .into_iter()
+                .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
+            {
+                *b = if y >= ScoreValue::default() {
+                    BoundaryType::WordBoundary
+                } else {
+                    BoundaryType::NotWordBoundary
+                };
+
+                #[cfg(feature = "model-quantize")]
+                let y = y as f64 * self.quantize_multiplier;
+
+                *s = y;
+            }
+            sentence.boundary_scores.replace(scores);
+        }
+
         sentence
     }
 }

From 6d329163f46e9fce585f0c3bd1968dd7ed3834be Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 29 Nov 2021 20:50:13 +0900
Subject: [PATCH 12/60] Add descriptions of features (#3)

* Update README.md

* Update lib.rs

* Update README.md
---
 vaporetto/README.md  | 17 +++++++++++------
 vaporetto/src/lib.rs | 10 ++++------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/vaporetto/README.md b/vaporetto/README.md
index 9e309661..6b774112 100644
--- a/vaporetto/README.md
+++ b/vaporetto/README.md
@@ -14,14 +14,19 @@ let mut f = BufReader::new(File::open("model.raw").unwrap());
 let model = Model::read(&mut f).unwrap();
 let predictor = Predictor::new(model);
 
-for line in stdin().lock().lines() {
-    let s = Sentence::from_raw(line.unwrap()).unwrap();
-    let s = predictor.predict(s);
-    let toks = s.to_tokenized_string().unwrap();
-    println!("{}", toks);
-}
+let s = Sentence::from_raw("火星猫の生態").unwrap();
+let s = predictor.predict(s);
+
+println!("{:?}", s.to_tokenized_vec().unwrap());
+// ["火星", "猫", "の", "生態"]
 ```
 
+## Feature flags
+
+* `kytea` - Enables the reader for models generated by KyTea.
+* `train` - Enables the trainer.
+* `simd` - Use the SIMD operations for prediction. (Nightly version of Rust is required.)
+
 ## License
 
 Licensed under either of
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index b59cd2aa..2d537fc8 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -17,12 +17,10 @@
 //! let model = Model::read(&mut f).unwrap();
 //! let predictor = Predictor::new(model);
 //!
-//! for line in stdin().lock().lines() {
-//!     let s = Sentence::from_raw(line.unwrap()).unwrap();
-//!     let s = predictor.predict(s);
-//!     let toks = s.to_tokenized_string().unwrap();
-//!     println!("{}", toks);
-//! }
+//! let s = Sentence::from_raw("火星猫の生態").unwrap();
+//! let s = predictor.predict(s);
+//!
+//! println!("{:?}", s.to_tokenized_vec().unwrap());
 //! ```
 //!
 //! Training requires **crate feature** `train`. For more details, see [`Trainer`].

From 5eaa397faaa81325559d5e90df6ac121282cbe4f Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 30 Nov 2021 13:30:39 +0900
Subject: [PATCH 13/60] Validate length of patterns and weights (#4)

* Validate length of patterns and weights

* Format

* Fix bugs

* Fix format

* Add comments
---
 vaporetto/src/char_scorer.rs | 39 +++++++++-------
 vaporetto/src/dict_scorer.rs | 14 +++---
 vaporetto/src/kytea_model.rs | 14 ++----
 vaporetto/src/model.rs       | 10 ++---
 vaporetto/src/predictor.rs   | 87 +++++++++++++++---------------------
 vaporetto/src/trainer.rs     |  4 +-
 vaporetto/src/type_scorer.rs | 41 +++++++++--------
 vaporetto/src/utils.rs       |  6 ++-
 8 files changed, 106 insertions(+), 109 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 987ecc60..07c6652e 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -13,21 +13,20 @@ pub enum CharScorer {
 }
 
 impl CharScorer {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[String], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
         #[cfg(not(feature = "simd"))]
         {
-            Self::Naive(CharScorerNaive::new(pma, weights, window_size))
+            Self::Naive(CharScorerNaive::new(ngrams, weights, window_size))
         }
 
         #[cfg(feature = "simd")]
         if window_size <= 4 {
-            Self::Simd(CharScorerSimd::new(pma, weights, window_size))
+            Self::Simd(CharScorerSimd::new(ngrams, weights, window_size))
         } else {
-            Self::Naive(CharScorerNaive::new(pma, weights, window_size))
+            Self::Naive(CharScorerNaive::new(ngrams, weights, window_size))
         }
     }
 
@@ -48,13 +47,15 @@ pub struct CharScorerNaive {
 }
 
 impl CharScorerNaive {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[String], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+        if ngrams.len() != weights.len() {
+            panic!("ngrams.len() != weights.len()");
+        }
         Self {
-            pma,
+            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
             weights,
             window_size,
         }
@@ -87,7 +88,13 @@ pub struct CharScorerSimd {
 
 #[cfg(feature = "simd")]
 impl CharScorerSimd {
-    pub fn new(pma: DoubleArrayAhoCorasick, weights: Vec<Vec<i32>>, window_size: usize) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
+        if ngrams.len() != weights.len() {
+            panic!("ngrams.len() != weights.len()");
+        }
         let weights: Vec<_> = weights
             .iter()
             .map(|w| {
@@ -97,7 +104,7 @@ impl CharScorerSimd {
             })
             .collect();
         Self {
-            pma,
+            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
             weights,
             window_size,
         }
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 668a9d30..f8ae1604 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -9,13 +9,15 @@ pub struct DictScorer {
 }
 
 impl DictScorer {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<DictWeight>,
-        word_wise_score: bool,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(words: &[String], weights: Vec<DictWeight>, word_wise_score: bool) -> Self {
+        if word_wise_score && words.len() != weights.len() {
+            panic!("word_wise_score == true && words.len() != weights.len()");
+        }
         Self {
-            pma,
+            pma: DoubleArrayAhoCorasick::new(words).unwrap(),
             weights,
             word_wise_score,
         }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 60585291..bc721177 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -409,17 +409,11 @@ impl TryFrom<KyteaModel> for Model {
             .type_dict
             .ok_or_else(|| anyhow!("no type dictionary."))?;
 
-        let mut char_ngrams: Vec<Vec<u8>> = vec![];
+        let mut char_ngrams: Vec<String> = vec![];
         let mut char_ngram_weights = vec![];
         for (char_ngram, v) in char_dict.dump_items() {
             let weight_size = config.char_w as usize * 2 - char_ngram.len() + 1;
-            char_ngrams.push(
-                char_ngram
-                    .into_iter()
-                    .collect::<String>()
-                    .as_bytes()
-                    .to_vec(),
-            );
+            char_ngrams.push(char_ngram.into_iter().collect::<String>());
             char_ngram_weights.push(v[..weight_size].to_vec());
         }
 
@@ -437,7 +431,7 @@ impl TryFrom<KyteaModel> for Model {
             type_ngram_weights.push(v[..weight_size].to_vec());
         }
 
-        let mut dict: Vec<Vec<u8>> = vec![];
+        let mut dict: Vec<String> = vec![];
         let mut dict_weights = vec![];
         if let Some(kytea_dict) = model.dict {
             for (w, data) in kytea_dict.dump_items() {
@@ -452,7 +446,7 @@ impl TryFrom<KyteaModel> for Model {
                     }
                 }
                 dict_weights.push(weights);
-                dict.push(w.into_iter().collect::<String>().as_bytes().to_vec());
+                dict.push(w.into_iter().collect());
             }
         }
 
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 35747b0e..6a500ee9 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -33,9 +33,9 @@ pub struct DictWeight {
 /// Model data.
 #[derive(Serialize, Deserialize)]
 pub struct Model {
-    pub(crate) char_ngrams: Vec<Vec<u8>>,
+    pub(crate) char_ngrams: Vec<String>,
     pub(crate) type_ngrams: Vec<Vec<u8>>,
-    pub(crate) dict: Vec<Vec<u8>>,
+    pub(crate) dict: Vec<String>,
 
     pub(crate) char_ngram_weights: Vec<Vec<WeightValue>>,
     pub(crate) type_ngram_weights: Vec<Vec<WeightValue>>,
@@ -93,7 +93,7 @@ impl Model {
     pub(crate) fn from_liblinear_model(
         model: impl LibLinearModel,
         fid_manager: FeatureIDManager,
-        dict: Vec<Vec<u8>>,
+        dict: Vec<String>,
         char_window_size: usize,
         type_window_size: usize,
         dict_word_max_size: usize,
@@ -139,9 +139,9 @@ impl Model {
 
             match feature.feature {
                 FeatureContent::CharacterNgram(char_ngram) => {
-                    let id = char_ngram_ids.get_id(char_ngram.as_bytes());
+                    let id = char_ngram_ids.get_id(&char_ngram);
                     if id == char_ngram_weights.len() {
-                        char_ngrams.push(char_ngram.as_bytes().to_vec());
+                        char_ngrams.push(char_ngram.to_string());
                         char_ngram_weights.push(vec![
                             WeightValue::default();
                             char_window_size * 2
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index b83d6f16..89e5088e 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -9,8 +9,6 @@ use crate::type_scorer::TypeScorer;
 #[cfg(feature = "simd")]
 use crate::char_scorer::CharScorerSimd;
 
-use daachorse::DoubleArrayAhoCorasick;
-
 /// Predictor.
 pub struct Predictor {
     bias: ScoreValue,
@@ -69,20 +67,16 @@ impl Predictor {
         #[cfg(feature = "model-quantize")]
         let bias = bias as i32;
 
-        let char_pma = DoubleArrayAhoCorasick::new(char_ngrams).unwrap();
-        let type_pma = DoubleArrayAhoCorasick::new(model.type_ngrams).unwrap();
-
-        let char_scorer = CharScorer::new(char_pma, char_ngram_weights, model.char_window_size);
-        let type_scorer = TypeScorer::new(type_pma, type_ngram_weights, model.type_window_size);
+        let char_scorer = CharScorer::new(&char_ngrams, char_ngram_weights, model.char_window_size);
+        let type_scorer = TypeScorer::new(
+            &model.type_ngrams,
+            type_ngram_weights,
+            model.type_window_size,
+        );
         let dict_scorer = if dict.is_empty() {
             None
         } else {
-            let dict_pma = DoubleArrayAhoCorasick::new(dict).unwrap();
-            Some(DictScorer::new(
-                dict_pma,
-                dict_weights,
-                model.dict_word_wise,
-            ))
+            Some(DictScorer::new(&dict, dict_weights, model.dict_word_wise))
         };
 
         Self {
@@ -101,13 +95,13 @@ impl Predictor {
     }
 
     fn merge_dict_weights(
-        dict: Vec<Vec<u8>>,
+        dict: Vec<String>,
         dict_weights: Vec<DictWeight>,
-        words: &[Vec<u8>],
+        words: &[String],
         word_weights: &mut Vec<Vec<ScoreValue>>,
         char_window_size: usize,
         dict_word_wise: bool,
-    ) -> (Vec<Vec<u8>>, Vec<DictWeight>) {
+    ) -> (Vec<String>, Vec<DictWeight>) {
         let mut word_map = HashMap::new();
         for (i, word) in words.iter().cloned().enumerate() {
             word_map.insert(word, i);
@@ -116,7 +110,7 @@ impl Predictor {
         if dict_word_wise {
             let mut new_dict_weights = vec![];
             for (word, weight) in dict.into_iter().zip(dict_weights) {
-                let word_size = std::str::from_utf8(&word).unwrap().chars().count();
+                let word_size = word.chars().count();
                 match word_map.get(&word) {
                     Some(&idx) if char_window_size >= word_size => {
                         let start = char_window_size - word_size;
@@ -136,7 +130,7 @@ impl Predictor {
             (new_dict, new_dict_weights)
         } else {
             for word in dict {
-                let word_size = std::str::from_utf8(&word).unwrap().chars().count();
+                let word_size = word.chars().count();
                 match word_map.get(&word) {
                     Some(&idx) if char_window_size >= word_size => {
                         let start = char_window_size - word_size;
@@ -156,15 +150,18 @@ impl Predictor {
         }
     }
 
-    fn merge_weights(words: &[Vec<u8>], weights: &[Vec<ScoreValue>]) -> Vec<Vec<ScoreValue>> {
+    fn merge_weights<P>(words: &[P], weights: &[Vec<ScoreValue>]) -> Vec<Vec<ScoreValue>>
+    where
+        P: AsRef<[u8]>,
+    {
         let mut result = vec![];
         let word_ids = words
             .iter()
-            .cloned()
             .enumerate()
-            .map(|(i, w)| (w, i))
+            .map(|(i, w)| (w.as_ref().to_vec(), i))
             .collect::<HashMap<Vec<u8>, usize>>();
         for seq in words {
+            let seq = seq.as_ref();
             let mut new_weights: Option<Vec<_>> = None;
             for st in (0..seq.len()).rev() {
                 if let Some(&idx) = word_ids.get(&seq[st..]) {
@@ -338,18 +335,14 @@ mod tests {
     fn generate_model_1() -> Model {
         Model {
             char_ngrams: vec![
-                "我ら".as_bytes().to_vec(),
-                "全世界".as_bytes().to_vec(),
-                "国民".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "界".as_bytes().to_vec(),
+                "我ら".to_string(),
+                "全世界".to_string(),
+                "国民".to_string(),
+                "世界".to_string(),
+                "界".to_string(),
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
-            dict: vec![
-                "全世界".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "世".as_bytes().to_vec(),
-            ],
+            dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
             #[cfg(not(feature = "model-quantize"))]
             char_ngram_weights: vec![
                 vec![0.5, 1.0, 1.5, 2.0, 2.5],
@@ -447,18 +440,14 @@ mod tests {
     fn generate_model_2() -> Model {
         Model {
             char_ngrams: vec![
-                "我ら".as_bytes().to_vec(),
-                "全世界".as_bytes().to_vec(),
-                "国民".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "界".as_bytes().to_vec(),
+                "我ら".to_string(),
+                "全世界".to_string(),
+                "国民".to_string(),
+                "世界".to_string(),
+                "界".to_string(),
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
-            dict: vec![
-                "全世界".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "世".as_bytes().to_vec(),
-            ],
+            dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
             #[cfg(not(feature = "model-quantize"))]
             char_ngram_weights: vec![
                 vec![0.25, 0.5, 0.75],
@@ -566,18 +555,14 @@ mod tests {
     fn generate_model_3() -> Model {
         Model {
             char_ngrams: vec![
-                "我ら".as_bytes().to_vec(),
-                "全世界".as_bytes().to_vec(),
-                "国民".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "界".as_bytes().to_vec(),
+                "我ら".to_string(),
+                "全世界".to_string(),
+                "国民".to_string(),
+                "世界".to_string(),
+                "界".to_string(),
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
-            dict: vec![
-                "国民".as_bytes().to_vec(),
-                "世界".as_bytes().to_vec(),
-                "世".as_bytes().to_vec(),
-            ],
+            dict: vec!["国民".to_string(), "世界".to_string(), "世".to_string()],
             #[cfg(not(feature = "model-quantize"))]
             char_ngram_weights: vec![
                 vec![0.25, 0.5, 0.75],
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index b81de851..50a5f6a9 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -72,7 +72,7 @@ impl From<SolverType> for liblinear::SolverType {
 /// Dataset manager.
 #[cfg_attr(docsrs, doc(cfg(feature = "train")))]
 pub struct Dataset<'a> {
-    dictionary: Vec<Vec<u8>>,
+    dictionary: Vec<String>,
     feature_extractor: FeatureExtractor,
     example_generator: ExampleGenerator,
     char_window_size: usize,
@@ -118,7 +118,7 @@ impl<'a> Dataset<'a> {
             dictionary: dictionary
                 .as_ref()
                 .iter()
-                .map(|word| (word.as_ref() as &[u8]).to_vec())
+                .map(|word| (word.as_ref() as &str).to_string())
                 .collect(),
             feature_extractor: FeatureExtractor::new(
                 char_ngram_size,
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 2e4b81dd..f2d2b6da 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -8,15 +8,14 @@ pub enum TypeScorer {
 }
 
 impl TypeScorer {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
         if window_size <= 3 {
-            Self::Cache(TypeScorerCache::new(pma, weights, window_size))
+            Self::Cache(TypeScorerCache::new(ngrams, weights, window_size))
         } else {
-            Self::Pma(TypeScorerPma::new(pma, weights, window_size))
+            Self::Pma(TypeScorerPma::new(ngrams, weights, window_size))
         }
     }
 
@@ -35,13 +34,15 @@ pub struct TypeScorerPma {
 }
 
 impl TypeScorerPma {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+        if ngrams.len() != weights.len() {
+            panic!("ngrams.len() != weights.len()");
+        }
         Self {
-            pma,
+            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
             weights,
             window_size,
         }
@@ -74,11 +75,15 @@ pub struct TypeScorerCache {
 }
 
 impl TypeScorerCache {
-    pub fn new(
-        pma: DoubleArrayAhoCorasick,
-        weights: Vec<Vec<ScoreValue>>,
-        window_size: usize,
-    ) -> Self {
+    /// # Panics
+    ///
+    /// `ngrams` and `weights` must have same number of entries.
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+        if ngrams.len() != weights.len() {
+            panic!("ngrams.len() != weights.len()");
+        }
+        let pma = DoubleArrayAhoCorasick::new(ngrams).unwrap();
+
         let sequence_size = window_size * 2;
         let all_sequences = ALPHABET_SIZE.pow(sequence_size as u32);
 
diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs
index 47b51b80..c8f3c422 100644
--- a/vaporetto/src/utils.rs
+++ b/vaporetto/src/utils.rs
@@ -46,7 +46,11 @@ impl StringIdManager {
         }
     }
 
-    pub fn get_id(&mut self, key: &[u8]) -> usize {
+    pub fn get_id<K>(&mut self, key: K) -> usize
+    where
+        K: AsRef<[u8]>,
+    {
+        let key = key.as_ref();
         self.map.get(key).copied().unwrap_or_else(|| {
             let new_id = self.map.len();
             self.map.insert(key.into(), new_id);

From 9d3ca6b09280bdc43c2cced229632d00ad627516 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 1 Dec 2021 00:29:52 +0900
Subject: [PATCH 14/60] Remove FP support and use i32 in model file (#5)

* Use i32 for holding quantized weights

* Remove model-quantize feature and remove supporting FP numbers

* Use 24bit for quantization

* 24bit -> 16bit

* Fix a bug

* Add a comment

* Rename BIT_DEPTH -> QUANTIZE_BIT_DEPTH
---
 vaporetto/Cargo.toml         |   3 +-
 vaporetto/src/char_scorer.rs |  13 ++-
 vaporetto/src/dict_scorer.rs |   4 +-
 vaporetto/src/kytea_model.rs |   7 +-
 vaporetto/src/model.rs       |  65 ++++++--------
 vaporetto/src/predictor.rs   | 168 ++++-------------------------------
 vaporetto/src/type_scorer.rs |  23 +++--
 7 files changed, 66 insertions(+), 217 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index ab056251..44161ed3 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -22,9 +22,8 @@ byteorder = { version = "1.4", optional = true }  # Unlicense or MIT
 liblinear = { version = "1", optional = true }  # MIT
 
 [features]
-default = ["model-quantize"]
+default = []
 kytea = ["byteorder"]
-model-quantize = []
 train = ["liblinear"]
 simd = []
 
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 07c6652e..205cbe64 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,4 +1,3 @@
-use crate::model::ScoreValue;
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -16,7 +15,7 @@ impl CharScorer {
     /// # Panics
     ///
     /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[String], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
         #[cfg(not(feature = "simd"))]
         {
             Self::Naive(CharScorerNaive::new(ngrams, weights, window_size))
@@ -30,7 +29,7 @@ impl CharScorer {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         match self {
             CharScorer::Naive(naive) => naive.add_scores(sentence, &mut ys[padding..]),
 
@@ -42,7 +41,7 @@ impl CharScorer {
 
 pub struct CharScorerNaive {
     pma: DoubleArrayAhoCorasick,
-    weights: Vec<Vec<ScoreValue>>,
+    weights: Vec<Vec<i32>>,
     window_size: usize,
 }
 
@@ -50,7 +49,7 @@ impl CharScorerNaive {
     /// # Panics
     ///
     /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[String], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
         if ngrams.len() != weights.len() {
             panic!("ngrams.len() != weights.len()");
         }
@@ -61,7 +60,7 @@ impl CharScorerNaive {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
             let m_end = sentence.str_to_char_pos[m.end()];
             let offset = m_end as isize - self.window_size as isize - 1;
@@ -110,7 +109,7 @@ impl CharScorerSimd {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
             let m_end = sentence.str_to_char_pos[m.end()];
             let offset = padding as isize + m_end as isize - self.window_size as isize - 1;
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index f8ae1604..5afc62f5 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -1,4 +1,4 @@
-use crate::model::{DictWeight, ScoreValue};
+use crate::model::DictWeight;
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -23,7 +23,7 @@ impl DictScorer {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
         for m in self.pma.find_overlapping_iter(&sentence.text) {
             let m_start = sentence.str_to_char_pos[m.start()];
             let m_end = sentence.str_to_char_pos[m.end()];
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index bc721177..a486e0d9 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -401,7 +401,7 @@ impl TryFrom<KyteaModel> for Model {
         let feature_lookup = wordseg_model
             .feature_lookup
             .ok_or_else(|| anyhow!("no lookup data."))?;
-        let bias = feature_lookup.biases[0];
+        let bias = feature_lookup.biases[0] as i32;
         let char_dict = feature_lookup
             .char_dict
             .ok_or_else(|| anyhow!("no character dictionary."))?;
@@ -414,7 +414,7 @@ impl TryFrom<KyteaModel> for Model {
         for (char_ngram, v) in char_dict.dump_items() {
             let weight_size = config.char_w as usize * 2 - char_ngram.len() + 1;
             char_ngrams.push(char_ngram.into_iter().collect::<String>());
-            char_ngram_weights.push(v[..weight_size].to_vec());
+            char_ngram_weights.push(v[..weight_size].iter().map(|&w| w as i32).collect());
         }
 
         let mut type_ngrams: Vec<Vec<u8>> = vec![];
@@ -428,7 +428,7 @@ impl TryFrom<KyteaModel> for Model {
                     .as_bytes()
                     .to_vec(),
             );
-            type_ngram_weights.push(v[..weight_size].to_vec());
+            type_ngram_weights.push(v[..weight_size].iter().map(|&w| w as i32).collect());
         }
 
         let mut dict: Vec<String> = vec![];
@@ -455,7 +455,6 @@ impl TryFrom<KyteaModel> for Model {
             type_ngrams,
             dict,
 
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier,
 
             char_ngram_weights,
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 6a500ee9..211eafd3 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -11,23 +11,19 @@ use crate::sentence::BoundaryType;
 use crate::utils::{FeatureIDManager, StringIdManager};
 #[cfg(feature = "train")]
 use liblinear::LibLinearModel;
+
 #[cfg(feature = "train")]
 const EPSILON: f64 = 1e-6;
 
-#[cfg(not(feature = "model-quantize"))]
-pub type WeightValue = f64;
-#[cfg(feature = "model-quantize")]
-pub type WeightValue = i16;
-#[cfg(not(feature = "model-quantize"))]
-pub type ScoreValue = f64;
-#[cfg(feature = "model-quantize")]
-pub type ScoreValue = i32;
+// Bit depth for weight quantization.
+#[cfg(feature = "train")]
+const QUANTIZE_BIT_DEPTH: u8 = 16;
 
 #[derive(Clone, Copy, Default, Serialize, Deserialize)]
 pub struct DictWeight {
-    pub right: ScoreValue,
-    pub inner: ScoreValue,
-    pub left: ScoreValue,
+    pub right: i32,
+    pub inner: i32,
+    pub left: i32,
 }
 
 /// Model data.
@@ -37,16 +33,15 @@ pub struct Model {
     pub(crate) type_ngrams: Vec<Vec<u8>>,
     pub(crate) dict: Vec<String>,
 
-    pub(crate) char_ngram_weights: Vec<Vec<WeightValue>>,
-    pub(crate) type_ngram_weights: Vec<Vec<WeightValue>>,
+    pub(crate) char_ngram_weights: Vec<Vec<i32>>,
+    pub(crate) type_ngram_weights: Vec<Vec<i32>>,
     pub(crate) dict_weights: Vec<DictWeight>,
 
-    #[cfg(feature = "model-quantize")]
     pub(crate) quantize_multiplier: f64,
 
     pub(crate) dict_word_wise: bool,
 
-    pub(crate) bias: WeightValue,
+    pub(crate) bias: i32,
     pub(crate) char_window_size: usize,
     pub(crate) type_window_size: usize,
 }
@@ -113,20 +108,16 @@ impl Model {
         let mut char_ngram_ids = StringIdManager::new();
         let mut type_ngram_ids = StringIdManager::new();
 
-        #[cfg(feature = "model-quantize")]
-        let quantize_multiplier = {
-            let mut weight_max = bias.abs();
-            for fid in 0..model.num_features() {
-                let weight = model.feature_coefficient(fid as i32, wb_idx).abs();
-                if weight > weight_max {
-                    weight_max = weight;
-                }
+        let mut weight_max = bias.abs();
+        for fid in 0..model.num_features() {
+            let weight = model.feature_coefficient(fid as i32, wb_idx).abs();
+            if weight > weight_max {
+                weight_max = weight;
             }
-            weight_max / 32767.
-        };
+        }
+        let quantize_multiplier = weight_max / ((1 << (QUANTIZE_BIT_DEPTH - 1)) - 1) as f64;
 
-        #[cfg(feature = "model-quantize")]
-        let bias = (bias / quantize_multiplier) as i16;
+        let bias = (bias / quantize_multiplier) as i32;
 
         for (feature, fid) in fid_manager.map {
             let weight = model.feature_coefficient(fid as i32 + 1, wb_idx);
@@ -134,7 +125,6 @@ impl Model {
                 continue;
             }
 
-            #[cfg(feature = "model-quantize")]
             let weight = weight / quantize_multiplier;
 
             match feature.feature {
@@ -143,29 +133,27 @@ impl Model {
                     if id == char_ngram_weights.len() {
                         char_ngrams.push(char_ngram.to_string());
                         char_ngram_weights.push(vec![
-                            WeightValue::default();
+                            0;
                             char_window_size * 2
                                 - char_ngram.chars().count()
                                 + 1
                         ]);
                     }
-                    char_ngram_weights[id][feature.rel_position] = weight as WeightValue;
+                    char_ngram_weights[id][feature.rel_position] = weight as i32;
                 }
                 FeatureContent::CharacterTypeNgram(type_ngram) => {
                     let id = type_ngram_ids.get_id(type_ngram) as usize;
                     if id == type_ngram_weights.len() {
                         type_ngrams.push(type_ngram.to_vec());
-                        type_ngram_weights.push(vec![
-                            WeightValue::default();
-                            type_window_size * 2 - type_ngram.len() + 1
-                        ]);
+                        type_ngram_weights
+                            .push(vec![0; type_window_size * 2 - type_ngram.len() + 1]);
                     }
-                    type_ngram_weights[id][feature.rel_position] = weight as WeightValue;
+                    type_ngram_weights[id][feature.rel_position] = weight as i32;
                 }
                 FeatureContent::DictionaryWord(size) => match feature.rel_position {
-                    0 => dict_weights[size - 1].right = weight as ScoreValue,
-                    1 => dict_weights[size - 1].inner = weight as ScoreValue,
-                    2 => dict_weights[size - 1].left = weight as ScoreValue,
+                    0 => dict_weights[size - 1].right = weight as i32,
+                    1 => dict_weights[size - 1].inner = weight as i32,
+                    2 => dict_weights[size - 1].left = weight as i32,
                     _ => panic!("Invalid rel_position"),
                 },
             };
@@ -175,7 +163,6 @@ impl Model {
             type_ngrams,
             dict,
 
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier,
 
             char_ngram_weights,
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 89e5088e..e9c04e19 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 
 use crate::char_scorer::CharScorer;
 use crate::dict_scorer::DictScorer;
-use crate::model::{DictWeight, Model, ScoreValue};
+use crate::model::{DictWeight, Model};
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
@@ -11,13 +11,12 @@ use crate::char_scorer::CharScorerSimd;
 
 /// Predictor.
 pub struct Predictor {
-    bias: ScoreValue,
+    bias: i32,
 
     char_scorer: CharScorer,
     type_scorer: TypeScorer,
     dict_scorer: Option<DictScorer>,
 
-    #[cfg(feature = "model-quantize")]
     quantize_multiplier: f64,
 
     #[cfg(feature = "simd")]
@@ -41,16 +40,8 @@ impl Predictor {
         let dict = model.dict;
         let dict_weights = model.dict_weights;
 
-        let mut char_ngram_weights: Vec<_> = model
-            .char_ngram_weights
-            .into_iter()
-            .map(|ws| ws.into_iter().map(|w| w as ScoreValue).collect())
-            .collect();
-        let type_ngram_weights: Vec<_> = model
-            .type_ngram_weights
-            .into_iter()
-            .map(|ws| ws.into_iter().map(|w| w as ScoreValue).collect())
-            .collect();
+        let mut char_ngram_weights = model.char_ngram_weights;
+        let type_ngram_weights = model.type_ngram_weights;
 
         let (dict, dict_weights) = Self::merge_dict_weights(
             dict,
@@ -64,9 +55,6 @@ impl Predictor {
         let char_ngram_weights = Self::merge_weights(&char_ngrams, &char_ngram_weights);
         let type_ngram_weights = Self::merge_weights(&model.type_ngrams, &type_ngram_weights);
 
-        #[cfg(feature = "model-quantize")]
-        let bias = bias as i32;
-
         let char_scorer = CharScorer::new(&char_ngrams, char_ngram_weights, model.char_window_size);
         let type_scorer = TypeScorer::new(
             &model.type_ngrams,
@@ -86,7 +74,6 @@ impl Predictor {
             type_scorer,
             dict_scorer,
 
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier: model.quantize_multiplier,
 
             #[cfg(feature = "simd")]
@@ -98,7 +85,7 @@ impl Predictor {
         dict: Vec<String>,
         dict_weights: Vec<DictWeight>,
         words: &[String],
-        word_weights: &mut Vec<Vec<ScoreValue>>,
+        word_weights: &mut Vec<Vec<i32>>,
         char_window_size: usize,
         dict_word_wise: bool,
     ) -> (Vec<String>, Vec<DictWeight>) {
@@ -150,7 +137,7 @@ impl Predictor {
         }
     }
 
-    fn merge_weights<P>(words: &[P], weights: &[Vec<ScoreValue>]) -> Vec<Vec<ScoreValue>>
+    fn merge_weights<P>(words: &[P], weights: &[Vec<i32>]) -> Vec<Vec<i32>>
     where
         P: AsRef<[u8]>,
     {
@@ -179,7 +166,7 @@ impl Predictor {
         result
     }
 
-    fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [ScoreValue]) {
+    fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         ys.fill(self.bias);
         self.char_scorer.add_scores(sentence, padding, ys);
         self.type_scorer.add_scores(sentence, &mut ys[padding..]);
@@ -202,10 +189,10 @@ impl Predictor {
 
         #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
-            let mut ys = vec![ScoreValue::default(); boundaries_size];
+            let mut ys = vec![0; boundaries_size];
             self.predict_impl(&sentence, 0, &mut ys);
             for (y, b) in ys.into_iter().zip(sentence.boundaries.iter_mut()) {
-                *b = if y >= ScoreValue::default() {
+                *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
@@ -216,13 +203,13 @@ impl Predictor {
         #[cfg(feature = "simd")]
         if boundaries_size != 0 {
             let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
-            let mut ys = vec![ScoreValue::default(); ys_size];
+            let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
             for (&y, b) in ys[self.padding..]
                 .into_iter()
                 .zip(sentence.boundaries.iter_mut())
             {
-                *b = if y >= ScoreValue::default() {
+                *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
@@ -247,7 +234,7 @@ impl Predictor {
 
         #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
-            let mut ys = vec![ScoreValue::default(); boundaries_size];
+            let mut ys = vec![0; boundaries_size];
             self.predict_impl(&sentence, 0, &mut ys);
             let mut scores = sentence
                 .boundary_scores
@@ -257,16 +244,13 @@ impl Predictor {
                 .into_iter()
                 .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
             {
-                *b = if y >= ScoreValue::default() {
+                *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
                 };
 
-                #[cfg(feature = "model-quantize")]
-                let y = y as f64 * self.quantize_multiplier;
-
-                *s = y;
+                *s = y as f64 * self.quantize_multiplier;
             }
             sentence.boundary_scores.replace(scores);
         }
@@ -274,7 +258,7 @@ impl Predictor {
         #[cfg(feature = "simd")]
         if boundaries_size != 0 {
             let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
-            let mut ys = vec![ScoreValue::default(); ys_size];
+            let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
             let mut scores = sentence
                 .boundary_scores
@@ -284,16 +268,13 @@ impl Predictor {
                 .into_iter()
                 .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
             {
-                *b = if y >= ScoreValue::default() {
+                *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
                 };
 
-                #[cfg(feature = "model-quantize")]
-                let y = y as f64 * self.quantize_multiplier;
-
-                *s = y;
+                *s = y as f64 * self.quantize_multiplier;
             }
             sentence.boundary_scores.replace(scores);
         }
@@ -343,15 +324,6 @@ mod tests {
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            #[cfg(not(feature = "model-quantize"))]
-            char_ngram_weights: vec![
-                vec![0.5, 1.0, 1.5, 2.0, 2.5],
-                vec![3.0, 3.5, 4.0, 4.5],
-                vec![5.0, 5.5, 6.0, 6.5, 7.0],
-                vec![7.5, 8.0, 8.5, 9.0, 9.5],
-                vec![10.0, 10.5, 11.0, 11.5, 12.0, 12.5],
-            ],
-            #[cfg(feature = "model-quantize")]
             char_ngram_weights: vec![
                 vec![1, 2, 3, 4, 5],
                 vec![6, 7, 8, 9],
@@ -359,34 +331,12 @@ mod tests {
                 vec![15, 16, 17, 18, 19],
                 vec![20, 21, 22, 23, 24, 25],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            type_ngram_weights: vec![
-                vec![13.0, 13.5, 14.0, 14.5],
-                vec![15.0, 15.5, 16.0, 16.5],
-                vec![17.0, 17.5, 18.0],
-                vec![18.5, 19.0, 19.5],
-            ],
-            #[cfg(feature = "model-quantize")]
             type_ngram_weights: vec![
                 vec![26, 27, 28, 29],
                 vec![30, 31, 32, 33],
                 vec![34, 35, 36],
                 vec![37, 38, 39],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            dict_weights: vec![
-                DictWeight {
-                    right: 20.0,
-                    inner: 20.5,
-                    left: 21.0,
-                },
-                DictWeight {
-                    right: 21.5,
-                    inner: 22.0,
-                    left: 22.5,
-                },
-            ],
-            #[cfg(feature = "model-quantize")]
             dict_weights: vec![
                 DictWeight {
                     right: 40,
@@ -399,12 +349,8 @@ mod tests {
                     left: 45,
                 },
             ],
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier: 0.5,
             dict_word_wise: false,
-            #[cfg(not(feature = "model-quantize"))]
-            bias: -100.0,
-            #[cfg(feature = "model-quantize")]
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
@@ -448,15 +394,6 @@ mod tests {
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            #[cfg(not(feature = "model-quantize"))]
-            char_ngram_weights: vec![
-                vec![0.25, 0.5, 0.75],
-                vec![1.0, 1.25],
-                vec![1.5, 1.75, 2.0],
-                vec![2.25, 2.5, 2.75],
-                vec![3.0, 3.25, 3.5, 3.75],
-            ],
-            #[cfg(feature = "model-quantize")]
             char_ngram_weights: vec![
                 vec![1, 2, 3],
                 vec![4, 5],
@@ -464,39 +401,12 @@ mod tests {
                 vec![9, 10, 11],
                 vec![12, 13, 14, 15],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            type_ngram_weights: vec![
-                vec![4.0, 4.25, 4.5, 4.75, 5.0, 5.25],
-                vec![5.5, 5.75, 6.0, 6.25, 6.5, 6.75],
-                vec![7.0, 7.25, 7.5, 7.75, 8.0],
-                vec![8.25, 8.5, 8.75, 9.0, 9.25],
-            ],
-            #[cfg(feature = "model-quantize")]
             type_ngram_weights: vec![
                 vec![16, 17, 18, 19, 20, 21],
                 vec![22, 23, 24, 25, 26, 27],
                 vec![28, 29, 30, 31, 32],
                 vec![33, 34, 35, 36, 37],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            dict_weights: vec![
-                DictWeight {
-                    right: 9.5,
-                    inner: 9.75,
-                    left: 10.0,
-                },
-                DictWeight {
-                    right: 10.25,
-                    inner: 10.5,
-                    left: 10.75,
-                },
-                DictWeight {
-                    right: 11.0,
-                    inner: 11.25,
-                    left: 11.5,
-                },
-            ],
-            #[cfg(feature = "model-quantize")]
             dict_weights: vec![
                 DictWeight {
                     right: 38,
@@ -514,12 +424,8 @@ mod tests {
                     left: 46,
                 },
             ],
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier: 0.25,
             dict_word_wise: false,
-            #[cfg(not(feature = "model-quantize"))]
-            bias: -71.25,
-            #[cfg(feature = "model-quantize")]
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
@@ -563,15 +469,6 @@ mod tests {
             ],
             type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
             dict: vec!["国民".to_string(), "世界".to_string(), "世".to_string()],
-            #[cfg(not(feature = "model-quantize"))]
-            char_ngram_weights: vec![
-                vec![0.25, 0.5, 0.75],
-                vec![1.0, 1.25],
-                vec![1.5, 1.75, 2.0],
-                vec![2.25, 2.5, 2.75],
-                vec![3.0, 3.25, 3.5, 3.75],
-            ],
-            #[cfg(feature = "model-quantize")]
             char_ngram_weights: vec![
                 vec![1, 2, 3],
                 vec![4, 5],
@@ -579,39 +476,12 @@ mod tests {
                 vec![9, 10, 11],
                 vec![12, 13, 14, 15],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            type_ngram_weights: vec![
-                vec![4.0, 4.25, 4.5, 4.75, 5.0, 5.25],
-                vec![5.5, 5.75, 6.0, 6.25, 6.5, 6.75],
-                vec![7.0, 7.25, 7.5, 7.75, 8.0],
-                vec![8.25, 8.5, 8.75, 9.0, 9.25],
-            ],
-            #[cfg(feature = "model-quantize")]
             type_ngram_weights: vec![
                 vec![16, 17, 18, 19, 20, 21],
                 vec![22, 23, 24, 25, 26, 27],
                 vec![28, 29, 30, 31, 32],
                 vec![33, 34, 35, 36, 37],
             ],
-            #[cfg(not(feature = "model-quantize"))]
-            dict_weights: vec![
-                DictWeight {
-                    right: 9.5,
-                    inner: 9.75,
-                    left: 11.0,
-                },
-                DictWeight {
-                    right: 10.25,
-                    inner: 10.5,
-                    left: 10.75,
-                },
-                DictWeight {
-                    right: 11.0,
-                    inner: 11.25,
-                    left: 11.5,
-                },
-            ],
-            #[cfg(feature = "model-quantize")]
             dict_weights: vec![
                 DictWeight {
                     right: 38,
@@ -629,12 +499,8 @@ mod tests {
                     left: 46,
                 },
             ],
-            #[cfg(feature = "model-quantize")]
             quantize_multiplier: 0.25,
             dict_word_wise: true,
-            #[cfg(not(feature = "model-quantize"))]
-            bias: -71.25,
-            #[cfg(feature = "model-quantize")]
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index f2d2b6da..696068fe 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -1,4 +1,3 @@
-use crate::model::ScoreValue;
 use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -11,7 +10,7 @@ impl TypeScorer {
     /// # Panics
     ///
     /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
         if window_size <= 3 {
             Self::Cache(TypeScorerCache::new(ngrams, weights, window_size))
         } else {
@@ -19,7 +18,7 @@ impl TypeScorer {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
         match self {
             TypeScorer::Pma(pma) => pma.add_scores(sentence, ys),
             TypeScorer::Cache(cache) => cache.add_scores(sentence, ys),
@@ -29,7 +28,7 @@ impl TypeScorer {
 
 pub struct TypeScorerPma {
     pma: DoubleArrayAhoCorasick,
-    weights: Vec<Vec<ScoreValue>>,
+    weights: Vec<Vec<i32>>,
     window_size: usize,
 }
 
@@ -37,7 +36,7 @@ impl TypeScorerPma {
     /// # Panics
     ///
     /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
         if ngrams.len() != weights.len() {
             panic!("ngrams.len() != weights.len()");
         }
@@ -48,7 +47,7 @@ impl TypeScorerPma {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
         for m in self
             .pma
             .find_overlapping_no_suffix_iter(&sentence.char_type)
@@ -69,7 +68,7 @@ impl TypeScorerPma {
 }
 
 pub struct TypeScorerCache {
-    scores: Vec<ScoreValue>,
+    scores: Vec<i32>,
     window_size: usize,
     sequence_mask: usize,
 }
@@ -78,7 +77,7 @@ impl TypeScorerCache {
     /// # Panics
     ///
     /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<ScoreValue>>, window_size: usize) -> Self {
+    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
         if ngrams.len() != weights.len() {
             panic!("ngrams.len() != weights.len()");
         }
@@ -88,13 +87,13 @@ impl TypeScorerCache {
         let all_sequences = ALPHABET_SIZE.pow(sequence_size as u32);
 
         let mut sequence = vec![0u8; sequence_size];
-        let mut scores = vec![0 as ScoreValue; all_sequences];
+        let mut scores = vec![0; all_sequences];
 
         for (i, score) in scores.iter_mut().enumerate() {
             if !Self::seqid_to_seq(i, &mut sequence) {
                 continue;
             }
-            let mut y = ScoreValue::default();
+            let mut y = 0;
             for m in pma.find_overlapping_no_suffix_iter(&sequence) {
                 y += weights[m.pattern()][sequence_size - m.end()];
             }
@@ -108,7 +107,7 @@ impl TypeScorerCache {
         }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [ScoreValue]) {
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
         let mut seqid = 0;
         for i in 0..self.window_size {
             if let Some(ct) = sentence.char_type.get(i) {
@@ -141,7 +140,7 @@ impl TypeScorerCache {
     }
 
     #[inline(always)]
-    fn get_score(&self, seqid: usize) -> ScoreValue {
+    fn get_score(&self, seqid: usize) -> i32 {
         self.scores[seqid]
     }
 

From 06782f612685bb5fd69e60771923cf427a7af2ed Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 1 Dec 2021 11:30:30 +0900
Subject: [PATCH 15/60] Separate n-gram feature management (#6)

* Use i32 for holding quantized weights

* Remove model-quantize feature and remove supporting FP numbers

* Use 24bit for quantization

* 24bit -> 16bit

* Fix a bug

* Add a comment

* Rename BIT_DEPTH -> QUANTIZE_BIT_DEPTH

* Add NgramModel

* Fix a bug
---
 vaporetto/src/char_scorer.rs |  49 +++----
 vaporetto/src/kytea_model.rs |  27 ++--
 vaporetto/src/lib.rs         |   1 +
 vaporetto/src/model.rs       |  45 +++---
 vaporetto/src/ngram_model.rs |  63 +++++++++
 vaporetto/src/predictor.rs   | 257 +++++++++++++++++++----------------
 vaporetto/src/type_scorer.rs |  38 ++----
 7 files changed, 272 insertions(+), 208 deletions(-)
 create mode 100644 vaporetto/src/ngram_model.rs

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 205cbe64..a69e7f41 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,6 +1,8 @@
-use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
+use crate::ngram_model::NgramModel;
+use crate::sentence::Sentence;
+
 #[cfg(feature = "simd")]
 use std::simd::i32x8;
 
@@ -12,20 +14,17 @@ pub enum CharScorer {
 }
 
 impl CharScorer {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
+    pub fn new(model: NgramModel<String>, window_size: usize) -> Self {
         #[cfg(not(feature = "simd"))]
         {
-            Self::Naive(CharScorerNaive::new(ngrams, weights, window_size))
+            Self::Naive(CharScorerNaive::new(model, window_size))
         }
 
         #[cfg(feature = "simd")]
         if window_size <= 4 {
-            Self::Simd(CharScorerSimd::new(ngrams, weights, window_size))
+            Self::Simd(CharScorerSimd::new(model, window_size))
         } else {
-            Self::Naive(CharScorerNaive::new(ngrams, weights, window_size))
+            Self::Naive(CharScorerNaive::new(model, window_size))
         }
     }
 
@@ -46,16 +45,11 @@ pub struct CharScorerNaive {
 }
 
 impl CharScorerNaive {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
-        if ngrams.len() != weights.len() {
-            panic!("ngrams.len() != weights.len()");
-        }
+    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Self {
+        model.merge_weights();
         Self {
-            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
-            weights,
+            pma: DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap(),
+            weights: model.data.into_iter().map(|d| d.weights).collect(),
             window_size,
         }
     }
@@ -87,23 +81,20 @@ pub struct CharScorerSimd {
 
 #[cfg(feature = "simd")]
 impl CharScorerSimd {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[String], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
-        if ngrams.len() != weights.len() {
-            panic!("ngrams.len() != weights.len()");
-        }
-        let weights: Vec<_> = weights
-            .iter()
-            .map(|w| {
+    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Self {
+        model.merge_weights();
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap();
+        let weights = model
+            .data
+            .into_iter()
+            .map(|d| {
                 let mut s = [0i32; 8];
-                s[..w.len()].copy_from_slice(&w);
+                s[..d.weights.len()].copy_from_slice(&d.weights);
                 i32x8::from_array(s)
             })
             .collect();
         Self {
-            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
+            pma,
             weights,
             window_size,
         }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index a486e0d9..13fdaa8a 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -5,6 +5,7 @@ use anyhow::{anyhow, Result};
 use byteorder::{LittleEndian, ReadBytesExt};
 
 use crate::model::{DictWeight, Model};
+use crate::ngram_model::{NgramData, NgramModel};
 
 struct KyteaConfig {
     _model_tag: String,
@@ -409,26 +410,26 @@ impl TryFrom<KyteaModel> for Model {
             .type_dict
             .ok_or_else(|| anyhow!("no type dictionary."))?;
 
-        let mut char_ngrams: Vec<String> = vec![];
-        let mut char_ngram_weights = vec![];
+        let mut char_ngrams = vec![];
         for (char_ngram, v) in char_dict.dump_items() {
             let weight_size = config.char_w as usize * 2 - char_ngram.len() + 1;
-            char_ngrams.push(char_ngram.into_iter().collect::<String>());
-            char_ngram_weights.push(v[..weight_size].iter().map(|&w| w as i32).collect());
+            char_ngrams.push(NgramData {
+                ngram: char_ngram.into_iter().collect(),
+                weights: v[..weight_size].iter().map(|&w| w as i32).collect(),
+            });
         }
 
-        let mut type_ngrams: Vec<Vec<u8>> = vec![];
-        let mut type_ngram_weights = vec![];
+        let mut type_ngrams = vec![];
         for (type_ngram, v) in type_dict.dump_items() {
             let weight_size = config.type_w as usize * 2 - type_ngram.len() + 1;
-            type_ngrams.push(
-                type_ngram
+            type_ngrams.push(NgramData {
+                ngram: type_ngram
                     .into_iter()
                     .collect::<String>()
                     .as_bytes()
                     .to_vec(),
-            );
-            type_ngram_weights.push(v[..weight_size].iter().map(|&w| w as i32).collect());
+                weights: v[..weight_size].iter().map(|&w| w as i32).collect(),
+            });
         }
 
         let mut dict: Vec<String> = vec![];
@@ -451,14 +452,12 @@ impl TryFrom<KyteaModel> for Model {
         }
 
         Ok(Self {
-            char_ngrams,
-            type_ngrams,
+            char_ngram_model: NgramModel::new(char_ngrams),
+            type_ngram_model: NgramModel::new(type_ngrams),
             dict,
 
             quantize_multiplier,
 
-            char_ngram_weights,
-            type_ngram_weights,
             dict_weights,
             dict_word_wise: true,
             bias,
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 2d537fc8..c81fce84 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -31,6 +31,7 @@ mod utils;
 mod char_scorer;
 mod dict_scorer;
 mod model;
+mod ngram_model;
 mod predictor;
 mod sentence;
 mod type_scorer;
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 211eafd3..2d038892 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -3,9 +3,13 @@ use std::io::{Read, Write};
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 
+use crate::ngram_model::NgramModel;
+
 #[cfg(feature = "train")]
 use crate::feature::FeatureContent;
 #[cfg(feature = "train")]
+use crate::ngram_model::NgramData;
+#[cfg(feature = "train")]
 use crate::sentence::BoundaryType;
 #[cfg(feature = "train")]
 use crate::utils::{FeatureIDManager, StringIdManager};
@@ -29,12 +33,9 @@ pub struct DictWeight {
 /// Model data.
 #[derive(Serialize, Deserialize)]
 pub struct Model {
-    pub(crate) char_ngrams: Vec<String>,
-    pub(crate) type_ngrams: Vec<Vec<u8>>,
+    pub(crate) char_ngram_model: NgramModel<String>,
+    pub(crate) type_ngram_model: NgramModel<Vec<u8>>,
     pub(crate) dict: Vec<String>,
-
-    pub(crate) char_ngram_weights: Vec<Vec<i32>>,
-    pub(crate) type_ngram_weights: Vec<Vec<i32>>,
     pub(crate) dict_weights: Vec<DictWeight>,
 
     pub(crate) quantize_multiplier: f64,
@@ -102,8 +103,6 @@ impl Model {
         let bias = model.label_bias(wb_idx);
         let mut char_ngrams = vec![];
         let mut type_ngrams = vec![];
-        let mut char_ngram_weights = vec![];
-        let mut type_ngram_weights = vec![];
         let mut dict_weights = vec![DictWeight::default(); dict_word_max_size];
         let mut char_ngram_ids = StringIdManager::new();
         let mut type_ngram_ids = StringIdManager::new();
@@ -130,25 +129,23 @@ impl Model {
             match feature.feature {
                 FeatureContent::CharacterNgram(char_ngram) => {
                     let id = char_ngram_ids.get_id(&char_ngram);
-                    if id == char_ngram_weights.len() {
-                        char_ngrams.push(char_ngram.to_string());
-                        char_ngram_weights.push(vec![
-                            0;
-                            char_window_size * 2
-                                - char_ngram.chars().count()
-                                + 1
-                        ]);
+                    if id == char_ngrams.len() {
+                        char_ngrams.push(NgramData {
+                            ngram: char_ngram.to_string(),
+                            weights: vec![0; char_window_size * 2 - char_ngram.chars().count() + 1],
+                        });
                     }
-                    char_ngram_weights[id][feature.rel_position] = weight as i32;
+                    char_ngrams[id].weights[feature.rel_position] = weight as i32;
                 }
                 FeatureContent::CharacterTypeNgram(type_ngram) => {
                     let id = type_ngram_ids.get_id(type_ngram) as usize;
-                    if id == type_ngram_weights.len() {
-                        type_ngrams.push(type_ngram.to_vec());
-                        type_ngram_weights
-                            .push(vec![0; type_window_size * 2 - type_ngram.len() + 1]);
+                    if id == type_ngrams.len() {
+                        type_ngrams.push(NgramData {
+                            ngram: type_ngram.to_vec(),
+                            weights: vec![0; type_window_size * 2 - type_ngram.len() + 1],
+                        });
                     }
-                    type_ngram_weights[id][feature.rel_position] = weight as i32;
+                    type_ngrams[id].weights[feature.rel_position] = weight as i32;
                 }
                 FeatureContent::DictionaryWord(size) => match feature.rel_position {
                     0 => dict_weights[size - 1].right = weight as i32,
@@ -159,14 +156,12 @@ impl Model {
             };
         }
         Self {
-            char_ngrams,
-            type_ngrams,
+            char_ngram_model: NgramModel::new(char_ngrams),
+            type_ngram_model: NgramModel::new(type_ngrams),
             dict,
 
             quantize_multiplier,
 
-            char_ngram_weights,
-            type_ngram_weights,
             dict_weights,
             dict_word_wise: false,
             bias,
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
new file mode 100644
index 00000000..dbff7a91
--- /dev/null
+++ b/vaporetto/src/ngram_model.rs
@@ -0,0 +1,63 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct NgramData<T>
+where
+    T: Clone,
+{
+    pub(crate) ngram: T,
+    pub(crate) weights: Vec<i32>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct NgramModel<T>
+where
+    T: Clone,
+{
+    pub(crate) data: Vec<NgramData<T>>,
+    merged: bool,
+}
+
+impl<T> NgramModel<T>
+where
+    T: AsRef<[u8]> + Clone,
+{
+    #[cfg(any(feature = "train", test))]
+    pub fn new(data: Vec<NgramData<T>>) -> Self {
+        Self {
+            data,
+            merged: false,
+        }
+    }
+
+    pub fn merge_weights(&mut self) {
+        if self.merged {
+            return;
+        }
+        self.merged = true;
+        let ngrams = self
+            .data
+            .iter()
+            .cloned()
+            .map(|d| (d.ngram.as_ref().to_vec(), d.weights))
+            .collect::<HashMap<_, _>>();
+        for NgramData { ngram, weights } in &mut self.data {
+            let ngram = ngram.as_ref();
+            let mut new_weights: Option<Vec<_>> = None;
+            for st in (0..ngram.len()).rev() {
+                if let Some(weights) = ngrams.get(&ngram[st..]) {
+                    if let Some(new_weights) = new_weights.as_mut() {
+                        for (w_new, w) in new_weights.iter_mut().zip(weights) {
+                            *w_new += *w;
+                        }
+                    } else {
+                        new_weights.replace(weights.clone());
+                    }
+                }
+            }
+            *weights = new_weights.unwrap();
+        }
+    }
+}
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index e9c04e19..89d61836 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -3,6 +3,7 @@ use std::collections::HashMap;
 use crate::char_scorer::CharScorer;
 use crate::dict_scorer::DictScorer;
 use crate::model::{DictWeight, Model};
+use crate::ngram_model::NgramModel;
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
@@ -36,31 +37,21 @@ impl Predictor {
     pub fn new(model: Model) -> Self {
         let bias = model.bias;
 
-        let char_ngrams = model.char_ngrams;
+        let mut char_ngram_model = model.char_ngram_model;
+        let type_ngram_model = model.type_ngram_model;
         let dict = model.dict;
         let dict_weights = model.dict_weights;
 
-        let mut char_ngram_weights = model.char_ngram_weights;
-        let type_ngram_weights = model.type_ngram_weights;
-
         let (dict, dict_weights) = Self::merge_dict_weights(
             dict,
             dict_weights,
-            &char_ngrams,
-            &mut char_ngram_weights,
+            &mut char_ngram_model,
             model.char_window_size,
             model.dict_word_wise,
         );
 
-        let char_ngram_weights = Self::merge_weights(&char_ngrams, &char_ngram_weights);
-        let type_ngram_weights = Self::merge_weights(&model.type_ngrams, &type_ngram_weights);
-
-        let char_scorer = CharScorer::new(&char_ngrams, char_ngram_weights, model.char_window_size);
-        let type_scorer = TypeScorer::new(
-            &model.type_ngrams,
-            type_ngram_weights,
-            model.type_window_size,
-        );
+        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size);
+        let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size);
         let dict_scorer = if dict.is_empty() {
             None
         } else {
@@ -84,13 +75,17 @@ impl Predictor {
     fn merge_dict_weights(
         dict: Vec<String>,
         dict_weights: Vec<DictWeight>,
-        words: &[String],
-        word_weights: &mut Vec<Vec<i32>>,
+        char_ngram_model: &mut NgramModel<String>,
         char_window_size: usize,
         dict_word_wise: bool,
     ) -> (Vec<String>, Vec<DictWeight>) {
         let mut word_map = HashMap::new();
-        for (i, word) in words.iter().cloned().enumerate() {
+        for (i, word) in char_ngram_model
+            .data
+            .iter()
+            .map(|d| d.ngram.clone())
+            .enumerate()
+        {
             word_map.insert(word, i);
         }
         let mut new_dict = vec![];
@@ -102,11 +97,11 @@ impl Predictor {
                     Some(&idx) if char_window_size >= word_size => {
                         let start = char_window_size - word_size;
                         let end = start + word_size;
-                        word_weights[idx][start] += weight.right;
+                        char_ngram_model.data[idx].weights[start] += weight.right;
                         for i in start + 1..end {
-                            word_weights[idx][i] += weight.inner;
+                            char_ngram_model.data[idx].weights[i] += weight.inner;
                         }
-                        word_weights[idx][end] += weight.left;
+                        char_ngram_model.data[idx].weights[end] += weight.left;
                     }
                     _ => {
                         new_dict.push(word);
@@ -124,11 +119,11 @@ impl Predictor {
                         let end = start + word_size;
                         let word_size_idx = std::cmp::min(word_size, dict_weights.len()) - 1;
                         let weight = &dict_weights[word_size_idx];
-                        word_weights[idx][start] += weight.right;
+                        char_ngram_model.data[idx].weights[start] += weight.right;
                         for i in start + 1..end {
-                            word_weights[idx][i] += weight.inner;
+                            char_ngram_model.data[idx].weights[i] += weight.inner;
                         }
-                        word_weights[idx][end] += weight.left;
+                        char_ngram_model.data[idx].weights[end] += weight.left;
                     }
                     _ => new_dict.push(word),
                 }
@@ -137,35 +132,6 @@ impl Predictor {
         }
     }
 
-    fn merge_weights<P>(words: &[P], weights: &[Vec<i32>]) -> Vec<Vec<i32>>
-    where
-        P: AsRef<[u8]>,
-    {
-        let mut result = vec![];
-        let word_ids = words
-            .iter()
-            .enumerate()
-            .map(|(i, w)| (w.as_ref().to_vec(), i))
-            .collect::<HashMap<Vec<u8>, usize>>();
-        for seq in words {
-            let seq = seq.as_ref();
-            let mut new_weights: Option<Vec<_>> = None;
-            for st in (0..seq.len()).rev() {
-                if let Some(&idx) = word_ids.get(&seq[st..]) {
-                    if let Some(new_weights) = new_weights.as_mut() {
-                        for (w_new, w) in new_weights.iter_mut().zip(&weights[idx]) {
-                            *w_new += *w;
-                        }
-                    } else {
-                        new_weights.replace(weights[idx].clone());
-                    }
-                }
-            }
-            result.push(new_weights.unwrap());
-        }
-        result
-    }
-
     fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         ys.fill(self.bias);
         self.char_scorer.add_scores(sentence, padding, ys);
@@ -287,6 +253,8 @@ impl Predictor {
 mod tests {
     use super::*;
 
+    use crate::ngram_model::NgramData;
+
     /// Input:  我  ら  は  全  世  界  の  国  民
     /// bias:   -200  ..  ..  ..  ..  ..  ..  ..
     /// words:
@@ -315,28 +283,47 @@ mod tests {
     ///   世:                 40  42
     fn generate_model_1() -> Model {
         Model {
-            char_ngrams: vec![
-                "我ら".to_string(),
-                "全世界".to_string(),
-                "国民".to_string(),
-                "世界".to_string(),
-                "界".to_string(),
-            ],
-            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            char_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: "我ら".to_string(),
+                    weights: vec![1, 2, 3, 4, 5],
+                },
+                NgramData {
+                    ngram: "全世界".to_string(),
+                    weights: vec![6, 7, 8, 9],
+                },
+                NgramData {
+                    ngram: "国民".to_string(),
+                    weights: vec![10, 11, 12, 13, 14],
+                },
+                NgramData {
+                    ngram: "世界".to_string(),
+                    weights: vec![15, 16, 17, 18, 19],
+                },
+                NgramData {
+                    ngram: "界".to_string(),
+                    weights: vec![20, 21, 22, 23, 24, 25],
+                },
+            ]),
+            type_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: b"H".to_vec(),
+                    weights: vec![26, 27, 28, 29],
+                },
+                NgramData {
+                    ngram: b"K".to_vec(),
+                    weights: vec![30, 31, 32, 33],
+                },
+                NgramData {
+                    ngram: b"KH".to_vec(),
+                    weights: vec![34, 35, 36],
+                },
+                NgramData {
+                    ngram: b"HK".to_vec(),
+                    weights: vec![37, 38, 39],
+                },
+            ]),
             dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            char_ngram_weights: vec![
-                vec![1, 2, 3, 4, 5],
-                vec![6, 7, 8, 9],
-                vec![10, 11, 12, 13, 14],
-                vec![15, 16, 17, 18, 19],
-                vec![20, 21, 22, 23, 24, 25],
-            ],
-            type_ngram_weights: vec![
-                vec![26, 27, 28, 29],
-                vec![30, 31, 32, 33],
-                vec![34, 35, 36],
-                vec![37, 38, 39],
-            ],
             dict_weights: vec![
                 DictWeight {
                     right: 40,
@@ -385,28 +372,47 @@ mod tests {
     ///   世:                 38  40
     fn generate_model_2() -> Model {
         Model {
-            char_ngrams: vec![
-                "我ら".to_string(),
-                "全世界".to_string(),
-                "国民".to_string(),
-                "世界".to_string(),
-                "界".to_string(),
-            ],
-            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            char_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: "我ら".to_string(),
+                    weights: vec![1, 2, 3],
+                },
+                NgramData {
+                    ngram: "全世界".to_string(),
+                    weights: vec![4, 5],
+                },
+                NgramData {
+                    ngram: "国民".to_string(),
+                    weights: vec![6, 7, 8],
+                },
+                NgramData {
+                    ngram: "世界".to_string(),
+                    weights: vec![9, 10, 11],
+                },
+                NgramData {
+                    ngram: "界".to_string(),
+                    weights: vec![12, 13, 14, 15],
+                },
+            ]),
+            type_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: b"H".to_vec(),
+                    weights: vec![16, 17, 18, 19, 20, 21],
+                },
+                NgramData {
+                    ngram: b"K".to_vec(),
+                    weights: vec![22, 23, 24, 25, 26, 27],
+                },
+                NgramData {
+                    ngram: b"KH".to_vec(),
+                    weights: vec![28, 29, 30, 31, 32],
+                },
+                NgramData {
+                    ngram: b"HK".to_vec(),
+                    weights: vec![33, 34, 35, 36, 37],
+                },
+            ]),
             dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            char_ngram_weights: vec![
-                vec![1, 2, 3],
-                vec![4, 5],
-                vec![6, 7, 8],
-                vec![9, 10, 11],
-                vec![12, 13, 14, 15],
-            ],
-            type_ngram_weights: vec![
-                vec![16, 17, 18, 19, 20, 21],
-                vec![22, 23, 24, 25, 26, 27],
-                vec![28, 29, 30, 31, 32],
-                vec![33, 34, 35, 36, 37],
-            ],
             dict_weights: vec![
                 DictWeight {
                     right: 38,
@@ -460,28 +466,47 @@ mod tests {
     ///   世:                 44  46
     fn generate_model_3() -> Model {
         Model {
-            char_ngrams: vec![
-                "我ら".to_string(),
-                "全世界".to_string(),
-                "国民".to_string(),
-                "世界".to_string(),
-                "界".to_string(),
-            ],
-            type_ngrams: vec![b"H".to_vec(), b"K".to_vec(), b"KH".to_vec(), b"HK".to_vec()],
+            char_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: "我ら".to_string(),
+                    weights: vec![1, 2, 3],
+                },
+                NgramData {
+                    ngram: "全世界".to_string(),
+                    weights: vec![4, 5],
+                },
+                NgramData {
+                    ngram: "国民".to_string(),
+                    weights: vec![6, 7, 8],
+                },
+                NgramData {
+                    ngram: "世界".to_string(),
+                    weights: vec![9, 10, 11],
+                },
+                NgramData {
+                    ngram: "界".to_string(),
+                    weights: vec![12, 13, 14, 15],
+                },
+            ]),
+            type_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: b"H".to_vec(),
+                    weights: vec![16, 17, 18, 19, 20, 21],
+                },
+                NgramData {
+                    ngram: b"K".to_vec(),
+                    weights: vec![22, 23, 24, 25, 26, 27],
+                },
+                NgramData {
+                    ngram: b"KH".to_vec(),
+                    weights: vec![28, 29, 30, 31, 32],
+                },
+                NgramData {
+                    ngram: b"HK".to_vec(),
+                    weights: vec![33, 34, 35, 36, 37],
+                },
+            ]),
             dict: vec!["国民".to_string(), "世界".to_string(), "世".to_string()],
-            char_ngram_weights: vec![
-                vec![1, 2, 3],
-                vec![4, 5],
-                vec![6, 7, 8],
-                vec![9, 10, 11],
-                vec![12, 13, 14, 15],
-            ],
-            type_ngram_weights: vec![
-                vec![16, 17, 18, 19, 20, 21],
-                vec![22, 23, 24, 25, 26, 27],
-                vec![28, 29, 30, 31, 32],
-                vec![33, 34, 35, 36, 37],
-            ],
             dict_weights: vec![
                 DictWeight {
                     right: 38,
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 696068fe..5bc9299d 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -1,20 +1,19 @@
-use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
+use crate::ngram_model::NgramModel;
+use crate::sentence::Sentence;
+
 pub enum TypeScorer {
     Pma(TypeScorerPma),
     Cache(TypeScorerCache),
 }
 
 impl TypeScorer {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
+    pub fn new(model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
         if window_size <= 3 {
-            Self::Cache(TypeScorerCache::new(ngrams, weights, window_size))
+            Self::Cache(TypeScorerCache::new(model, window_size))
         } else {
-            Self::Pma(TypeScorerPma::new(ngrams, weights, window_size))
+            Self::Pma(TypeScorerPma::new(model, window_size))
         }
     }
 
@@ -33,16 +32,11 @@ pub struct TypeScorerPma {
 }
 
 impl TypeScorerPma {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
-        if ngrams.len() != weights.len() {
-            panic!("ngrams.len() != weights.len()");
-        }
+    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
+        model.merge_weights();
         Self {
-            pma: DoubleArrayAhoCorasick::new(ngrams).unwrap(),
-            weights,
+            pma: DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap(),
+            weights: model.data.into_iter().map(|d| d.weights).collect(),
             window_size,
         }
     }
@@ -74,14 +68,10 @@ pub struct TypeScorerCache {
 }
 
 impl TypeScorerCache {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(ngrams: &[Vec<u8>], weights: Vec<Vec<i32>>, window_size: usize) -> Self {
-        if ngrams.len() != weights.len() {
-            panic!("ngrams.len() != weights.len()");
-        }
-        let pma = DoubleArrayAhoCorasick::new(ngrams).unwrap();
+    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
+        model.merge_weights();
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap();
+        let weights: Vec<Vec<i32>> = model.data.into_iter().map(|d| d.weights).collect();
 
         let sequence_size = window_size * 2;
         let all_sequences = ALPHABET_SIZE.pow(sequence_size as u32);

From ed4849ccbfde12d562c297c3913140eb76317d31 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 1 Dec 2021 16:58:31 +0900
Subject: [PATCH 16/60] Add DictModel (#7)

* Add DictModel

* Model DictWeight into dict_model

* Fix bugs

* Fix
---
 vaporetto/src/dict_model.rs  | 137 ++++++++++++++++++++++++
 vaporetto/src/dict_scorer.rs |  82 +++++++++++---
 vaporetto/src/kytea_model.rs |  16 +--
 vaporetto/src/lib.rs         |   1 +
 vaporetto/src/model.rs       |  22 ++--
 vaporetto/src/ngram_model.rs |   2 +-
 vaporetto/src/predictor.rs   | 202 +++++++++++++----------------------
 7 files changed, 293 insertions(+), 169 deletions(-)
 create mode 100644 vaporetto/src/dict_model.rs

diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
new file mode 100644
index 00000000..b84cd4ac
--- /dev/null
+++ b/vaporetto/src/dict_model.rs
@@ -0,0 +1,137 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::ngram_model::NgramModel;
+
+#[derive(Clone, Copy, Default, Serialize, Deserialize)]
+pub struct DictWeight {
+    pub right: i32,
+    pub inner: i32,
+    pub left: i32,
+}
+
+#[derive(Serialize, Deserialize)]
+pub enum DictModel {
+    Wordwise(DictModelWordwise),
+    Lengthwise(DictModelLengthwise),
+}
+
+impl DictModel {
+    pub fn merge_dict_weights(
+        &mut self,
+        char_ngram_model: &mut NgramModel<String>,
+        char_window_size: usize,
+    ) {
+        match self {
+            Self::Wordwise(model) => model.merge_dict_weights(char_ngram_model, char_window_size),
+            Self::Lengthwise(model) => model.merge_dict_weights(char_ngram_model, char_window_size),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        match self {
+            Self::Wordwise(model) => model.is_empty(),
+            Self::Lengthwise(model) => model.is_empty(),
+        }
+    }
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct WordwiseDictData {
+    pub(crate) word: String,
+    pub(crate) weights: DictWeight,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct DictModelWordwise {
+    pub(crate) data: Vec<WordwiseDictData>,
+}
+
+impl DictModelWordwise {
+    pub fn merge_dict_weights(
+        &mut self,
+        char_ngram_model: &mut NgramModel<String>,
+        char_window_size: usize,
+    ) {
+        let mut word_map = HashMap::new();
+        for (i, word) in char_ngram_model
+            .data
+            .iter()
+            .map(|d| d.ngram.clone())
+            .enumerate()
+        {
+            word_map.insert(word, i);
+        }
+        let mut new_data = vec![];
+        for data in self.data.drain(..) {
+            let word_size = data.word.chars().count();
+            match word_map.get(&data.word) {
+                Some(&idx) if char_window_size >= word_size => {
+                    let start = char_window_size - word_size;
+                    let end = start + word_size;
+                    char_ngram_model.data[idx].weights[start] += data.weights.right;
+                    for i in start + 1..end {
+                        char_ngram_model.data[idx].weights[i] += data.weights.inner;
+                    }
+                    char_ngram_model.data[idx].weights[end] += data.weights.left;
+                }
+                _ => {
+                    new_data.push(data);
+                }
+            }
+        }
+        self.data = new_data;
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct DictModelLengthwise {
+    pub(crate) words: Vec<String>,
+    pub(crate) weights: Vec<DictWeight>,
+}
+
+impl DictModelLengthwise {
+    pub fn merge_dict_weights(
+        &mut self,
+        char_ngram_model: &mut NgramModel<String>,
+        char_window_size: usize,
+    ) {
+        let mut word_map = HashMap::new();
+        for (i, word) in char_ngram_model
+            .data
+            .iter()
+            .map(|d| d.ngram.clone())
+            .enumerate()
+        {
+            word_map.insert(word, i);
+        }
+        let mut new_words = vec![];
+        for word in self.words.drain(..) {
+            let word_size = word.chars().count();
+            match word_map.get(&word) {
+                Some(&idx) if char_window_size >= word_size => {
+                    let start = char_window_size - word_size;
+                    let end = start + word_size;
+                    let word_size_idx = word_size.min(self.weights.len()) - 1;
+                    let weight = &self.weights[word_size_idx];
+                    char_ngram_model.data[idx].weights[start] += weight.right;
+                    for i in start + 1..end {
+                        char_ngram_model.data[idx].weights[i] += weight.inner;
+                    }
+                    char_ngram_model.data[idx].weights[end] += weight.left;
+                }
+                _ => new_words.push(word),
+            }
+        }
+        self.words = new_words;
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.words.is_empty()
+    }
+}
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 5afc62f5..b489f76d 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -1,25 +1,45 @@
-use crate::model::DictWeight;
-use crate::sentence::Sentence;
 use daachorse::DoubleArrayAhoCorasick;
 
-pub struct DictScorer {
+use crate::dict_model::{DictModel, DictModelLengthwise, DictModelWordwise, DictWeight};
+use crate::sentence::Sentence;
+
+pub enum DictScorer {
+    Wordwise(DictScorerWordwise),
+    Lengthwise(DictScorerLengthwise),
+}
+
+impl DictScorer {
+    pub fn new(model: DictModel) -> Self {
+        match model {
+            DictModel::Wordwise(model) => Self::Wordwise(DictScorerWordwise::new(model)),
+            DictModel::Lengthwise(model) => Self::Lengthwise(DictScorerLengthwise::new(model)),
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
+        match self {
+            Self::Wordwise(model) => model.add_scores(sentence, ys),
+            Self::Lengthwise(model) => model.add_scores(sentence, ys),
+        }
+    }
+}
+
+pub struct DictScorerWordwise {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<DictWeight>,
-    word_wise_score: bool,
 }
 
-impl DictScorer {
-    /// # Panics
-    ///
-    /// `ngrams` and `weights` must have same number of entries.
-    pub fn new(words: &[String], weights: Vec<DictWeight>, word_wise_score: bool) -> Self {
-        if word_wise_score && words.len() != weights.len() {
-            panic!("word_wise_score == true && words.len() != weights.len()");
+impl DictScorerWordwise {
+    pub fn new(model: DictModelWordwise) -> Self {
+        let mut words = vec![];
+        let mut weights = vec![];
+        for pair in model.data {
+            words.push(pair.word);
+            weights.push(pair.weights);
         }
         Self {
             pma: DoubleArrayAhoCorasick::new(words).unwrap(),
             weights,
-            word_wise_score,
         }
     }
 
@@ -27,11 +47,39 @@ impl DictScorer {
         for m in self.pma.find_overlapping_iter(&sentence.text) {
             let m_start = sentence.str_to_char_pos[m.start()];
             let m_end = sentence.str_to_char_pos[m.end()];
-            let idx = if self.word_wise_score {
-                m.pattern()
-            } else {
-                std::cmp::min(m_end - m_start, self.weights.len()) - 1
-            };
+            let idx = m.pattern();
+            let dict_weight = self.weights[idx];
+            if m_start != 0 {
+                ys[m_start - 1] += dict_weight.right;
+            }
+            for y in &mut ys[m_start..m_end - 1] {
+                *y += dict_weight.inner;
+            }
+            if m_end <= ys.len() {
+                ys[m_end - 1] += dict_weight.left;
+            }
+        }
+    }
+}
+
+pub struct DictScorerLengthwise {
+    pma: DoubleArrayAhoCorasick,
+    weights: Vec<DictWeight>,
+}
+
+impl DictScorerLengthwise {
+    pub fn new(model: DictModelLengthwise) -> Self {
+        Self {
+            pma: DoubleArrayAhoCorasick::new(model.words).unwrap(),
+            weights: model.weights,
+        }
+    }
+
+    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
+        for m in self.pma.find_overlapping_iter(&sentence.text) {
+            let m_start = sentence.str_to_char_pos[m.start()];
+            let m_end = sentence.str_to_char_pos[m.end()];
+            let idx = (m_end - m_start).min(self.weights.len()) - 1;
             let dict_weight = self.weights[idx];
             if m_start != 0 {
                 ys[m_start - 1] += dict_weight.right;
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 13fdaa8a..12696ef2 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -4,7 +4,8 @@ use std::io::BufRead;
 use anyhow::{anyhow, Result};
 use byteorder::{LittleEndian, ReadBytesExt};
 
-use crate::model::{DictWeight, Model};
+use crate::dict_model::{DictModel, DictModelWordwise, DictWeight, WordwiseDictData};
+use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
 
 struct KyteaConfig {
@@ -432,8 +433,7 @@ impl TryFrom<KyteaModel> for Model {
             });
         }
 
-        let mut dict: Vec<String> = vec![];
-        let mut dict_weights = vec![];
+        let mut dict_data = vec![];
         if let Some(kytea_dict) = model.dict {
             for (w, data) in kytea_dict.dump_items() {
                 let word_len = std::cmp::min(w.len(), config.dict_n as usize) - 1;
@@ -446,20 +446,20 @@ impl TryFrom<KyteaModel> for Model {
                         weights.left += feature_lookup.dict_vec[offset + 2] as i32;
                     }
                 }
-                dict_weights.push(weights);
-                dict.push(w.into_iter().collect());
+                dict_data.push(WordwiseDictData {
+                    word: w.into_iter().collect(),
+                    weights,
+                });
             }
         }
 
         Ok(Self {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
-            dict,
+            dict_model: DictModel::Wordwise(DictModelWordwise { data: dict_data }),
 
             quantize_multiplier,
 
-            dict_weights,
-            dict_word_wise: true,
             bias,
             char_window_size: config.char_w as usize,
             type_window_size: config.type_w as usize,
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index c81fce84..15fecd47 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -29,6 +29,7 @@
 mod utils;
 
 mod char_scorer;
+mod dict_model;
 mod dict_scorer;
 mod model;
 mod ngram_model;
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 2d038892..085e8cbd 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -3,8 +3,11 @@ use std::io::{Read, Write};
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 
+use crate::dict_model::DictModel;
 use crate::ngram_model::NgramModel;
 
+#[cfg(feature = "train")]
+use crate::dict_model::{DictModelLengthwise, DictWeight};
 #[cfg(feature = "train")]
 use crate::feature::FeatureContent;
 #[cfg(feature = "train")]
@@ -23,25 +26,15 @@ const EPSILON: f64 = 1e-6;
 #[cfg(feature = "train")]
 const QUANTIZE_BIT_DEPTH: u8 = 16;
 
-#[derive(Clone, Copy, Default, Serialize, Deserialize)]
-pub struct DictWeight {
-    pub right: i32,
-    pub inner: i32,
-    pub left: i32,
-}
-
 /// Model data.
 #[derive(Serialize, Deserialize)]
 pub struct Model {
     pub(crate) char_ngram_model: NgramModel<String>,
     pub(crate) type_ngram_model: NgramModel<Vec<u8>>,
-    pub(crate) dict: Vec<String>,
-    pub(crate) dict_weights: Vec<DictWeight>,
+    pub(crate) dict_model: DictModel,
 
     pub(crate) quantize_multiplier: f64,
 
-    pub(crate) dict_word_wise: bool,
-
     pub(crate) bias: i32,
     pub(crate) char_window_size: usize,
     pub(crate) type_window_size: usize,
@@ -158,12 +151,13 @@ impl Model {
         Self {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
-            dict,
+            dict_model: DictModel::Lengthwise(DictModelLengthwise {
+                words: dict,
+                weights: dict_weights,
+            }),
 
             quantize_multiplier,
 
-            dict_weights,
-            dict_word_wise: false,
             bias,
             char_window_size,
             type_window_size,
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index dbff7a91..28ce97e6 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -24,7 +24,7 @@ impl<T> NgramModel<T>
 where
     T: AsRef<[u8]> + Clone,
 {
-    #[cfg(any(feature = "train", test))]
+    #[cfg(any(feature = "train", feature = "kytea", test))]
     pub fn new(data: Vec<NgramData<T>>) -> Self {
         Self {
             data,
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 89d61836..24e670cb 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,9 +1,6 @@
-use std::collections::HashMap;
-
 use crate::char_scorer::CharScorer;
 use crate::dict_scorer::DictScorer;
-use crate::model::{DictWeight, Model};
-use crate::ngram_model::NgramModel;
+use crate::model::Model;
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
@@ -39,23 +36,16 @@ impl Predictor {
 
         let mut char_ngram_model = model.char_ngram_model;
         let type_ngram_model = model.type_ngram_model;
-        let dict = model.dict;
-        let dict_weights = model.dict_weights;
-
-        let (dict, dict_weights) = Self::merge_dict_weights(
-            dict,
-            dict_weights,
-            &mut char_ngram_model,
-            model.char_window_size,
-            model.dict_word_wise,
-        );
+        let mut dict_model = model.dict_model;
+
+        dict_model.merge_dict_weights(&mut char_ngram_model, model.char_window_size);
 
         let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size);
         let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size);
-        let dict_scorer = if dict.is_empty() {
+        let dict_scorer = if dict_model.is_empty() {
             None
         } else {
-            Some(DictScorer::new(&dict, dict_weights, model.dict_word_wise))
+            Some(DictScorer::new(dict_model))
         };
 
         Self {
@@ -72,66 +62,6 @@ impl Predictor {
         }
     }
 
-    fn merge_dict_weights(
-        dict: Vec<String>,
-        dict_weights: Vec<DictWeight>,
-        char_ngram_model: &mut NgramModel<String>,
-        char_window_size: usize,
-        dict_word_wise: bool,
-    ) -> (Vec<String>, Vec<DictWeight>) {
-        let mut word_map = HashMap::new();
-        for (i, word) in char_ngram_model
-            .data
-            .iter()
-            .map(|d| d.ngram.clone())
-            .enumerate()
-        {
-            word_map.insert(word, i);
-        }
-        let mut new_dict = vec![];
-        if dict_word_wise {
-            let mut new_dict_weights = vec![];
-            for (word, weight) in dict.into_iter().zip(dict_weights) {
-                let word_size = word.chars().count();
-                match word_map.get(&word) {
-                    Some(&idx) if char_window_size >= word_size => {
-                        let start = char_window_size - word_size;
-                        let end = start + word_size;
-                        char_ngram_model.data[idx].weights[start] += weight.right;
-                        for i in start + 1..end {
-                            char_ngram_model.data[idx].weights[i] += weight.inner;
-                        }
-                        char_ngram_model.data[idx].weights[end] += weight.left;
-                    }
-                    _ => {
-                        new_dict.push(word);
-                        new_dict_weights.push(weight);
-                    }
-                }
-            }
-            (new_dict, new_dict_weights)
-        } else {
-            for word in dict {
-                let word_size = word.chars().count();
-                match word_map.get(&word) {
-                    Some(&idx) if char_window_size >= word_size => {
-                        let start = char_window_size - word_size;
-                        let end = start + word_size;
-                        let word_size_idx = std::cmp::min(word_size, dict_weights.len()) - 1;
-                        let weight = &dict_weights[word_size_idx];
-                        char_ngram_model.data[idx].weights[start] += weight.right;
-                        for i in start + 1..end {
-                            char_ngram_model.data[idx].weights[i] += weight.inner;
-                        }
-                        char_ngram_model.data[idx].weights[end] += weight.left;
-                    }
-                    _ => new_dict.push(word),
-                }
-            }
-            (new_dict, dict_weights)
-        }
-    }
-
     fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         ys.fill(self.bias);
         self.char_scorer.add_scores(sentence, padding, ys);
@@ -253,7 +183,10 @@ impl Predictor {
 mod tests {
     use super::*;
 
-    use crate::ngram_model::NgramData;
+    use crate::dict_model::{
+        DictModel, DictModelLengthwise, DictModelWordwise, DictWeight, WordwiseDictData,
+    };
+    use crate::ngram_model::{NgramData, NgramModel};
 
     /// Input:  我  ら  は  全  世  界  の  国  民
     /// bias:   -200  ..  ..  ..  ..  ..  ..  ..
@@ -323,21 +256,22 @@ mod tests {
                     weights: vec![37, 38, 39],
                 },
             ]),
-            dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            dict_weights: vec![
-                DictWeight {
-                    right: 40,
-                    inner: 41,
-                    left: 42,
-                },
-                DictWeight {
-                    right: 43,
-                    inner: 44,
-                    left: 45,
-                },
-            ],
+            dict_model: DictModel::Lengthwise(DictModelLengthwise {
+                words: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
+                weights: vec![
+                    DictWeight {
+                        right: 40,
+                        inner: 41,
+                        left: 42,
+                    },
+                    DictWeight {
+                        right: 43,
+                        inner: 44,
+                        left: 45,
+                    },
+                ],
+            }),
             quantize_multiplier: 0.5,
-            dict_word_wise: false,
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
@@ -412,26 +346,27 @@ mod tests {
                     weights: vec![33, 34, 35, 36, 37],
                 },
             ]),
-            dict: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-            dict_weights: vec![
-                DictWeight {
-                    right: 38,
-                    inner: 39,
-                    left: 40,
-                },
-                DictWeight {
-                    right: 41,
-                    inner: 42,
-                    left: 43,
-                },
-                DictWeight {
-                    right: 44,
-                    inner: 45,
-                    left: 46,
-                },
-            ],
+            dict_model: DictModel::Lengthwise(DictModelLengthwise {
+                words: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
+                weights: vec![
+                    DictWeight {
+                        right: 38,
+                        inner: 39,
+                        left: 40,
+                    },
+                    DictWeight {
+                        right: 41,
+                        inner: 42,
+                        left: 43,
+                    },
+                    DictWeight {
+                        right: 44,
+                        inner: 45,
+                        left: 46,
+                    },
+                ],
+            }),
             quantize_multiplier: 0.25,
-            dict_word_wise: false,
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
@@ -506,26 +441,35 @@ mod tests {
                     weights: vec![33, 34, 35, 36, 37],
                 },
             ]),
-            dict: vec!["国民".to_string(), "世界".to_string(), "世".to_string()],
-            dict_weights: vec![
-                DictWeight {
-                    right: 38,
-                    inner: 39,
-                    left: 40,
-                },
-                DictWeight {
-                    right: 41,
-                    inner: 42,
-                    left: 43,
-                },
-                DictWeight {
-                    right: 44,
-                    inner: 45,
-                    left: 46,
-                },
-            ],
+            dict_model: DictModel::Wordwise(DictModelWordwise {
+                data: vec![
+                    WordwiseDictData {
+                        word: "国民".to_string(),
+                        weights: DictWeight {
+                            right: 38,
+                            inner: 39,
+                            left: 40,
+                        },
+                    },
+                    WordwiseDictData {
+                        word: "世界".to_string(),
+                        weights: DictWeight {
+                            right: 41,
+                            inner: 42,
+                            left: 43,
+                        },
+                    },
+                    WordwiseDictData {
+                        word: "世".to_string(),
+                        weights: DictWeight {
+                            right: 44,
+                            inner: 45,
+                            left: 46,
+                        },
+                    },
+                ],
+            }),
             quantize_multiplier: 0.25,
-            dict_word_wise: true,
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,

From 35a6decaa04d3aaf6c89c754f1ffc3aef2360ec3 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 1 Dec 2021 20:03:42 +0900
Subject: [PATCH 17/60] Add errors module and remove anyhow from deps (#8)

* Add errors module and remove anyhow from deps

* Add a missing file

* impl Error

* Fix apis

* Fix doc of vaporetto_rules
---
 evaluate/src/main.rs         |   2 +-
 predict/src/main.rs          |   2 +-
 vaporetto/Cargo.toml         |   1 -
 vaporetto/src/dict_scorer.rs |  20 +++---
 vaporetto/src/errors.rs      | 115 +++++++++++++++++++++++++++++++++++
 vaporetto/src/feature.rs     |   9 ++-
 vaporetto/src/kytea_model.rs |  12 ++--
 vaporetto/src/lib.rs         |   4 +-
 vaporetto/src/model.rs       |  10 ++-
 vaporetto/src/predictor.rs   |  21 ++++---
 vaporetto/src/sentence.rs    |  78 ++++++++++++++++--------
 vaporetto/src/trainer.rs     |   9 +--
 vaporetto_rules/src/lib.rs   |   2 +-
 vaporetto_wasm/src/lib.rs    |   2 +-
 14 files changed, 221 insertions(+), 66 deletions(-)
 create mode 100644 vaporetto/src/errors.rs

diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index 8dfeb791..3e96ecf0 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -72,7 +72,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     eprintln!("Loading model file...");
     let mut f = zstd::Decoder::new(File::open(opt.model)?)?;
     let model = Model::read(&mut f)?;
-    let predictor = Predictor::new(model);
+    let predictor = Predictor::new(model)?;
 
     eprintln!("Start tokenization");
     let mut n_true_positive = 0;
diff --git a/predict/src/main.rs b/predict/src/main.rs
index e6210f11..d8201bce 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -70,7 +70,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     eprintln!("Loading model file...");
     let mut f = zstd::Decoder::new(File::open(opt.model)?)?;
     let model = Model::read(&mut f)?;
-    let predictor = Predictor::new(model);
+    let predictor = Predictor::new(model)?;
 
     eprintln!("Start tokenization");
     let mut n_boundaries = 0;
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 44161ed3..b25d7528 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -13,7 +13,6 @@ categories = ["text-processing"]
 autotests = false
 
 [dependencies]
-anyhow = "1.0"  # MIT or Apache-2.0
 bincode = "1.3.3"  # MIT
 daachorse = "0.2.0"  # MIT or Apache-2.0
 serde = { version = "1.0", features = ["derive"] }  # MIT or Apache-2.0
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index b489f76d..8d3ab03d 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -1,6 +1,7 @@
 use daachorse::DoubleArrayAhoCorasick;
 
 use crate::dict_model::{DictModel, DictModelLengthwise, DictModelWordwise, DictWeight};
+use crate::errors::{Result, VaporettoError};
 use crate::sentence::Sentence;
 
 pub enum DictScorer {
@@ -9,11 +10,11 @@ pub enum DictScorer {
 }
 
 impl DictScorer {
-    pub fn new(model: DictModel) -> Self {
-        match model {
+    pub fn new(model: DictModel) -> Result<Self> {
+        Ok(match model {
             DictModel::Wordwise(model) => Self::Wordwise(DictScorerWordwise::new(model)),
-            DictModel::Lengthwise(model) => Self::Lengthwise(DictScorerLengthwise::new(model)),
-        }
+            DictModel::Lengthwise(model) => Self::Lengthwise(DictScorerLengthwise::new(model)?),
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
@@ -68,11 +69,16 @@ pub struct DictScorerLengthwise {
 }
 
 impl DictScorerLengthwise {
-    pub fn new(model: DictModelLengthwise) -> Self {
-        Self {
+    pub fn new(model: DictModelLengthwise) -> Result<Self> {
+        if model.weights.is_empty() {
+            return Err(VaporettoError::invalid_model(
+                "dict_word_max_size must be >= 1",
+            ));
+        }
+        Ok(Self {
             pma: DoubleArrayAhoCorasick::new(model.words).unwrap(),
             weights: model.weights,
-        }
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
diff --git a/vaporetto/src/errors.rs b/vaporetto/src/errors.rs
new file mode 100644
index 00000000..863da6cf
--- /dev/null
+++ b/vaporetto/src/errors.rs
@@ -0,0 +1,115 @@
+//! Definition of errors.
+
+use std::error::Error;
+use std::fmt;
+
+#[derive(Debug)]
+pub enum VaporettoError {
+    InvalidModel(InvalidModelError),
+    InvalidSentence(InvalidSentenceError),
+    InvalidArgument(InvalidArgumentError),
+    IOError(std::io::Error),
+    UTF8Error(std::string::FromUtf8Error),
+}
+
+impl VaporettoError {
+    pub(crate) fn invalid_model<S>(msg: S) -> Self
+    where
+        S: Into<String>,
+    {
+        Self::InvalidModel(InvalidModelError { msg: msg.into() })
+    }
+
+    pub(crate) fn invalid_sentence<S>(msg: S) -> Self
+    where
+        S: Into<String>,
+    {
+        Self::InvalidSentence(InvalidSentenceError { msg: msg.into() })
+    }
+
+    pub(crate) fn invalid_argument<S>(arg: &'static str, msg: S) -> Self
+    where
+        S: Into<String>,
+    {
+        Self::InvalidArgument(InvalidArgumentError {
+            arg,
+            msg: msg.into(),
+        })
+    }
+}
+
+impl fmt::Display for VaporettoError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::InvalidModel(e) => e.fmt(f),
+            Self::InvalidSentence(e) => e.fmt(f),
+            Self::InvalidArgument(e) => e.fmt(f),
+            Self::IOError(e) => e.fmt(f),
+            Self::UTF8Error(e) => e.fmt(f),
+        }
+    }
+}
+
+impl Error for VaporettoError {}
+
+pub type Result<T, E = VaporettoError> = std::result::Result<T, E>;
+
+/// Error used when the model is invalid.
+#[derive(Debug)]
+pub struct InvalidModelError {
+    /// Error message.
+    pub(crate) msg: String,
+}
+
+impl fmt::Display for InvalidModelError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "InvalidModelError: {}", self.msg)
+    }
+}
+
+impl Error for InvalidModelError {}
+
+/// Error used when the sentence is invalid.
+#[derive(Debug)]
+pub struct InvalidSentenceError {
+    /// Error message.
+    pub(crate) msg: String,
+}
+
+impl fmt::Display for InvalidSentenceError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "InvalidSentenceError: {}", self.msg)
+    }
+}
+
+impl Error for InvalidSentenceError {}
+
+/// Error used when the argument is invalid.
+#[derive(Debug)]
+pub struct InvalidArgumentError {
+    /// Name of the argument.
+    pub(crate) arg: &'static str,
+
+    /// Error message.
+    pub(crate) msg: String,
+}
+
+impl fmt::Display for InvalidArgumentError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "InvalidArgumentError: {}: {}", self.arg, self.msg)
+    }
+}
+
+impl Error for InvalidArgumentError {}
+
+impl From<std::io::Error> for VaporettoError {
+    fn from(error: std::io::Error) -> Self {
+        Self::IOError(error)
+    }
+}
+
+impl From<std::string::FromUtf8Error> for VaporettoError {
+    fn from(error: std::string::FromUtf8Error) -> Self {
+        Self::UTF8Error(error)
+    }
+}
diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 31a12f9a..8d80ad36 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -1,6 +1,6 @@
+use crate::errors::{Result, VaporettoError};
 use crate::sentence::{BoundaryType, Sentence};
 
-use anyhow::{anyhow, Result};
 use daachorse::DoubleArrayAhoCorasick;
 
 #[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
@@ -55,7 +55,10 @@ impl FeatureExtractor {
                 dict_word_max_size,
             );
             if size == 0 {
-                return Err(anyhow!("`dictionary` contains an empty string"));
+                return Err(VaporettoError::invalid_argument(
+                    "dictionary",
+                    "contains an empty string",
+                ));
             }
             dict_word_size.push(size);
         }
@@ -224,7 +227,7 @@ mod tests {
 
         assert!(fe.is_err());
         assert_eq!(
-            "`dictionary` contains an empty string",
+            "InvalidArgumentError: dictionary: contains an empty string",
             &fe.err().unwrap().to_string()
         );
     }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 12696ef2..a6de598b 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -1,10 +1,10 @@
 use std::convert::TryFrom;
 use std::io::BufRead;
 
-use anyhow::{anyhow, Result};
 use byteorder::{LittleEndian, ReadBytesExt};
 
 use crate::dict_model::{DictModel, DictModelWordwise, DictWeight, WordwiseDictData};
+use crate::errors::{Result, VaporettoError};
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
 
@@ -392,24 +392,24 @@ impl KyteaModel {
 }
 
 impl TryFrom<KyteaModel> for Model {
-    type Error = anyhow::Error;
+    type Error = VaporettoError;
 
     fn try_from(model: KyteaModel) -> Result<Self> {
         let config = &model.config;
         let wordseg_model = model
             .wordseg_model
-            .ok_or_else(|| anyhow!("no word segmentation model."))?;
+            .ok_or_else(|| VaporettoError::invalid_model("no word segmentation model."))?;
         let quantize_multiplier = wordseg_model.multiplier;
         let feature_lookup = wordseg_model
             .feature_lookup
-            .ok_or_else(|| anyhow!("no lookup data."))?;
+            .ok_or_else(|| VaporettoError::invalid_model("no lookup data."))?;
         let bias = feature_lookup.biases[0] as i32;
         let char_dict = feature_lookup
             .char_dict
-            .ok_or_else(|| anyhow!("no character dictionary."))?;
+            .ok_or_else(|| VaporettoError::invalid_model("no character dictionary."))?;
         let type_dict = feature_lookup
             .type_dict
-            .ok_or_else(|| anyhow!("no type dictionary."))?;
+            .ok_or_else(|| VaporettoError::invalid_model("no type dictionary."))?;
 
         let mut char_ngrams = vec![];
         for (char_ngram, v) in char_dict.dump_items() {
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 15fecd47..1705d53c 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -15,7 +15,7 @@
 //!
 //! let mut f = BufReader::new(File::open("model.bin").unwrap());
 //! let model = Model::read(&mut f).unwrap();
-//! let predictor = Predictor::new(model);
+//! let predictor = Predictor::new(model).unwrap();
 //!
 //! let s = Sentence::from_raw("火星猫の生態").unwrap();
 //! let s = predictor.predict(s);
@@ -37,6 +37,8 @@ mod predictor;
 mod sentence;
 mod type_scorer;
 
+pub mod errors;
+
 #[cfg(feature = "train")]
 mod feature;
 #[cfg(feature = "train")]
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 085e8cbd..cbbfa18b 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -1,6 +1,5 @@
 use std::io::{Read, Write};
 
-use anyhow::Result;
 use serde::{Deserialize, Serialize};
 
 use crate::dict_model::DictModel;
@@ -50,12 +49,11 @@ impl Model {
     /// # Errors
     ///
     /// When `wtr` generates an error, it will be returned as is.
-    pub fn write<W>(&self, wtr: &mut W) -> Result<()>
+    pub fn write<W>(&self, wtr: &mut W) -> Result<(), bincode::Error>
     where
         W: Write,
     {
-        bincode::serialize_into(wtr, self)?;
-        Ok(())
+        bincode::serialize_into(wtr, self)
     }
 
     /// Creates a model from a reader.
@@ -71,11 +69,11 @@ impl Model {
     /// # Errors
     ///
     /// When `rdr` generates an error, it will be returned as is.
-    pub fn read<R>(rdr: &mut R) -> Result<Self>
+    pub fn read<R>(rdr: &mut R) -> Result<Self, bincode::Error>
     where
         R: Read,
     {
-        Ok(bincode::deserialize_from(rdr)?)
+        bincode::deserialize_from(rdr)
     }
 
     #[cfg(feature = "train")]
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 24e670cb..66f953a3 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,5 +1,6 @@
 use crate::char_scorer::CharScorer;
 use crate::dict_scorer::DictScorer;
+use crate::errors::Result;
 use crate::model::Model;
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
@@ -31,7 +32,7 @@ impl Predictor {
     /// # Returns
     ///
     /// A new predictor.
-    pub fn new(model: Model) -> Self {
+    pub fn new(model: Model) -> Result<Self> {
         let bias = model.bias;
 
         let mut char_ngram_model = model.char_ngram_model;
@@ -45,10 +46,10 @@ impl Predictor {
         let dict_scorer = if dict_model.is_empty() {
             None
         } else {
-            Some(DictScorer::new(dict_model))
+            Some(DictScorer::new(dict_model)?)
         };
 
-        Self {
+        Ok(Self {
             bias,
 
             char_scorer,
@@ -59,7 +60,7 @@ impl Predictor {
 
             #[cfg(feature = "simd")]
             padding: model.char_window_size.max(model.type_window_size),
-        }
+        })
     }
 
     fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
@@ -479,7 +480,7 @@ mod tests {
     #[test]
     fn test_predict_1() {
         let model = generate_model_1();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -500,7 +501,7 @@ mod tests {
     #[test]
     fn test_predict_2() {
         let model = generate_model_2();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -521,7 +522,7 @@ mod tests {
     #[test]
     fn test_predict_3() {
         let model = generate_model_3();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -542,7 +543,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_1() {
         let model = generate_model_1();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(
@@ -567,7 +568,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_2() {
         let model = generate_model_2();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(
@@ -592,7 +593,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_3() {
         let model = generate_model_3();
-        let p = Predictor::new(model);
+        let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 9f7804b4..fb32ca4d 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, Result};
+use crate::errors::{Result, VaporettoError};
 
 /// Character type.
 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
@@ -140,7 +140,7 @@ impl Sentence {
         let text = text.into();
 
         if text.is_empty() {
-            return Err(anyhow!("`text` is empty"));
+            return Err(VaporettoError::invalid_argument("text", "is empty"));
         }
 
         let chars: Vec<char> = text.chars().collect();
@@ -212,7 +212,10 @@ impl Sentence {
         let tokenized_text = tokenized_text.as_ref();
 
         if tokenized_text.is_empty() {
-            return Err(anyhow!("`tokenized_text` is empty"));
+            return Err(VaporettoError::invalid_argument(
+                "tokenized_text",
+                "is empty",
+            ));
         }
 
         let tokenized_chars: Vec<char> = tokenized_text.chars().collect();
@@ -228,9 +231,15 @@ impl Sentence {
                 }
                 (false, ' ') => {
                     if chars.is_empty() {
-                        return Err(anyhow!("`tokenized_text` starts with a whitespace"));
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "starts with a whitespace",
+                        ));
                     } else if prev_boundary {
-                        return Err(anyhow!("`tokenized_text` contains consecutive whitespaces"));
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "contains consecutive whitespaces",
+                        ));
                     }
                     prev_boundary = true;
                 }
@@ -249,7 +258,10 @@ impl Sentence {
             };
         }
         if prev_boundary {
-            return Err(anyhow!("`tokenized_text` ends with a whitespace"));
+            return Err(VaporettoError::invalid_argument(
+                "tokenized_text",
+                "ends with a whitespace",
+            ));
         }
 
         let (char_to_str_pos, str_to_char_pos, char_type) = Self::common_info(&chars);
@@ -296,7 +308,9 @@ impl Sentence {
                 }
                 BoundaryType::NotWordBoundary => (),
                 BoundaryType::Unknown => {
-                    return Err(anyhow!("sentence contains an unknown boundary"));
+                    return Err(VaporettoError::invalid_sentence(
+                        "contains an unknown boundary",
+                    ));
                 }
             }
             match c {
@@ -344,7 +358,9 @@ impl Sentence {
                 }
                 BoundaryType::NotWordBoundary => (),
                 BoundaryType::Unknown => {
-                    return Err(anyhow!("sentence contains an unknown boundary"));
+                    return Err(VaporettoError::invalid_sentence(
+                        "contains an unknown boundary",
+                    ));
                 }
             }
         }
@@ -389,14 +405,14 @@ impl Sentence {
         let labeled_text = labeled_text.as_ref();
 
         if labeled_text.is_empty() {
-            return Err(anyhow!("`labeled_text` is empty"));
+            return Err(VaporettoError::invalid_argument("labeled_text", "is empty"));
         }
 
         let labeled_chars: Vec<char> = labeled_text.chars().collect();
         if labeled_chars.len() & 0x01 == 0 {
-            return Err(anyhow!(
-                "invalid length for `labeled_text`: {}",
-                labeled_chars.len()
+            return Err(VaporettoError::invalid_argument(
+                "labeled_text",
+                format!("invalid length: {}", labeled_chars.len()),
             ));
         }
         let mut chars = Vec::with_capacity(labeled_chars.len() / 2 + 1);
@@ -407,7 +423,12 @@ impl Sentence {
                 ' ' => BoundaryType::Unknown,
                 '|' => BoundaryType::WordBoundary,
                 '-' => BoundaryType::NotWordBoundary,
-                _ => return Err(anyhow!("invalid boundary character: '{}'", c)),
+                _ => {
+                    return Err(VaporettoError::invalid_argument(
+                        "labeled_text",
+                        format!("contains invalid boundary character: '{}'", c),
+                    ))
+                }
             });
         }
         for c in labeled_chars.into_iter().step_by(2) {
@@ -527,7 +548,7 @@ impl Sentence {
         } else {
             match self.str_to_char_pos.get(index) {
                 Some(index) if *index != 0 => Ok(*index),
-                _ => Err(anyhow!("invalid index")),
+                _ => Err(VaporettoError::invalid_argument("index", "invalid index")),
             }
         }
     }
@@ -556,7 +577,10 @@ mod tests {
         let s = Sentence::from_raw("");
 
         assert!(s.is_err());
-        assert_eq!("`text` is empty", &s.err().unwrap().to_string());
+        assert_eq!(
+            "InvalidArgumentError: text: is empty",
+            &s.err().unwrap().to_string()
+        );
     }
 
     #[test]
@@ -612,7 +636,10 @@ mod tests {
         let s = Sentence::from_tokenized("");
 
         assert!(s.is_err());
-        assert_eq!("`tokenized_text` is empty", &s.err().unwrap().to_string());
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: is empty",
+            &s.err().unwrap().to_string()
+        );
     }
 
     #[test]
@@ -621,7 +648,7 @@ mod tests {
 
         assert!(s.is_err());
         assert_eq!(
-            "`tokenized_text` starts with a whitespace",
+            "InvalidArgumentError: tokenized_text: starts with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
@@ -632,7 +659,7 @@ mod tests {
 
         assert!(s.is_err());
         assert_eq!(
-            "`tokenized_text` ends with a whitespace",
+            "InvalidArgumentError: tokenized_text: ends with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
@@ -643,7 +670,7 @@ mod tests {
 
         assert!(s.is_err());
         assert_eq!(
-            "`tokenized_text` contains consecutive whitespaces",
+            "InvalidArgumentError: tokenized_text: contains consecutive whitespaces",
             &s.err().unwrap().to_string()
         );
     }
@@ -778,7 +805,7 @@ mod tests {
 
         assert!(result.is_err());
         assert_eq!(
-            "sentence contains an unknown boundary",
+            "InvalidSentenceError: contains an unknown boundary",
             result.err().unwrap().to_string()
         );
     }
@@ -810,7 +837,7 @@ mod tests {
 
         assert!(result.is_err());
         assert_eq!(
-            "sentence contains an unknown boundary",
+            "InvalidSentenceError: contains an unknown boundary",
             result.err().unwrap().to_string()
         );
     }
@@ -830,7 +857,10 @@ mod tests {
         let s = Sentence::from_partial_annotation("");
 
         assert!(s.is_err());
-        assert_eq!("`labeled_text` is empty", &s.err().unwrap().to_string());
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: is empty",
+            &s.err().unwrap().to_string()
+        );
     }
 
     #[test]
@@ -839,7 +869,7 @@ mod tests {
 
         assert!(s.is_err());
         assert_eq!(
-            "invalid length for `labeled_text`: 12",
+            "InvalidArgumentError: labeled_text: invalid length: 12",
             &s.err().unwrap().to_string()
         );
     }
@@ -850,7 +880,7 @@ mod tests {
 
         assert!(s.is_err());
         assert_eq!(
-            "invalid boundary character: '?'",
+            "InvalidArgumentError: labeled_text: contains invalid boundary character: '?'",
             &s.err().unwrap().to_string()
         );
     }
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index 50a5f6a9..c67d1cf2 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -1,8 +1,7 @@
 use std::collections::BTreeMap;
 use std::str::FromStr;
 
-use anyhow::{anyhow, Result};
-
+use crate::errors::{Result, VaporettoError};
 use crate::feature::{ExampleGenerator, FeatureExtractor};
 use crate::model::Model;
 use crate::sentence::Sentence;
@@ -237,14 +236,16 @@ impl Trainer {
         let mut builder = liblinear::Builder::new();
         let training_input =
             liblinear::util::TrainingInput::from_sparse_features(dataset.ys, dataset.xs)
-                .map_err(|e| anyhow!("liblinear error: {:?}", e))?;
+                .map_err(|e| VaporettoError::invalid_model(format!("liblinear error: {:?}", e)))?;
         builder.problem().input_data(training_input).bias(self.bias);
         builder
             .parameters()
             .solver_type(solver.into())
             .stopping_criterion(self.epsilon)
             .constraints_violation_cost(self.cost);
-        let model = builder.build_model().map_err(|e| anyhow!(e.to_string()))?;
+        let model = builder
+            .build_model()
+            .map_err(|e| VaporettoError::invalid_model(e.to_string()))?;
 
         Ok(Model::from_liblinear_model(
             model,
diff --git a/vaporetto_rules/src/lib.rs b/vaporetto_rules/src/lib.rs
index 8864c586..9ac6969d 100644
--- a/vaporetto_rules/src/lib.rs
+++ b/vaporetto_rules/src/lib.rs
@@ -17,7 +17,7 @@
 //!
 //! let mut f = BufReader::new(File::open("model.bin").unwrap());
 //! let model = Model::read(&mut f).unwrap();
-//! let mut predictor = Predictor::new(model);
+//! let mut predictor = Predictor::new(model).unwrap();
 //!
 //! let pre_filters: Vec<Box<dyn StringFilter<String>>> = vec![
 //!     Box::new(KyteaFullwidthFilter::new()),
diff --git a/vaporetto_wasm/src/lib.rs b/vaporetto_wasm/src/lib.rs
index 3d75ef72..31ec48aa 100644
--- a/vaporetto_wasm/src/lib.rs
+++ b/vaporetto_wasm/src/lib.rs
@@ -28,7 +28,7 @@ impl Vaporetto {
         let mut buff = vec![];
         decoder.read_to_end(&mut buff).unwrap();
         let model = Model::read(&mut buff.as_slice()).unwrap();
-        let predictor = Predictor::new(model);
+        let predictor = Predictor::new(model).unwrap();
         let post_filters: Vec<_> = filters
             .chars()
             .map(|c| {

From 0e05d795504188dbc5031f913b8a29fedad60e9a Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 2 Dec 2021 12:28:11 +0900
Subject: [PATCH 18/60] Return Error when patterns or weights are invalid (#9)

* Return Error when daachorse returns error in initialization

* Validate size of weight vector

* Refactoring

* Apply suggestions from code review

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 vaporetto/src/char_scorer.rs | 64 ++++++++++++++++++++++--------------
 vaporetto/src/dict_scorer.rs | 15 +++++----
 vaporetto/src/predictor.rs   |  4 +--
 vaporetto/src/type_scorer.rs | 51 +++++++++++++++++++---------
 4 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index a69e7f41..6fb0ae52 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,5 +1,6 @@
 use daachorse::DoubleArrayAhoCorasick;
 
+use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
 
@@ -14,18 +15,18 @@ pub enum CharScorer {
 }
 
 impl CharScorer {
-    pub fn new(model: NgramModel<String>, window_size: usize) -> Self {
+    pub fn new(model: NgramModel<String>, window_size: usize) -> Result<Self> {
         #[cfg(not(feature = "simd"))]
         {
-            Self::Naive(CharScorerNaive::new(model, window_size))
+            Ok(Self::Naive(CharScorerNaive::new(model, window_size)?))
         }
 
         #[cfg(feature = "simd")]
-        if window_size <= 4 {
-            Self::Simd(CharScorerSimd::new(model, window_size))
+        Ok(if window_size <= 4 {
+            Self::Simd(CharScorerSimd::new(model, window_size)?)
         } else {
-            Self::Naive(CharScorerNaive::new(model, window_size))
-        }
+            Self::Naive(CharScorerNaive::new(model, window_size)?)
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
@@ -45,13 +46,24 @@ pub struct CharScorerNaive {
 }
 
 impl CharScorerNaive {
-    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Self {
+    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Result<Self> {
         model.merge_weights();
-        Self {
-            pma: DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap(),
-            weights: model.data.into_iter().map(|d| d.weights).collect(),
-            window_size,
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
+            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
+        let mut weights = vec![];
+        for d in model.data {
+            if d.weights.len() <= 2 * window_size - d.ngram.chars().count() {
+                return Err(VaporettoError::invalid_model(
+                    "invalid size of weight vector",
+                ));
+            }
+            weights.push(d.weights);
         }
+        Ok(Self {
+            pma,
+            weights,
+            window_size,
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
@@ -81,23 +93,27 @@ pub struct CharScorerSimd {
 
 #[cfg(feature = "simd")]
 impl CharScorerSimd {
-    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Self {
+    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Result<Self> {
         model.merge_weights();
-        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap();
-        let weights = model
-            .data
-            .into_iter()
-            .map(|d| {
-                let mut s = [0i32; 8];
-                s[..d.weights.len()].copy_from_slice(&d.weights);
-                i32x8::from_array(s)
-            })
-            .collect();
-        Self {
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
+            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
+        let mut weights = vec![];
+        for d in model.data {
+            let mut s = [0i32; 8];
+            if let Some(s) = s.get_mut(..d.weights.len()) {
+                s.copy_from_slice(&d.weights);
+            } else {
+                return Err(VaporettoError::invalid_model(
+                    "invalid size of weight vector",
+                ));
+            }
+            weights.push(i32x8::from_array(s));
+        }
+        Ok(Self {
             pma,
             weights,
             window_size,
-        }
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 8d3ab03d..f6f6106c 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -12,7 +12,7 @@ pub enum DictScorer {
 impl DictScorer {
     pub fn new(model: DictModel) -> Result<Self> {
         Ok(match model {
-            DictModel::Wordwise(model) => Self::Wordwise(DictScorerWordwise::new(model)),
+            DictModel::Wordwise(model) => Self::Wordwise(DictScorerWordwise::new(model)?),
             DictModel::Lengthwise(model) => Self::Lengthwise(DictScorerLengthwise::new(model)?),
         })
     }
@@ -31,17 +31,16 @@ pub struct DictScorerWordwise {
 }
 
 impl DictScorerWordwise {
-    pub fn new(model: DictModelWordwise) -> Self {
+    pub fn new(model: DictModelWordwise) -> Result<Self> {
         let mut words = vec![];
         let mut weights = vec![];
         for pair in model.data {
             words.push(pair.word);
             weights.push(pair.weights);
         }
-        Self {
-            pma: DoubleArrayAhoCorasick::new(words).unwrap(),
-            weights,
-        }
+        let pma = DoubleArrayAhoCorasick::new(words)
+            .map_err(|_| VaporettoError::invalid_model("invalid dictionary"))?;
+        Ok(Self { pma, weights })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
@@ -75,8 +74,10 @@ impl DictScorerLengthwise {
                 "dict_word_max_size must be >= 1",
             ));
         }
+        let pma = DoubleArrayAhoCorasick::new(model.words)
+            .map_err(|_| VaporettoError::invalid_model("invalid dictionary"))?;
         Ok(Self {
-            pma: DoubleArrayAhoCorasick::new(model.words).unwrap(),
+            pma,
             weights: model.weights,
         })
     }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 66f953a3..298ba9e7 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -41,8 +41,8 @@ impl Predictor {
 
         dict_model.merge_dict_weights(&mut char_ngram_model, model.char_window_size);
 
-        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size);
-        let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size);
+        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size)?;
+        let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size)?;
         let dict_scorer = if dict_model.is_empty() {
             None
         } else {
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 5bc9299d..78e0febe 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -1,5 +1,6 @@
 use daachorse::DoubleArrayAhoCorasick;
 
+use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
 
@@ -9,12 +10,12 @@ pub enum TypeScorer {
 }
 
 impl TypeScorer {
-    pub fn new(model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
-        if window_size <= 3 {
-            Self::Cache(TypeScorerCache::new(model, window_size))
+    pub fn new(model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
+        Ok(if window_size <= 3 {
+            Self::Cache(TypeScorerCache::new(model, window_size)?)
         } else {
-            Self::Pma(TypeScorerPma::new(model, window_size))
-        }
+            Self::Pma(TypeScorerPma::new(model, window_size)?)
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
@@ -32,13 +33,24 @@ pub struct TypeScorerPma {
 }
 
 impl TypeScorerPma {
-    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
+    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
         model.merge_weights();
-        Self {
-            pma: DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap(),
-            weights: model.data.into_iter().map(|d| d.weights).collect(),
-            window_size,
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
+            .map_err(|_| VaporettoError::invalid_model("invalid character type n-grams"))?;
+        let mut weights = vec![];
+        for d in model.data {
+            if d.weights.len() <= 2 * window_size - d.ngram.len() {
+                return Err(VaporettoError::invalid_model(
+                    "invalid size of weight vector",
+                ));
+            }
+            weights.push(d.weights);
         }
+        Ok(Self {
+            pma,
+            weights,
+            window_size,
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
@@ -68,10 +80,19 @@ pub struct TypeScorerCache {
 }
 
 impl TypeScorerCache {
-    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Self {
+    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
         model.merge_weights();
-        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram)).unwrap();
-        let weights: Vec<Vec<i32>> = model.data.into_iter().map(|d| d.weights).collect();
+        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
+            .map_err(|_| VaporettoError::invalid_model("invalid character type n-grams"))?;
+        let mut weights = vec![];
+        for d in model.data {
+            if d.weights.len() <= 2 * window_size - d.ngram.len() {
+                return Err(VaporettoError::invalid_model(
+                    "invalid size of weight vector",
+                ));
+            }
+            weights.push(d.weights);
+        }
 
         let sequence_size = window_size * 2;
         let all_sequences = ALPHABET_SIZE.pow(sequence_size as u32);
@@ -90,11 +111,11 @@ impl TypeScorerCache {
             *score = y;
         }
 
-        Self {
+        Ok(Self {
             scores,
             window_size,
             sequence_mask: (1 << (ALPHABET_SHIFT * sequence_size)) - 1,
-        }
+        })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {

From 6b342e79c58483b326cbd21445f3f72210efb750 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 2 Dec 2021 19:09:28 +0900
Subject: [PATCH 19/60] Get weights without boundary checking (#10)

* Get weights without boundary checking

* Fix
---
 vaporetto/src/char_scorer.rs | 8 ++++++--
 vaporetto/src/dict_scorer.rs | 9 ++++++---
 vaporetto/src/type_scorer.rs | 4 +++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 6fb0ae52..4cf82351 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -70,7 +70,9 @@ impl CharScorerNaive {
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
             let m_end = sentence.str_to_char_pos[m.end()];
             let offset = m_end as isize - self.window_size as isize - 1;
-            let weights = &self.weights[m.pattern()];
+            // Both the weights and the PMA always have the same number of items.
+            // Therefore, the following code is safe.
+            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
                     *y += w;
@@ -120,7 +122,9 @@ impl CharScorerSimd {
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
             let m_end = sentence.str_to_char_pos[m.end()];
             let offset = padding as isize + m_end as isize - self.window_size as isize - 1;
-            let weights = &self.weights[m.pattern()];
+            // Both the weights and the PMA always have the same number of items.
+            // Therefore, the following code is safe.
+            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
             let ys_slice = &mut ys[offset as usize..offset as usize + 8];
             let mut target = i32x8::from_slice(ys_slice);
             target += weights;
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index f6f6106c..dcc64502 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -47,8 +47,9 @@ impl DictScorerWordwise {
         for m in self.pma.find_overlapping_iter(&sentence.text) {
             let m_start = sentence.str_to_char_pos[m.start()];
             let m_end = sentence.str_to_char_pos[m.end()];
-            let idx = m.pattern();
-            let dict_weight = self.weights[idx];
+            // Both the weights and the PMA always have the same number of items.
+            // Therefore, the following code is safe.
+            let dict_weight = unsafe { self.weights.get_unchecked(m.pattern()) };
             if m_start != 0 {
                 ys[m_start - 1] += dict_weight.right;
             }
@@ -87,7 +88,9 @@ impl DictScorerLengthwise {
             let m_start = sentence.str_to_char_pos[m.start()];
             let m_end = sentence.str_to_char_pos[m.end()];
             let idx = (m_end - m_start).min(self.weights.len()) - 1;
-            let dict_weight = self.weights[idx];
+            // The upper bound of idx is weights.len() - 1.
+            // Therefore, the following code is safe.
+            let dict_weight = unsafe { self.weights.get_unchecked(idx) };
             if m_start != 0 {
                 ys[m_start - 1] += dict_weight.right;
             }
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 78e0febe..21a1c365 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -59,7 +59,9 @@ impl TypeScorerPma {
             .find_overlapping_no_suffix_iter(&sentence.char_type)
         {
             let offset = m.end() as isize - self.window_size as isize - 1;
-            let weights = &self.weights[m.pattern()];
+            // Both the weights and the PMA always have the same number of items.
+            // Therefore, the following code is safe.
+            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
                     *y += w;

From 389d1bfb1512bf2715f3557da50b6cb60a2463e8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 2 Dec 2021 20:04:53 +0900
Subject: [PATCH 20/60] Simplify speed comparison on README (#11)

* Simplify speed comparison on README

* Update README.md

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 README.md | 54 +++++++++++++-----------------------------------------
 1 file changed, 13 insertions(+), 41 deletions(-)

diff --git a/README.md b/README.md
index 97a925d1..8c27a863 100644
--- a/README.md
+++ b/README.md
@@ -86,47 +86,19 @@ You can specify all arguments above multiple times.
 
 ## Speed Comparison of Various Tokenizers
 
-You can find the comparison script at [here](https://github.com/legalforce-research/tokenizer-speed-bench).
-
-### Experimental Setup
-
-* Document: Japanese training data of Kyoto Free Translation Task
-* Models:
-  * KyTea and Vaporetto: Compact LR model (jp-0.4.7-6)
-  * MeCab, Kuromoji, and Lindera: IPAdic
-  * Sudachi and Sudachi.rs: system_core.dic (v20210802)
-
-### Results
-
-* VM instance on Google Cloud Platform (c2-standard-16, Debian)
-
-  | Tool Name (version)        | Speed (×10^6 chars/s) | σ     |
-  | -------------------------- | ---------------------:|-------|
-  | KyTea (2020-04-03)         |                 0.777 | 0.020 |
-  | Vaporetto (0.1.6)          |             **4.426** | 0.182 |
-  |                            |                       |       |
-  | MeCab (2020-09-14)         |                 2.736 | 0.041 |
-  |                            |                       |       |
-  | Kuromoji (Atilika's 0.9.0) |                 0.423 | 0.013 |
-  | Lindera (0.8.0)            |                 1.002 | 0.014 |
-  |                            |                       |       |
-  | Sudachi (0.5.2)            |                 0.251 | 0.012 |
-  | Sudachi.rs (0.6.0-rc1)     |                 0.644 | 0.012 |
-
-* MacBook Pro (2017, Processor: 2.3 GHz Intel Core i5, Memory: 8 GB 2133 MHz LPDDR3)
-
-  | Tool Name (version)        | Speed (×10^6 chars/s) | σ     |
-  | -------------------------- | ---------------------:|-------|
-  | KyTea (2020-04-03)         |                 0.490 | 0.003 |
-  | Vaporetto (0.1.6)          |             **3.016** | 0.113 |
-  |                            |                       |       |
-  | MeCab (2020-09-14)         |                 1.418 | 0.007 |
-  |                            |                       |       |
-  | Kuromoji (Atilika's 0.9.0) |                 1.197 | 0.034 |
-  | Lindera (0.8.0)            |                 0.542 | 0.010 |
-  |                            |                       |       |
-  | Sudachi (0.5.2)            |                 0.439 | 0.026 |
-  | Sudachi.rs (0.6.0-rc1)     |                 0.427 | 0.009 |
+Details can be found [here](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison).
+
+| Tool Name (version)               | Speed [M chars/s] | STD           |
+| --------------------------------- | -----------------:| -------------:|
+| KyTea (2020-04-03)                |             1.463 |         0.012 |
+| Vaporetto (0.3.0)                 |         **9.716** |         0.115 |
+| Vaporetto (0.3.0, `feature=simd`) |        **11.035** |         0.144 |
+|                                   |                   |               |
+| MeCab (2020-09-14)                |             4.621 |         0.047 |
+| Kuromoji (0.9.0)                  |             1.470 |         0.074 |
+| Lindera (0.8.1)                   |             1.444 |         0.022 |
+| Sudachi (0.5.3)                   |             0.322 |         0.029 |
+| sudachi.rs (0.6.0)                |             0.961 |         0.008 |
 
 ## Disclaimer
 

From 28f23f1c26497fe4eefaf9c7046b8d55f65de736 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 3 Dec 2021 14:56:39 +0900
Subject: [PATCH 21/60] Add a figure in README (#12)

* Add a figure

* Fix

* Fix

* Update README.md
---
 README.md              |   12 +-
 figures/comparison.ngp | 1518 ++++++++++++++++++++++++++++++++++++++++
 figures/comparison.svg |  179 +++++
 figures/comparison.txt |    9 +
 4 files changed, 1707 insertions(+), 11 deletions(-)
 create mode 100644 figures/comparison.ngp
 create mode 100644 figures/comparison.svg
 create mode 100644 figures/comparison.txt

diff --git a/README.md b/README.md
index 8c27a863..da2f46ff 100644
--- a/README.md
+++ b/README.md
@@ -88,17 +88,7 @@ You can specify all arguments above multiple times.
 
 Details can be found [here](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison).
 
-| Tool Name (version)               | Speed [M chars/s] | STD           |
-| --------------------------------- | -----------------:| -------------:|
-| KyTea (2020-04-03)                |             1.463 |         0.012 |
-| Vaporetto (0.3.0)                 |         **9.716** |         0.115 |
-| Vaporetto (0.3.0, `feature=simd`) |        **11.035** |         0.144 |
-|                                   |                   |               |
-| MeCab (2020-09-14)                |             4.621 |         0.047 |
-| Kuromoji (0.9.0)                  |             1.470 |         0.074 |
-| Lindera (0.8.1)                   |             1.444 |         0.022 |
-| Sudachi (0.5.3)                   |             0.322 |         0.029 |
-| sudachi.rs (0.6.0)                |             0.961 |         0.008 |
+![](./figures/comparison.svg)
 
 ## Disclaimer
 
diff --git a/figures/comparison.ngp b/figures/comparison.ngp
new file mode 100644
index 00000000..1378083c
--- /dev/null
+++ b/figures/comparison.ngp
@@ -0,0 +1,1518 @@
+#!ngraph
+#%creator: Ngraph 
+#%version: 6.09.03
+new axis name:fX1
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=12
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=3200
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=left
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=1200
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fY1
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0.5
+	axis::max=3.5
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=3200
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=3000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=right
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fU1
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=200
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:0'
+	axis::gauge=right
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=left
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fR1
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=17700
+	axis::y=3200
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=3000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:1'
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=left
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+axis::grouping 1 0 1 2 3
+
+new axis name:fX2
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=12
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=4400
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=left
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=1200
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fY2
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0.5
+	axis::max=1.5
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=4400
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=1000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=right
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fU2
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=3400
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:4'
+	axis::gauge=right
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=left
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fR2
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=17700
+	axis::y=4400
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=1000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:5'
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=left
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+axis::grouping 1 4 5 6 7
+
+new axis name:fX3
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=12
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=6600
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=left
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=1200
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fY3
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0.5
+	axis::max=2.5
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=6600
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=2000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=right
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fU3
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=4600
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:8'
+	axis::gauge=right
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=left
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fR3
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=17700
+	axis::y=6600
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=2000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:9'
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=left
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+axis::grouping 1 8 9 10 11
+
+new axis name:fX4
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=12
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=8800
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=left
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=right
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=1200
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fY4
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0.5
+	axis::max=2.5
+	axis::inc=1
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=8800
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=2000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference=
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=right
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fU4
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=6400
+	axis::y=6800
+	axis::direction=0
+	axis::baseline=true
+	axis::length=11300
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:12'
+	axis::gauge=right
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=left
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=center
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+new axis name:fR4
+	axis::hidden=false
+	axis::R=0
+	axis::G=0
+	axis::B=0
+	axis::A=255
+	axis::clip=true
+	axis::redraw_flag=true
+	axis::min=0
+	axis::max=0
+	axis::inc=0
+	axis::div=0
+	axis::type=linear
+	axis::x=17700
+	axis::y=8800
+	axis::direction=9000
+	axis::baseline=true
+	axis::length=2000
+	axis::width=40
+	axis::style=
+	axis::auto_scale_margin=500
+	axis::adjust_axis=
+	axis::adjust_position=0
+	axis::arrow=none
+	axis::arrow_length=72426
+	axis::arrow_width=60000
+	axis::wave=none
+	axis::wave_length=300
+	axis::wave_width=40
+	axis::reference='axis:13'
+	axis::gauge=none
+	axis::gauge_min=0
+	axis::gauge_max=0
+	axis::gauge_style=
+	axis::gauge_length1=100
+	axis::gauge_width1=40
+	axis::gauge_length2=200
+	axis::gauge_width2=40
+	axis::gauge_length3=300
+	axis::gauge_width3=40
+	axis::gauge_R=0
+	axis::gauge_G=0
+	axis::gauge_B=0
+	axis::gauge_A=255
+	axis::num=none
+	axis::num_begin=0
+	axis::num_step=0
+	axis::num_num=-1
+	axis::num_auto_norm=5
+	axis::num_head=
+	axis::num_format='%g'
+	axis::num_tail=
+	axis::num_log_pow=true
+	axis::num_pt=2000
+	axis::num_space=0
+	axis::num_font='Sans-serif'
+	axis::num_font_style=0
+	axis::num_script_size=7000
+	axis::num_align=left
+	axis::num_no_zero=regular
+	axis::num_direction=horizontal
+	axis::num_shift_p=0
+	axis::num_shift_n=100
+	axis::num_R=0
+	axis::num_G=0
+	axis::num_B=0
+	axis::num_A=255
+	axis::num_date_format=
+	axis::num_math=
+
+axis::grouping 1 12 13 14 15
+
+new data
+	data::hidden=false
+	data::R=0
+	data::G=0
+	data::B=0
+	data::A=255
+	data::clip=true
+	data::redraw_flag=true
+	data::source=file
+	data::save_path=relative
+	data::x=2
+	data::y=0
+	data::type=bar_fill_x
+	data::interpolation=spline
+	data::fit=
+	data::math_x=
+	data::math_y='4-Y'
+	data::func_f=
+	data::func_g=
+	data::func_h=
+	data::smooth_x=0
+	data::smooth_y=0
+	data::averaging_type=simple
+	data::mark_type=0
+	data::mark_size=200
+	data::line_width=40
+	data::line_style=
+	data::line_join=bevel
+	data::line_miter_limit=1000
+	data::R2=0
+	data::G2=0
+	data::B2=0
+	data::A2=255
+	data::remark='#%'\'''
+	data::ifs=','
+	data::csv=false
+	data::head_skip=1
+	data::read_step=1
+	data::final_line=4
+	data::mask=
+	data::move_data=
+	data::move_data_x=
+	data::move_data_y=
+	data::axis_x='axis:0'
+	data::axis_y='axis:1'
+	data::data_clip=true
+	data::range_min=1
+	data::range_max=10
+	data::range_div=512
+	data::array=
+	data::file='./comparison.txt'
+
+new data
+	data::hidden=false
+	data::R=0
+	data::G=0
+	data::B=0
+	data::A=255
+	data::clip=true
+	data::redraw_flag=true
+	data::source=file
+	data::save_path=relative
+	data::x=2
+	data::y=0
+	data::type=bar_fill_x
+	data::interpolation=spline
+	data::fit=
+	data::math_x=
+	data::math_y=
+	data::func_f=
+	data::func_g=
+	data::func_h=
+	data::smooth_x=0
+	data::smooth_y=0
+	data::averaging_type=simple
+	data::mark_type=0
+	data::mark_size=200
+	data::line_width=40
+	data::line_style=
+	data::line_join=bevel
+	data::line_miter_limit=1000
+	data::R2=0
+	data::G2=0
+	data::B2=0
+	data::A2=255
+	data::remark='#%'\'''
+	data::ifs=','
+	data::csv=false
+	data::head_skip=4
+	data::read_step=1
+	data::final_line=5
+	data::mask=
+	data::move_data=
+	data::move_data_x=
+	data::move_data_y=
+	data::axis_x='axis:4'
+	data::axis_y='axis:5'
+	data::data_clip=true
+	data::range_min=1
+	data::range_max=10
+	data::range_div=512
+	data::array=
+	data::file='./comparison.txt'
+
+new data
+	data::hidden=false
+	data::R=0
+	data::G=0
+	data::B=0
+	data::A=255
+	data::clip=true
+	data::redraw_flag=true
+	data::source=file
+	data::save_path=relative
+	data::x=2
+	data::y=0
+	data::type=bar_fill_x
+	data::interpolation=spline
+	data::fit=
+	data::math_x=
+	data::math_y='3-Y'
+	data::func_f=
+	data::func_g=
+	data::func_h=
+	data::smooth_x=0
+	data::smooth_y=0
+	data::averaging_type=simple
+	data::mark_type=0
+	data::mark_size=200
+	data::line_width=40
+	data::line_style=
+	data::line_join=bevel
+	data::line_miter_limit=1000
+	data::R2=0
+	data::G2=0
+	data::B2=0
+	data::A2=255
+	data::remark='#%'\'''
+	data::ifs=','
+	data::csv=false
+	data::head_skip=5
+	data::read_step=1
+	data::final_line=7
+	data::mask=
+	data::move_data=
+	data::move_data_x=
+	data::move_data_y=
+	data::axis_x='axis:8'
+	data::axis_y='axis:9'
+	data::data_clip=true
+	data::range_min=1
+	data::range_max=10
+	data::range_div=512
+	data::array=
+	data::file='./comparison.txt'
+
+new data
+	data::hidden=false
+	data::R=0
+	data::G=0
+	data::B=0
+	data::A=255
+	data::clip=true
+	data::redraw_flag=true
+	data::source=file
+	data::save_path=relative
+	data::x=2
+	data::y=0
+	data::type=bar_fill_x
+	data::interpolation=spline
+	data::fit=
+	data::math_x=
+	data::math_y='3-Y'
+	data::func_f=
+	data::func_g=
+	data::func_h=
+	data::smooth_x=0
+	data::smooth_y=0
+	data::averaging_type=simple
+	data::mark_type=0
+	data::mark_size=200
+	data::line_width=40
+	data::line_style=
+	data::line_join=bevel
+	data::line_miter_limit=1000
+	data::R2=0
+	data::G2=0
+	data::B2=0
+	data::A2=255
+	data::remark='#%'\'''
+	data::ifs=','
+	data::csv=false
+	data::head_skip=7
+	data::read_step=1
+	data::final_line=9
+	data::mask=
+	data::move_data=
+	data::move_data_x=
+	data::move_data_y=
+	data::axis_x='axis:12'
+	data::axis_y='axis:13'
+	data::data_clip=true
+	data::range_min=1
+	data::range_max=10
+	data::range_div=512
+	data::array=
+	data::file='./comparison.txt'
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='KyTea (2020-04-03)'
+	text::x=200
+	text::y=800
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Vaporetto (0.3.0)'
+	text::x=200
+	text::y=1800
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Analysis Speed [×10^6@ chars/s]'
+	text::x=6400
+	text::y=10000
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='MeCab (2020-09-14)'
+	text::x=200
+	text::y=4000
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Kuromoji (0.9.0)'
+	text::x=200
+	text::y=5200
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Lindera (0.8.1)'
+	text::x=200
+	text::y=6200
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Sudachi (0.5.3)'
+	text::x=200
+	text::y=7400
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='sudachi.rs (0.6.0)'
+	text::x=200
+	text::y=8400
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new text
+	text::hidden=false
+	text::R=0
+	text::G=0
+	text::B=0
+	text::A=255
+	text::clip=true
+	text::redraw_flag=true
+	text::text='Vaporetto (0.3.0, feature=simd)'
+	text::x=200
+	text::y=2800
+	text::pt=1200
+	text::font='Sans-serif'
+	text::style=0
+	text::space=0
+	text::direction=0
+	text::script_size=7000
+	text::raw=false
+
+new gra name:viewer
+	gra::left_margin=0
+	gra::top_margin=0
+	gra::zoom=10000
+	gra::paper_width=18000
+	gra::paper_height=10200
+	gra::decimalsign=period
+	gra::draw_obj='axisgrid axis data merge legend rectangle arc path mark text'
diff --git a/figures/comparison.svg b/figures/comparison.svg
new file mode 100644
index 00000000..5a75e90e
--- /dev/null
+++ b/figures/comparison.svg
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="510.23622pt" height="289.133858pt" viewBox="0 0 510.23622 289.133858" version="1.1">
+<g id="surface32278">
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 501.730469 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 181.417969 82.203125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 90.707031 L 208.089844 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 90.707031 L 234.792969 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 90.707031 L 261.496094 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 90.707031 L 288.171875 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 90.707031 L 314.871094 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 90.707031 L 341.574219 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 90.707031 L 368.25 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 90.707031 L 394.953125 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 90.707031 L 421.652344 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 90.707031 L 448.328125 82.203125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 90.707031 L 475.03125 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 90.707031 L 501.730469 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 181.417969 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 5.667969 L 501.730469 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 5.667969 L 181.417969 14.171875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 5.667969 L 208.089844 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 5.667969 L 234.792969 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 5.667969 L 261.496094 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 5.667969 L 288.171875 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 5.667969 L 314.871094 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 5.667969 L 341.574219 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 5.667969 L 368.25 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 5.667969 L 394.953125 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 5.667969 L 421.652344 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 5.667969 L 448.328125 14.171875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 5.667969 L 475.03125 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 5.667969 L 501.730469 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 90.707031 L 501.730469 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 501.730469 124.722656 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 181.417969 116.21875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 124.722656 L 208.089844 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 124.722656 L 234.792969 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 124.722656 L 261.496094 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 124.722656 L 288.171875 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 124.722656 L 314.871094 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 124.722656 L 341.574219 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 124.722656 L 368.25 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 124.722656 L 394.953125 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 124.722656 L 421.652344 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 124.722656 L 448.328125 116.21875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 124.722656 L 475.03125 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 124.722656 L 501.730469 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 181.417969 96.378906 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 96.378906 L 501.730469 96.378906 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 96.378906 L 181.417969 104.882812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 96.378906 L 208.089844 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 96.378906 L 234.792969 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 96.378906 L 261.496094 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 96.378906 L 288.171875 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 96.378906 L 314.871094 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 96.378906 L 341.574219 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 96.378906 L 368.25 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 96.378906 L 394.953125 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 96.378906 L 421.652344 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 96.378906 L 448.328125 104.882812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 96.378906 L 475.03125 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 96.378906 L 501.730469 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 124.722656 L 501.730469 96.378906 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 501.730469 187.085938 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 181.417969 178.582031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 187.085938 L 208.089844 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 187.085938 L 234.792969 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 187.085938 L 261.496094 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 187.085938 L 288.171875 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 187.085938 L 314.871094 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 187.085938 L 341.574219 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 187.085938 L 368.25 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 187.085938 L 394.953125 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 187.085938 L 421.652344 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 187.085938 L 448.328125 178.582031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 187.085938 L 475.03125 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 187.085938 L 501.730469 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 181.417969 130.394531 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 130.394531 L 501.730469 130.394531 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 130.394531 L 181.417969 138.898438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 130.394531 L 208.089844 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 130.394531 L 234.792969 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 130.394531 L 261.496094 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 130.394531 L 288.171875 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 130.394531 L 314.871094 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 130.394531 L 341.574219 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 130.394531 L 368.25 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 130.394531 L 394.953125 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 130.394531 L 421.652344 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 130.394531 L 448.328125 138.898438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 130.394531 L 475.03125 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 130.394531 L 501.730469 136.0625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 187.085938 L 501.730469 130.394531 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 501.730469 249.449219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 181.417969 240.945312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 249.449219 L 208.089844 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 249.449219 L 234.792969 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 249.449219 L 261.496094 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 249.449219 L 288.171875 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 249.449219 L 314.871094 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 249.449219 L 341.574219 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 249.449219 L 368.25 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 249.449219 L 394.953125 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 249.449219 L 421.652344 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 249.449219 L 448.328125 240.945312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 249.449219 L 475.03125 243.78125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 249.449219 L 501.730469 243.78125 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 184.074219 262.242188 C 184.074219 261.105469 183.988281 260.050781 183.425781 259.101562 C 183.03125 258.441406 182.285156 258.035156 181.496094 258.035156 C 180.894531 258.035156 180.042969 258.261719 179.503906 259.199219 C 178.941406 260.144531 178.90625 261.417969 178.90625 262.242188 C 178.90625 262.84375 178.90625 264.210938 179.445312 265.179688 C 180.042969 266.238281 180.957031 266.40625 181.484375 266.40625 C 182.179688 266.40625 183.042969 266.117188 183.558594 265.121094 C 184.011719 264.246094 184.074219 263.203125 184.074219 262.242188 Z M 183.078125 262.101562 C 183.078125 262.914062 183.078125 263.742188 182.800781 264.523438 C 182.527344 265.347656 181.964844 265.625 181.496094 265.625 C 179.902344 265.625 179.902344 263.226562 179.902344 262.101562 C 179.902344 261.34375 179.902344 260.734375 180.082031 260.109375 C 180.308594 259.28125 180.800781 258.816406 181.484375 258.816406 C 183.078125 258.816406 183.078125 260.984375 183.078125 262.101562 Z M 178.414062 252.292969 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 317.40625 263.707031 C 317.40625 262.21875 316.339844 261.03125 315.070312 261.03125 C 314.914062 261.03125 314.324219 261.03125 313.761719 261.511719 L 313.761719 259.113281 L 316.988281 259.113281 L 316.988281 258.300781 L 312.839844 258.300781 L 312.839844 262.757812 L 313.703125 262.757812 C 314.085938 261.871094 314.816406 261.8125 315.042969 261.8125 C 315.535156 261.8125 316.253906 262.183594 316.253906 263.695312 C 316.253906 265.15625 315.390625 265.625 314.671875 265.625 C 314.050781 265.625 313.234375 265.324219 312.765625 264.523438 L 312.335938 265.230469 C 312.527344 265.492188 313.328125 266.40625 314.683594 266.40625 C 316.183594 266.40625 317.40625 265.207031 317.40625 263.707031 Z M 311.867188 252.292969 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 447.554688 266.140625 L 447.554688 265.398438 L 446.019531 265.398438 L 446.019531 258.035156 L 445.769531 258.035156 C 444.964844 258.816406 443.816406 258.816406 443.394531 258.816406 L 443.394531 259.558594 C 443.730469 259.558594 444.34375 259.558594 445.003906 259.28125 L 445.003906 265.398438 L 443.46875 265.398438 L 443.46875 266.140625 Z M 453.976562 262.242188 C 453.976562 261.105469 453.890625 260.050781 453.328125 259.101562 C 452.933594 258.441406 452.1875 258.035156 451.398438 258.035156 C 450.796875 258.035156 449.945312 258.261719 449.40625 259.199219 C 448.84375 260.144531 448.808594 261.417969 448.808594 262.242188 C 448.808594 262.84375 448.808594 264.210938 449.347656 265.179688 C 449.945312 266.238281 450.859375 266.40625 451.386719 266.40625 C 452.082031 266.40625 452.945312 266.117188 453.460938 265.121094 C 453.914062 264.246094 453.976562 263.203125 453.976562 262.242188 Z M 452.980469 262.101562 C 452.980469 262.914062 452.980469 263.742188 452.703125 264.523438 C 452.429688 265.347656 451.867188 265.625 451.398438 265.625 C 449.804688 265.625 449.804688 263.226562 449.804688 262.101562 C 449.804688 261.34375 449.804688 260.734375 449.984375 260.109375 C 450.210938 259.28125 450.703125 258.816406 451.386719 258.816406 C 452.980469 258.816406 452.980469 260.984375 452.980469 262.101562 Z M 442.316406 252.292969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 181.417969 192.757812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 192.757812 L 501.730469 192.757812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 192.757812 L 181.417969 201.261719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 192.757812 L 208.089844 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 192.757812 L 234.792969 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 192.757812 L 261.496094 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 192.757812 L 288.171875 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 192.757812 L 314.871094 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 192.757812 L 341.574219 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 192.757812 L 368.25 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 192.757812 L 394.953125 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 192.757812 L 421.652344 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 192.757812 L 448.328125 201.261719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 192.757812 L 475.03125 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 192.757812 L 501.730469 198.425781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 249.449219 L 501.730469 192.757812 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 17.007812 L 220.480469 17.007812 L 220.480469 22.675781 L 181.417969 22.675781 Z M 181.417969 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 17.007812 L 220.480469 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.480469 17.007812 L 220.480469 22.675781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.480469 22.675781 L 181.417969 22.675781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 22.675781 L 181.417969 17.007812 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 45.355469 L 440.757812 45.355469 L 440.757812 51.023438 L 181.417969 51.023438 Z M 181.417969 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 45.355469 L 440.757812 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 440.757812 45.355469 L 440.757812 51.023438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 440.757812 51.023438 L 181.417969 51.023438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 51.023438 L 181.417969 45.355469 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 73.699219 L 475.964844 73.699219 L 475.964844 79.371094 L 181.417969 79.371094 Z M 181.417969 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 73.699219 L 475.964844 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.964844 73.699219 L 475.964844 79.371094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.964844 79.371094 L 181.417969 79.371094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 79.371094 L 181.417969 73.699219 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 107.714844 L 304.753906 107.714844 L 304.753906 113.386719 L 181.417969 113.386719 Z M 181.417969 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 107.714844 L 304.753906 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.753906 107.714844 L 304.753906 113.386719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.753906 113.386719 L 181.417969 113.386719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 113.386719 L 181.417969 107.714844 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 141.730469 L 220.648438 141.730469 L 220.648438 147.402344 L 181.417969 147.402344 Z M 181.417969 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 141.730469 L 220.648438 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.648438 141.730469 L 220.648438 147.402344 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.648438 147.402344 L 181.417969 147.402344 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 147.402344 L 181.417969 141.730469 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 170.078125 L 219.96875 170.078125 L 219.96875 175.746094 L 181.417969 175.746094 Z M 181.417969 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 170.078125 L 219.96875 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 219.96875 170.078125 L 219.96875 175.746094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 219.96875 175.746094 L 181.417969 175.746094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 175.746094 L 181.417969 170.078125 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 204.09375 L 190.007812 204.09375 L 190.007812 209.765625 L 181.417969 209.765625 Z M 181.417969 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 204.09375 L 190.007812 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 190.007812 204.09375 L 190.007812 209.765625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 190.007812 209.765625 L 181.417969 209.765625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 209.765625 L 181.417969 204.09375 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 232.441406 L 207.070312 232.441406 L 207.070312 238.109375 L 181.417969 238.109375 Z M 181.417969 232.441406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 232.441406 L 207.070312 232.441406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.070312 232.441406 L 207.070312 238.109375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.070312 238.109375 L 181.417969 238.109375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 238.109375 L 181.417969 232.441406 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 23.523438 L 10.25 18.488281 L 13.535156 15.203125 L 12.359375 15.203125 L 7.933594 19.613281 L 7.933594 15.203125 L 6.84375 15.203125 L 6.84375 23.523438 L 7.933594 23.523438 L 7.933594 20.789062 L 9.539062 19.195312 L 12.527344 23.523438 Z M 20.171875 18.199219 L 19.222656 18.199219 C 18.3125 20.382812 17.652344 21.976562 17.59375 22.816406 C 17.558594 22.265625 17.054688 21.03125 16.910156 20.683594 C 16.417969 19.457031 16.214844 19.050781 15.84375 18.199219 L 14.835938 18.199219 L 17.234375 23.523438 L 16.789062 24.589844 C 16.476562 25.261719 16.25 25.261719 16.082031 25.261719 C 15.890625 25.261719 15.472656 25.214844 15.113281 25.058594 L 15.183594 25.886719 C 15.21875 25.898438 15.664062 25.980469 16.070312 25.980469 C 16.394531 25.980469 16.910156 25.980469 17.507812 24.554688 Z M 28.628906 16.113281 L 28.628906 15.273438 L 21.113281 15.273438 L 21.113281 16.113281 L 21.84375 16.113281 L 24.300781 16.089844 L 24.300781 23.523438 L 25.453125 23.523438 L 25.453125 16.089844 L 27.910156 16.113281 Z M 32.789062 20.898438 C 32.789062 20.527344 32.777344 19.613281 32.308594 18.894531 C 31.804688 18.140625 31.039062 17.996094 30.582031 17.996094 C 29.191406 17.996094 28.085938 19.277344 28.085938 20.8125 C 28.085938 22.394531 29.261719 23.65625 30.761719 23.65625 C 31.335938 23.65625 32.03125 23.511719 32.726562 23.066406 L 32.65625 22.242188 C 31.902344 22.78125 31.191406 22.875 30.773438 22.875 C 29.777344 22.875 29.011719 22 28.976562 20.898438 Z M 32.070312 20.191406 L 29.046875 20.191406 C 29.238281 19.421875 29.828125 18.773438 30.582031 18.773438 C 31.023438 18.773438 31.839844 18.980469 32.070312 20.191406 Z M 37.726562 23.523438 L 37.726562 20.058594 C 37.726562 18.871094 36.851562 17.996094 35.664062 17.996094 C 34.886719 17.996094 34.3125 18.199219 33.734375 18.523438 L 33.808594 19.375 C 34.40625 18.929688 34.992188 18.75 35.652344 18.75 C 36.324219 18.75 36.734375 19.289062 36.734375 20.070312 L 36.734375 20.574219 C 34.671875 20.621094 33.316406 21.160156 33.316406 22.132812 C 33.316406 22.695312 33.6875 23.65625 34.730469 23.65625 C 34.96875 23.65625 36.011719 23.632812 36.769531 23.066406 L 36.769531 23.523438 Z M 36.734375 21.84375 C 36.734375 22.097656 36.734375 22.394531 36.300781 22.648438 C 36 22.828125 35.605469 22.875 35.367188 22.875 C 34.753906 22.875 34.238281 22.589844 34.238281 22.109375 C 34.238281 21.257812 36.578125 21.222656 36.734375 21.222656 Z M 46.769531 26.507812 C 46.121094 25.847656 44.632812 24.316406 44.632812 20.527344 C 44.632812 16.726562 46.121094 15.203125 46.769531 14.53125 L 45.988281 14.53125 C 44.574219 15.71875 43.640625 17.769531 43.640625 20.515625 C 43.640625 23.367188 44.632812 25.371094 45.988281 26.507812 Z M 53.207031 23.523438 L 53.207031 22.625 L 50.953125 22.625 C 50.808594 22.625 50.664062 22.636719 50.523438 22.636719 L 49.203125 22.636719 L 51.167969 20.851562 C 52.429688 19.820312 53.207031 19.171875 53.207031 18.007812 C 53.207031 16.628906 52.210938 15.417969 50.582031 15.417969 C 49.3125 15.417969 48.554688 16.148438 48.183594 17.335938 L 48.734375 18.03125 C 49.046875 16.628906 49.671875 16.234375 50.414062 16.234375 C 51.480469 16.234375 52.152344 17.046875 52.152344 18.03125 C 52.152344 19.121094 51.386719 19.820312 50.496094 20.609375 L 48.28125 22.707031 L 48.28125 23.523438 Z M 59.328125 19.625 C 59.328125 18.488281 59.242188 17.433594 58.679688 16.484375 C 58.285156 15.824219 57.539062 15.417969 56.75 15.417969 C 56.148438 15.417969 55.296875 15.644531 54.757812 16.582031 C 54.195312 17.527344 54.160156 18.800781 54.160156 19.625 C 54.160156 20.226562 54.160156 21.59375 54.699219 22.5625 C 55.296875 23.621094 56.210938 23.789062 56.738281 23.789062 C 57.433594 23.789062 58.296875 23.5 58.8125 22.503906 C 59.265625 21.628906 59.328125 20.585938 59.328125 19.625 Z M 58.332031 19.484375 C 58.332031 20.296875 58.332031 21.125 58.054688 21.90625 C 57.78125 22.730469 57.21875 23.007812 56.75 23.007812 C 55.15625 23.007812 55.15625 20.609375 55.15625 19.484375 C 55.15625 18.726562 55.15625 18.117188 55.335938 17.492188 C 55.5625 16.664062 56.054688 16.199219 56.738281 16.199219 C 58.332031 16.199219 58.332031 18.367188 58.332031 19.484375 Z M 65.207031 23.523438 L 65.207031 22.625 L 62.953125 22.625 C 62.808594 22.625 62.664062 22.636719 62.523438 22.636719 L 61.203125 22.636719 L 63.167969 20.851562 C 64.429688 19.820312 65.207031 19.171875 65.207031 18.007812 C 65.207031 16.628906 64.210938 15.417969 62.582031 15.417969 C 61.3125 15.417969 60.554688 16.148438 60.183594 17.335938 L 60.734375 18.03125 C 61.046875 16.628906 61.671875 16.234375 62.414062 16.234375 C 63.480469 16.234375 64.152344 17.046875 64.152344 18.03125 C 64.152344 19.121094 63.386719 19.820312 62.496094 20.609375 L 60.28125 22.707031 L 60.28125 23.523438 Z M 71.328125 19.625 C 71.328125 18.488281 71.242188 17.433594 70.679688 16.484375 C 70.285156 15.824219 69.539062 15.417969 68.75 15.417969 C 68.148438 15.417969 67.296875 15.644531 66.757812 16.582031 C 66.195312 17.527344 66.160156 18.800781 66.160156 19.625 C 66.160156 20.226562 66.160156 21.59375 66.699219 22.5625 C 67.296875 23.621094 68.210938 23.789062 68.738281 23.789062 C 69.433594 23.789062 70.296875 23.5 70.8125 22.503906 C 71.265625 21.628906 71.328125 20.585938 71.328125 19.625 Z M 70.332031 19.484375 C 70.332031 20.296875 70.332031 21.125 70.054688 21.90625 C 69.78125 22.730469 69.21875 23.007812 68.75 23.007812 C 67.15625 23.007812 67.15625 20.609375 67.15625 19.484375 C 67.15625 18.726562 67.15625 18.117188 67.335938 17.492188 C 67.5625 16.664062 68.054688 16.199219 68.738281 16.199219 C 70.332031 16.199219 70.332031 18.367188 70.332031 19.484375 Z M 75.085938 21.234375 L 75.085938 20.488281 L 71.800781 20.488281 L 71.800781 21.234375 Z M 81.328125 19.625 C 81.328125 18.488281 81.242188 17.433594 80.679688 16.484375 C 80.285156 15.824219 79.539062 15.417969 78.75 15.417969 C 78.148438 15.417969 77.296875 15.644531 76.757812 16.582031 C 76.195312 17.527344 76.160156 18.800781 76.160156 19.625 C 76.160156 20.226562 76.160156 21.59375 76.699219 22.5625 C 77.296875 23.621094 78.210938 23.789062 78.738281 23.789062 C 79.433594 23.789062 80.296875 23.5 80.8125 22.503906 C 81.265625 21.628906 81.328125 20.585938 81.328125 19.625 Z M 80.332031 19.484375 C 80.332031 20.296875 80.332031 21.125 80.054688 21.90625 C 79.78125 22.730469 79.21875 23.007812 78.75 23.007812 C 77.15625 23.007812 77.15625 20.609375 77.15625 19.484375 C 77.15625 18.726562 77.15625 18.117188 77.335938 17.492188 C 77.5625 16.664062 78.054688 16.199219 78.738281 16.199219 C 80.332031 16.199219 80.332031 18.367188 80.332031 19.484375 Z M 87.472656 21.519531 L 87.472656 20.742188 L 86.273438 20.742188 L 86.273438 15.683594 L 85.109375 15.683594 L 82.015625 20.742188 L 82.015625 21.519531 L 85.265625 21.519531 L 85.265625 23.523438 L 86.273438 23.523438 L 86.273438 21.519531 Z M 85.335938 20.742188 L 83.011719 20.742188 L 83.742188 19.542969 C 84.101562 18.929688 85.324219 16.84375 85.335938 16.234375 Z M 91.085938 21.234375 L 91.085938 20.488281 L 87.800781 20.488281 L 87.800781 21.234375 Z M 97.328125 19.625 C 97.328125 18.488281 97.242188 17.433594 96.679688 16.484375 C 96.285156 15.824219 95.539062 15.417969 94.75 15.417969 C 94.148438 15.417969 93.296875 15.644531 92.757812 16.582031 C 92.195312 17.527344 92.160156 18.800781 92.160156 19.625 C 92.160156 20.226562 92.160156 21.59375 92.699219 22.5625 C 93.296875 23.621094 94.210938 23.789062 94.738281 23.789062 C 95.433594 23.789062 96.296875 23.5 96.8125 22.503906 C 97.265625 21.628906 97.328125 20.585938 97.328125 19.625 Z M 96.332031 19.484375 C 96.332031 20.296875 96.332031 21.125 96.054688 21.90625 C 95.78125 22.730469 95.21875 23.007812 94.75 23.007812 C 93.15625 23.007812 93.15625 20.609375 93.15625 19.484375 C 93.15625 18.726562 93.15625 18.117188 93.335938 17.492188 C 93.5625 16.664062 94.054688 16.199219 94.738281 16.199219 C 96.332031 16.199219 96.332031 18.367188 96.332031 19.484375 Z M 103.304688 21.351562 C 103.304688 20.40625 102.609375 19.613281 101.625 19.289062 C 102.464844 18.859375 102.96875 18.078125 102.96875 17.253906 C 102.96875 16.222656 101.949219 15.417969 100.714844 15.417969 C 99.707031 15.417969 98.855469 15.933594 98.351562 16.6875 L 98.820312 17.371094 C 99.347656 16.328125 100.257812 16.160156 100.703125 16.160156 C 101.3125 16.160156 101.914062 16.496094 101.914062 17.253906 C 101.914062 17.695312 101.660156 18.726562 100.367188 18.847656 C 100.136719 18.871094 99.945312 18.882812 99.71875 18.894531 L 99.71875 19.675781 L 100.617188 19.675781 C 101.78125 19.675781 102.152344 20.621094 102.152344 21.339844 C 102.152344 22.289062 101.589844 23.007812 100.675781 23.007812 C 99.695312 23.007812 98.78125 22.46875 98.328125 21.820312 C 98.230469 22.300781 98.230469 22.324219 98.183594 22.589844 C 98.769531 23.296875 99.683594 23.789062 100.703125 23.789062 C 102.210938 23.789062 103.304688 22.613281 103.304688 21.351562 Z M 107.480469 20.527344 C 107.480469 17.671875 106.484375 15.667969 105.128906 14.53125 L 104.351562 14.53125 C 105 15.191406 106.484375 16.726562 106.484375 20.515625 C 106.484375 24.316406 105 25.835938 104.351562 26.507812 L 105.128906 26.507812 C 106.546875 25.320312 107.480469 23.273438 107.480469 20.527344 Z M 5.667969 9.675781 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.726562 43.550781 L 12.730469 43.550781 L 11.230469 47.289062 C 11.097656 47.613281 10.042969 50.253906 9.886719 50.960938 L 9.875 50.960938 C 9.757812 50.324219 8.808594 47.972656 8.507812 47.21875 L 7.046875 43.550781 L 5.835938 43.550781 L 9.207031 51.871094 L 10.355469 51.871094 Z M 18.726562 51.871094 L 18.726562 48.40625 C 18.726562 47.21875 17.851562 46.34375 16.664062 46.34375 C 15.886719 46.34375 15.3125 46.546875 14.734375 46.871094 L 14.808594 47.722656 C 15.40625 47.277344 15.992188 47.097656 16.652344 47.097656 C 17.324219 47.097656 17.734375 47.636719 17.734375 48.417969 L 17.734375 48.921875 C 15.671875 48.96875 14.316406 49.507812 14.316406 50.480469 C 14.316406 51.042969 14.6875 52.003906 15.730469 52.003906 C 15.96875 52.003906 17.011719 51.980469 17.769531 51.414062 L 17.769531 51.871094 Z M 17.734375 50.191406 C 17.734375 50.445312 17.734375 50.742188 17.300781 50.996094 C 17 51.175781 16.605469 51.222656 16.367188 51.222656 C 15.753906 51.222656 15.238281 50.9375 15.238281 50.457031 C 15.238281 49.605469 17.578125 49.570312 17.734375 49.570312 Z M 25.589844 49.199219 C 25.589844 47.757812 24.835938 46.414062 23.671875 46.414062 C 23.335938 46.414062 22.449219 46.476562 21.621094 47.121094 L 21.621094 46.546875 L 20.664062 46.546875 L 20.664062 54.195312 L 21.660156 54.195312 L 21.660156 51.320312 C 22.101562 51.738281 22.703125 52.003906 23.324219 52.003906 C 24.535156 52.003906 25.589844 50.816406 25.589844 49.199219 Z M 24.597656 49.210938 C 24.597656 50.359375 23.757812 51.222656 22.820312 51.222656 C 22.4375 51.222656 22.007812 51.066406 21.660156 50.480469 L 21.660156 47.890625 C 21.8125 47.675781 22.257812 47.230469 22.941406 47.230469 C 23.839844 47.230469 24.597656 48.082031 24.597656 49.210938 Z M 32.460938 49.234375 C 32.460938 47.664062 31.261719 46.34375 29.738281 46.34375 C 28.25 46.34375 27.027344 47.636719 27.027344 49.234375 C 27.027344 50.816406 28.285156 52.003906 29.738281 52.003906 C 31.222656 52.003906 32.460938 50.792969 32.460938 49.234375 Z M 31.464844 49.113281 C 31.464844 50.503906 30.625 51.1875 29.75 51.1875 C 28.828125 51.1875 28.023438 50.46875 28.023438 49.113281 C 28.023438 47.710938 28.921875 47.121094 29.738281 47.121094 C 30.601562 47.121094 31.464844 47.734375 31.464844 49.113281 Z M 36.695312 47.277344 L 36.695312 46.414062 C 35.832031 46.425781 35.042969 46.859375 34.550781 47.566406 L 34.550781 46.476562 L 33.664062 46.476562 L 33.664062 51.871094 L 34.621094 51.871094 L 34.621094 49.316406 C 34.621094 48.058594 35.605469 47.289062 36.695312 47.277344 Z M 41.789062 49.246094 C 41.789062 48.875 41.777344 47.960938 41.308594 47.242188 C 40.804688 46.488281 40.039062 46.34375 39.582031 46.34375 C 38.191406 46.34375 37.085938 47.625 37.085938 49.160156 C 37.085938 50.742188 38.261719 52.003906 39.761719 52.003906 C 40.335938 52.003906 41.03125 51.859375 41.726562 51.414062 L 41.65625 50.589844 C 40.902344 51.128906 40.191406 51.222656 39.773438 51.222656 C 38.777344 51.222656 38.011719 50.347656 37.976562 49.246094 Z M 41.070312 48.539062 L 38.046875 48.539062 C 38.238281 47.769531 38.828125 47.121094 39.582031 47.121094 C 40.023438 47.121094 40.839844 47.328125 41.070312 48.539062 Z M 45.769531 51.558594 L 45.566406 50.804688 C 45.253906 51.042969 44.832031 51.1875 44.460938 51.1875 C 43.992188 51.1875 43.839844 50.769531 43.839844 50.144531 L 43.839844 47.289062 L 45.566406 47.289062 L 45.566406 46.546875 L 43.839844 46.546875 L 43.839844 45.023438 L 42.949219 45.023438 L 42.949219 46.546875 L 41.894531 46.546875 L 41.894531 47.289062 L 42.914062 47.289062 L 42.914062 50.359375 C 42.914062 51.066406 43.070312 52.003906 43.945312 52.003906 C 44.472656 52.003906 45.097656 51.894531 45.769531 51.558594 Z M 49.769531 51.558594 L 49.566406 50.804688 C 49.253906 51.042969 48.832031 51.1875 48.460938 51.1875 C 47.992188 51.1875 47.839844 50.769531 47.839844 50.144531 L 47.839844 47.289062 L 49.566406 47.289062 L 49.566406 46.546875 L 47.839844 46.546875 L 47.839844 45.023438 L 46.949219 45.023438 L 46.949219 46.546875 L 45.894531 46.546875 L 45.894531 47.289062 L 46.914062 47.289062 L 46.914062 50.359375 C 46.914062 51.066406 47.070312 52.003906 47.945312 52.003906 C 48.472656 52.003906 49.097656 51.894531 49.769531 51.558594 Z M 55.460938 49.234375 C 55.460938 47.664062 54.261719 46.34375 52.738281 46.34375 C 51.25 46.34375 50.027344 47.636719 50.027344 49.234375 C 50.027344 50.816406 51.285156 52.003906 52.738281 52.003906 C 54.222656 52.003906 55.460938 50.792969 55.460938 49.234375 Z M 54.464844 49.113281 C 54.464844 50.503906 53.625 51.1875 52.75 51.1875 C 51.828125 51.1875 51.023438 50.46875 51.023438 49.113281 C 51.023438 47.710938 51.921875 47.121094 52.738281 47.121094 C 53.601562 47.121094 54.464844 47.734375 54.464844 49.113281 Z M 63.769531 54.855469 C 63.121094 54.195312 61.632812 52.664062 61.632812 48.875 C 61.632812 45.074219 63.121094 43.550781 63.769531 42.878906 L 62.988281 42.878906 C 61.574219 44.066406 60.640625 46.117188 60.640625 48.863281 C 60.640625 51.714844 61.632812 53.71875 62.988281 54.855469 Z M 70.328125 47.972656 C 70.328125 46.835938 70.242188 45.78125 69.679688 44.832031 C 69.285156 44.171875 68.539062 43.765625 67.75 43.765625 C 67.148438 43.765625 66.296875 43.992188 65.757812 44.929688 C 65.195312 45.875 65.160156 47.148438 65.160156 47.972656 C 65.160156 48.574219 65.160156 49.941406 65.699219 50.910156 C 66.296875 51.96875 67.210938 52.136719 67.738281 52.136719 C 68.433594 52.136719 69.296875 51.847656 69.8125 50.851562 C 70.265625 49.976562 70.328125 48.933594 70.328125 47.972656 Z M 69.332031 47.832031 C 69.332031 48.644531 69.332031 49.472656 69.054688 50.253906 C 68.78125 51.078125 68.21875 51.355469 67.75 51.355469 C 66.15625 51.355469 66.15625 48.957031 66.15625 47.832031 C 66.15625 47.074219 66.15625 46.464844 66.335938 45.839844 C 66.5625 45.011719 67.054688 44.546875 67.738281 44.546875 C 69.332031 44.546875 69.332031 46.714844 69.332031 47.832031 Z M 72.886719 51.871094 L 72.886719 50.839844 L 71.855469 50.839844 L 71.855469 51.871094 Z M 79.304688 49.699219 C 79.304688 48.753906 78.609375 47.960938 77.625 47.636719 C 78.464844 47.207031 78.96875 46.425781 78.96875 45.601562 C 78.96875 44.570312 77.949219 43.765625 76.714844 43.765625 C 75.707031 43.765625 74.855469 44.28125 74.351562 45.035156 L 74.820312 45.71875 C 75.347656 44.675781 76.257812 44.507812 76.703125 44.507812 C 77.3125 44.507812 77.914062 44.84375 77.914062 45.601562 C 77.914062 46.042969 77.660156 47.074219 76.367188 47.195312 C 76.136719 47.21875 75.945312 47.230469 75.71875 47.242188 L 75.71875 48.023438 L 76.617188 48.023438 C 77.78125 48.023438 78.152344 48.96875 78.152344 49.6875 C 78.152344 50.636719 77.589844 51.355469 76.675781 51.355469 C 75.695312 51.355469 74.78125 50.816406 74.328125 50.167969 C 74.230469 50.648438 74.230469 50.671875 74.183594 50.9375 C 74.769531 51.644531 75.683594 52.136719 76.703125 52.136719 C 78.210938 52.136719 79.304688 50.960938 79.304688 49.699219 Z M 81.886719 51.871094 L 81.886719 50.839844 L 80.855469 50.839844 L 80.855469 51.871094 Z M 88.328125 47.972656 C 88.328125 46.835938 88.242188 45.78125 87.679688 44.832031 C 87.285156 44.171875 86.539062 43.765625 85.75 43.765625 C 85.148438 43.765625 84.296875 43.992188 83.757812 44.929688 C 83.195312 45.875 83.160156 47.148438 83.160156 47.972656 C 83.160156 48.574219 83.160156 49.941406 83.699219 50.910156 C 84.296875 51.96875 85.210938 52.136719 85.738281 52.136719 C 86.433594 52.136719 87.296875 51.847656 87.8125 50.851562 C 88.265625 49.976562 88.328125 48.933594 88.328125 47.972656 Z M 87.332031 47.832031 C 87.332031 48.644531 87.332031 49.472656 87.054688 50.253906 C 86.78125 51.078125 86.21875 51.355469 85.75 51.355469 C 84.15625 51.355469 84.15625 48.957031 84.15625 47.832031 C 84.15625 47.074219 84.15625 46.464844 84.335938 45.839844 C 84.5625 45.011719 85.054688 44.546875 85.738281 44.546875 C 87.332031 44.546875 87.332031 46.714844 87.332031 47.832031 Z M 92.480469 48.875 C 92.480469 46.019531 91.484375 44.015625 90.128906 42.878906 L 89.351562 42.878906 C 90 43.539062 91.484375 45.074219 91.484375 48.863281 C 91.484375 52.664062 90 54.183594 89.351562 54.855469 L 90.128906 54.855469 C 91.546875 53.667969 92.480469 51.621094 92.480469 48.875 Z M 5.667969 38.023438 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 189.296875 284.3125 L 186.082031 275.992188 L 184.980469 275.992188 L 181.765625 284.3125 L 182.714844 284.3125 L 183.625 281.925781 L 187.183594 281.925781 L 188.109375 284.3125 Z M 186.898438 281.21875 L 183.910156 281.21875 L 185.398438 276.878906 Z M 194.800781 284.3125 L 194.800781 280.75 C 194.800781 279.863281 194.5625 278.855469 193.109375 278.855469 C 192.164062 278.855469 191.589844 279.410156 191.3125 279.769531 L 191.3125 278.917969 L 190.390625 278.917969 L 190.390625 284.3125 L 191.382812 284.3125 L 191.382812 281.386719 C 191.382812 280.59375 191.695312 279.636719 192.605469 279.636719 C 193.804688 279.636719 193.804688 280.488281 193.804688 280.835938 L 193.804688 284.3125 Z M 200.476562 284.3125 L 200.476562 280.847656 C 200.476562 279.660156 199.601562 278.785156 198.414062 278.785156 C 197.636719 278.785156 197.0625 278.988281 196.484375 279.3125 L 196.558594 280.164062 C 197.15625 279.71875 197.742188 279.539062 198.402344 279.539062 C 199.074219 279.539062 199.484375 280.078125 199.484375 280.859375 L 199.484375 281.363281 C 197.421875 281.410156 196.066406 281.949219 196.066406 282.921875 C 196.066406 283.484375 196.4375 284.445312 197.480469 284.445312 C 197.71875 284.445312 198.761719 284.421875 199.519531 283.855469 L 199.519531 284.3125 Z M 199.484375 282.632812 C 199.484375 282.886719 199.484375 283.183594 199.050781 283.4375 C 198.75 283.617188 198.355469 283.664062 198.117188 283.664062 C 197.503906 283.664062 196.988281 283.378906 196.988281 282.898438 C 196.988281 282.046875 199.328125 282.011719 199.484375 282.011719 Z M 203.359375 284.3125 L 203.359375 275.992188 L 202.402344 275.992188 L 202.402344 284.3125 Z M 209.921875 278.988281 L 208.972656 278.988281 C 208.0625 281.171875 207.402344 282.765625 207.34375 283.605469 C 207.308594 283.054688 206.804688 281.820312 206.660156 281.472656 C 206.167969 280.246094 205.964844 279.839844 205.59375 278.988281 L 204.585938 278.988281 L 206.984375 284.3125 L 206.539062 285.378906 C 206.226562 286.050781 206 286.050781 205.832031 286.050781 C 205.640625 286.050781 205.222656 286.003906 204.863281 285.847656 L 204.933594 286.675781 C 204.96875 286.6875 205.414062 286.769531 205.820312 286.769531 C 206.144531 286.769531 206.660156 286.769531 207.257812 285.34375 Z M 214.855469 282.765625 C 214.855469 282.347656 214.6875 281.925781 214.265625 281.566406 C 213.859375 281.21875 213.5 281.148438 212.757812 281.003906 C 212.324219 280.917969 211.726562 280.8125 211.726562 280.246094 C 211.726562 279.539062 212.578125 279.539062 212.757812 279.539062 C 213.632812 279.539062 214.148438 279.816406 214.496094 280.007812 L 214.652344 279.167969 C 213.90625 278.832031 213.355469 278.785156 212.863281 278.785156 C 212.589844 278.785156 210.824219 278.785156 210.824219 280.367188 C 210.824219 280.941406 211.171875 281.304688 211.304688 281.433594 C 211.761719 281.832031 212.074219 281.890625 212.886719 282.046875 C 213.273438 282.117188 213.957031 282.25 213.957031 282.863281 C 213.957031 283.640625 212.984375 283.640625 212.828125 283.640625 C 212.121094 283.640625 211.4375 283.402344 210.921875 283.042969 L 210.765625 283.917969 C 211.484375 284.300781 212.214844 284.445312 212.828125 284.445312 C 213.605469 284.445312 214.855469 284.191406 214.855469 282.765625 Z M 217.359375 284.3125 L 217.359375 278.988281 L 216.402344 278.988281 L 216.402344 284.3125 Z M 217.46875 277.5625 L 217.46875 276.410156 L 216.316406 276.410156 L 216.316406 277.5625 Z M 222.855469 282.765625 C 222.855469 282.347656 222.6875 281.925781 222.265625 281.566406 C 221.859375 281.21875 221.5 281.148438 220.757812 281.003906 C 220.324219 280.917969 219.726562 280.8125 219.726562 280.246094 C 219.726562 279.539062 220.578125 279.539062 220.757812 279.539062 C 221.632812 279.539062 222.148438 279.816406 222.496094 280.007812 L 222.652344 279.167969 C 221.90625 278.832031 221.355469 278.785156 220.863281 278.785156 C 220.589844 278.785156 218.824219 278.785156 218.824219 280.367188 C 218.824219 280.941406 219.171875 281.304688 219.304688 281.433594 C 219.761719 281.832031 220.074219 281.890625 220.886719 282.046875 C 221.273438 282.117188 221.957031 282.25 221.957031 282.863281 C 221.957031 283.640625 220.984375 283.640625 220.828125 283.640625 C 220.121094 283.640625 219.4375 283.402344 218.921875 283.042969 L 218.765625 283.917969 C 219.484375 284.300781 220.214844 284.445312 220.828125 284.445312 C 221.605469 284.445312 222.855469 284.191406 222.855469 282.765625 Z M 233.570312 282.023438 C 233.570312 280.667969 232.453125 279.78125 231.554688 279.5625 L 230.34375 279.277344 C 229.730469 279.132812 229.121094 278.652344 229.121094 277.957031 C 229.121094 277.273438 229.78125 276.578125 230.871094 276.578125 C 231.734375 276.578125 232.332031 276.832031 232.957031 277.359375 L 233.148438 276.316406 C 232.550781 276.015625 231.972656 275.726562 230.882812 275.726562 C 229.179688 275.726562 228.101562 276.914062 228.101562 278.113281 C 228.101562 278.414062 228.160156 278.894531 228.570312 279.445312 C 229.121094 280.164062 229.898438 280.34375 230.390625 280.464844 L 231.398438 280.714844 C 231.949219 280.882812 232.550781 281.410156 232.550781 282.152344 C 232.550781 282.945312 231.867188 283.6875 230.800781 283.6875 C 229.828125 283.6875 228.917969 283.316406 228.160156 282.707031 L 227.96875 283.75 C 229.144531 284.480469 230.234375 284.578125 230.800781 284.578125 C 232.441406 284.578125 233.570312 283.351562 233.570312 282.023438 Z M 240.339844 281.640625 C 240.339844 280.199219 239.585938 278.855469 238.421875 278.855469 C 238.085938 278.855469 237.199219 278.917969 236.371094 279.5625 L 236.371094 278.988281 L 235.414062 278.988281 L 235.414062 286.636719 L 236.410156 286.636719 L 236.410156 283.761719 C 236.851562 284.179688 237.453125 284.445312 238.074219 284.445312 C 239.285156 284.445312 240.339844 283.257812 240.339844 281.640625 Z M 239.347656 281.652344 C 239.347656 282.800781 238.507812 283.664062 237.570312 283.664062 C 237.1875 283.664062 236.757812 283.507812 236.410156 282.921875 L 236.410156 280.332031 C 236.5625 280.117188 237.007812 279.671875 237.691406 279.671875 C 238.589844 279.671875 239.347656 280.523438 239.347656 281.652344 Z M 246.539062 281.6875 C 246.539062 281.316406 246.527344 280.402344 246.058594 279.683594 C 245.554688 278.929688 244.789062 278.785156 244.332031 278.785156 C 242.941406 278.785156 241.835938 280.066406 241.835938 281.601562 C 241.835938 283.183594 243.011719 284.445312 244.511719 284.445312 C 245.085938 284.445312 245.78125 284.300781 246.476562 283.855469 L 246.40625 283.03125 C 245.652344 283.570312 244.941406 283.664062 244.523438 283.664062 C 243.527344 283.664062 242.761719 282.789062 242.726562 281.6875 Z M 245.820312 280.980469 L 242.796875 280.980469 C 242.988281 280.210938 243.578125 279.5625 244.332031 279.5625 C 244.773438 279.5625 245.589844 279.769531 245.820312 280.980469 Z M 251.539062 281.6875 C 251.539062 281.316406 251.527344 280.402344 251.058594 279.683594 C 250.554688 278.929688 249.789062 278.785156 249.332031 278.785156 C 247.941406 278.785156 246.835938 280.066406 246.835938 281.601562 C 246.835938 283.183594 248.011719 284.445312 249.511719 284.445312 C 250.085938 284.445312 250.78125 284.300781 251.476562 283.855469 L 251.40625 283.03125 C 250.652344 283.570312 249.941406 283.664062 249.523438 283.664062 C 248.527344 283.664062 247.761719 282.789062 247.726562 281.6875 Z M 250.820312 280.980469 L 247.796875 280.980469 C 247.988281 280.210938 248.578125 279.5625 249.332031 279.5625 C 249.773438 279.5625 250.589844 279.769531 250.820312 280.980469 Z M 256.777344 284.3125 L 256.777344 275.992188 L 255.820312 275.992188 L 255.820312 279.515625 C 255.148438 278.953125 254.378906 278.855469 253.996094 278.855469 C 252.785156 278.855469 251.851562 280.117188 251.851562 281.652344 C 251.851562 283.199219 252.773438 284.445312 253.949219 284.445312 C 254.308594 284.445312 255.0625 284.359375 255.78125 283.6875 L 255.78125 284.3125 Z M 255.78125 282.621094 C 255.78125 282.851562 255.78125 282.875 255.652344 283.042969 C 255.328125 283.472656 254.917969 283.664062 254.488281 283.664062 C 253.707031 283.664062 252.84375 283.101562 252.84375 281.664062 C 252.84375 280.128906 253.863281 279.636719 254.621094 279.636719 C 255.171875 279.636719 255.554688 279.949219 255.78125 280.261719 Z M 264.726562 287.308594 L 264.726562 286.53125 L 263.578125 286.53125 L 263.578125 276.097656 L 264.726562 276.097656 L 264.726562 275.320312 L 262.652344 275.320312 L 262.652344 287.308594 Z M 273.117188 284.203125 C 273.296875 284.023438 273.117188 283.84375 272.996094 283.726562 L 270.574219 281.304688 L 272.984375 278.894531 C 273.105469 278.773438 273.285156 278.59375 273.105469 278.414062 C 272.925781 278.222656 272.757812 278.390625 272.625 278.519531 L 270.203125 280.941406 L 267.78125 278.519531 C 267.648438 278.378906 267.480469 278.222656 267.300781 278.402344 C 267.121094 278.582031 267.300781 278.761719 267.421875 278.882812 L 269.84375 281.304688 L 267.421875 283.726562 C 267.300781 283.84375 267.121094 284.023438 267.300781 284.203125 C 267.480469 284.394531 267.648438 284.230469 267.78125 284.097656 L 270.203125 281.675781 L 272.636719 284.097656 C 272.769531 284.230469 272.925781 284.394531 273.117188 284.203125 Z M 280.65625 284.3125 L 280.65625 283.570312 L 279.121094 283.570312 L 279.121094 276.207031 L 278.871094 276.207031 C 278.066406 276.988281 276.917969 276.988281 276.496094 276.988281 L 276.496094 277.730469 C 276.832031 277.730469 277.445312 277.730469 278.105469 277.453125 L 278.105469 283.570312 L 276.570312 283.570312 L 276.570312 284.3125 Z M 287.078125 280.414062 C 287.078125 279.277344 286.992188 278.222656 286.429688 277.273438 C 286.035156 276.613281 285.289062 276.207031 284.5 276.207031 C 283.898438 276.207031 283.046875 276.433594 282.507812 277.371094 C 281.945312 278.316406 281.910156 279.589844 281.910156 280.414062 C 281.910156 281.015625 281.910156 282.382812 282.449219 283.351562 C 283.046875 284.410156 283.960938 284.578125 284.488281 284.578125 C 285.183594 284.578125 286.046875 284.289062 286.5625 283.292969 C 287.015625 282.417969 287.078125 281.375 287.078125 280.414062 Z M 286.082031 280.273438 C 286.082031 281.085938 286.082031 281.914062 285.804688 282.695312 C 285.53125 283.519531 284.96875 283.796875 284.5 283.796875 C 282.90625 283.796875 282.90625 281.398438 282.90625 280.273438 C 282.90625 279.515625 282.90625 278.90625 283.085938 278.28125 C 283.3125 277.453125 283.804688 276.988281 284.488281 276.988281 C 286.082031 276.988281 286.082031 279.15625 286.082031 280.273438 Z M 181.417969 270.464844 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 291.359375 275.652344 C 291.359375 275.183594 291.292969 274.761719 291.007812 274.308594 C 290.671875 273.78125 290.378906 273.640625 289.984375 273.640625 C 289.667969 273.640625 289.046875 273.730469 288.527344 274.328125 C 288.617188 273.203125 289.1875 272.371094 290.085938 272.371094 C 290.253906 272.371094 290.5625 272.378906 290.992188 272.546875 L 290.992188 272.019531 C 290.589844 271.867188 290.261719 271.851562 290.078125 271.851562 C 288.871094 271.851562 287.777344 273.027344 287.777344 274.832031 C 287.777344 277.128906 288.792969 277.707031 289.582031 277.707031 C 290.09375 277.707031 290.464844 277.523438 290.800781 277.144531 C 291.203125 276.703125 291.359375 276.316406 291.359375 275.652344 Z M 290.632812 275.652344 C 290.632812 275.980469 290.632812 276.324219 290.414062 276.683594 C 290.253906 276.9375 290.019531 277.164062 289.582031 277.164062 C 288.683594 277.164062 288.558594 275.964844 288.535156 275.6875 C 288.535156 275.570312 288.535156 275.550781 288.542969 275.460938 C 288.542969 274.820312 288.945312 274.183594 289.625 274.183594 C 289.941406 274.183594 290.226562 274.300781 290.445312 274.695312 C 290.613281 275 290.632812 275.265625 290.632812 275.652344 Z M 287.417969 267.832031 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 300.539062 283.867188 L 300.464844 283.015625 C 299.890625 283.414062 299.253906 283.628906 298.535156 283.628906 C 297.515625 283.628906 296.84375 282.777344 296.84375 281.640625 C 296.84375 280.714844 297.300781 279.601562 298.582031 279.601562 C 299.339844 279.601562 299.722656 279.746094 300.320312 280.152344 L 300.476562 279.3125 C 299.804688 278.929688 299.433594 278.785156 298.570312 278.785156 C 296.867188 278.785156 295.851562 280.222656 295.851562 281.652344 C 295.851562 283.136719 296.964844 284.445312 298.523438 284.445312 C 299.363281 284.445312 300 284.191406 300.539062 283.867188 Z M 305.800781 284.3125 L 305.800781 280.75 C 305.800781 279.863281 305.5625 278.855469 304.109375 278.855469 C 303.425781 278.855469 302.78125 279.179688 302.347656 279.71875 L 302.347656 275.992188 L 301.390625 275.992188 L 301.390625 284.3125 L 302.382812 284.3125 L 302.382812 281.386719 C 302.382812 280.59375 302.695312 279.636719 303.605469 279.636719 C 304.804688 279.636719 304.804688 280.488281 304.804688 280.835938 L 304.804688 284.3125 Z M 311.476562 284.3125 L 311.476562 280.847656 C 311.476562 279.660156 310.601562 278.785156 309.414062 278.785156 C 308.636719 278.785156 308.0625 278.988281 307.484375 279.3125 L 307.558594 280.164062 C 308.15625 279.71875 308.742188 279.539062 309.402344 279.539062 C 310.074219 279.539062 310.484375 280.078125 310.484375 280.859375 L 310.484375 281.363281 C 308.421875 281.410156 307.066406 281.949219 307.066406 282.921875 C 307.066406 283.484375 307.4375 284.445312 308.480469 284.445312 C 308.71875 284.445312 309.761719 284.421875 310.519531 283.855469 L 310.519531 284.3125 Z M 310.484375 282.632812 C 310.484375 282.886719 310.484375 283.183594 310.050781 283.4375 C 309.75 283.617188 309.355469 283.664062 309.117188 283.664062 C 308.503906 283.664062 307.988281 283.378906 307.988281 282.898438 C 307.988281 282.046875 310.328125 282.011719 310.484375 282.011719 Z M 316.445312 279.71875 L 316.445312 278.855469 C 315.582031 278.867188 314.792969 279.300781 314.300781 280.007812 L 314.300781 278.917969 L 313.414062 278.917969 L 313.414062 284.3125 L 314.371094 284.3125 L 314.371094 281.757812 C 314.371094 280.5 315.355469 279.730469 316.445312 279.71875 Z M 320.855469 282.765625 C 320.855469 282.347656 320.6875 281.925781 320.265625 281.566406 C 319.859375 281.21875 319.5 281.148438 318.757812 281.003906 C 318.324219 280.917969 317.726562 280.8125 317.726562 280.246094 C 317.726562 279.539062 318.578125 279.539062 318.757812 279.539062 C 319.632812 279.539062 320.148438 279.816406 320.496094 280.007812 L 320.652344 279.167969 C 319.90625 278.832031 319.355469 278.785156 318.863281 278.785156 C 318.589844 278.785156 316.824219 278.785156 316.824219 280.367188 C 316.824219 280.941406 317.171875 281.304688 317.304688 281.433594 C 317.761719 281.832031 318.074219 281.890625 318.886719 282.046875 C 319.273438 282.117188 319.957031 282.25 319.957031 282.863281 C 319.957031 283.640625 318.984375 283.640625 318.828125 283.640625 C 318.121094 283.640625 317.4375 283.402344 316.921875 283.042969 L 316.765625 283.917969 C 317.484375 284.300781 318.214844 284.445312 318.828125 284.445312 C 319.605469 284.445312 320.855469 284.191406 320.855469 282.765625 Z M 326.886719 275.582031 C 326.886719 275.402344 326.742188 275.320312 326.632812 275.320312 C 326.441406 275.320312 326.394531 275.453125 326.347656 275.59375 L 322.160156 286.84375 C 322.125 286.949219 322.101562 287 322.101562 287.046875 C 322.101562 287.226562 322.246094 287.308594 322.351562 287.308594 C 322.546875 287.308594 322.59375 287.179688 322.640625 287.035156 L 326.824219 275.789062 C 326.863281 275.679688 326.886719 275.632812 326.886719 275.582031 Z M 331.855469 282.765625 C 331.855469 282.347656 331.6875 281.925781 331.265625 281.566406 C 330.859375 281.21875 330.5 281.148438 329.757812 281.003906 C 329.324219 280.917969 328.726562 280.8125 328.726562 280.246094 C 328.726562 279.539062 329.578125 279.539062 329.757812 279.539062 C 330.632812 279.539062 331.148438 279.816406 331.496094 280.007812 L 331.652344 279.167969 C 330.90625 278.832031 330.355469 278.785156 329.863281 278.785156 C 329.589844 278.785156 327.824219 278.785156 327.824219 280.367188 C 327.824219 280.941406 328.171875 281.304688 328.304688 281.433594 C 328.761719 281.832031 329.074219 281.890625 329.886719 282.046875 C 330.273438 282.117188 330.957031 282.25 330.957031 282.863281 C 330.957031 283.640625 329.984375 283.640625 329.828125 283.640625 C 329.121094 283.640625 328.4375 283.402344 327.921875 283.042969 L 327.765625 283.917969 C 328.484375 284.300781 329.214844 284.445312 329.828125 284.445312 C 330.605469 284.445312 331.855469 284.191406 331.855469 282.765625 Z M 334.757812 287.308594 L 334.757812 275.320312 L 332.683594 275.320312 L 332.683594 276.097656 L 333.832031 276.097656 L 333.832031 286.53125 L 332.683594 286.53125 L 332.683594 287.308594 Z M 291.417969 270.464844 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 15.199219 114.234375 L 15.199219 105.914062 L 13.761719 105.914062 L 12.15625 110.015625 C 11.84375 110.816406 11.195312 112.472656 11.050781 113.058594 L 11.039062 113.058594 C 10.917969 112.566406 10.367188 111.117188 10.140625 110.542969 L 8.546875 106.453125 L 8.328125 105.914062 L 6.890625 105.914062 L 6.890625 114.234375 L 7.875 114.234375 L 7.875 106.847656 L 7.886719 106.847656 C 8.03125 107.460938 8.726562 109.246094 8.855469 109.582031 L 10.175781 112.964844 L 10.570312 113.96875 L 11.484375 113.96875 L 13.136719 109.75 C 13.316406 109.304688 14.050781 107.410156 14.203125 106.847656 L 14.21875 106.847656 L 14.21875 114.234375 Z M 21.789062 111.609375 C 21.789062 111.238281 21.777344 110.324219 21.308594 109.605469 C 20.804688 108.851562 20.039062 108.707031 19.582031 108.707031 C 18.191406 108.707031 17.085938 109.988281 17.085938 111.523438 C 17.085938 113.105469 18.261719 114.367188 19.761719 114.367188 C 20.335938 114.367188 21.03125 114.222656 21.726562 113.777344 L 21.65625 112.953125 C 20.902344 113.492188 20.191406 113.585938 19.773438 113.585938 C 18.777344 113.585938 18.011719 112.710938 17.976562 111.609375 Z M 21.070312 110.902344 L 18.046875 110.902344 C 18.238281 110.132812 18.828125 109.484375 19.582031 109.484375 C 20.023438 109.484375 20.839844 109.691406 21.070312 110.902344 Z M 28.898438 113.695312 L 28.824219 112.808594 C 28.464844 113.046875 28.105469 113.273438 27.699219 113.40625 C 27.292969 113.527344 26.871094 113.550781 26.441406 113.550781 C 25.660156 113.550781 24.941406 113.144531 24.4375 112.53125 C 23.875 111.835938 23.609375 110.960938 23.609375 110.074219 C 23.609375 109.175781 23.875 108.300781 24.4375 107.617188 C 24.941406 107.003906 25.660156 106.597656 26.441406 106.597656 C 26.824219 106.597656 27.195312 106.632812 27.554688 106.742188 C 27.925781 106.847656 28.261719 107.039062 28.574219 107.257812 L 28.765625 106.199219 C 28.394531 106.058594 28.011719 105.9375 27.613281 105.878906 C 27.230469 105.804688 26.835938 105.78125 26.441406 105.78125 C 25.371094 105.78125 24.355469 106.25 23.621094 107.039062 C 22.867188 107.855469 22.460938 108.945312 22.460938 110.074219 C 22.460938 111.1875 22.867188 112.28125 23.621094 113.105469 C 24.355469 113.898438 25.371094 114.367188 26.441406 114.367188 C 26.871094 114.367188 27.304688 114.34375 27.722656 114.234375 C 28.144531 114.125 28.527344 113.910156 28.898438 113.695312 Z M 34.726562 114.234375 L 34.726562 110.769531 C 34.726562 109.582031 33.851562 108.707031 32.664062 108.707031 C 31.886719 108.707031 31.3125 108.910156 30.734375 109.234375 L 30.808594 110.085938 C 31.40625 109.640625 31.992188 109.460938 32.652344 109.460938 C 33.324219 109.460938 33.734375 110 33.734375 110.78125 L 33.734375 111.285156 C 31.671875 111.332031 30.316406 111.871094 30.316406 112.84375 C 30.316406 113.40625 30.6875 114.367188 31.730469 114.367188 C 31.96875 114.367188 33.011719 114.34375 33.769531 113.777344 L 33.769531 114.234375 Z M 33.734375 112.554688 C 33.734375 112.808594 33.734375 113.105469 33.300781 113.359375 C 33 113.539062 32.605469 113.585938 32.367188 113.585938 C 31.753906 113.585938 31.238281 113.300781 31.238281 112.820312 C 31.238281 111.96875 33.578125 111.933594 33.734375 111.933594 Z M 41.589844 111.5625 C 41.589844 110.109375 40.800781 108.777344 39.625 108.777344 C 39.300781 108.777344 38.4375 108.839844 37.621094 109.472656 L 37.621094 105.914062 L 36.664062 105.914062 L 36.664062 114.234375 L 37.660156 114.234375 L 37.660156 113.671875 C 37.8125 113.839844 38.425781 114.367188 39.3125 114.367188 C 40.558594 114.367188 41.589844 113.15625 41.589844 111.5625 Z M 40.597656 111.5625 C 40.597656 113.105469 39.566406 113.585938 38.832031 113.585938 C 38.066406 113.585938 37.71875 112.953125 37.660156 112.84375 L 37.660156 110.230469 C 37.839844 109.976562 38.269531 109.558594 38.953125 109.558594 C 39.671875 109.558594 40.597656 110.0625 40.597656 111.5625 Z M 49.769531 117.21875 C 49.121094 116.558594 47.632812 115.027344 47.632812 111.238281 C 47.632812 107.4375 49.121094 105.914062 49.769531 105.242188 L 48.988281 105.242188 C 47.574219 106.429688 46.640625 108.480469 46.640625 111.226562 C 46.640625 114.078125 47.632812 116.082031 48.988281 117.21875 Z M 56.207031 114.234375 L 56.207031 113.335938 L 53.953125 113.335938 C 53.808594 113.335938 53.664062 113.347656 53.523438 113.347656 L 52.203125 113.347656 L 54.167969 111.5625 C 55.429688 110.53125 56.207031 109.882812 56.207031 108.71875 C 56.207031 107.339844 55.210938 106.128906 53.582031 106.128906 C 52.3125 106.128906 51.554688 106.859375 51.183594 108.046875 L 51.734375 108.742188 C 52.046875 107.339844 52.671875 106.945312 53.414062 106.945312 C 54.480469 106.945312 55.152344 107.757812 55.152344 108.742188 C 55.152344 109.832031 54.386719 110.53125 53.496094 111.320312 L 51.28125 113.417969 L 51.28125 114.234375 Z M 62.328125 110.335938 C 62.328125 109.199219 62.242188 108.144531 61.679688 107.195312 C 61.285156 106.535156 60.539062 106.128906 59.75 106.128906 C 59.148438 106.128906 58.296875 106.355469 57.757812 107.292969 C 57.195312 108.238281 57.160156 109.511719 57.160156 110.335938 C 57.160156 110.9375 57.160156 112.304688 57.699219 113.273438 C 58.296875 114.332031 59.210938 114.5 59.738281 114.5 C 60.433594 114.5 61.296875 114.210938 61.8125 113.214844 C 62.265625 112.339844 62.328125 111.296875 62.328125 110.335938 Z M 61.332031 110.195312 C 61.332031 111.007812 61.332031 111.835938 61.054688 112.617188 C 60.78125 113.441406 60.21875 113.71875 59.75 113.71875 C 58.15625 113.71875 58.15625 111.320312 58.15625 110.195312 C 58.15625 109.4375 58.15625 108.828125 58.335938 108.203125 C 58.5625 107.375 59.054688 106.910156 59.738281 106.910156 C 61.332031 106.910156 61.332031 109.078125 61.332031 110.195312 Z M 68.207031 114.234375 L 68.207031 113.335938 L 65.953125 113.335938 C 65.808594 113.335938 65.664062 113.347656 65.523438 113.347656 L 64.203125 113.347656 L 66.167969 111.5625 C 67.429688 110.53125 68.207031 109.882812 68.207031 108.71875 C 68.207031 107.339844 67.210938 106.128906 65.582031 106.128906 C 64.3125 106.128906 63.554688 106.859375 63.183594 108.046875 L 63.734375 108.742188 C 64.046875 107.339844 64.671875 106.945312 65.414062 106.945312 C 66.480469 106.945312 67.152344 107.757812 67.152344 108.742188 C 67.152344 109.832031 66.386719 110.53125 65.496094 111.320312 L 63.28125 113.417969 L 63.28125 114.234375 Z M 74.328125 110.335938 C 74.328125 109.199219 74.242188 108.144531 73.679688 107.195312 C 73.285156 106.535156 72.539062 106.128906 71.75 106.128906 C 71.148438 106.128906 70.296875 106.355469 69.757812 107.292969 C 69.195312 108.238281 69.160156 109.511719 69.160156 110.335938 C 69.160156 110.9375 69.160156 112.304688 69.699219 113.273438 C 70.296875 114.332031 71.210938 114.5 71.738281 114.5 C 72.433594 114.5 73.296875 114.210938 73.8125 113.214844 C 74.265625 112.339844 74.328125 111.296875 74.328125 110.335938 Z M 73.332031 110.195312 C 73.332031 111.007812 73.332031 111.835938 73.054688 112.617188 C 72.78125 113.441406 72.21875 113.71875 71.75 113.71875 C 70.15625 113.71875 70.15625 111.320312 70.15625 110.195312 C 70.15625 109.4375 70.15625 108.828125 70.335938 108.203125 C 70.5625 107.375 71.054688 106.910156 71.738281 106.910156 C 73.332031 106.910156 73.332031 109.078125 73.332031 110.195312 Z M 78.085938 111.945312 L 78.085938 111.199219 L 74.800781 111.199219 L 74.800781 111.945312 Z M 84.328125 110.335938 C 84.328125 109.199219 84.242188 108.144531 83.679688 107.195312 C 83.285156 106.535156 82.539062 106.128906 81.75 106.128906 C 81.148438 106.128906 80.296875 106.355469 79.757812 107.292969 C 79.195312 108.238281 79.160156 109.511719 79.160156 110.335938 C 79.160156 110.9375 79.160156 112.304688 79.699219 113.273438 C 80.296875 114.332031 81.210938 114.5 81.738281 114.5 C 82.433594 114.5 83.296875 114.210938 83.8125 113.214844 C 84.265625 112.339844 84.328125 111.296875 84.328125 110.335938 Z M 83.332031 110.195312 C 83.332031 111.007812 83.332031 111.835938 83.054688 112.617188 C 82.78125 113.441406 82.21875 113.71875 81.75 113.71875 C 80.15625 113.71875 80.15625 111.320312 80.15625 110.195312 C 80.15625 109.4375 80.15625 108.828125 80.335938 108.203125 C 80.5625 107.375 81.054688 106.910156 81.738281 106.910156 C 83.332031 106.910156 83.332031 109.078125 83.332031 110.195312 Z M 90.304688 110.230469 C 90.304688 106.96875 88.902344 106.128906 87.785156 106.128906 C 87.101562 106.128906 86.515625 106.355469 85.976562 106.945312 C 85.386719 107.605469 85.183594 108.15625 85.183594 109.054688 C 85.183594 109.882812 85.339844 110.457031 85.746094 111.070312 C 86.167969 111.714844 86.574219 111.945312 87.148438 111.945312 C 88.179688 111.945312 88.914062 111.34375 89.234375 110.949219 C 89.042969 113.09375 87.988281 113.71875 87.199219 113.71875 C 86.863281 113.71875 86.40625 113.648438 85.949219 113.261719 L 85.554688 113.933594 C 86.09375 114.367188 86.632812 114.5 87.199219 114.5 C 88.804688 114.5 90.304688 112.832031 90.304688 110.230469 Z M 89.210938 109.320312 C 89.210938 110.300781 88.601562 111.164062 87.664062 111.164062 C 87.019531 111.164062 86.730469 110.816406 86.539062 110.515625 C 86.238281 110.039062 86.226562 109.558594 86.226562 109.054688 C 86.226562 108.539062 86.238281 108.023438 86.621094 107.496094 C 86.765625 107.292969 87.078125 106.871094 87.785156 106.871094 C 89.03125 106.871094 89.1875 108.609375 89.199219 109.042969 C 89.210938 109.113281 89.210938 109.234375 89.210938 109.320312 Z M 94.085938 111.945312 L 94.085938 111.199219 L 90.800781 111.199219 L 90.800781 111.945312 Z M 99.90625 114.234375 L 99.90625 113.492188 L 98.371094 113.492188 L 98.371094 106.128906 L 98.121094 106.128906 C 97.316406 106.910156 96.167969 106.910156 95.746094 106.910156 L 95.746094 107.652344 C 96.082031 107.652344 96.695312 107.652344 97.355469 107.375 L 97.355469 113.492188 L 95.820312 113.492188 L 95.820312 114.234375 Z M 106.472656 112.230469 L 106.472656 111.453125 L 105.273438 111.453125 L 105.273438 106.394531 L 104.109375 106.394531 L 101.015625 111.453125 L 101.015625 112.230469 L 104.265625 112.230469 L 104.265625 114.234375 L 105.273438 114.234375 L 105.273438 112.230469 Z M 104.335938 111.453125 L 102.011719 111.453125 L 102.742188 110.253906 C 103.101562 109.640625 104.324219 107.554688 104.335938 106.945312 Z M 110.480469 111.238281 C 110.480469 108.382812 109.484375 106.378906 108.128906 105.242188 L 107.351562 105.242188 C 108 105.902344 109.484375 107.4375 109.484375 111.226562 C 109.484375 115.027344 108 116.546875 107.351562 117.21875 L 108.128906 117.21875 C 109.546875 116.03125 110.480469 113.984375 110.480469 111.238281 Z M 5.667969 100.386719 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 148.25 L 10.25 143.214844 L 13.535156 139.929688 L 12.359375 139.929688 L 7.933594 144.339844 L 7.933594 139.929688 L 6.84375 139.929688 L 6.84375 148.25 L 7.933594 148.25 L 7.933594 145.515625 L 9.539062 143.921875 L 12.527344 148.25 Z M 20.050781 148.25 L 20.050781 142.925781 L 19.054688 142.925781 L 19.054688 146.390625 C 19.054688 147.351562 18.300781 147.675781 17.582031 147.675781 C 16.71875 147.675781 16.632812 147.398438 16.632812 146.882812 L 16.632812 142.925781 L 15.640625 142.925781 L 15.640625 146.941406 C 15.640625 147.960938 16.046875 148.382812 16.898438 148.382812 C 17.59375 148.382812 18.527344 148.179688 19.09375 147.675781 L 19.09375 148.25 Z M 24.695312 143.65625 L 24.695312 142.792969 C 23.832031 142.804688 23.042969 143.238281 22.550781 143.945312 L 22.550781 142.855469 L 21.664062 142.855469 L 21.664062 148.25 L 22.621094 148.25 L 22.621094 145.695312 C 22.621094 144.4375 23.605469 143.667969 24.695312 143.65625 Z M 30.460938 145.613281 C 30.460938 144.042969 29.261719 142.722656 27.738281 142.722656 C 26.25 142.722656 25.027344 144.015625 25.027344 145.613281 C 25.027344 147.195312 26.285156 148.382812 27.738281 148.382812 C 29.222656 148.382812 30.460938 147.171875 30.460938 145.613281 Z M 29.464844 145.492188 C 29.464844 146.882812 28.625 147.566406 27.75 147.566406 C 26.828125 147.566406 26.023438 146.847656 26.023438 145.492188 C 26.023438 144.089844 26.921875 143.5 27.738281 143.5 C 28.601562 143.5 29.464844 144.113281 29.464844 145.492188 Z M 39.46875 148.25 L 39.46875 144.6875 C 39.46875 143.800781 39.230469 142.792969 37.777344 142.792969 C 36.769531 142.792969 36.207031 143.40625 35.917969 143.777344 C 35.835938 143.539062 35.582031 142.792969 34.359375 142.792969 C 33.414062 142.792969 32.839844 143.347656 32.5625 143.707031 L 32.5625 142.855469 L 31.640625 142.855469 L 31.640625 148.25 L 32.632812 148.25 L 32.632812 145.324219 C 32.632812 144.53125 32.945312 143.574219 33.855469 143.574219 C 35.054688 143.574219 35.054688 144.425781 35.054688 144.773438 L 35.054688 148.25 L 36.050781 148.25 L 36.050781 145.324219 C 36.050781 144.53125 36.363281 143.574219 37.273438 143.574219 C 38.472656 143.574219 38.472656 144.425781 38.472656 144.773438 L 38.472656 148.25 Z M 46.460938 145.613281 C 46.460938 144.042969 45.261719 142.722656 43.738281 142.722656 C 42.25 142.722656 41.027344 144.015625 41.027344 145.613281 C 41.027344 147.195312 42.285156 148.382812 43.738281 148.382812 C 45.222656 148.382812 46.460938 147.171875 46.460938 145.613281 Z M 45.464844 145.492188 C 45.464844 146.882812 44.625 147.566406 43.75 147.566406 C 42.828125 147.566406 42.023438 146.847656 42.023438 145.492188 C 42.023438 144.089844 42.921875 143.5 43.738281 143.5 C 44.601562 143.5 45.464844 144.113281 45.464844 145.492188 Z M 48.957031 148.957031 L 48.957031 142.925781 L 48 142.925781 L 48 149.066406 C 48 149.855469 47.28125 149.894531 47.085938 149.894531 C 46.75 149.894531 46.453125 149.761719 46.1875 149.519531 L 45.914062 150.300781 C 46.511719 150.636719 47.085938 150.707031 47.398438 150.707031 C 48.214844 150.707031 48.957031 150.058594 48.957031 148.957031 Z M 48.957031 141.5 L 48.957031 140.347656 L 47.808594 140.347656 L 47.808594 141.5 Z M 51.609375 148.25 L 51.609375 142.925781 L 50.652344 142.925781 L 50.652344 148.25 Z M 51.71875 141.5 L 51.71875 140.347656 L 50.566406 140.347656 L 50.566406 141.5 Z M 60.769531 151.234375 C 60.121094 150.574219 58.632812 149.042969 58.632812 145.253906 C 58.632812 141.453125 60.121094 139.929688 60.769531 139.257812 L 59.988281 139.257812 C 58.574219 140.445312 57.640625 142.496094 57.640625 145.242188 C 57.640625 148.09375 58.632812 150.097656 59.988281 151.234375 Z M 67.328125 144.351562 C 67.328125 143.214844 67.242188 142.160156 66.679688 141.210938 C 66.285156 140.550781 65.539062 140.144531 64.75 140.144531 C 64.148438 140.144531 63.296875 140.371094 62.757812 141.308594 C 62.195312 142.253906 62.160156 143.527344 62.160156 144.351562 C 62.160156 144.953125 62.160156 146.320312 62.699219 147.289062 C 63.296875 148.347656 64.210938 148.515625 64.738281 148.515625 C 65.433594 148.515625 66.296875 148.226562 66.8125 147.230469 C 67.265625 146.355469 67.328125 145.3125 67.328125 144.351562 Z M 66.332031 144.210938 C 66.332031 145.023438 66.332031 145.851562 66.054688 146.632812 C 65.78125 147.457031 65.21875 147.734375 64.75 147.734375 C 63.15625 147.734375 63.15625 145.335938 63.15625 144.210938 C 63.15625 143.453125 63.15625 142.84375 63.335938 142.21875 C 63.5625 141.390625 64.054688 140.925781 64.738281 140.925781 C 66.332031 140.925781 66.332031 143.09375 66.332031 144.210938 Z M 69.886719 148.25 L 69.886719 147.21875 L 68.855469 147.21875 L 68.855469 148.25 Z M 76.304688 144.246094 C 76.304688 140.984375 74.902344 140.144531 73.785156 140.144531 C 73.101562 140.144531 72.515625 140.371094 71.976562 140.960938 C 71.386719 141.621094 71.183594 142.171875 71.183594 143.070312 C 71.183594 143.898438 71.339844 144.472656 71.746094 145.085938 C 72.167969 145.730469 72.574219 145.960938 73.148438 145.960938 C 74.179688 145.960938 74.914062 145.359375 75.234375 144.964844 C 75.042969 147.109375 73.988281 147.734375 73.199219 147.734375 C 72.863281 147.734375 72.40625 147.664062 71.949219 147.277344 L 71.554688 147.949219 C 72.09375 148.382812 72.632812 148.515625 73.199219 148.515625 C 74.804688 148.515625 76.304688 146.847656 76.304688 144.246094 Z M 75.210938 143.335938 C 75.210938 144.316406 74.601562 145.179688 73.664062 145.179688 C 73.019531 145.179688 72.730469 144.832031 72.539062 144.53125 C 72.238281 144.054688 72.226562 143.574219 72.226562 143.070312 C 72.226562 142.554688 72.238281 142.039062 72.621094 141.511719 C 72.765625 141.308594 73.078125 140.886719 73.785156 140.886719 C 75.03125 140.886719 75.1875 142.625 75.199219 143.058594 C 75.210938 143.128906 75.210938 143.25 75.210938 143.335938 Z M 78.886719 148.25 L 78.886719 147.21875 L 77.855469 147.21875 L 77.855469 148.25 Z M 85.328125 144.351562 C 85.328125 143.214844 85.242188 142.160156 84.679688 141.210938 C 84.285156 140.550781 83.539062 140.144531 82.75 140.144531 C 82.148438 140.144531 81.296875 140.371094 80.757812 141.308594 C 80.195312 142.253906 80.160156 143.527344 80.160156 144.351562 C 80.160156 144.953125 80.160156 146.320312 80.699219 147.289062 C 81.296875 148.347656 82.210938 148.515625 82.738281 148.515625 C 83.433594 148.515625 84.296875 148.226562 84.8125 147.230469 C 85.265625 146.355469 85.328125 145.3125 85.328125 144.351562 Z M 84.332031 144.210938 C 84.332031 145.023438 84.332031 145.851562 84.054688 146.632812 C 83.78125 147.457031 83.21875 147.734375 82.75 147.734375 C 81.15625 147.734375 81.15625 145.335938 81.15625 144.210938 C 81.15625 143.453125 81.15625 142.84375 81.335938 142.21875 C 81.5625 141.390625 82.054688 140.925781 82.738281 140.925781 C 84.332031 140.925781 84.332031 143.09375 84.332031 144.210938 Z M 89.480469 145.253906 C 89.480469 142.398438 88.484375 140.394531 87.128906 139.257812 L 86.351562 139.257812 C 87 139.917969 88.484375 141.453125 88.484375 145.242188 C 88.484375 149.042969 87 150.5625 86.351562 151.234375 L 87.128906 151.234375 C 88.546875 150.046875 89.480469 148 89.480469 145.253906 Z M 5.667969 134.402344 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.855469 176.59375 L 11.855469 175.753906 L 11.328125 175.753906 L 8.414062 175.777344 L 7.957031 175.777344 L 7.957031 168.273438 L 6.808594 168.273438 L 6.808594 176.59375 Z M 14.609375 176.59375 L 14.609375 171.269531 L 13.652344 171.269531 L 13.652344 176.59375 Z M 14.71875 169.84375 L 14.71875 168.691406 L 13.566406 168.691406 L 13.566406 169.84375 Z M 21.050781 176.59375 L 21.050781 173.03125 C 21.050781 172.144531 20.8125 171.136719 19.359375 171.136719 C 18.414062 171.136719 17.839844 171.691406 17.5625 172.050781 L 17.5625 171.199219 L 16.640625 171.199219 L 16.640625 176.59375 L 17.632812 176.59375 L 17.632812 173.667969 C 17.632812 172.875 17.945312 171.917969 18.855469 171.917969 C 20.054688 171.917969 20.054688 172.769531 20.054688 173.117188 L 20.054688 176.59375 Z M 27.027344 176.59375 L 27.027344 168.273438 L 26.070312 168.273438 L 26.070312 171.796875 C 25.398438 171.234375 24.628906 171.136719 24.246094 171.136719 C 23.035156 171.136719 22.101562 172.398438 22.101562 173.933594 C 22.101562 175.480469 23.023438 176.726562 24.199219 176.726562 C 24.558594 176.726562 25.3125 176.640625 26.03125 175.96875 L 26.03125 176.59375 Z M 26.03125 174.902344 C 26.03125 175.132812 26.03125 175.15625 25.902344 175.324219 C 25.578125 175.753906 25.167969 175.945312 24.738281 175.945312 C 23.957031 175.945312 23.09375 175.382812 23.09375 173.945312 C 23.09375 172.410156 24.113281 171.917969 24.871094 171.917969 C 25.421875 171.917969 25.804688 172.230469 26.03125 172.542969 Z M 32.789062 173.96875 C 32.789062 173.597656 32.777344 172.683594 32.308594 171.964844 C 31.804688 171.210938 31.039062 171.066406 30.582031 171.066406 C 29.191406 171.066406 28.085938 172.347656 28.085938 173.882812 C 28.085938 175.464844 29.261719 176.726562 30.761719 176.726562 C 31.335938 176.726562 32.03125 176.582031 32.726562 176.136719 L 32.65625 175.3125 C 31.902344 175.851562 31.191406 175.945312 30.773438 175.945312 C 29.777344 175.945312 29.011719 175.070312 28.976562 173.96875 Z M 32.070312 173.261719 L 29.046875 173.261719 C 29.238281 172.492188 29.828125 171.84375 30.582031 171.84375 C 31.023438 171.84375 31.839844 172.050781 32.070312 173.261719 Z M 36.695312 172 L 36.695312 171.136719 C 35.832031 171.148438 35.042969 171.582031 34.550781 172.289062 L 34.550781 171.199219 L 33.664062 171.199219 L 33.664062 176.59375 L 34.621094 176.59375 L 34.621094 174.039062 C 34.621094 172.78125 35.605469 172.011719 36.695312 172 Z M 41.726562 176.59375 L 41.726562 173.128906 C 41.726562 171.941406 40.851562 171.066406 39.664062 171.066406 C 38.886719 171.066406 38.3125 171.269531 37.734375 171.59375 L 37.808594 172.445312 C 38.40625 172 38.992188 171.820312 39.652344 171.820312 C 40.324219 171.820312 40.734375 172.359375 40.734375 173.140625 L 40.734375 173.644531 C 38.671875 173.691406 37.316406 174.230469 37.316406 175.203125 C 37.316406 175.765625 37.6875 176.726562 38.730469 176.726562 C 38.96875 176.726562 40.011719 176.703125 40.769531 176.136719 L 40.769531 176.59375 Z M 40.734375 174.914062 C 40.734375 175.167969 40.734375 175.464844 40.300781 175.71875 C 40 175.898438 39.605469 175.945312 39.367188 175.945312 C 38.753906 175.945312 38.238281 175.660156 38.238281 175.179688 C 38.238281 174.328125 40.578125 174.292969 40.734375 174.292969 Z M 50.769531 179.578125 C 50.121094 178.917969 48.632812 177.386719 48.632812 173.597656 C 48.632812 169.796875 50.121094 168.273438 50.769531 167.601562 L 49.988281 167.601562 C 48.574219 168.789062 47.640625 170.839844 47.640625 173.585938 C 47.640625 176.4375 48.632812 178.441406 49.988281 179.578125 Z M 57.328125 172.695312 C 57.328125 171.558594 57.242188 170.503906 56.679688 169.554688 C 56.285156 168.894531 55.539062 168.488281 54.75 168.488281 C 54.148438 168.488281 53.296875 168.714844 52.757812 169.652344 C 52.195312 170.597656 52.160156 171.871094 52.160156 172.695312 C 52.160156 173.296875 52.160156 174.664062 52.699219 175.632812 C 53.296875 176.691406 54.210938 176.859375 54.738281 176.859375 C 55.433594 176.859375 56.296875 176.570312 56.8125 175.574219 C 57.265625 174.699219 57.328125 173.65625 57.328125 172.695312 Z M 56.332031 172.554688 C 56.332031 173.367188 56.332031 174.195312 56.054688 174.976562 C 55.78125 175.800781 55.21875 176.078125 54.75 176.078125 C 53.15625 176.078125 53.15625 173.679688 53.15625 172.554688 C 53.15625 171.796875 53.15625 171.1875 53.335938 170.5625 C 53.5625 169.734375 54.054688 169.269531 54.738281 169.269531 C 56.332031 169.269531 56.332031 171.4375 56.332031 172.554688 Z M 59.886719 176.59375 L 59.886719 175.5625 L 58.855469 175.5625 L 58.855469 176.59375 Z M 66.304688 174.421875 C 66.304688 173.667969 65.847656 172.792969 64.625 172.347656 C 65.378906 172.132812 66.136719 171.484375 66.136719 170.585938 C 66.136719 169.484375 65.105469 168.488281 63.75 168.488281 C 62.347656 168.488281 61.351562 169.507812 61.351562 170.585938 C 61.351562 171.484375 62.105469 172.132812 62.863281 172.347656 C 61.589844 172.804688 61.183594 173.714844 61.183594 174.421875 C 61.183594 175.742188 62.324219 176.859375 63.738281 176.859375 C 65.199219 176.859375 66.304688 175.71875 66.304688 174.421875 Z M 65.25 170.597656 C 65.25 171.34375 64.734375 171.964844 63.738281 171.964844 C 62.800781 171.964844 62.238281 171.390625 62.238281 170.597656 C 62.238281 169.796875 62.8125 169.230469 63.738281 169.230469 C 64.71875 169.230469 65.25 169.832031 65.25 170.597656 Z M 65.273438 174.421875 C 65.273438 175.539062 64.539062 176.078125 63.75 176.078125 C 62.898438 176.078125 62.214844 175.503906 62.214844 174.421875 C 62.214844 173.140625 63.125 172.746094 63.738281 172.746094 C 64.421875 172.746094 65.273438 173.1875 65.273438 174.421875 Z M 68.886719 176.59375 L 68.886719 175.5625 L 67.855469 175.5625 L 67.855469 176.59375 Z M 74.90625 176.59375 L 74.90625 175.851562 L 73.371094 175.851562 L 73.371094 168.488281 L 73.121094 168.488281 C 72.316406 169.269531 71.167969 169.269531 70.746094 169.269531 L 70.746094 170.011719 C 71.082031 170.011719 71.695312 170.011719 72.355469 169.734375 L 72.355469 175.851562 L 70.820312 175.851562 L 70.820312 176.59375 Z M 79.480469 173.597656 C 79.480469 170.742188 78.484375 168.738281 77.128906 167.601562 L 76.351562 167.601562 C 77 168.261719 78.484375 169.796875 78.484375 173.585938 C 78.484375 177.386719 77 178.90625 76.351562 179.578125 L 77.128906 179.578125 C 78.546875 178.390625 79.480469 176.34375 79.480469 173.597656 Z M 5.667969 162.746094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.820312 208.324219 C 11.820312 206.96875 10.703125 206.082031 9.804688 205.863281 L 8.59375 205.578125 C 7.980469 205.433594 7.371094 204.953125 7.371094 204.257812 C 7.371094 203.574219 8.03125 202.878906 9.121094 202.878906 C 9.984375 202.878906 10.582031 203.132812 11.207031 203.660156 L 11.398438 202.617188 C 10.800781 202.316406 10.222656 202.027344 9.132812 202.027344 C 7.429688 202.027344 6.351562 203.214844 6.351562 204.414062 C 6.351562 204.714844 6.410156 205.195312 6.820312 205.746094 C 7.371094 206.464844 8.148438 206.644531 8.640625 206.765625 L 9.648438 207.015625 C 10.199219 207.183594 10.800781 207.710938 10.800781 208.453125 C 10.800781 209.246094 10.117188 209.988281 9.050781 209.988281 C 8.078125 209.988281 7.167969 209.617188 6.410156 209.007812 L 6.21875 210.050781 C 7.394531 210.78125 8.484375 210.878906 9.050781 210.878906 C 10.691406 210.878906 11.820312 209.652344 11.820312 208.324219 Z M 18.050781 210.613281 L 18.050781 205.289062 L 17.054688 205.289062 L 17.054688 208.753906 C 17.054688 209.714844 16.300781 210.039062 15.582031 210.039062 C 14.71875 210.039062 14.632812 209.761719 14.632812 209.246094 L 14.632812 205.289062 L 13.640625 205.289062 L 13.640625 209.304688 C 13.640625 210.324219 14.046875 210.746094 14.898438 210.746094 C 15.59375 210.746094 16.527344 210.542969 17.09375 210.039062 L 17.09375 210.613281 Z M 24.027344 210.613281 L 24.027344 202.292969 L 23.070312 202.292969 L 23.070312 205.816406 C 22.398438 205.253906 21.628906 205.15625 21.246094 205.15625 C 20.035156 205.15625 19.101562 206.417969 19.101562 207.953125 C 19.101562 209.5 20.023438 210.746094 21.199219 210.746094 C 21.558594 210.746094 22.3125 210.660156 23.03125 209.988281 L 23.03125 210.613281 Z M 23.03125 208.921875 C 23.03125 209.152344 23.03125 209.175781 22.902344 209.34375 C 22.578125 209.773438 22.167969 209.964844 21.738281 209.964844 C 20.957031 209.964844 20.09375 209.402344 20.09375 207.964844 C 20.09375 206.429688 21.113281 205.9375 21.871094 205.9375 C 22.421875 205.9375 22.804688 206.25 23.03125 206.5625 Z M 29.726562 210.613281 L 29.726562 207.148438 C 29.726562 205.960938 28.851562 205.085938 27.664062 205.085938 C 26.886719 205.085938 26.3125 205.289062 25.734375 205.613281 L 25.808594 206.464844 C 26.40625 206.019531 26.992188 205.839844 27.652344 205.839844 C 28.324219 205.839844 28.734375 206.378906 28.734375 207.160156 L 28.734375 207.664062 C 26.671875 207.710938 25.316406 208.25 25.316406 209.222656 C 25.316406 209.785156 25.6875 210.746094 26.730469 210.746094 C 26.96875 210.746094 28.011719 210.722656 28.769531 210.15625 L 28.769531 210.613281 Z M 28.734375 208.933594 C 28.734375 209.1875 28.734375 209.484375 28.300781 209.738281 C 28 209.917969 27.605469 209.964844 27.367188 209.964844 C 26.753906 209.964844 26.238281 209.679688 26.238281 209.199219 C 26.238281 208.347656 28.578125 208.3125 28.734375 208.3125 Z M 35.789062 210.167969 L 35.714844 209.316406 C 35.140625 209.714844 34.503906 209.929688 33.785156 209.929688 C 32.765625 209.929688 32.09375 209.078125 32.09375 207.941406 C 32.09375 207.015625 32.550781 205.902344 33.832031 205.902344 C 34.589844 205.902344 34.972656 206.046875 35.570312 206.453125 L 35.726562 205.613281 C 35.054688 205.230469 34.683594 205.085938 33.820312 205.085938 C 32.117188 205.085938 31.101562 206.523438 31.101562 207.953125 C 31.101562 209.4375 32.214844 210.746094 33.773438 210.746094 C 34.613281 210.746094 35.25 210.492188 35.789062 210.167969 Z M 41.050781 210.613281 L 41.050781 207.050781 C 41.050781 206.164062 40.8125 205.15625 39.359375 205.15625 C 38.675781 205.15625 38.03125 205.480469 37.597656 206.019531 L 37.597656 202.292969 L 36.640625 202.292969 L 36.640625 210.613281 L 37.632812 210.613281 L 37.632812 207.6875 C 37.632812 206.894531 37.945312 205.9375 38.855469 205.9375 C 40.054688 205.9375 40.054688 206.789062 40.054688 207.136719 L 40.054688 210.613281 Z M 43.609375 210.613281 L 43.609375 205.289062 L 42.652344 205.289062 L 42.652344 210.613281 Z M 43.71875 203.863281 L 43.71875 202.710938 L 42.566406 202.710938 L 42.566406 203.863281 Z M 52.769531 213.597656 C 52.121094 212.9375 50.632812 211.40625 50.632812 207.617188 C 50.632812 203.816406 52.121094 202.292969 52.769531 201.621094 L 51.988281 201.621094 C 50.574219 202.808594 49.640625 204.859375 49.640625 207.605469 C 49.640625 210.457031 50.632812 212.460938 51.988281 213.597656 Z M 59.328125 206.714844 C 59.328125 205.578125 59.242188 204.523438 58.679688 203.574219 C 58.285156 202.914062 57.539062 202.507812 56.75 202.507812 C 56.148438 202.507812 55.296875 202.734375 54.757812 203.671875 C 54.195312 204.617188 54.160156 205.890625 54.160156 206.714844 C 54.160156 207.316406 54.160156 208.683594 54.699219 209.652344 C 55.296875 210.710938 56.210938 210.878906 56.738281 210.878906 C 57.433594 210.878906 58.296875 210.589844 58.8125 209.59375 C 59.265625 208.71875 59.328125 207.675781 59.328125 206.714844 Z M 58.332031 206.574219 C 58.332031 207.386719 58.332031 208.214844 58.054688 208.996094 C 57.78125 209.820312 57.21875 210.097656 56.75 210.097656 C 55.15625 210.097656 55.15625 207.699219 55.15625 206.574219 C 55.15625 205.816406 55.15625 205.207031 55.335938 204.582031 C 55.5625 203.753906 56.054688 203.289062 56.738281 203.289062 C 58.332031 203.289062 58.332031 205.457031 58.332031 206.574219 Z M 61.886719 210.613281 L 61.886719 209.582031 L 60.855469 209.582031 L 60.855469 210.613281 Z M 68.207031 208.179688 C 68.207031 206.691406 67.140625 205.503906 65.871094 205.503906 C 65.714844 205.503906 65.125 205.503906 64.5625 205.984375 L 64.5625 203.585938 L 67.789062 203.585938 L 67.789062 202.773438 L 63.640625 202.773438 L 63.640625 207.230469 L 64.503906 207.230469 C 64.886719 206.34375 65.617188 206.285156 65.84375 206.285156 C 66.335938 206.285156 67.054688 206.65625 67.054688 208.167969 C 67.054688 209.628906 66.191406 210.097656 65.472656 210.097656 C 64.851562 210.097656 64.035156 209.796875 63.566406 208.996094 L 63.136719 209.703125 C 63.328125 209.964844 64.128906 210.878906 65.484375 210.878906 C 66.984375 210.878906 68.207031 209.679688 68.207031 208.179688 Z M 70.886719 210.613281 L 70.886719 209.582031 L 69.855469 209.582031 L 69.855469 210.613281 Z M 77.304688 208.441406 C 77.304688 207.496094 76.609375 206.703125 75.625 206.378906 C 76.464844 205.949219 76.96875 205.167969 76.96875 204.34375 C 76.96875 203.3125 75.949219 202.507812 74.714844 202.507812 C 73.707031 202.507812 72.855469 203.023438 72.351562 203.777344 L 72.820312 204.460938 C 73.347656 203.417969 74.257812 203.25 74.703125 203.25 C 75.3125 203.25 75.914062 203.585938 75.914062 204.34375 C 75.914062 204.785156 75.660156 205.816406 74.367188 205.9375 C 74.136719 205.960938 73.945312 205.972656 73.71875 205.984375 L 73.71875 206.765625 L 74.617188 206.765625 C 75.78125 206.765625 76.152344 207.710938 76.152344 208.429688 C 76.152344 209.378906 75.589844 210.097656 74.675781 210.097656 C 73.695312 210.097656 72.78125 209.558594 72.328125 208.910156 C 72.230469 209.390625 72.230469 209.414062 72.183594 209.679688 C 72.769531 210.386719 73.683594 210.878906 74.703125 210.878906 C 76.210938 210.878906 77.304688 209.703125 77.304688 208.441406 Z M 81.480469 207.617188 C 81.480469 204.761719 80.484375 202.757812 79.128906 201.621094 L 78.351562 201.621094 C 79 202.28125 80.484375 203.816406 80.484375 207.605469 C 80.484375 211.40625 79 212.925781 78.351562 213.597656 L 79.128906 213.597656 C 80.546875 212.410156 81.480469 210.363281 81.480469 207.617188 Z M 5.667969 196.765625 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 10.105469 237.410156 C 10.105469 236.992188 9.9375 236.570312 9.515625 236.210938 C 9.109375 235.863281 8.75 235.792969 8.007812 235.648438 C 7.574219 235.5625 6.976562 235.457031 6.976562 234.890625 C 6.976562 234.183594 7.828125 234.183594 8.007812 234.183594 C 8.882812 234.183594 9.398438 234.460938 9.746094 234.652344 L 9.902344 233.8125 C 9.15625 233.476562 8.605469 233.429688 8.113281 233.429688 C 7.839844 233.429688 6.074219 233.429688 6.074219 235.011719 C 6.074219 235.585938 6.421875 235.949219 6.554688 236.078125 C 7.011719 236.476562 7.324219 236.535156 8.136719 236.691406 C 8.523438 236.761719 9.207031 236.894531 9.207031 237.507812 C 9.207031 238.285156 8.234375 238.285156 8.078125 238.285156 C 7.371094 238.285156 6.6875 238.046875 6.171875 237.6875 L 6.015625 238.5625 C 6.734375 238.945312 7.464844 239.089844 8.078125 239.089844 C 8.855469 239.089844 10.105469 238.835938 10.105469 237.410156 Z M 16.050781 238.957031 L 16.050781 233.632812 L 15.054688 233.632812 L 15.054688 237.097656 C 15.054688 238.058594 14.300781 238.382812 13.582031 238.382812 C 12.71875 238.382812 12.632812 238.105469 12.632812 237.589844 L 12.632812 233.632812 L 11.640625 233.632812 L 11.640625 237.648438 C 11.640625 238.667969 12.046875 239.089844 12.898438 239.089844 C 13.59375 239.089844 14.527344 238.886719 15.09375 238.382812 L 15.09375 238.957031 Z M 22.027344 238.957031 L 22.027344 230.636719 L 21.070312 230.636719 L 21.070312 234.160156 C 20.398438 233.597656 19.628906 233.5 19.246094 233.5 C 18.035156 233.5 17.101562 234.761719 17.101562 236.296875 C 17.101562 237.84375 18.023438 239.089844 19.199219 239.089844 C 19.558594 239.089844 20.3125 239.003906 21.03125 238.332031 L 21.03125 238.957031 Z M 21.03125 237.265625 C 21.03125 237.496094 21.03125 237.519531 20.902344 237.6875 C 20.578125 238.117188 20.167969 238.308594 19.738281 238.308594 C 18.957031 238.308594 18.09375 237.746094 18.09375 236.308594 C 18.09375 234.773438 19.113281 234.28125 19.871094 234.28125 C 20.421875 234.28125 20.804688 234.59375 21.03125 234.90625 Z M 27.726562 238.957031 L 27.726562 235.492188 C 27.726562 234.304688 26.851562 233.429688 25.664062 233.429688 C 24.886719 233.429688 24.3125 233.632812 23.734375 233.957031 L 23.808594 234.808594 C 24.40625 234.363281 24.992188 234.183594 25.652344 234.183594 C 26.324219 234.183594 26.734375 234.722656 26.734375 235.503906 L 26.734375 236.007812 C 24.671875 236.054688 23.316406 236.59375 23.316406 237.566406 C 23.316406 238.128906 23.6875 239.089844 24.730469 239.089844 C 24.96875 239.089844 26.011719 239.066406 26.769531 238.5 L 26.769531 238.957031 Z M 26.734375 237.277344 C 26.734375 237.53125 26.734375 237.828125 26.300781 238.082031 C 26 238.261719 25.605469 238.308594 25.367188 238.308594 C 24.753906 238.308594 24.238281 238.023438 24.238281 237.542969 C 24.238281 236.691406 26.578125 236.65625 26.734375 236.65625 Z M 33.789062 238.511719 L 33.714844 237.660156 C 33.140625 238.058594 32.503906 238.273438 31.785156 238.273438 C 30.765625 238.273438 30.09375 237.421875 30.09375 236.285156 C 30.09375 235.359375 30.550781 234.246094 31.832031 234.246094 C 32.589844 234.246094 32.972656 234.390625 33.570312 234.796875 L 33.726562 233.957031 C 33.054688 233.574219 32.683594 233.429688 31.820312 233.429688 C 30.117188 233.429688 29.101562 234.867188 29.101562 236.296875 C 29.101562 237.78125 30.214844 239.089844 31.773438 239.089844 C 32.613281 239.089844 33.25 238.835938 33.789062 238.511719 Z M 39.050781 238.957031 L 39.050781 235.394531 C 39.050781 234.507812 38.8125 233.5 37.359375 233.5 C 36.675781 233.5 36.03125 233.824219 35.597656 234.363281 L 35.597656 230.636719 L 34.640625 230.636719 L 34.640625 238.957031 L 35.632812 238.957031 L 35.632812 236.03125 C 35.632812 235.238281 35.945312 234.28125 36.855469 234.28125 C 38.054688 234.28125 38.054688 235.132812 38.054688 235.480469 L 38.054688 238.957031 Z M 41.609375 238.957031 L 41.609375 233.632812 L 40.652344 233.632812 L 40.652344 238.957031 Z M 41.71875 232.207031 L 41.71875 231.054688 L 40.566406 231.054688 L 40.566406 232.207031 Z M 44.886719 238.957031 L 44.886719 237.925781 L 43.855469 237.925781 L 43.855469 238.957031 Z M 49.695312 234.363281 L 49.695312 233.5 C 48.832031 233.511719 48.042969 233.945312 47.550781 234.652344 L 47.550781 233.5625 L 46.664062 233.5625 L 46.664062 238.957031 L 47.621094 238.957031 L 47.621094 236.402344 C 47.621094 235.144531 48.605469 234.375 49.695312 234.363281 Z M 54.105469 237.410156 C 54.105469 236.992188 53.9375 236.570312 53.515625 236.210938 C 53.109375 235.863281 52.75 235.792969 52.007812 235.648438 C 51.574219 235.5625 50.976562 235.457031 50.976562 234.890625 C 50.976562 234.183594 51.828125 234.183594 52.007812 234.183594 C 52.882812 234.183594 53.398438 234.460938 53.746094 234.652344 L 53.902344 233.8125 C 53.15625 233.476562 52.605469 233.429688 52.113281 233.429688 C 51.839844 233.429688 50.074219 233.429688 50.074219 235.011719 C 50.074219 235.585938 50.421875 235.949219 50.554688 236.078125 C 51.011719 236.476562 51.324219 236.535156 52.136719 236.691406 C 52.523438 236.761719 53.207031 236.894531 53.207031 237.507812 C 53.207031 238.285156 52.234375 238.285156 52.078125 238.285156 C 51.371094 238.285156 50.6875 238.046875 50.171875 237.6875 L 50.015625 238.5625 C 50.734375 238.945312 51.464844 239.089844 52.078125 239.089844 C 52.855469 239.089844 54.105469 238.835938 54.105469 237.410156 Z M 62.769531 241.941406 C 62.121094 241.28125 60.632812 239.75 60.632812 235.960938 C 60.632812 232.160156 62.121094 230.636719 62.769531 229.964844 L 61.988281 229.964844 C 60.574219 231.152344 59.640625 233.203125 59.640625 235.949219 C 59.640625 238.800781 60.632812 240.804688 61.988281 241.941406 Z M 69.328125 235.058594 C 69.328125 233.921875 69.242188 232.867188 68.679688 231.917969 C 68.285156 231.257812 67.539062 230.851562 66.75 230.851562 C 66.148438 230.851562 65.296875 231.078125 64.757812 232.015625 C 64.195312 232.960938 64.160156 234.234375 64.160156 235.058594 C 64.160156 235.660156 64.160156 237.027344 64.699219 237.996094 C 65.296875 239.054688 66.210938 239.222656 66.738281 239.222656 C 67.433594 239.222656 68.296875 238.933594 68.8125 237.9375 C 69.265625 237.0625 69.328125 236.019531 69.328125 235.058594 Z M 68.332031 234.917969 C 68.332031 235.730469 68.332031 236.558594 68.054688 237.339844 C 67.78125 238.164062 67.21875 238.441406 66.75 238.441406 C 65.15625 238.441406 65.15625 236.042969 65.15625 234.917969 C 65.15625 234.160156 65.15625 233.550781 65.335938 232.925781 C 65.5625 232.097656 66.054688 231.632812 66.738281 231.632812 C 68.332031 231.632812 68.332031 233.800781 68.332031 234.917969 Z M 71.886719 238.957031 L 71.886719 237.925781 L 70.855469 237.925781 L 70.855469 238.957031 Z M 78.304688 236.285156 C 78.304688 235.613281 78.207031 235.011719 77.800781 234.363281 C 77.320312 233.609375 76.902344 233.40625 76.335938 233.40625 C 75.882812 233.40625 74.992188 233.539062 74.25 234.390625 C 74.382812 232.78125 75.199219 231.59375 76.480469 231.59375 C 76.71875 231.59375 77.164062 231.605469 77.777344 231.847656 L 77.777344 231.089844 C 77.199219 230.875 76.734375 230.851562 76.46875 230.851562 C 74.742188 230.851562 73.183594 232.53125 73.183594 235.109375 C 73.183594 238.394531 74.632812 239.222656 75.761719 239.222656 C 76.492188 239.222656 77.019531 238.957031 77.5 238.417969 C 78.074219 237.78125 78.304688 237.230469 78.304688 236.285156 Z M 77.261719 236.285156 C 77.261719 236.75 77.261719 237.242188 76.949219 237.757812 C 76.71875 238.117188 76.386719 238.441406 75.761719 238.441406 C 74.476562 238.441406 74.296875 236.726562 74.261719 236.332031 C 74.261719 236.164062 74.261719 236.140625 74.273438 236.007812 C 74.273438 235.097656 74.851562 234.183594 75.820312 234.183594 C 76.277344 234.183594 76.683594 234.351562 76.996094 234.917969 C 77.234375 235.347656 77.261719 235.730469 77.261719 236.285156 Z M 80.886719 238.957031 L 80.886719 237.925781 L 79.855469 237.925781 L 79.855469 238.957031 Z M 87.328125 235.058594 C 87.328125 233.921875 87.242188 232.867188 86.679688 231.917969 C 86.285156 231.257812 85.539062 230.851562 84.75 230.851562 C 84.148438 230.851562 83.296875 231.078125 82.757812 232.015625 C 82.195312 232.960938 82.160156 234.234375 82.160156 235.058594 C 82.160156 235.660156 82.160156 237.027344 82.699219 237.996094 C 83.296875 239.054688 84.210938 239.222656 84.738281 239.222656 C 85.433594 239.222656 86.296875 238.933594 86.8125 237.9375 C 87.265625 237.0625 87.328125 236.019531 87.328125 235.058594 Z M 86.332031 234.917969 C 86.332031 235.730469 86.332031 236.558594 86.054688 237.339844 C 85.78125 238.164062 85.21875 238.441406 84.75 238.441406 C 83.15625 238.441406 83.15625 236.042969 83.15625 234.917969 C 83.15625 234.160156 83.15625 233.550781 83.335938 232.925781 C 83.5625 232.097656 84.054688 231.632812 84.738281 231.632812 C 86.332031 231.632812 86.332031 233.800781 86.332031 234.917969 Z M 91.480469 235.960938 C 91.480469 233.105469 90.484375 231.101562 89.128906 229.964844 L 88.351562 229.964844 C 89 230.625 90.484375 232.160156 90.484375 235.949219 C 90.484375 239.75 89 241.269531 88.351562 241.941406 L 89.128906 241.941406 C 90.546875 240.753906 91.480469 238.707031 91.480469 235.960938 Z M 5.667969 225.109375 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.726562 71.898438 L 12.730469 71.898438 L 11.230469 75.636719 C 11.097656 75.960938 10.042969 78.601562 9.886719 79.308594 L 9.875 79.308594 C 9.757812 78.671875 8.808594 76.320312 8.507812 75.566406 L 7.046875 71.898438 L 5.835938 71.898438 L 9.207031 80.21875 L 10.355469 80.21875 Z M 18.726562 80.21875 L 18.726562 76.753906 C 18.726562 75.566406 17.851562 74.691406 16.664062 74.691406 C 15.886719 74.691406 15.3125 74.894531 14.734375 75.21875 L 14.808594 76.070312 C 15.40625 75.625 15.992188 75.445312 16.652344 75.445312 C 17.324219 75.445312 17.734375 75.984375 17.734375 76.765625 L 17.734375 77.269531 C 15.671875 77.316406 14.316406 77.855469 14.316406 78.828125 C 14.316406 79.390625 14.6875 80.351562 15.730469 80.351562 C 15.96875 80.351562 17.011719 80.328125 17.769531 79.761719 L 17.769531 80.21875 Z M 17.734375 78.539062 C 17.734375 78.792969 17.734375 79.089844 17.300781 79.34375 C 17 79.523438 16.605469 79.570312 16.367188 79.570312 C 15.753906 79.570312 15.238281 79.285156 15.238281 78.804688 C 15.238281 77.953125 17.578125 77.917969 17.734375 77.917969 Z M 25.589844 77.546875 C 25.589844 76.105469 24.835938 74.761719 23.671875 74.761719 C 23.335938 74.761719 22.449219 74.824219 21.621094 75.46875 L 21.621094 74.894531 L 20.664062 74.894531 L 20.664062 82.542969 L 21.660156 82.542969 L 21.660156 79.667969 C 22.101562 80.085938 22.703125 80.351562 23.324219 80.351562 C 24.535156 80.351562 25.589844 79.164062 25.589844 77.546875 Z M 24.597656 77.558594 C 24.597656 78.707031 23.757812 79.570312 22.820312 79.570312 C 22.4375 79.570312 22.007812 79.414062 21.660156 78.828125 L 21.660156 76.238281 C 21.8125 76.023438 22.257812 75.578125 22.941406 75.578125 C 23.839844 75.578125 24.597656 76.429688 24.597656 77.558594 Z M 32.460938 77.582031 C 32.460938 76.011719 31.261719 74.691406 29.738281 74.691406 C 28.25 74.691406 27.027344 75.984375 27.027344 77.582031 C 27.027344 79.164062 28.285156 80.351562 29.738281 80.351562 C 31.222656 80.351562 32.460938 79.140625 32.460938 77.582031 Z M 31.464844 77.460938 C 31.464844 78.851562 30.625 79.535156 29.75 79.535156 C 28.828125 79.535156 28.023438 78.816406 28.023438 77.460938 C 28.023438 76.058594 28.921875 75.46875 29.738281 75.46875 C 30.601562 75.46875 31.464844 76.082031 31.464844 77.460938 Z M 36.695312 75.625 L 36.695312 74.761719 C 35.832031 74.773438 35.042969 75.207031 34.550781 75.914062 L 34.550781 74.824219 L 33.664062 74.824219 L 33.664062 80.21875 L 34.621094 80.21875 L 34.621094 77.664062 C 34.621094 76.40625 35.605469 75.636719 36.695312 75.625 Z M 41.789062 77.59375 C 41.789062 77.222656 41.777344 76.308594 41.308594 75.589844 C 40.804688 74.835938 40.039062 74.691406 39.582031 74.691406 C 38.191406 74.691406 37.085938 75.972656 37.085938 77.507812 C 37.085938 79.089844 38.261719 80.351562 39.761719 80.351562 C 40.335938 80.351562 41.03125 80.207031 41.726562 79.761719 L 41.65625 78.9375 C 40.902344 79.476562 40.191406 79.570312 39.773438 79.570312 C 38.777344 79.570312 38.011719 78.695312 37.976562 77.59375 Z M 41.070312 76.886719 L 38.046875 76.886719 C 38.238281 76.117188 38.828125 75.46875 39.582031 75.46875 C 40.023438 75.46875 40.839844 75.675781 41.070312 76.886719 Z M 45.769531 79.90625 L 45.566406 79.152344 C 45.253906 79.390625 44.832031 79.535156 44.460938 79.535156 C 43.992188 79.535156 43.839844 79.117188 43.839844 78.492188 L 43.839844 75.636719 L 45.566406 75.636719 L 45.566406 74.894531 L 43.839844 74.894531 L 43.839844 73.371094 L 42.949219 73.371094 L 42.949219 74.894531 L 41.894531 74.894531 L 41.894531 75.636719 L 42.914062 75.636719 L 42.914062 78.707031 C 42.914062 79.414062 43.070312 80.351562 43.945312 80.351562 C 44.472656 80.351562 45.097656 80.242188 45.769531 79.90625 Z M 49.769531 79.90625 L 49.566406 79.152344 C 49.253906 79.390625 48.832031 79.535156 48.460938 79.535156 C 47.992188 79.535156 47.839844 79.117188 47.839844 78.492188 L 47.839844 75.636719 L 49.566406 75.636719 L 49.566406 74.894531 L 47.839844 74.894531 L 47.839844 73.371094 L 46.949219 73.371094 L 46.949219 74.894531 L 45.894531 74.894531 L 45.894531 75.636719 L 46.914062 75.636719 L 46.914062 78.707031 C 46.914062 79.414062 47.070312 80.351562 47.945312 80.351562 C 48.472656 80.351562 49.097656 80.242188 49.769531 79.90625 Z M 55.460938 77.582031 C 55.460938 76.011719 54.261719 74.691406 52.738281 74.691406 C 51.25 74.691406 50.027344 75.984375 50.027344 77.582031 C 50.027344 79.164062 51.285156 80.351562 52.738281 80.351562 C 54.222656 80.351562 55.460938 79.140625 55.460938 77.582031 Z M 54.464844 77.460938 C 54.464844 78.851562 53.625 79.535156 52.75 79.535156 C 51.828125 79.535156 51.023438 78.816406 51.023438 77.460938 C 51.023438 76.058594 51.921875 75.46875 52.738281 75.46875 C 53.601562 75.46875 54.464844 76.082031 54.464844 77.460938 Z M 63.769531 83.203125 C 63.121094 82.542969 61.632812 81.011719 61.632812 77.222656 C 61.632812 73.421875 63.121094 71.898438 63.769531 71.226562 L 62.988281 71.226562 C 61.574219 72.414062 60.640625 74.464844 60.640625 77.210938 C 60.640625 80.0625 61.632812 82.066406 62.988281 83.203125 Z M 70.328125 76.320312 C 70.328125 75.183594 70.242188 74.128906 69.679688 73.179688 C 69.285156 72.519531 68.539062 72.113281 67.75 72.113281 C 67.148438 72.113281 66.296875 72.339844 65.757812 73.277344 C 65.195312 74.222656 65.160156 75.496094 65.160156 76.320312 C 65.160156 76.921875 65.160156 78.289062 65.699219 79.257812 C 66.296875 80.316406 67.210938 80.484375 67.738281 80.484375 C 68.433594 80.484375 69.296875 80.195312 69.8125 79.199219 C 70.265625 78.324219 70.328125 77.28125 70.328125 76.320312 Z M 69.332031 76.179688 C 69.332031 76.992188 69.332031 77.820312 69.054688 78.601562 C 68.78125 79.425781 68.21875 79.703125 67.75 79.703125 C 66.15625 79.703125 66.15625 77.304688 66.15625 76.179688 C 66.15625 75.421875 66.15625 74.8125 66.335938 74.1875 C 66.5625 73.359375 67.054688 72.894531 67.738281 72.894531 C 69.332031 72.894531 69.332031 75.0625 69.332031 76.179688 Z M 72.886719 80.21875 L 72.886719 79.1875 L 71.855469 79.1875 L 71.855469 80.21875 Z M 79.304688 78.046875 C 79.304688 77.101562 78.609375 76.308594 77.625 75.984375 C 78.464844 75.554688 78.96875 74.773438 78.96875 73.949219 C 78.96875 72.917969 77.949219 72.113281 76.714844 72.113281 C 75.707031 72.113281 74.855469 72.628906 74.351562 73.382812 L 74.820312 74.066406 C 75.347656 73.023438 76.257812 72.855469 76.703125 72.855469 C 77.3125 72.855469 77.914062 73.191406 77.914062 73.949219 C 77.914062 74.390625 77.660156 75.421875 76.367188 75.542969 C 76.136719 75.566406 75.945312 75.578125 75.71875 75.589844 L 75.71875 76.371094 L 76.617188 76.371094 C 77.78125 76.371094 78.152344 77.316406 78.152344 78.035156 C 78.152344 78.984375 77.589844 79.703125 76.675781 79.703125 C 75.695312 79.703125 74.78125 79.164062 74.328125 78.515625 C 74.230469 78.996094 74.230469 79.019531 74.183594 79.285156 C 74.769531 79.992188 75.683594 80.484375 76.703125 80.484375 C 78.210938 80.484375 79.304688 79.308594 79.304688 78.046875 Z M 81.886719 80.21875 L 81.886719 79.1875 L 80.855469 79.1875 L 80.855469 80.21875 Z M 88.328125 76.320312 C 88.328125 75.183594 88.242188 74.128906 87.679688 73.179688 C 87.285156 72.519531 86.539062 72.113281 85.75 72.113281 C 85.148438 72.113281 84.296875 72.339844 83.757812 73.277344 C 83.195312 74.222656 83.160156 75.496094 83.160156 76.320312 C 83.160156 76.921875 83.160156 78.289062 83.699219 79.257812 C 84.296875 80.316406 85.210938 80.484375 85.738281 80.484375 C 86.433594 80.484375 87.296875 80.195312 87.8125 79.199219 C 88.265625 78.324219 88.328125 77.28125 88.328125 76.320312 Z M 87.332031 76.179688 C 87.332031 76.992188 87.332031 77.820312 87.054688 78.601562 C 86.78125 79.425781 86.21875 79.703125 85.75 79.703125 C 84.15625 79.703125 84.15625 77.304688 84.15625 76.179688 C 84.15625 75.421875 84.15625 74.8125 84.335938 74.1875 C 84.5625 73.359375 85.054688 72.894531 85.738281 72.894531 C 87.332031 72.894531 87.332031 75.0625 87.332031 76.179688 Z M 90.886719 80.207031 L 90.886719 79.1875 L 89.855469 79.1875 L 89.855469 80.21875 L 90.167969 80.21875 L 89.84375 81.742188 L 90.359375 81.742188 Z M 99.9375 72.75 L 99.9375 71.898438 C 99.792969 71.863281 99.410156 71.765625 98.953125 71.765625 C 97.789062 71.765625 96.902344 72.640625 96.902344 73.839844 L 96.902344 74.894531 L 96.003906 74.894531 L 96.003906 75.636719 L 96.902344 75.636719 L 96.902344 80.21875 L 97.863281 80.21875 L 97.863281 75.636719 L 99.191406 75.636719 L 99.191406 74.894531 L 97.828125 74.894531 L 97.828125 73.503906 C 97.828125 72.640625 98.605469 72.546875 98.941406 72.546875 C 99.050781 72.546875 99.445312 72.546875 99.9375 72.75 Z M 104.789062 77.59375 C 104.789062 77.222656 104.777344 76.308594 104.308594 75.589844 C 103.804688 74.835938 103.039062 74.691406 102.582031 74.691406 C 101.191406 74.691406 100.085938 75.972656 100.085938 77.507812 C 100.085938 79.089844 101.261719 80.351562 102.761719 80.351562 C 103.335938 80.351562 104.03125 80.207031 104.726562 79.761719 L 104.65625 78.9375 C 103.902344 79.476562 103.191406 79.570312 102.773438 79.570312 C 101.777344 79.570312 101.011719 78.695312 100.976562 77.59375 Z M 104.070312 76.886719 L 101.046875 76.886719 C 101.238281 76.117188 101.828125 75.46875 102.582031 75.46875 C 103.023438 75.46875 103.839844 75.675781 104.070312 76.886719 Z M 109.726562 80.21875 L 109.726562 76.753906 C 109.726562 75.566406 108.851562 74.691406 107.664062 74.691406 C 106.886719 74.691406 106.3125 74.894531 105.734375 75.21875 L 105.808594 76.070312 C 106.40625 75.625 106.992188 75.445312 107.652344 75.445312 C 108.324219 75.445312 108.734375 75.984375 108.734375 76.765625 L 108.734375 77.269531 C 106.671875 77.316406 105.316406 77.855469 105.316406 78.828125 C 105.316406 79.390625 105.6875 80.351562 106.730469 80.351562 C 106.96875 80.351562 108.011719 80.328125 108.769531 79.761719 L 108.769531 80.21875 Z M 108.734375 78.539062 C 108.734375 78.792969 108.734375 79.089844 108.300781 79.34375 C 108 79.523438 107.605469 79.570312 107.367188 79.570312 C 106.753906 79.570312 106.238281 79.285156 106.238281 78.804688 C 106.238281 77.953125 108.578125 77.917969 108.734375 77.917969 Z M 114.769531 79.90625 L 114.566406 79.152344 C 114.253906 79.390625 113.832031 79.535156 113.460938 79.535156 C 112.992188 79.535156 112.839844 79.117188 112.839844 78.492188 L 112.839844 75.636719 L 114.566406 75.636719 L 114.566406 74.894531 L 112.839844 74.894531 L 112.839844 73.371094 L 111.949219 73.371094 L 111.949219 74.894531 L 110.894531 74.894531 L 110.894531 75.636719 L 111.914062 75.636719 L 111.914062 78.707031 C 111.914062 79.414062 112.070312 80.351562 112.945312 80.351562 C 113.472656 80.351562 114.097656 80.242188 114.769531 79.90625 Z M 120.050781 80.21875 L 120.050781 74.894531 L 119.054688 74.894531 L 119.054688 78.359375 C 119.054688 79.320312 118.300781 79.644531 117.582031 79.644531 C 116.71875 79.644531 116.632812 79.367188 116.632812 78.851562 L 116.632812 74.894531 L 115.640625 74.894531 L 115.640625 78.910156 C 115.640625 79.929688 116.046875 80.351562 116.898438 80.351562 C 117.59375 80.351562 118.527344 80.148438 119.09375 79.644531 L 119.09375 80.21875 Z M 124.695312 75.625 L 124.695312 74.761719 C 123.832031 74.773438 123.042969 75.207031 122.550781 75.914062 L 122.550781 74.824219 L 121.664062 74.824219 L 121.664062 80.21875 L 122.621094 80.21875 L 122.621094 77.664062 C 122.621094 76.40625 123.605469 75.636719 124.695312 75.625 Z M 129.789062 77.59375 C 129.789062 77.222656 129.777344 76.308594 129.308594 75.589844 C 128.804688 74.835938 128.039062 74.691406 127.582031 74.691406 C 126.191406 74.691406 125.085938 75.972656 125.085938 77.507812 C 125.085938 79.089844 126.261719 80.351562 127.761719 80.351562 C 128.335938 80.351562 129.03125 80.207031 129.726562 79.761719 L 129.65625 78.9375 C 128.902344 79.476562 128.191406 79.570312 127.773438 79.570312 C 126.777344 79.570312 126.011719 78.695312 125.976562 77.59375 Z M 129.070312 76.886719 L 126.046875 76.886719 C 126.238281 76.117188 126.828125 75.46875 127.582031 75.46875 C 128.023438 75.46875 128.839844 75.675781 129.070312 76.886719 Z M 138.550781 75.960938 C 138.550781 75.699219 138.289062 75.699219 138.109375 75.699219 L 130.796875 75.699219 C 130.613281 75.699219 130.351562 75.699219 130.351562 75.960938 C 130.351562 76.214844 130.589844 76.214844 130.746094 76.214844 L 138.15625 76.214844 C 138.324219 76.214844 138.550781 76.214844 138.550781 75.960938 Z M 138.550781 78.46875 C 138.550781 78.214844 138.324219 78.214844 138.15625 78.214844 L 130.746094 78.214844 C 130.589844 78.214844 130.351562 78.214844 130.351562 78.46875 C 130.351562 78.730469 130.613281 78.730469 130.796875 78.730469 L 138.109375 78.730469 C 138.289062 78.730469 138.550781 78.730469 138.550781 78.46875 Z M 144.105469 78.671875 C 144.105469 78.253906 143.9375 77.832031 143.515625 77.472656 C 143.109375 77.125 142.75 77.054688 142.007812 76.910156 C 141.574219 76.824219 140.976562 76.71875 140.976562 76.152344 C 140.976562 75.445312 141.828125 75.445312 142.007812 75.445312 C 142.882812 75.445312 143.398438 75.722656 143.746094 75.914062 L 143.902344 75.074219 C 143.15625 74.738281 142.605469 74.691406 142.113281 74.691406 C 141.839844 74.691406 140.074219 74.691406 140.074219 76.273438 C 140.074219 76.847656 140.421875 77.210938 140.554688 77.339844 C 141.011719 77.738281 141.324219 77.796875 142.136719 77.953125 C 142.523438 78.023438 143.207031 78.15625 143.207031 78.769531 C 143.207031 79.546875 142.234375 79.546875 142.078125 79.546875 C 141.371094 79.546875 140.6875 79.308594 140.171875 78.949219 L 140.015625 79.824219 C 140.734375 80.207031 141.464844 80.351562 142.078125 80.351562 C 142.855469 80.351562 144.105469 80.097656 144.105469 78.671875 Z M 146.609375 80.21875 L 146.609375 74.894531 L 145.652344 74.894531 L 145.652344 80.21875 Z M 146.71875 73.46875 L 146.71875 72.316406 L 145.566406 72.316406 L 145.566406 73.46875 Z M 156.46875 80.21875 L 156.46875 76.65625 C 156.46875 75.769531 156.230469 74.761719 154.777344 74.761719 C 153.769531 74.761719 153.207031 75.375 152.917969 75.746094 C 152.835938 75.507812 152.582031 74.761719 151.359375 74.761719 C 150.414062 74.761719 149.839844 75.316406 149.5625 75.675781 L 149.5625 74.824219 L 148.640625 74.824219 L 148.640625 80.21875 L 149.632812 80.21875 L 149.632812 77.292969 C 149.632812 76.5 149.945312 75.542969 150.855469 75.542969 C 152.054688 75.542969 152.054688 76.394531 152.054688 76.742188 L 152.054688 80.21875 L 153.050781 80.21875 L 153.050781 77.292969 C 153.050781 76.5 153.363281 75.542969 154.273438 75.542969 C 155.472656 75.542969 155.472656 76.394531 155.472656 76.742188 L 155.472656 80.21875 Z M 163.027344 80.21875 L 163.027344 71.898438 L 162.070312 71.898438 L 162.070312 75.421875 C 161.398438 74.859375 160.628906 74.761719 160.246094 74.761719 C 159.035156 74.761719 158.101562 76.023438 158.101562 77.558594 C 158.101562 79.105469 159.023438 80.351562 160.199219 80.351562 C 160.558594 80.351562 161.3125 80.265625 162.03125 79.59375 L 162.03125 80.21875 Z M 162.03125 78.527344 C 162.03125 78.757812 162.03125 78.78125 161.902344 78.949219 C 161.578125 79.378906 161.167969 79.570312 160.738281 79.570312 C 159.957031 79.570312 159.09375 79.007812 159.09375 77.570312 C 159.09375 76.035156 160.113281 75.542969 160.871094 75.542969 C 161.421875 75.542969 161.804688 75.855469 162.03125 76.167969 Z M 167.480469 77.222656 C 167.480469 74.367188 166.484375 72.363281 165.128906 71.226562 L 164.351562 71.226562 C 165 71.886719 166.484375 73.421875 166.484375 77.210938 C 166.484375 81.011719 165 82.53125 164.351562 83.203125 L 165.128906 83.203125 C 166.546875 82.015625 167.480469 79.96875 167.480469 77.222656 Z M 5.667969 66.371094 "/>
+</g>
+</svg>
diff --git a/figures/comparison.txt b/figures/comparison.txt
new file mode 100644
index 00000000..6a25dd8d
--- /dev/null
+++ b/figures/comparison.txt
@@ -0,0 +1,9 @@
+Tool Name (version),Speed [M chars/s],STD
+KyTea (2020-04-03),1.463,0.012
+Vaporetto (0.3.0),9.716,0.115
+Vaporetto (0.3.0+feature=simd),11.035,0.144
+MeCab (2020-09-14),4.621,0.047
+Kuromoji (0.9.0),1.470,0.074
+Lindera (0.8.1),1.444,0.022
+Sudachi (0.5.3),0.322,0.029
+sudachi.rs (0.6.0),0.961,0.008

From 79e7b6e28290f833631a3b057789dfa9cd9a1432 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 7 Dec 2021 11:22:48 +0900
Subject: [PATCH 22/60] Dict manipulation (#13)

* wip

* Add manipulate_model command

* wip

* Update doc

* Update README.md

* Fix

* fix

* Fix tests

* Update README.md

* Update README.md

* Update Cargo.toml

* Update README.md

* Update README.md

* Update README.md

* Update README.md
---
 Cargo.toml                   |  1 +
 README.md                    | 62 ++++++++++++++++++++++---
 manipulate_model/Cargo.toml  | 11 +++++
 manipulate_model/src/main.rs | 85 ++++++++++++++++++++++++++++++++++
 vaporetto/src/dict_model.rs  | 89 ++++++++++++++++++++++++++++++++----
 vaporetto/src/dict_scorer.rs |  6 +--
 vaporetto/src/kytea_model.rs | 10 ++--
 vaporetto/src/lib.rs         |  1 +
 vaporetto/src/model.rs       | 12 ++++-
 vaporetto/src/predictor.rs   | 26 +++++------
 10 files changed, 264 insertions(+), 39 deletions(-)
 create mode 100644 manipulate_model/Cargo.toml
 create mode 100644 manipulate_model/src/main.rs

diff --git a/Cargo.toml b/Cargo.toml
index 09c8e826..3c5b1193 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@
 members = [
     "vaporetto",
     "vaporetto_rules",
+    "manipulate_model",
     "predict",
     "train",
     "evaluate",
diff --git a/README.md b/README.md
index da2f46ff..655a2b4a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # 🛥 VAporetto: POintwise pREdicTion based TOkenizer
 
 Vaporetto is a fast and lightweight pointwise prediction based tokenizer.
+This repository includes both a Rust crate that provides APIs for Vaporetto and CLI frontends.
 
 [![Crates.io](https://img.shields.io/crates/v/vaporetto)](https://crates.io/crates/vaporetto)
 [![Documentation](https://docs.rs/vaporetto/badge.svg)](https://docs.rs/vaporetto)
@@ -8,9 +9,7 @@ Vaporetto is a fast and lightweight pointwise prediction based tokenizer.
 
 [Technical details](https://tech.legalforce.co.jp/entry/2021/09/28/180844) (Japanese)
 
-## Overview
-
-This repository includes both a Rust crate that provides APIs for Vaporetto and CLI frontends.
+## Example Usage
 
 ### Try Word Segmentation
 
@@ -36,12 +35,12 @@ Each model is compressed, so you need to decompress the downloaded model file li
 To convert a KyTea model into a Vaporetto model, run the following command in the Vaporetto root directory.
 If necessary, the Rust code will be compiled before the conversion process.
 ```
-% cargo run --release -p convert_kytea_model -- --model-in path/to/jp-0.4.7-5.mod --model-out path/to/jp-0.4.7-5-tokenize.model.zstd
+% cargo run --release -p convert_kytea_model -- --model-in path/to/jp-0.4.7-5.mod --model-out path/to/jp-0.4.7-5-tokenize.model.zst
 ```
 
 Now you can perform tokenization. Run the following command:
 ```
-% echo '火星猫の生態の調査結果' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zstd
+% echo '火星猫の生態の調査結果' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
 ```
 
 The following will be output:
@@ -75,7 +74,7 @@ Here is an example:
 To train a model, use the following command:
 
 ```
-% cargo run --release -p train -- --model ./your.model.zstd --tok path/to/full.txt --part path/to/part.txt --dict path/to/dict.txt
+% cargo run --release -p train -- --model ./your.model.zst --tok path/to/full.txt --part path/to/part.txt --dict path/to/dict.txt
 ```
 
 `--tok` argument specifies a fully annotated corpus, and `--part` argument specifies a partially annotated corpus.
@@ -84,6 +83,57 @@ A word dictionary is a file with words per line.
 
 You can specify all arguments above multiple times.
 
+### Model Manipulation
+
+For example, `メロンパン` is split into two tokens in the following command:
+```
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
+朝食 は メロン パン 1 個 だっ た
+```
+
+Sometimes, the model outputs different results than what you expect.
+You can make the `メロンパン` into a single token by manipulating the model following the steps below:
+
+1. Dump a dictionary by the following command:
+   ```
+   % cargo run --release -p manipulate_model -- --model-in path/to/jp-0.4.7-5-tokenize.model.zst --dump-dict path/to/dictionary.csv
+   ```
+
+2. Edit the dictionary.
+
+   The dictionary is a csv file. Each row contains a word and corresponding weights in the following order:
+
+   * `right_weight` - A weight that is added when the word is found to the right of the boundary.
+   * `inside_weight` - A weight that is added when the word is overlapped on the boundary.
+   * `left_weight` - A weight that is added when the word is found to the left of the boundary.
+
+   Vaporetto splits a text when the total weight of the boundary is a positive number, so we add a new entry as follows:
+   ```diff
+    メロレオストーシス,6944,-2553,5319
+    メロン,8924,-10861,7081
+   +メロンパン,0,-100000,0
+    メロン果実,4168,-1165,3558
+    メロヴィング,6999,-15413,7583
+   ```
+
+   In this case, `-100000` will be added when the boundary is inside of the word `メロンパン`.
+   
+   Note that Vaporetto uses 32-bit integers for the total weight, so you have to be careful about overflow.
+   
+   In addition, The dictionary cannot contain duplicated words.
+   When the word is already contained in the dictionary, you have to edit existing weights.
+
+3. Replaces weight data of a model file
+   ```
+   % cargo run --release -p manipulate_model -- --model-in path/to/jp-0.4.7-5-tokenize.model.zst --replace-dict path/to/dictionary.csv --model-out path/to/jp-0.4.7-5-tokenize-new.model.zst
+   ```
+
+Now `メロンパン` is split into a single token.
+```
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize-new.model.zst
+朝食 は メロンパン 1 個 だっ た
+```
+
 ## Speed Comparison of Various Tokenizers
 
 Details can be found [here](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison).
diff --git a/manipulate_model/Cargo.toml b/manipulate_model/Cargo.toml
new file mode 100644
index 00000000..5139cfd9
--- /dev/null
+++ b/manipulate_model/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "manipulate_model"
+version = "0.1.0"
+edition = "2018"
+
+[dependencies]
+csv = "1.1"  # Unlicense OR MIT 
+serde = { version = "1.0", features = ["derive"] }  # MIT or Apache-2.0
+structopt = "0.3"  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
+zstd = "0.9"  # MIT
diff --git a/manipulate_model/src/main.rs b/manipulate_model/src/main.rs
new file mode 100644
index 00000000..db4e6e87
--- /dev/null
+++ b/manipulate_model/src/main.rs
@@ -0,0 +1,85 @@
+use std::fs;
+use std::path::PathBuf;
+
+use serde::{Deserialize, Serialize};
+use structopt::StructOpt;
+use vaporetto::{Model, WordWeightRecord};
+
+#[derive(StructOpt, Debug)]
+#[structopt(
+    name = "manipulate_model",
+    about = "A program to manipulate tarined models."
+)]
+struct Opt {
+    /// Input path of the model file
+    #[structopt(long)]
+    model_in: PathBuf,
+
+    /// Output path of the model file
+    #[structopt(long)]
+    model_out: Option<PathBuf>,
+
+    /// Output a dictionary contained in the model.
+    #[structopt(long)]
+    dump_dict: Option<PathBuf>,
+
+    /// Replace a dictionary if the argument is specified.
+    #[structopt(long)]
+    replace_dict: Option<PathBuf>,
+}
+
+#[derive(Deserialize, Serialize)]
+struct WordWeightRecordFlatten {
+    word: String,
+    right: i32,
+    inside: i32,
+    left: i32,
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let opt = Opt::from_args();
+
+    eprintln!("Loading model file...");
+    let mut f = zstd::Decoder::new(fs::File::open(opt.model_in)?)?;
+    let mut model = Model::read(&mut f)?;
+
+    if let Some(path) = opt.dump_dict {
+        eprintln!("Saving dictionary file...");
+        let file = fs::File::create(path)?;
+        let mut wtr = csv::Writer::from_writer(file);
+        for data in model.dump_dictionary() {
+            wtr.serialize(WordWeightRecordFlatten {
+                word: data.get_word().to_string(),
+                right: data.get_right_weight(),
+                inside: data.get_inside_weight(),
+                left: data.get_left_weight(),
+            })?;
+        }
+    }
+
+    if let Some(path) = opt.replace_dict {
+        eprintln!("Loading dictionary file...");
+        let file = fs::File::open(path)?;
+        let mut rdr = csv::Reader::from_reader(file);
+        let mut dict = vec![];
+        for result in rdr.deserialize() {
+            let record: WordWeightRecordFlatten = result?;
+            dict.push(WordWeightRecord::new(
+                record.word,
+                record.right,
+                record.inside,
+                record.left,
+            ));
+        }
+        model.replace_dictionary(dict);
+    }
+
+    if let Some(path) = opt.model_out {
+        eprintln!("Saving model file...");
+        let mut f = zstd::Encoder::new(fs::File::create(path)?, 19)?;
+        model.write(&mut f)?;
+        f.finish()?;
+    }
+
+    Ok(())
+}
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index b84cd4ac..279a3cd2 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -7,7 +7,7 @@ use crate::ngram_model::NgramModel;
 #[derive(Clone, Copy, Default, Serialize, Deserialize)]
 pub struct DictWeight {
     pub right: i32,
-    pub inner: i32,
+    pub inside: i32,
     pub left: i32,
 }
 
@@ -35,17 +35,70 @@ impl DictModel {
             Self::Lengthwise(model) => model.is_empty(),
         }
     }
+
+    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
+        match self {
+            Self::Wordwise(model) => model.dump_dictionary(),
+            Self::Lengthwise(model) => model.dump_dictionary(),
+        }
+    }
 }
 
+/// Record of weights for each word.
 #[derive(Clone, Serialize, Deserialize)]
-pub struct WordwiseDictData {
+pub struct WordWeightRecord {
     pub(crate) word: String,
     pub(crate) weights: DictWeight,
 }
 
+impl WordWeightRecord {
+    /// Creates a new word weight record.
+    ///
+    /// # Arguments
+    ///
+    /// * `word` - A word.
+    /// * `right` - A weight of the boundary when the word is found at right.
+    /// * `inside` - A weight of the boundary when the word is overlapped on the boundary.
+    /// * `left` - A weight of the boundary when the word is found at left.
+    ///
+    /// # Returns
+    ///
+    /// A new record.
+    pub const fn new(word: String, right: i32, inside: i32, left: i32) -> Self {
+        Self {
+            word,
+            weights: DictWeight {
+                right,
+                inside,
+                left,
+            },
+        }
+    }
+
+    /// Gets a reference to the word.
+    pub fn get_word(&self) -> &str {
+        &self.word
+    }
+
+    /// Gets a `right` weight.
+    pub const fn get_right_weight(&self) -> i32 {
+        self.weights.right
+    }
+
+    /// Gets a `inside` weight.
+    pub const fn get_inside_weight(&self) -> i32 {
+        self.weights.inside
+    }
+
+    /// Gets a `left` weight.
+    pub const fn get_left_weight(&self) -> i32 {
+        self.weights.left
+    }
+}
+
 #[derive(Serialize, Deserialize)]
 pub struct DictModelWordwise {
-    pub(crate) data: Vec<WordwiseDictData>,
+    pub(crate) dict: Vec<WordWeightRecord>,
 }
 
 impl DictModelWordwise {
@@ -63,8 +116,8 @@ impl DictModelWordwise {
         {
             word_map.insert(word, i);
         }
-        let mut new_data = vec![];
-        for data in self.data.drain(..) {
+        let mut new_dict = vec![];
+        for data in self.dict.drain(..) {
             let word_size = data.word.chars().count();
             match word_map.get(&data.word) {
                 Some(&idx) if char_window_size >= word_size => {
@@ -72,20 +125,24 @@ impl DictModelWordwise {
                     let end = start + word_size;
                     char_ngram_model.data[idx].weights[start] += data.weights.right;
                     for i in start + 1..end {
-                        char_ngram_model.data[idx].weights[i] += data.weights.inner;
+                        char_ngram_model.data[idx].weights[i] += data.weights.inside;
                     }
                     char_ngram_model.data[idx].weights[end] += data.weights.left;
                 }
                 _ => {
-                    new_data.push(data);
+                    new_dict.push(data);
                 }
             }
         }
-        self.data = new_data;
+        self.dict = new_dict;
     }
 
     pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
+        self.dict.is_empty()
+    }
+
+    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
+        self.dict.clone()
     }
 }
 
@@ -121,7 +178,7 @@ impl DictModelLengthwise {
                     let weight = &self.weights[word_size_idx];
                     char_ngram_model.data[idx].weights[start] += weight.right;
                     for i in start + 1..end {
-                        char_ngram_model.data[idx].weights[i] += weight.inner;
+                        char_ngram_model.data[idx].weights[i] += weight.inside;
                     }
                     char_ngram_model.data[idx].weights[end] += weight.left;
                 }
@@ -134,4 +191,16 @@ impl DictModelLengthwise {
     pub fn is_empty(&self) -> bool {
         self.words.is_empty()
     }
+
+    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
+        let mut result = vec![];
+        for word in &self.words {
+            let word = word.clone();
+            let word_size = word.chars().count();
+            let word_size_idx = word_size.min(self.weights.len()) - 1;
+            let weights = self.weights[word_size_idx];
+            result.push(WordWeightRecord { word, weights });
+        }
+        result
+    }
 }
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index dcc64502..59268538 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -34,7 +34,7 @@ impl DictScorerWordwise {
     pub fn new(model: DictModelWordwise) -> Result<Self> {
         let mut words = vec![];
         let mut weights = vec![];
-        for pair in model.data {
+        for pair in model.dict {
             words.push(pair.word);
             weights.push(pair.weights);
         }
@@ -54,7 +54,7 @@ impl DictScorerWordwise {
                 ys[m_start - 1] += dict_weight.right;
             }
             for y in &mut ys[m_start..m_end - 1] {
-                *y += dict_weight.inner;
+                *y += dict_weight.inside;
             }
             if m_end <= ys.len() {
                 ys[m_end - 1] += dict_weight.left;
@@ -95,7 +95,7 @@ impl DictScorerLengthwise {
                 ys[m_start - 1] += dict_weight.right;
             }
             for y in &mut ys[m_start..m_end - 1] {
-                *y += dict_weight.inner;
+                *y += dict_weight.inside;
             }
             if m_end <= ys.len() {
                 ys[m_end - 1] += dict_weight.left;
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index a6de598b..95ee1b03 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -3,7 +3,7 @@ use std::io::BufRead;
 
 use byteorder::{LittleEndian, ReadBytesExt};
 
-use crate::dict_model::{DictModel, DictModelWordwise, DictWeight, WordwiseDictData};
+use crate::dict_model::{DictModel, DictModelWordwise, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
@@ -433,7 +433,7 @@ impl TryFrom<KyteaModel> for Model {
             });
         }
 
-        let mut dict_data = vec![];
+        let mut dict = vec![];
         if let Some(kytea_dict) = model.dict {
             for (w, data) in kytea_dict.dump_items() {
                 let word_len = std::cmp::min(w.len(), config.dict_n as usize) - 1;
@@ -442,11 +442,11 @@ impl TryFrom<KyteaModel> for Model {
                     if data.in_dict >> j & 1 == 1 {
                         let offset = 3 * config.dict_n as usize * j + 3 * word_len;
                         weights.right += feature_lookup.dict_vec[offset] as i32;
-                        weights.inner += feature_lookup.dict_vec[offset + 1] as i32;
+                        weights.inside += feature_lookup.dict_vec[offset + 1] as i32;
                         weights.left += feature_lookup.dict_vec[offset + 2] as i32;
                     }
                 }
-                dict_data.push(WordwiseDictData {
+                dict.push(WordWeightRecord {
                     word: w.into_iter().collect(),
                     weights,
                 });
@@ -456,7 +456,7 @@ impl TryFrom<KyteaModel> for Model {
         Ok(Self {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
-            dict_model: DictModel::Wordwise(DictModelWordwise { data: dict_data }),
+            dict_model: DictModel::Wordwise(DictModelWordwise { dict }),
 
             quantize_multiplier,
 
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 1705d53c..c1214fe7 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -47,6 +47,7 @@ mod trainer;
 #[cfg(feature = "kytea")]
 mod kytea_model;
 
+pub use dict_model::WordWeightRecord;
 pub use model::Model;
 pub use predictor::Predictor;
 pub use sentence::{BoundaryType, CharacterType, Sentence};
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index cbbfa18b..58bdc492 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -2,7 +2,7 @@ use std::io::{Read, Write};
 
 use serde::{Deserialize, Serialize};
 
-use crate::dict_model::DictModel;
+use crate::dict_model::{DictModel, DictModelWordwise, WordWeightRecord};
 use crate::ngram_model::NgramModel;
 
 #[cfg(feature = "train")]
@@ -140,7 +140,7 @@ impl Model {
                 }
                 FeatureContent::DictionaryWord(size) => match feature.rel_position {
                     0 => dict_weights[size - 1].right = weight as i32,
-                    1 => dict_weights[size - 1].inner = weight as i32,
+                    1 => dict_weights[size - 1].inside = weight as i32,
                     2 => dict_weights[size - 1].left = weight as i32,
                     _ => panic!("Invalid rel_position"),
                 },
@@ -161,4 +161,12 @@ impl Model {
             type_window_size,
         }
     }
+
+    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
+        self.dict_model.dump_dictionary()
+    }
+
+    pub fn replace_dictionary(&mut self, dict: Vec<WordWeightRecord>) {
+        self.dict_model = DictModel::Wordwise(DictModelWordwise { dict });
+    }
 }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 298ba9e7..e993b03d 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -185,7 +185,7 @@ mod tests {
     use super::*;
 
     use crate::dict_model::{
-        DictModel, DictModelLengthwise, DictModelWordwise, DictWeight, WordwiseDictData,
+        DictModel, DictModelLengthwise, DictModelWordwise, DictWeight, WordWeightRecord,
     };
     use crate::ngram_model::{NgramData, NgramModel};
 
@@ -262,12 +262,12 @@ mod tests {
                 weights: vec![
                     DictWeight {
                         right: 40,
-                        inner: 41,
+                        inside: 41,
                         left: 42,
                     },
                     DictWeight {
                         right: 43,
-                        inner: 44,
+                        inside: 44,
                         left: 45,
                     },
                 ],
@@ -352,17 +352,17 @@ mod tests {
                 weights: vec![
                     DictWeight {
                         right: 38,
-                        inner: 39,
+                        inside: 39,
                         left: 40,
                     },
                     DictWeight {
                         right: 41,
-                        inner: 42,
+                        inside: 42,
                         left: 43,
                     },
                     DictWeight {
                         right: 44,
-                        inner: 45,
+                        inside: 45,
                         left: 46,
                     },
                 ],
@@ -443,28 +443,28 @@ mod tests {
                 },
             ]),
             dict_model: DictModel::Wordwise(DictModelWordwise {
-                data: vec![
-                    WordwiseDictData {
+                dict: vec![
+                    WordWeightRecord {
                         word: "国民".to_string(),
                         weights: DictWeight {
                             right: 38,
-                            inner: 39,
+                            inside: 39,
                             left: 40,
                         },
                     },
-                    WordwiseDictData {
+                    WordWeightRecord {
                         word: "世界".to_string(),
                         weights: DictWeight {
                             right: 41,
-                            inner: 42,
+                            inside: 42,
                             left: 43,
                         },
                     },
-                    WordwiseDictData {
+                    WordWeightRecord {
                         word: "世".to_string(),
                         weights: DictWeight {
                             right: 44,
-                            inner: 45,
+                            inside: 45,
                             left: 46,
                         },
                     },

From 6efa477ac5527626245743ca39147858e2d1adad Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 7 Dec 2021 12:40:15 +0900
Subject: [PATCH 23/60] Add update functions to Sentence (#14)

* Add chars field

* Separate parsers

* Add tests

* Use update_raw() in predict command

* Fix format and refactoring
---
 predict/src/main.rs       |  40 +-
 vaporetto/src/sentence.rs | 871 +++++++++++++++++++++++++++++++-------
 2 files changed, 751 insertions(+), 160 deletions(-)

diff --git a/predict/src/main.rs b/predict/src/main.rs
index d8201bce..d8fb3570 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -75,23 +75,31 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     eprintln!("Start tokenization");
     let mut n_boundaries = 0;
     let start = Instant::now();
-    for line in stdin().lock().lines() {
-        let line = line?;
-        let s = if opt.no_norm {
-            let s = Sentence::from_raw(line)?;
-            predictor.predict(s)
-        } else {
+    let mut s = Sentence::from_raw(" ")?;
+    if opt.no_norm {
+        for line in stdin().lock().lines() {
+            let line = line?;
+            s.update_raw(line)?;
+            s = predictor.predict(s);
+            s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
+            n_boundaries += s.boundaries().len();
+            let toks = s.to_tokenized_string()?;
+            println!("{}", toks);
+        }
+    } else {
+        let mut s_norm = Sentence::from_raw(" ")?;
+        for line in stdin().lock().lines() {
+            let line = line?;
             let norm = fullwidth_filter.filter(&line);
-            let mut s_orig = Sentence::from_raw(line)?;
-            let s = Sentence::from_raw(norm)?;
-            let s = predictor.predict(s);
-            s_orig.boundaries_mut().clone_from_slice(s.boundaries());
-            s_orig
-        };
-        let s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
-        n_boundaries += s.boundaries().len();
-        let toks = s.to_tokenized_string()?;
-        println!("{}", toks);
+            s.update_raw(line)?;
+            s_norm.update_raw(norm)?;
+            s_norm = predictor.predict(s_norm);
+            s.boundaries_mut().clone_from_slice(s_norm.boundaries());
+            s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
+            n_boundaries += s.boundaries().len();
+            let toks = s.to_tokenized_string()?;
+            println!("{}", toks);
+        }
     }
     let duration = start.elapsed();
     eprintln!("Elapsed: {} [sec]", duration.as_secs_f64());
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index fb32ca4d..c01e9528 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -80,6 +80,7 @@ pub enum BoundaryType {
 #[derive(Debug, PartialEq, Clone)]
 pub struct Sentence {
     pub(crate) text: String,
+    pub(crate) chars: Vec<char>,
     pub(crate) str_to_char_pos: Vec<usize>,
     pub(crate) char_to_str_pos: Vec<usize>,
     pub(crate) char_type: Vec<u8>,
@@ -88,31 +89,205 @@ pub struct Sentence {
 }
 
 impl Sentence {
-    fn common_info(chars: &[char]) -> (Vec<usize>, Vec<usize>, Vec<u8>) {
-        let mut char_to_str_pos = Vec::with_capacity(chars.len() + 1);
-        let mut char_type = Vec::with_capacity(chars.len());
+    fn internal_new(text: String, chars: Vec<char>, boundaries: Vec<BoundaryType>) -> Self {
+        let mut s = Self {
+            text,
+            chars,
+            str_to_char_pos: Vec::with_capacity(0),
+            char_to_str_pos: Vec::with_capacity(0),
+            char_type: Vec::with_capacity(0),
+            boundaries,
+            boundary_scores: None,
+        };
+        s.update_common_info();
+        s
+    }
+
+    fn clear(&mut self) {
+        self.text.clear();
+        self.text.push(' ');
+        self.chars.clear();
+        self.chars.push(' ');
+        self.str_to_char_pos.clear();
+        self.str_to_char_pos.push(0);
+        self.str_to_char_pos.push(1);
+        self.char_to_str_pos.clear();
+        self.char_to_str_pos.push(0);
+        self.char_to_str_pos.push(1);
+        self.char_type.clear();
+        self.char_type.push(CharacterType::Other as u8);
+        self.boundaries.clear();
+        self.boundary_scores = None;
+    }
+
+    fn parse_raw_text(
+        raw_text: &str,
+        chars: &mut Vec<char>,
+        boundaries: &mut Vec<BoundaryType>,
+    ) -> Result<()> {
+        if raw_text.is_empty() {
+            return Err(VaporettoError::invalid_argument("raw_text", "is empty"));
+        }
+
+        chars.clear();
+
+        for c in raw_text.chars() {
+            chars.push(c);
+        }
+        boundaries.clear();
+        boundaries.resize(chars.len() - 1, BoundaryType::Unknown);
+
+        Ok(())
+    }
+
+    fn parse_tokenized_text(
+        tokenized_text: &str,
+        text: &mut String,
+        chars: &mut Vec<char>,
+        boundaries: &mut Vec<BoundaryType>,
+    ) -> Result<()> {
+        if tokenized_text.is_empty() {
+            return Err(VaporettoError::invalid_argument(
+                "tokenized_text",
+                "is empty",
+            ));
+        }
+
+        text.clear();
+        text.reserve(tokenized_text.len());
+        chars.clear();
+        boundaries.clear();
+
+        let mut prev_boundary = false;
+        let mut escape = false;
+        for c in tokenized_text.chars() {
+            match (escape, c) {
+                (false, '\\') => {
+                    escape = true;
+                }
+                (false, ' ') => {
+                    if chars.is_empty() {
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "starts with a whitespace",
+                        ));
+                    } else if prev_boundary {
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "contains consecutive whitespaces",
+                        ));
+                    }
+                    prev_boundary = true;
+                }
+                (_, _) => {
+                    if !chars.is_empty() {
+                        boundaries.push(if prev_boundary {
+                            BoundaryType::WordBoundary
+                        } else {
+                            BoundaryType::NotWordBoundary
+                        });
+                    }
+                    prev_boundary = false;
+                    escape = false;
+                    text.push(c);
+                    chars.push(c);
+                }
+            };
+        }
+
+        if prev_boundary {
+            return Err(VaporettoError::invalid_argument(
+                "tokenized_text",
+                "ends with a whitespace",
+            ));
+        }
+
+        Ok(())
+    }
+
+    fn parse_partial_annotation(
+        labeled_text: &str,
+        text: &mut String,
+        chars: &mut Vec<char>,
+        boundaries: &mut Vec<BoundaryType>,
+    ) -> Result<()> {
+        if labeled_text.is_empty() {
+            return Err(VaporettoError::invalid_argument("labeled_text", "is empty"));
+        }
+
+        let labeled_chars: Vec<char> = labeled_text.chars().collect();
+        if labeled_chars.len() % 2 == 0 {
+            return Err(VaporettoError::invalid_argument(
+                "labeled_text",
+                format!("invalid length: {}", labeled_chars.len()),
+            ));
+        }
+
+        text.clear();
+        text.reserve(labeled_text.len() - labeled_chars.len() / 2);
+        chars.clear();
+        boundaries.clear();
+
+        for c in labeled_chars.iter().skip(1).step_by(2) {
+            boundaries.push(match c {
+                ' ' => BoundaryType::Unknown,
+                '|' => BoundaryType::WordBoundary,
+                '-' => BoundaryType::NotWordBoundary,
+                _ => {
+                    return Err(VaporettoError::invalid_argument(
+                        "labeled_text",
+                        format!("contains invalid boundary character: '{}'", c),
+                    ))
+                }
+            });
+        }
+        for c in labeled_chars.into_iter().step_by(2) {
+            text.push(c);
+            chars.push(c);
+        }
+
+        Ok(())
+    }
+
+    /// Updates char_to_str_pos, str_to_char_pos, and char_type.
+    ///
+    /// This function allocates:
+    ///
+    /// * char_to_str_pos: chars.len() + 1
+    /// * str_to_char_pos: text.len() + 1
+    /// * char_type: chars.len()
+    ///
+    /// If these variables already have sufficient spaces, this function reuses them.
+    fn update_common_info(&mut self) {
+        self.char_to_str_pos.clear();
+        self.str_to_char_pos.clear();
+        self.char_type.clear();
+
         let mut pos = 0;
-        char_to_str_pos.push(0);
-        for &c in chars {
+        self.char_to_str_pos.push(0);
+        for &c in &self.chars {
             pos += c.len_utf8();
-            char_to_str_pos.push(pos);
-            char_type.push(CharacterType::get_type(c) as u8)
+            self.char_to_str_pos.push(pos);
+            self.char_type.push(CharacterType::get_type(c) as u8)
         }
-        let mut str_to_char_pos = vec![0; char_to_str_pos.last().unwrap_or(&0) + 1];
-        for (i, &j) in char_to_str_pos.iter().enumerate() {
-            // j < str_to_char_pos.len()
+
+        debug_assert!(pos == self.text.len());
+
+        self.str_to_char_pos.fill(0);
+        self.str_to_char_pos.resize(self.text.len() + 1, 0);
+        for (i, &j) in self.char_to_str_pos.iter().enumerate() {
+            // j is always lower than pos + 1, so the following is safe.
             unsafe {
-                *str_to_char_pos.get_unchecked_mut(j) = i;
+                *self.str_to_char_pos.get_unchecked_mut(j) = i;
             }
         }
-        (char_to_str_pos, str_to_char_pos, char_type)
     }
 
     /// Creates a new [`Sentence`] from a given string.
     ///
     /// # Arguments
     ///
-    /// * `text` - A raw string without any annotation.
+    /// * `raw_text` - A raw string without any annotation.
     ///
     /// # Returns
     ///
@@ -120,7 +295,7 @@ impl Sentence {
     ///
     /// # Errors
     ///
-    /// If the given `text` is empty, an error variant will be returned.
+    /// If the given `raw_text` is empty, an error variant will be returned.
     ///
     /// # Examples
     ///
@@ -133,29 +308,56 @@ impl Sentence {
     /// let s = Sentence::from_raw("");
     /// assert!(s.is_err());
     /// ```
-    pub fn from_raw<S>(text: S) -> Result<Self>
+    pub fn from_raw<S>(raw_text: S) -> Result<Self>
     where
         S: Into<String>,
     {
-        let text = text.into();
+        let raw_text = raw_text.into();
 
-        if text.is_empty() {
-            return Err(VaporettoError::invalid_argument("text", "is empty"));
-        }
+        let mut chars = Vec::with_capacity(0);
+        let mut boundaries = Vec::with_capacity(0);
+        Self::parse_raw_text(&raw_text, &mut chars, &mut boundaries)?;
 
-        let chars: Vec<char> = text.chars().collect();
-        let boundaries = vec![BoundaryType::Unknown; chars.len() - 1];
+        Ok(Self::internal_new(raw_text, chars, boundaries))
+    }
 
-        let (char_to_str_pos, str_to_char_pos, char_type) = Self::common_info(&chars);
+    /// Updates the [`Sentence`] using a given string.
+    ///
+    /// # Arguments
+    ///
+    /// * `raw_text` - A raw string without any annotation.
+    ///
+    /// # Errors
+    ///
+    /// If the given `raw_text` is empty, an error variant will be returned.
+    /// When an error is occurred, the sentence will be replaced with a white space.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use vaporetto::Sentence;
+    ///
+    /// let mut s = Sentence::from_raw("How are you?").unwrap();
+    /// s.update_raw("I am file.").unwrap();
+    /// assert_eq!("I am file.", s.to_raw_string());
+    /// ```
+    pub fn update_raw<S>(&mut self, raw_text: S) -> Result<()>
+    where
+        S: Into<String>,
+    {
+        let raw_text = raw_text.into();
 
-        Ok(Self {
-            text,
-            str_to_char_pos,
-            char_to_str_pos,
-            char_type,
-            boundaries,
-            boundary_scores: None,
-        })
+        match Self::parse_raw_text(&raw_text, &mut self.chars, &mut self.boundaries) {
+            Ok(_) => {
+                self.text = raw_text;
+                self.update_common_info();
+                Ok(())
+            }
+            Err(e) => {
+                self.clear();
+                Err(e)
+            }
+        }
     }
 
     /// Gets a string without any annotation.
@@ -211,68 +413,61 @@ impl Sentence {
     {
         let tokenized_text = tokenized_text.as_ref();
 
-        if tokenized_text.is_empty() {
-            return Err(VaporettoError::invalid_argument(
-                "tokenized_text",
-                "is empty",
-            ));
-        }
+        let mut text = String::with_capacity(0);
+        let mut chars = Vec::with_capacity(0);
+        let mut boundaries = Vec::with_capacity(0);
 
-        let tokenized_chars: Vec<char> = tokenized_text.chars().collect();
-        let mut chars = Vec::with_capacity(tokenized_chars.len());
-        let mut boundaries = Vec::with_capacity(tokenized_chars.len() - 1);
+        Self::parse_tokenized_text(tokenized_text, &mut text, &mut chars, &mut boundaries)?;
 
-        let mut prev_boundary = false;
-        let mut escape = false;
-        for c in tokenized_chars {
-            match (escape, c) {
-                (false, '\\') => {
-                    escape = true;
-                }
-                (false, ' ') => {
-                    if chars.is_empty() {
-                        return Err(VaporettoError::invalid_argument(
-                            "tokenized_text",
-                            "starts with a whitespace",
-                        ));
-                    } else if prev_boundary {
-                        return Err(VaporettoError::invalid_argument(
-                            "tokenized_text",
-                            "contains consecutive whitespaces",
-                        ));
-                    }
-                    prev_boundary = true;
-                }
-                (_, _) => {
-                    if !chars.is_empty() {
-                        boundaries.push(if prev_boundary {
-                            BoundaryType::WordBoundary
-                        } else {
-                            BoundaryType::NotWordBoundary
-                        });
-                    }
-                    prev_boundary = false;
-                    escape = false;
-                    chars.push(c);
-                }
-            };
-        }
-        if prev_boundary {
-            return Err(VaporettoError::invalid_argument(
-                "tokenized_text",
-                "ends with a whitespace",
-            ));
-        }
+        Ok(Self::internal_new(text, chars, boundaries))
+    }
 
-        let (char_to_str_pos, str_to_char_pos, char_type) = Self::common_info(&chars);
-        Ok(Self {
-            text: chars.iter().collect(),
-            char_to_str_pos,
-            str_to_char_pos,
-            char_type,
-            boundaries,
-            boundary_scores: None,
-        })
+    /// Updates the [`Sentence`] using tokenized string.
+    ///
+    /// # Arguments
+    ///
+    /// * `tokenized_text` - A tokenized string containing whitespaces for word boundaries.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error variant when:
+    ///
+    /// * `tokenized_text` is empty.
+    /// * `tokenized_text` starts/ends with a whitespace.
+    /// * `tokenized_text` contains consecutive whitespaces.
+    ///
+    /// When an error is occurred, the sentence will be replaced with a white space.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use vaporetto::Sentence;
+    ///
+    /// let mut s = Sentence::from_tokenized("How are you?").unwrap();
+    /// s.update_tokenized("I am fine").unwrap();
+    /// assert_eq!("Iamfine", s.to_raw_string());
+    /// ```
+    pub fn update_tokenized<S>(&mut self, tokenized_text: S) -> Result<()>
+    where
+        S: AsRef<str>,
+    {
+        let tokenized_text = tokenized_text.as_ref();
+
+        match Self::parse_tokenized_text(
+            tokenized_text,
+            &mut self.text,
+            &mut self.chars,
+            &mut self.boundaries,
+        ) {
+            Ok(_) => {
+                self.update_common_info();
+                Ok(())
+            }
+            Err(e) => {
+                self.clear();
+                Err(e)
+            }
+        }
     }
 
     /// Generates a string with whitespaces for word boundaries.
@@ -404,46 +599,60 @@ impl Sentence {
     {
         let labeled_text = labeled_text.as_ref();
 
-        if labeled_text.is_empty() {
-            return Err(VaporettoError::invalid_argument("labeled_text", "is empty"));
-        }
+        let mut text = String::with_capacity(0);
+        let mut chars = Vec::with_capacity(0);
+        let mut boundaries = Vec::with_capacity(0);
+        Self::parse_partial_annotation(labeled_text, &mut text, &mut chars, &mut boundaries)?;
 
-        let labeled_chars: Vec<char> = labeled_text.chars().collect();
-        if labeled_chars.len() & 0x01 == 0 {
-            return Err(VaporettoError::invalid_argument(
-                "labeled_text",
-                format!("invalid length: {}", labeled_chars.len()),
-            ));
-        }
-        let mut chars = Vec::with_capacity(labeled_chars.len() / 2 + 1);
-        let mut boundaries = Vec::with_capacity(labeled_chars.len() / 2);
+        Ok(Self::internal_new(text, chars, boundaries))
+    }
 
-        for c in labeled_chars.iter().skip(1).step_by(2) {
-            boundaries.push(match c {
-                ' ' => BoundaryType::Unknown,
-                '|' => BoundaryType::WordBoundary,
-                '-' => BoundaryType::NotWordBoundary,
-                _ => {
-                    return Err(VaporettoError::invalid_argument(
-                        "labeled_text",
-                        format!("contains invalid boundary character: '{}'", c),
-                    ))
-                }
-            });
-        }
-        for c in labeled_chars.into_iter().step_by(2) {
-            chars.push(c);
-        }
+    /// Updates the [`Sentence`] using a string with partial annotations.
+    ///
+    /// # Arguments
+    ///
+    /// * `labeled_text` - A string with partial annotations.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error variant when:
+    ///
+    /// * `labeled_text` is empty.
+    /// * The length of `lsbeled_text` is even numbers.
+    /// * `labeled_text` contains invalid boundary characters.
+    ///
+    /// When an error is occurred, the sentence will be replaced with a white space.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use vaporetto::Sentence;
+    ///
+    /// let mut s = Sentence::from_raw("g-o-o-d|i-d e-a").unwrap();
+    /// s.update_partial_annotation("h-e-l-l-o").unwrap();
+    /// assert_eq!("hello", s.to_raw_string());
+    /// ```
+    pub fn update_partial_annotation<S>(&mut self, labeled_text: S) -> Result<()>
+    where
+        S: AsRef<str>,
+    {
+        let labeled_text = labeled_text.as_ref();
 
-        let (char_to_str_pos, str_to_char_pos, char_type) = Self::common_info(&chars);
-        Ok(Self {
-            text: chars.iter().collect(),
-            char_to_str_pos,
-            str_to_char_pos,
-            char_type,
-            boundaries,
-            boundary_scores: None,
-        })
+        match Self::parse_partial_annotation(
+            labeled_text,
+            &mut self.text,
+            &mut self.chars,
+            &mut self.boundaries,
+        ) {
+            Ok(_) => {
+                self.update_common_info();
+                Ok(())
+            }
+            Err(e) => {
+                self.clear();
+                Err(e)
+            }
+        }
     }
 
     /// Generates a string with partial annotations.
@@ -501,9 +710,27 @@ impl Sentence {
     ///
     /// # Returns
     ///
-    /// A mutable reference to the boundary information.
-    pub fn boundaries_mut(&mut self) -> &mut [BoundaryType] {
-        &mut self.boundaries
+    /// A mutable reference to the boundary information.
+    pub fn boundaries_mut(&mut self) -> &mut [BoundaryType] {
+        &mut self.boundaries
+    }
+
+    /// Gets a reference to the characters.
+    ///
+    /// # Returns
+    ///
+    /// A reference to the characters.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use vaporetto::Sentence;
+    ///
+    /// let s = Sentence::from_raw("A1あエ漢?").unwrap();
+    /// assert_eq!(&['A', '1', 'あ', 'エ', '漢', '?'], s.chars());
+    /// ```
+    pub fn chars(&self) -> &[char] {
+        &self.chars
     }
 
     /// Gets a reference to the character type information.
@@ -576,19 +803,41 @@ mod tests {
     fn test_sentence_from_raw_empty() {
         let s = Sentence::from_raw("");
 
-        assert!(s.is_err());
         assert_eq!(
-            "InvalidArgumentError: text: is empty",
+            "InvalidArgumentError: raw_text: is empty",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_raw_empty() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_raw("");
+
+        assert_eq!(
+            "InvalidArgumentError: raw_text: is empty",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: ct2u8vec![Other],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_raw_one() {
         let s = Sentence::from_raw("あ");
 
         let expected = Sentence {
             text: "あ".to_string(),
+            chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
             char_type: ct2u8vec![Hiragana],
@@ -598,12 +847,33 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_raw_one() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_raw("あ").unwrap();
+
+        let expected = Sentence {
+            text: "あ".to_string(),
+            chars: vec!['あ'],
+            str_to_char_pos: vec![0, 0, 0, 1],
+            char_to_str_pos: vec![0, 3],
+            char_type: ct2u8vec![Hiragana],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_raw() {
         let s = Sentence::from_raw("Rustで良いプログラミング体験を！");
 
         let expected = Sentence {
             text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
             str_to_char_pos: vec![
                 0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
                 0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
@@ -621,6 +891,34 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_raw() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_raw("Rustで良いプログラミング体験を！").unwrap();
+
+        let expected = Sentence {
+            text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
+            str_to_char_pos: vec![
+                0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
+                0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
+            ],
+            char_to_str_pos: vec![
+                0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
+            ],
+            char_type: ct2u8vec![
+                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
+                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
+            ],
+            boundaries: vec![Unknown; 17],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_to_raw() {
         let s = Sentence::from_raw("Rustで良いプログラミング体験を！");
@@ -635,52 +933,137 @@ mod tests {
     fn test_sentence_from_tokenized_empty() {
         let s = Sentence::from_tokenized("");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: tokenized_text: is empty",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_empty() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_tokenized("");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: is empty",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: ct2u8vec![Other],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_start_with_space() {
         let s = Sentence::from_tokenized(" Rust で 良い プログラミング 体験 を ！");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: tokenized_text: starts with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_start_with_space() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_tokenized(" Rust で 良い プログラミング 体験 を ！");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: starts with a whitespace",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: ct2u8vec![Other],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_end_with_space() {
         let s = Sentence::from_tokenized("Rust で 良い プログラミング 体験 を ！ ");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: tokenized_text: ends with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_end_with_space() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_tokenized("Rust で 良い プログラミング 体験 を ！ ");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: ends with a whitespace",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: ct2u8vec![Other],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_two_spaces() {
         let s = Sentence::from_tokenized("Rust で 良い  プログラミング 体験 を ！");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: tokenized_text: contains consecutive whitespaces",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_two_spaces() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_tokenized("Rust で 良い  プログラミング 体験 を ！");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: contains consecutive whitespaces",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: ct2u8vec![Other],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_one() {
         let s = Sentence::from_tokenized("あ");
 
         let expected = Sentence {
             text: "あ".to_string(),
+            chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
             char_type: ct2u8vec![Hiragana],
@@ -690,12 +1073,33 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_one() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("あ").unwrap();
+
+        let expected = Sentence {
+            text: "あ".to_string(),
+            chars: vec!['あ'],
+            str_to_char_pos: vec![0, 0, 0, 1],
+            char_to_str_pos: vec![0, 3],
+            char_type: ct2u8vec![Hiragana],
+            boundaries: vec![],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized() {
         let s = Sentence::from_tokenized("Rust で 良い プログラミング 体験 を ！");
 
         let expected = Sentence {
             text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
             str_to_char_pos: vec![
                 0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
                 0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
@@ -731,12 +1135,63 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_tokenized() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("Rust で 良い プログラミング 体験 を ！")
+            .unwrap();
+
+        let expected = Sentence {
+            text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
+            str_to_char_pos: vec![
+                0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
+                0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
+            ],
+            char_to_str_pos: vec![
+                0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
+            ],
+            char_type: ct2u8vec![
+                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
+                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
+            ],
+            boundaries: vec![
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_with_escape_whitespace() {
-        let s = Sentence::from_tokenized("火星 猫 の 生態 ( M \\  et\\ al. )");
+        let s = Sentence::from_tokenized("火星 猫 の 生態 ( M \\  et\\ al. )").unwrap();
 
         let expected = Sentence {
             text: "火星猫の生態(M et al.)".to_string(),
+            chars: vec![
+                '火', '星', '猫', 'の', '生', '態', '(', 'M', ' ', 'e', 't', ' ', 'a', 'l', '.',
+                ')',
+            ],
             str_to_char_pos: vec![
                 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13,
                 14, 15, 16,
@@ -767,7 +1222,52 @@ mod tests {
             ],
             boundary_scores: None,
         };
-        assert_eq!(expected, s.unwrap());
+        assert_eq!(expected, s);
+    }
+
+    #[test]
+    fn test_sentence_update_tokenized_escape_whitespace() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("火星 猫 の 生態 ( M \\  et\\ al. )")
+            .unwrap();
+
+        let expected = Sentence {
+            text: "火星猫の生態(M et al.)".to_string(),
+            chars: vec![
+                '火', '星', '猫', 'の', '生', '態', '(', 'M', ' ', 'e', 't', ' ', 'a', 'l', '.',
+                ')',
+            ],
+            str_to_char_pos: vec![
+                0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13,
+                14, 15, 16,
+            ],
+            char_to_str_pos: vec![
+                0, 3, 6, 9, 12, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+            ],
+            char_type: ct2u8vec![
+                Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji, Other, Roman, Other, Roman, Roman,
+                Other, Roman, Roman, Other, Other,
+            ],
+            boundaries: vec![
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
     }
 
     #[test]
@@ -776,6 +1276,7 @@ mod tests {
 
         let expected = Sentence {
             text: "改行に\\nを用いる".to_string(),
+            chars: vec!['改', '行', 'に', '\\', 'n', 'を', '用', 'い', 'る'],
             str_to_char_pos: vec![
                 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9,
             ],
@@ -798,12 +1299,41 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_tokenized_with_escape_backslash() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("改行 に \\\\n を 用い る").unwrap();
+
+        let expected = Sentence {
+            text: "改行に\\nを用いる".to_string(),
+            chars: vec!['改', '行', 'に', '\\', 'n', 'を', '用', 'い', 'る'],
+            str_to_char_pos: vec![
+                0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9,
+            ],
+            char_to_str_pos: vec![0, 3, 6, 9, 10, 11, 14, 17, 20, 23],
+            char_type: ct2u8vec![
+                Kanji, Kanji, Hiragana, Other, Roman, Hiragana, Kanji, Hiragana, Hiragana,
+            ],
+            boundaries: vec![
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_to_tokenized_string_unknown() {
         let s = Sentence::from_partial_annotation("火-星 猫|の|生-態");
         let result = s.unwrap().to_tokenized_string();
 
-        assert!(result.is_err());
         assert_eq!(
             "InvalidSentenceError: contains an unknown boundary",
             result.err().unwrap().to_string()
@@ -835,7 +1365,6 @@ mod tests {
         let s = Sentence::from_partial_annotation("火-星 猫|の|生-態").unwrap();
         let result = s.to_tokenized_vec();
 
-        assert!(result.is_err());
         assert_eq!(
             "InvalidSentenceError: contains an unknown boundary",
             result.err().unwrap().to_string()
@@ -856,21 +1385,41 @@ mod tests {
     fn test_sentence_from_partial_annotation_empty() {
         let s = Sentence::from_partial_annotation("");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: labeled_text: is empty",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_partial_annotation_empty() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_partial_annotation("");
+
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: is empty",
+            &result.err().unwrap().to_string()
+        );
+    }
+
     #[test]
     fn test_sentence_from_partial_annotation_invalid_length() {
-        let s = Sentence::from_partial_annotation("火-星 猫|の|生-態 ");
+        let result = Sentence::from_partial_annotation("火-星 猫|の|生-態 ");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: labeled_text: invalid length: 12",
-            &s.err().unwrap().to_string()
+            &result.err().unwrap().to_string()
+        );
+    }
+
+    #[test]
+    fn test_sentence_update_partial_annotation_invalid_length() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_partial_annotation("火-星 猫|の|生-態 ");
+
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: invalid length: 12",
+            &result.err().unwrap().to_string()
         );
     }
 
@@ -878,19 +1427,30 @@ mod tests {
     fn test_sentence_from_partial_annotation_invalid_boundary_character() {
         let s = Sentence::from_partial_annotation("火-星?猫|の|生-態");
 
-        assert!(s.is_err());
         assert_eq!(
             "InvalidArgumentError: labeled_text: contains invalid boundary character: '?'",
             &s.err().unwrap().to_string()
         );
     }
 
+    #[test]
+    fn test_sentence_update_partial_annotation_invalid_boundary_character() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_partial_annotation("火-星?猫|の|生-態");
+
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: contains invalid boundary character: '?'",
+            &result.err().unwrap().to_string()
+        );
+    }
+
     #[test]
     fn test_sentence_from_partial_annotation_one() {
         let s = Sentence::from_partial_annotation("火-星 猫|の|生-態");
 
         let expected = Sentence {
             text: "火星猫の生態".to_string(),
+            chars: vec!['火', '星', '猫', 'の', '生', '態'],
             str_to_char_pos: vec![0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6],
             char_to_str_pos: vec![0, 3, 6, 9, 12, 15, 18],
             char_type: ct2u8vec![Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji],
@@ -906,6 +1466,29 @@ mod tests {
         assert_eq!(expected, s.unwrap());
     }
 
+    #[test]
+    fn test_sentence_update_partial_annotation_one() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_partial_annotation("火-星 猫|の|生-態").unwrap();
+
+        let expected = Sentence {
+            text: "火星猫の生態".to_string(),
+            chars: vec!['火', '星', '猫', 'の', '生', '態'],
+            str_to_char_pos: vec![0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6],
+            char_to_str_pos: vec![0, 3, 6, 9, 12, 15, 18],
+            char_type: ct2u8vec![Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji],
+            boundaries: vec![
+                NotWordBoundary,
+                Unknown,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+            ],
+            boundary_scores: None,
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_to_partial_annotation_string() {
         let s = Sentence::from_partial_annotation("火-星 猫|の|生-態");

From d64e05572160bfba2492335ac397de57bed1bedd Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 7 Dec 2021 15:28:11 +0900
Subject: [PATCH 24/60] Update readme (#15)

* Update figure

* Update README
---
 README.md              |  6 ++--
 figures/comparison.svg | 66 +++++++++++++++++++++---------------------
 figures/comparison.txt | 18 ++++++------
 3 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index 655a2b4a..89d5383b 100644
--- a/README.md
+++ b/README.md
@@ -117,9 +117,9 @@ You can make the `メロンパン` into a single token by manipulating the model
    ```
 
    In this case, `-100000` will be added when the boundary is inside of the word `メロンパン`.
-   
+
    Note that Vaporetto uses 32-bit integers for the total weight, so you have to be careful about overflow.
-   
+
    In addition, The dictionary cannot contain duplicated words.
    When the word is already contained in the dictionary, you have to edit existing weights.
 
@@ -136,6 +136,8 @@ Now `メロンパン` is split into a single token.
 
 ## Speed Comparison of Various Tokenizers
 
+Vaporetto is 6.9 times faster than KyTea. With `feature=simd`, it becomes 7.8 times faster. (`simd` option requires Nightly Rust.)
+
 Details can be found [here](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison).
 
 ![](./figures/comparison.svg)
diff --git a/figures/comparison.svg b/figures/comparison.svg
index 5a75e90e..5f93598f 100644
--- a/figures/comparison.svg
+++ b/figures/comparison.svg
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="510.23622pt" height="289.133858pt" viewBox="0 0 510.23622 289.133858" version="1.1">
-<g id="surface32278">
+<g id="surface32868">
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 501.730469 90.707031 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 181.417969 82.203125 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 90.707031 L 208.089844 85.039062 "/>
@@ -124,45 +124,45 @@
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 192.757812 L 475.03125 198.425781 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 192.757812 L 501.730469 198.425781 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 249.449219 L 501.730469 192.757812 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 17.007812 L 220.480469 17.007812 L 220.480469 22.675781 L 181.417969 22.675781 Z M 181.417969 17.007812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 17.007812 L 220.480469 17.007812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.480469 17.007812 L 220.480469 22.675781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.480469 22.675781 L 181.417969 22.675781 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 17.007812 L 220.59375 17.007812 L 220.59375 22.675781 L 181.417969 22.675781 Z M 181.417969 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 17.007812 L 220.59375 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.59375 17.007812 L 220.59375 22.675781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.59375 22.675781 L 181.417969 22.675781 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 22.675781 L 181.417969 17.007812 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 45.355469 L 440.757812 45.355469 L 440.757812 51.023438 L 181.417969 51.023438 Z M 181.417969 45.355469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 45.355469 L 440.757812 45.355469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 440.757812 45.355469 L 440.757812 51.023438 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 440.757812 51.023438 L 181.417969 51.023438 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 45.355469 L 450.425781 45.355469 L 450.425781 51.023438 L 181.417969 51.023438 Z M 181.417969 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 45.355469 L 450.425781 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.425781 45.355469 L 450.425781 51.023438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.425781 51.023438 L 181.417969 51.023438 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 51.023438 L 181.417969 45.355469 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 73.699219 L 475.964844 73.699219 L 475.964844 79.371094 L 181.417969 79.371094 Z M 181.417969 73.699219 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 73.699219 L 475.964844 73.699219 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.964844 73.699219 L 475.964844 79.371094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.964844 79.371094 L 181.417969 79.371094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 73.699219 L 486.085938 73.699219 L 486.085938 79.371094 L 181.417969 79.371094 Z M 181.417969 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 73.699219 L 486.085938 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 486.085938 73.699219 L 486.085938 79.371094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 486.085938 79.371094 L 181.417969 79.371094 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 79.371094 L 181.417969 73.699219 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 107.714844 L 304.753906 107.714844 L 304.753906 113.386719 L 181.417969 113.386719 Z M 181.417969 107.714844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 107.714844 L 304.753906 107.714844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.753906 107.714844 L 304.753906 113.386719 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.753906 113.386719 L 181.417969 113.386719 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 107.714844 L 304.722656 107.714844 L 304.722656 113.386719 L 181.417969 113.386719 Z M 181.417969 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 107.714844 L 304.722656 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.722656 107.714844 L 304.722656 113.386719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.722656 113.386719 L 181.417969 113.386719 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 113.386719 L 181.417969 107.714844 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 141.730469 L 220.648438 141.730469 L 220.648438 147.402344 L 181.417969 147.402344 Z M 181.417969 141.730469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 141.730469 L 220.648438 141.730469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.648438 141.730469 L 220.648438 147.402344 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.648438 147.402344 L 181.417969 147.402344 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 141.730469 L 221.015625 141.730469 L 221.015625 147.402344 L 181.417969 147.402344 Z M 181.417969 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 141.730469 L 221.015625 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 221.015625 141.730469 L 221.015625 147.402344 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 221.015625 147.402344 L 181.417969 147.402344 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 147.402344 L 181.417969 141.730469 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 170.078125 L 219.96875 170.078125 L 219.96875 175.746094 L 181.417969 175.746094 Z M 181.417969 170.078125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 170.078125 L 219.96875 170.078125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 219.96875 170.078125 L 219.96875 175.746094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 219.96875 175.746094 L 181.417969 175.746094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 170.078125 L 220.109375 170.078125 L 220.109375 175.746094 L 181.417969 175.746094 Z M 181.417969 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 170.078125 L 220.109375 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.109375 170.078125 L 220.109375 175.746094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.109375 175.746094 L 181.417969 175.746094 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 175.746094 L 181.417969 170.078125 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 204.09375 L 190.007812 204.09375 L 190.007812 209.765625 L 181.417969 209.765625 Z M 181.417969 204.09375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 204.09375 L 190.007812 204.09375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 190.007812 204.09375 L 190.007812 209.765625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 190.007812 209.765625 L 181.417969 209.765625 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 204.09375 L 189.921875 204.09375 L 189.921875 209.765625 L 181.417969 209.765625 Z M 181.417969 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 204.09375 L 189.921875 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 189.921875 204.09375 L 189.921875 209.765625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 189.921875 209.765625 L 181.417969 209.765625 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 209.765625 L 181.417969 204.09375 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 232.441406 L 207.070312 232.441406 L 207.070312 238.109375 L 181.417969 238.109375 Z M 181.417969 232.441406 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 232.441406 L 207.070312 232.441406 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.070312 232.441406 L 207.070312 238.109375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.070312 238.109375 L 181.417969 238.109375 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 232.441406 L 207.210938 232.441406 L 207.210938 238.109375 L 181.417969 238.109375 Z M 181.417969 232.441406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 232.441406 L 207.210938 232.441406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.210938 232.441406 L 207.210938 238.109375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.210938 238.109375 L 181.417969 238.109375 "/>
 <path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 238.109375 L 181.417969 232.441406 "/>
 <path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 23.523438 L 10.25 18.488281 L 13.535156 15.203125 L 12.359375 15.203125 L 7.933594 19.613281 L 7.933594 15.203125 L 6.84375 15.203125 L 6.84375 23.523438 L 7.933594 23.523438 L 7.933594 20.789062 L 9.539062 19.195312 L 12.527344 23.523438 Z M 20.171875 18.199219 L 19.222656 18.199219 C 18.3125 20.382812 17.652344 21.976562 17.59375 22.816406 C 17.558594 22.265625 17.054688 21.03125 16.910156 20.683594 C 16.417969 19.457031 16.214844 19.050781 15.84375 18.199219 L 14.835938 18.199219 L 17.234375 23.523438 L 16.789062 24.589844 C 16.476562 25.261719 16.25 25.261719 16.082031 25.261719 C 15.890625 25.261719 15.472656 25.214844 15.113281 25.058594 L 15.183594 25.886719 C 15.21875 25.898438 15.664062 25.980469 16.070312 25.980469 C 16.394531 25.980469 16.910156 25.980469 17.507812 24.554688 Z M 28.628906 16.113281 L 28.628906 15.273438 L 21.113281 15.273438 L 21.113281 16.113281 L 21.84375 16.113281 L 24.300781 16.089844 L 24.300781 23.523438 L 25.453125 23.523438 L 25.453125 16.089844 L 27.910156 16.113281 Z M 32.789062 20.898438 C 32.789062 20.527344 32.777344 19.613281 32.308594 18.894531 C 31.804688 18.140625 31.039062 17.996094 30.582031 17.996094 C 29.191406 17.996094 28.085938 19.277344 28.085938 20.8125 C 28.085938 22.394531 29.261719 23.65625 30.761719 23.65625 C 31.335938 23.65625 32.03125 23.511719 32.726562 23.066406 L 32.65625 22.242188 C 31.902344 22.78125 31.191406 22.875 30.773438 22.875 C 29.777344 22.875 29.011719 22 28.976562 20.898438 Z M 32.070312 20.191406 L 29.046875 20.191406 C 29.238281 19.421875 29.828125 18.773438 30.582031 18.773438 C 31.023438 18.773438 31.839844 18.980469 32.070312 20.191406 Z M 37.726562 23.523438 L 37.726562 20.058594 C 37.726562 18.871094 36.851562 17.996094 35.664062 17.996094 C 34.886719 17.996094 34.3125 18.199219 33.734375 18.523438 L 33.808594 19.375 C 34.40625 18.929688 34.992188 18.75 35.652344 18.75 C 36.324219 18.75 36.734375 19.289062 36.734375 20.070312 L 36.734375 20.574219 C 34.671875 20.621094 33.316406 21.160156 33.316406 22.132812 C 33.316406 22.695312 33.6875 23.65625 34.730469 23.65625 C 34.96875 23.65625 36.011719 23.632812 36.769531 23.066406 L 36.769531 23.523438 Z M 36.734375 21.84375 C 36.734375 22.097656 36.734375 22.394531 36.300781 22.648438 C 36 22.828125 35.605469 22.875 35.367188 22.875 C 34.753906 22.875 34.238281 22.589844 34.238281 22.109375 C 34.238281 21.257812 36.578125 21.222656 36.734375 21.222656 Z M 46.769531 26.507812 C 46.121094 25.847656 44.632812 24.316406 44.632812 20.527344 C 44.632812 16.726562 46.121094 15.203125 46.769531 14.53125 L 45.988281 14.53125 C 44.574219 15.71875 43.640625 17.769531 43.640625 20.515625 C 43.640625 23.367188 44.632812 25.371094 45.988281 26.507812 Z M 53.207031 23.523438 L 53.207031 22.625 L 50.953125 22.625 C 50.808594 22.625 50.664062 22.636719 50.523438 22.636719 L 49.203125 22.636719 L 51.167969 20.851562 C 52.429688 19.820312 53.207031 19.171875 53.207031 18.007812 C 53.207031 16.628906 52.210938 15.417969 50.582031 15.417969 C 49.3125 15.417969 48.554688 16.148438 48.183594 17.335938 L 48.734375 18.03125 C 49.046875 16.628906 49.671875 16.234375 50.414062 16.234375 C 51.480469 16.234375 52.152344 17.046875 52.152344 18.03125 C 52.152344 19.121094 51.386719 19.820312 50.496094 20.609375 L 48.28125 22.707031 L 48.28125 23.523438 Z M 59.328125 19.625 C 59.328125 18.488281 59.242188 17.433594 58.679688 16.484375 C 58.285156 15.824219 57.539062 15.417969 56.75 15.417969 C 56.148438 15.417969 55.296875 15.644531 54.757812 16.582031 C 54.195312 17.527344 54.160156 18.800781 54.160156 19.625 C 54.160156 20.226562 54.160156 21.59375 54.699219 22.5625 C 55.296875 23.621094 56.210938 23.789062 56.738281 23.789062 C 57.433594 23.789062 58.296875 23.5 58.8125 22.503906 C 59.265625 21.628906 59.328125 20.585938 59.328125 19.625 Z M 58.332031 19.484375 C 58.332031 20.296875 58.332031 21.125 58.054688 21.90625 C 57.78125 22.730469 57.21875 23.007812 56.75 23.007812 C 55.15625 23.007812 55.15625 20.609375 55.15625 19.484375 C 55.15625 18.726562 55.15625 18.117188 55.335938 17.492188 C 55.5625 16.664062 56.054688 16.199219 56.738281 16.199219 C 58.332031 16.199219 58.332031 18.367188 58.332031 19.484375 Z M 65.207031 23.523438 L 65.207031 22.625 L 62.953125 22.625 C 62.808594 22.625 62.664062 22.636719 62.523438 22.636719 L 61.203125 22.636719 L 63.167969 20.851562 C 64.429688 19.820312 65.207031 19.171875 65.207031 18.007812 C 65.207031 16.628906 64.210938 15.417969 62.582031 15.417969 C 61.3125 15.417969 60.554688 16.148438 60.183594 17.335938 L 60.734375 18.03125 C 61.046875 16.628906 61.671875 16.234375 62.414062 16.234375 C 63.480469 16.234375 64.152344 17.046875 64.152344 18.03125 C 64.152344 19.121094 63.386719 19.820312 62.496094 20.609375 L 60.28125 22.707031 L 60.28125 23.523438 Z M 71.328125 19.625 C 71.328125 18.488281 71.242188 17.433594 70.679688 16.484375 C 70.285156 15.824219 69.539062 15.417969 68.75 15.417969 C 68.148438 15.417969 67.296875 15.644531 66.757812 16.582031 C 66.195312 17.527344 66.160156 18.800781 66.160156 19.625 C 66.160156 20.226562 66.160156 21.59375 66.699219 22.5625 C 67.296875 23.621094 68.210938 23.789062 68.738281 23.789062 C 69.433594 23.789062 70.296875 23.5 70.8125 22.503906 C 71.265625 21.628906 71.328125 20.585938 71.328125 19.625 Z M 70.332031 19.484375 C 70.332031 20.296875 70.332031 21.125 70.054688 21.90625 C 69.78125 22.730469 69.21875 23.007812 68.75 23.007812 C 67.15625 23.007812 67.15625 20.609375 67.15625 19.484375 C 67.15625 18.726562 67.15625 18.117188 67.335938 17.492188 C 67.5625 16.664062 68.054688 16.199219 68.738281 16.199219 C 70.332031 16.199219 70.332031 18.367188 70.332031 19.484375 Z M 75.085938 21.234375 L 75.085938 20.488281 L 71.800781 20.488281 L 71.800781 21.234375 Z M 81.328125 19.625 C 81.328125 18.488281 81.242188 17.433594 80.679688 16.484375 C 80.285156 15.824219 79.539062 15.417969 78.75 15.417969 C 78.148438 15.417969 77.296875 15.644531 76.757812 16.582031 C 76.195312 17.527344 76.160156 18.800781 76.160156 19.625 C 76.160156 20.226562 76.160156 21.59375 76.699219 22.5625 C 77.296875 23.621094 78.210938 23.789062 78.738281 23.789062 C 79.433594 23.789062 80.296875 23.5 80.8125 22.503906 C 81.265625 21.628906 81.328125 20.585938 81.328125 19.625 Z M 80.332031 19.484375 C 80.332031 20.296875 80.332031 21.125 80.054688 21.90625 C 79.78125 22.730469 79.21875 23.007812 78.75 23.007812 C 77.15625 23.007812 77.15625 20.609375 77.15625 19.484375 C 77.15625 18.726562 77.15625 18.117188 77.335938 17.492188 C 77.5625 16.664062 78.054688 16.199219 78.738281 16.199219 C 80.332031 16.199219 80.332031 18.367188 80.332031 19.484375 Z M 87.472656 21.519531 L 87.472656 20.742188 L 86.273438 20.742188 L 86.273438 15.683594 L 85.109375 15.683594 L 82.015625 20.742188 L 82.015625 21.519531 L 85.265625 21.519531 L 85.265625 23.523438 L 86.273438 23.523438 L 86.273438 21.519531 Z M 85.335938 20.742188 L 83.011719 20.742188 L 83.742188 19.542969 C 84.101562 18.929688 85.324219 16.84375 85.335938 16.234375 Z M 91.085938 21.234375 L 91.085938 20.488281 L 87.800781 20.488281 L 87.800781 21.234375 Z M 97.328125 19.625 C 97.328125 18.488281 97.242188 17.433594 96.679688 16.484375 C 96.285156 15.824219 95.539062 15.417969 94.75 15.417969 C 94.148438 15.417969 93.296875 15.644531 92.757812 16.582031 C 92.195312 17.527344 92.160156 18.800781 92.160156 19.625 C 92.160156 20.226562 92.160156 21.59375 92.699219 22.5625 C 93.296875 23.621094 94.210938 23.789062 94.738281 23.789062 C 95.433594 23.789062 96.296875 23.5 96.8125 22.503906 C 97.265625 21.628906 97.328125 20.585938 97.328125 19.625 Z M 96.332031 19.484375 C 96.332031 20.296875 96.332031 21.125 96.054688 21.90625 C 95.78125 22.730469 95.21875 23.007812 94.75 23.007812 C 93.15625 23.007812 93.15625 20.609375 93.15625 19.484375 C 93.15625 18.726562 93.15625 18.117188 93.335938 17.492188 C 93.5625 16.664062 94.054688 16.199219 94.738281 16.199219 C 96.332031 16.199219 96.332031 18.367188 96.332031 19.484375 Z M 103.304688 21.351562 C 103.304688 20.40625 102.609375 19.613281 101.625 19.289062 C 102.464844 18.859375 102.96875 18.078125 102.96875 17.253906 C 102.96875 16.222656 101.949219 15.417969 100.714844 15.417969 C 99.707031 15.417969 98.855469 15.933594 98.351562 16.6875 L 98.820312 17.371094 C 99.347656 16.328125 100.257812 16.160156 100.703125 16.160156 C 101.3125 16.160156 101.914062 16.496094 101.914062 17.253906 C 101.914062 17.695312 101.660156 18.726562 100.367188 18.847656 C 100.136719 18.871094 99.945312 18.882812 99.71875 18.894531 L 99.71875 19.675781 L 100.617188 19.675781 C 101.78125 19.675781 102.152344 20.621094 102.152344 21.339844 C 102.152344 22.289062 101.589844 23.007812 100.675781 23.007812 C 99.695312 23.007812 98.78125 22.46875 98.328125 21.820312 C 98.230469 22.300781 98.230469 22.324219 98.183594 22.589844 C 98.769531 23.296875 99.683594 23.789062 100.703125 23.789062 C 102.210938 23.789062 103.304688 22.613281 103.304688 21.351562 Z M 107.480469 20.527344 C 107.480469 17.671875 106.484375 15.667969 105.128906 14.53125 L 104.351562 14.53125 C 105 15.191406 106.484375 16.726562 106.484375 20.515625 C 106.484375 24.316406 105 25.835938 104.351562 26.507812 L 105.128906 26.507812 C 106.546875 25.320312 107.480469 23.273438 107.480469 20.527344 Z M 5.667969 9.675781 "/>
 <path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.726562 43.550781 L 12.730469 43.550781 L 11.230469 47.289062 C 11.097656 47.613281 10.042969 50.253906 9.886719 50.960938 L 9.875 50.960938 C 9.757812 50.324219 8.808594 47.972656 8.507812 47.21875 L 7.046875 43.550781 L 5.835938 43.550781 L 9.207031 51.871094 L 10.355469 51.871094 Z M 18.726562 51.871094 L 18.726562 48.40625 C 18.726562 47.21875 17.851562 46.34375 16.664062 46.34375 C 15.886719 46.34375 15.3125 46.546875 14.734375 46.871094 L 14.808594 47.722656 C 15.40625 47.277344 15.992188 47.097656 16.652344 47.097656 C 17.324219 47.097656 17.734375 47.636719 17.734375 48.417969 L 17.734375 48.921875 C 15.671875 48.96875 14.316406 49.507812 14.316406 50.480469 C 14.316406 51.042969 14.6875 52.003906 15.730469 52.003906 C 15.96875 52.003906 17.011719 51.980469 17.769531 51.414062 L 17.769531 51.871094 Z M 17.734375 50.191406 C 17.734375 50.445312 17.734375 50.742188 17.300781 50.996094 C 17 51.175781 16.605469 51.222656 16.367188 51.222656 C 15.753906 51.222656 15.238281 50.9375 15.238281 50.457031 C 15.238281 49.605469 17.578125 49.570312 17.734375 49.570312 Z M 25.589844 49.199219 C 25.589844 47.757812 24.835938 46.414062 23.671875 46.414062 C 23.335938 46.414062 22.449219 46.476562 21.621094 47.121094 L 21.621094 46.546875 L 20.664062 46.546875 L 20.664062 54.195312 L 21.660156 54.195312 L 21.660156 51.320312 C 22.101562 51.738281 22.703125 52.003906 23.324219 52.003906 C 24.535156 52.003906 25.589844 50.816406 25.589844 49.199219 Z M 24.597656 49.210938 C 24.597656 50.359375 23.757812 51.222656 22.820312 51.222656 C 22.4375 51.222656 22.007812 51.066406 21.660156 50.480469 L 21.660156 47.890625 C 21.8125 47.675781 22.257812 47.230469 22.941406 47.230469 C 23.839844 47.230469 24.597656 48.082031 24.597656 49.210938 Z M 32.460938 49.234375 C 32.460938 47.664062 31.261719 46.34375 29.738281 46.34375 C 28.25 46.34375 27.027344 47.636719 27.027344 49.234375 C 27.027344 50.816406 28.285156 52.003906 29.738281 52.003906 C 31.222656 52.003906 32.460938 50.792969 32.460938 49.234375 Z M 31.464844 49.113281 C 31.464844 50.503906 30.625 51.1875 29.75 51.1875 C 28.828125 51.1875 28.023438 50.46875 28.023438 49.113281 C 28.023438 47.710938 28.921875 47.121094 29.738281 47.121094 C 30.601562 47.121094 31.464844 47.734375 31.464844 49.113281 Z M 36.695312 47.277344 L 36.695312 46.414062 C 35.832031 46.425781 35.042969 46.859375 34.550781 47.566406 L 34.550781 46.476562 L 33.664062 46.476562 L 33.664062 51.871094 L 34.621094 51.871094 L 34.621094 49.316406 C 34.621094 48.058594 35.605469 47.289062 36.695312 47.277344 Z M 41.789062 49.246094 C 41.789062 48.875 41.777344 47.960938 41.308594 47.242188 C 40.804688 46.488281 40.039062 46.34375 39.582031 46.34375 C 38.191406 46.34375 37.085938 47.625 37.085938 49.160156 C 37.085938 50.742188 38.261719 52.003906 39.761719 52.003906 C 40.335938 52.003906 41.03125 51.859375 41.726562 51.414062 L 41.65625 50.589844 C 40.902344 51.128906 40.191406 51.222656 39.773438 51.222656 C 38.777344 51.222656 38.011719 50.347656 37.976562 49.246094 Z M 41.070312 48.539062 L 38.046875 48.539062 C 38.238281 47.769531 38.828125 47.121094 39.582031 47.121094 C 40.023438 47.121094 40.839844 47.328125 41.070312 48.539062 Z M 45.769531 51.558594 L 45.566406 50.804688 C 45.253906 51.042969 44.832031 51.1875 44.460938 51.1875 C 43.992188 51.1875 43.839844 50.769531 43.839844 50.144531 L 43.839844 47.289062 L 45.566406 47.289062 L 45.566406 46.546875 L 43.839844 46.546875 L 43.839844 45.023438 L 42.949219 45.023438 L 42.949219 46.546875 L 41.894531 46.546875 L 41.894531 47.289062 L 42.914062 47.289062 L 42.914062 50.359375 C 42.914062 51.066406 43.070312 52.003906 43.945312 52.003906 C 44.472656 52.003906 45.097656 51.894531 45.769531 51.558594 Z M 49.769531 51.558594 L 49.566406 50.804688 C 49.253906 51.042969 48.832031 51.1875 48.460938 51.1875 C 47.992188 51.1875 47.839844 50.769531 47.839844 50.144531 L 47.839844 47.289062 L 49.566406 47.289062 L 49.566406 46.546875 L 47.839844 46.546875 L 47.839844 45.023438 L 46.949219 45.023438 L 46.949219 46.546875 L 45.894531 46.546875 L 45.894531 47.289062 L 46.914062 47.289062 L 46.914062 50.359375 C 46.914062 51.066406 47.070312 52.003906 47.945312 52.003906 C 48.472656 52.003906 49.097656 51.894531 49.769531 51.558594 Z M 55.460938 49.234375 C 55.460938 47.664062 54.261719 46.34375 52.738281 46.34375 C 51.25 46.34375 50.027344 47.636719 50.027344 49.234375 C 50.027344 50.816406 51.285156 52.003906 52.738281 52.003906 C 54.222656 52.003906 55.460938 50.792969 55.460938 49.234375 Z M 54.464844 49.113281 C 54.464844 50.503906 53.625 51.1875 52.75 51.1875 C 51.828125 51.1875 51.023438 50.46875 51.023438 49.113281 C 51.023438 47.710938 51.921875 47.121094 52.738281 47.121094 C 53.601562 47.121094 54.464844 47.734375 54.464844 49.113281 Z M 63.769531 54.855469 C 63.121094 54.195312 61.632812 52.664062 61.632812 48.875 C 61.632812 45.074219 63.121094 43.550781 63.769531 42.878906 L 62.988281 42.878906 C 61.574219 44.066406 60.640625 46.117188 60.640625 48.863281 C 60.640625 51.714844 61.632812 53.71875 62.988281 54.855469 Z M 70.328125 47.972656 C 70.328125 46.835938 70.242188 45.78125 69.679688 44.832031 C 69.285156 44.171875 68.539062 43.765625 67.75 43.765625 C 67.148438 43.765625 66.296875 43.992188 65.757812 44.929688 C 65.195312 45.875 65.160156 47.148438 65.160156 47.972656 C 65.160156 48.574219 65.160156 49.941406 65.699219 50.910156 C 66.296875 51.96875 67.210938 52.136719 67.738281 52.136719 C 68.433594 52.136719 69.296875 51.847656 69.8125 50.851562 C 70.265625 49.976562 70.328125 48.933594 70.328125 47.972656 Z M 69.332031 47.832031 C 69.332031 48.644531 69.332031 49.472656 69.054688 50.253906 C 68.78125 51.078125 68.21875 51.355469 67.75 51.355469 C 66.15625 51.355469 66.15625 48.957031 66.15625 47.832031 C 66.15625 47.074219 66.15625 46.464844 66.335938 45.839844 C 66.5625 45.011719 67.054688 44.546875 67.738281 44.546875 C 69.332031 44.546875 69.332031 46.714844 69.332031 47.832031 Z M 72.886719 51.871094 L 72.886719 50.839844 L 71.855469 50.839844 L 71.855469 51.871094 Z M 79.304688 49.699219 C 79.304688 48.753906 78.609375 47.960938 77.625 47.636719 C 78.464844 47.207031 78.96875 46.425781 78.96875 45.601562 C 78.96875 44.570312 77.949219 43.765625 76.714844 43.765625 C 75.707031 43.765625 74.855469 44.28125 74.351562 45.035156 L 74.820312 45.71875 C 75.347656 44.675781 76.257812 44.507812 76.703125 44.507812 C 77.3125 44.507812 77.914062 44.84375 77.914062 45.601562 C 77.914062 46.042969 77.660156 47.074219 76.367188 47.195312 C 76.136719 47.21875 75.945312 47.230469 75.71875 47.242188 L 75.71875 48.023438 L 76.617188 48.023438 C 77.78125 48.023438 78.152344 48.96875 78.152344 49.6875 C 78.152344 50.636719 77.589844 51.355469 76.675781 51.355469 C 75.695312 51.355469 74.78125 50.816406 74.328125 50.167969 C 74.230469 50.648438 74.230469 50.671875 74.183594 50.9375 C 74.769531 51.644531 75.683594 52.136719 76.703125 52.136719 C 78.210938 52.136719 79.304688 50.960938 79.304688 49.699219 Z M 81.886719 51.871094 L 81.886719 50.839844 L 80.855469 50.839844 L 80.855469 51.871094 Z M 88.328125 47.972656 C 88.328125 46.835938 88.242188 45.78125 87.679688 44.832031 C 87.285156 44.171875 86.539062 43.765625 85.75 43.765625 C 85.148438 43.765625 84.296875 43.992188 83.757812 44.929688 C 83.195312 45.875 83.160156 47.148438 83.160156 47.972656 C 83.160156 48.574219 83.160156 49.941406 83.699219 50.910156 C 84.296875 51.96875 85.210938 52.136719 85.738281 52.136719 C 86.433594 52.136719 87.296875 51.847656 87.8125 50.851562 C 88.265625 49.976562 88.328125 48.933594 88.328125 47.972656 Z M 87.332031 47.832031 C 87.332031 48.644531 87.332031 49.472656 87.054688 50.253906 C 86.78125 51.078125 86.21875 51.355469 85.75 51.355469 C 84.15625 51.355469 84.15625 48.957031 84.15625 47.832031 C 84.15625 47.074219 84.15625 46.464844 84.335938 45.839844 C 84.5625 45.011719 85.054688 44.546875 85.738281 44.546875 C 87.332031 44.546875 87.332031 46.714844 87.332031 47.832031 Z M 92.480469 48.875 C 92.480469 46.019531 91.484375 44.015625 90.128906 42.878906 L 89.351562 42.878906 C 90 43.539062 91.484375 45.074219 91.484375 48.863281 C 91.484375 52.664062 90 54.183594 89.351562 54.855469 L 90.128906 54.855469 C 91.546875 53.667969 92.480469 51.621094 92.480469 48.875 Z M 5.667969 38.023438 "/>
diff --git a/figures/comparison.txt b/figures/comparison.txt
index 6a25dd8d..595c5403 100644
--- a/figures/comparison.txt
+++ b/figures/comparison.txt
@@ -1,9 +1,9 @@
-Tool Name (version),Speed [M chars/s],STD
-KyTea (2020-04-03),1.463,0.012
-Vaporetto (0.3.0),9.716,0.115
-Vaporetto (0.3.0+feature=simd),11.035,0.144
-MeCab (2020-09-14),4.621,0.047
-Kuromoji (0.9.0),1.470,0.074
-Lindera (0.8.1),1.444,0.022
-Sudachi (0.5.3),0.322,0.029
-sudachi.rs (0.6.0),0.961,0.008
+Tool Name (version),Speed [M chars/s]
+KyTea (2020-04-03),1.4674450789921388
+Vaporetto (0.3.0),10.07734841348238
+Vaporetto (0.3.0+feature=simd),11.414333204815095
+MeCab (2020-09-14),4.619055018595073
+Kuromoji (0.9.0),1.4837693905013502
+Lindera (0.8.1),1.4499374143314385
+Sudachi (0.5.3),0.3185670881795747
+sudachi.rs (0.6.0),0.9658781319147613

From dc0bff81db7afe4af27cae8446dbe0f59feee3d1 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 13 Dec 2021 12:53:30 +0900
Subject: [PATCH 25/60] Add --scores option to the predict command (#17)

* Use u32 instead of f64 for exporting scores

* Fix API of vaporetto_rules

* Add --scores option

* Update README

* Fix

* Fix doc

* Apply suggestions from code review

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

* Fix

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 README.md                                     | 39 ++++++--
 predict/src/main.rs                           | 96 +++++++++++++------
 train/src/main.rs                             |  2 +-
 vaporetto/src/kytea_model.rs                  |  8 +-
 vaporetto/src/model.rs                        |  6 --
 vaporetto/src/predictor.rs                    | 30 ++----
 vaporetto/src/sentence.rs                     |  4 +-
 vaporetto_rules/README.md                     |  9 +-
 vaporetto_rules/src/lib.rs                    | 14 +--
 .../src/string_filters/kytea_fullwidth.rs     |  9 +-
 10 files changed, 127 insertions(+), 90 deletions(-)

diff --git a/README.md b/README.md
index 89d5383b..167ccfba 100644
--- a/README.md
+++ b/README.md
@@ -85,14 +85,27 @@ You can specify all arguments above multiple times.
 
 ### Model Manipulation
 
-For example, `メロンパン` is split into two tokens in the following command:
+Sometimes, your model will output different results than what you expect.
+For example, `メロンパン` is split into two tokens in the following command.
+We use `--scores` option to show the score of each character boundary:
 ```
-% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --scores --model path/to/jp-0.4.7-5-tokenize.model.zst
 朝食 は メロン パン 1 個 だっ た
-```
-
-Sometimes, the model outputs different results than what you expect.
-You can make the `メロンパン` into a single token by manipulating the model following the steps below:
+0:朝食 -15398
+1:食は 24623
+2:はメ 30261
+3:メロ -26885
+4:ロン -38896
+5:ンパ 8162
+6:パン -23416
+7:ン１ 23513
+8:１個 18435
+9:個だ 24964
+10:だっ -15065
+11:った 14178
+```
+
+To concatenate `メロンパン` into a single token, manipulate the model in the following steps so that the score of `ンパ` becomes negative:
 
 1. Dump a dictionary by the following command:
    ```
@@ -130,8 +143,20 @@ You can make the `メロンパン` into a single token by manipulating the model
 
 Now `メロンパン` is split into a single token.
 ```
-% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize-new.model.zst
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --scores --model path/to/jp-0.4.7-5-tokenize-new.model.zst
 朝食 は メロンパン 1 個 だっ た
+0:朝食 -15398
+1:食は 24623
+2:はメ 30261
+3:メロ -126885
+4:ロン -138896
+5:ンパ -91838
+6:パン -123416
+7:ン１ 23513
+8:１個 18435
+9:個だ 24964
+10:だっ -15065
+11:った 14178
 ```
 
 ## Speed Comparison of Various Tokenizers
diff --git a/predict/src/main.rs b/predict/src/main.rs
index d8fb3570..7d96a42e 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -1,11 +1,12 @@
 use std::fs::File;
 use std::io::{prelude::*, stdin};
 use std::path::PathBuf;
+use std::rc::Rc;
 use std::str::FromStr;
 use std::time::Instant;
 
 use structopt::StructOpt;
-use vaporetto::{CharacterType, Model, Predictor, Sentence};
+use vaporetto::{errors::VaporettoError, CharacterType, Model, Predictor, Sentence};
 use vaporetto_rules::{
     sentence_filters::{ConcatGraphemeClustersFilter, KyteaWsConstFilter},
     string_filters::KyteaFullwidthFilter,
@@ -46,15 +47,65 @@ struct Opt {
     #[structopt(long)]
     wsconst: Vec<WsConst>,
 
+    /// Prints scores.
+    #[structopt(long)]
+    scores: bool,
+
     /// Do not normalize input strings before prediction.
     #[structopt(long)]
     no_norm: bool,
 }
 
+fn print_scores(s: &Sentence) {
+    if let Some(scores) = s.boundary_scores().as_ref() {
+        for (i, score) in scores.iter().enumerate() {
+            println!("{}:{}{} {}", i, s.chars()[i], s.chars()[i + 1], score);
+        }
+        println!();
+    }
+}
+
+fn tokenize(
+    predictor: &Predictor,
+    text: impl Into<String>,
+    mut buf1: Sentence,
+    mut buf2: Sentence,
+    pre_filters: &[Box<dyn StringFilter>],
+    post_filters: &[Box<dyn SentenceFilter>],
+) -> Result<(String, Sentence, Sentence), VaporettoError> {
+    let text = text.into();
+    if pre_filters.is_empty() {
+        buf1.update_raw(text)?;
+    } else {
+        let text_rc = Rc::new(text);
+        let filt_text = Rc::try_unwrap(
+            pre_filters
+                .iter()
+                .fold(Rc::clone(&text_rc), |s, filter| Rc::new(filter.filter(&s))),
+        )
+        .unwrap();
+        let text = Rc::try_unwrap(text_rc).unwrap();
+        buf1.update_raw(filt_text)?;
+        buf2.update_raw(text)?;
+    }
+    buf1 = predictor.predict_with_score(buf1);
+    buf1 = post_filters.iter().fold(buf1, |s, filter| filter.filter(s));
+    let result = if pre_filters.is_empty() {
+        buf1.to_tokenized_string()?
+    } else {
+        buf2.boundaries_mut().copy_from_slice(buf1.boundaries());
+        buf2.to_tokenized_string()?
+    };
+    Ok((result, buf1, buf2))
+}
+
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let opt = Opt::from_args();
 
-    let fullwidth_filter = KyteaFullwidthFilter::new();
+    let mut pre_filters: Vec<Box<dyn StringFilter>> = vec![];
+    if !opt.no_norm {
+        pre_filters.push(Box::new(KyteaFullwidthFilter::new()));
+    }
     let mut post_filters: Vec<Box<dyn SentenceFilter>> = vec![];
     for wsconst in &opt.wsconst {
         match wsconst {
@@ -73,39 +124,26 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let predictor = Predictor::new(model)?;
 
     eprintln!("Start tokenization");
-    let mut n_boundaries = 0;
+    let mut n_chars = 0;
     let start = Instant::now();
-    let mut s = Sentence::from_raw(" ")?;
-    if opt.no_norm {
-        for line in stdin().lock().lines() {
-            let line = line?;
-            s.update_raw(line)?;
-            s = predictor.predict(s);
-            s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
-            n_boundaries += s.boundaries().len();
-            let toks = s.to_tokenized_string()?;
-            println!("{}", toks);
-        }
-    } else {
-        let mut s_norm = Sentence::from_raw(" ")?;
-        for line in stdin().lock().lines() {
-            let line = line?;
-            let norm = fullwidth_filter.filter(&line);
-            s.update_raw(line)?;
-            s_norm.update_raw(norm)?;
-            s_norm = predictor.predict(s_norm);
-            s.boundaries_mut().clone_from_slice(s_norm.boundaries());
-            s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
-            n_boundaries += s.boundaries().len();
-            let toks = s.to_tokenized_string()?;
-            println!("{}", toks);
+    let mut buf1 = Sentence::from_raw(" ")?;
+    let mut buf2 = Sentence::from_raw(" ")?;
+    for line in stdin().lock().lines() {
+        let ret = tokenize(&predictor, line?, buf1, buf2, &pre_filters, &post_filters)?;
+        let result = ret.0;
+        buf1 = ret.1;
+        buf2 = ret.2;
+        println!("{}", result);
+        if opt.scores {
+            print_scores(&buf1);
         }
+        n_chars += buf1.chars().len();
     }
     let duration = start.elapsed();
     eprintln!("Elapsed: {} [sec]", duration.as_secs_f64());
     eprintln!(
-        "Speed: {} [boundaries/sec]",
-        n_boundaries as f64 / duration.as_secs_f64()
+        "Speed: {} [chars/sec]",
+        n_chars as f64 / duration.as_secs_f64()
     );
 
     Ok(())
diff --git a/train/src/main.rs b/train/src/main.rs
index 76c6c590..04b5db2f 100644
--- a/train/src/main.rs
+++ b/train/src/main.rs
@@ -138,7 +138,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             let line = if opt.no_norm {
                 line
             } else {
-                fullwidth_filter.filter(line)
+                fullwidth_filter.filter(&line)
             };
             dictionary.insert(line);
         }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 95ee1b03..984bd46c 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -240,7 +240,7 @@ struct LinearModel {
     _solver_type: u8,
     _labels: Vec<i32>,
     _bias: bool,
-    multiplier: f64,
+    _multiplier: f64,
     feature_lookup: Option<FeatureLookup<i16>>,
 }
 
@@ -264,7 +264,7 @@ impl Readable for Option<LinearModel> {
             _solver_type: solver_type,
             _labels: labels,
             _bias: bias,
-            multiplier,
+            _multiplier: multiplier,
             feature_lookup,
         }))
     }
@@ -399,7 +399,6 @@ impl TryFrom<KyteaModel> for Model {
         let wordseg_model = model
             .wordseg_model
             .ok_or_else(|| VaporettoError::invalid_model("no word segmentation model."))?;
-        let quantize_multiplier = wordseg_model.multiplier;
         let feature_lookup = wordseg_model
             .feature_lookup
             .ok_or_else(|| VaporettoError::invalid_model("no lookup data."))?;
@@ -457,9 +456,6 @@ impl TryFrom<KyteaModel> for Model {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
             dict_model: DictModel::Wordwise(DictModelWordwise { dict }),
-
-            quantize_multiplier,
-
             bias,
             char_window_size: config.char_w as usize,
             type_window_size: config.type_w as usize,
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 58bdc492..f5a9a815 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -31,9 +31,6 @@ pub struct Model {
     pub(crate) char_ngram_model: NgramModel<String>,
     pub(crate) type_ngram_model: NgramModel<Vec<u8>>,
     pub(crate) dict_model: DictModel,
-
-    pub(crate) quantize_multiplier: f64,
-
     pub(crate) bias: i32,
     pub(crate) char_window_size: usize,
     pub(crate) type_window_size: usize,
@@ -153,9 +150,6 @@ impl Model {
                 words: dict,
                 weights: dict_weights,
             }),
-
-            quantize_multiplier,
-
             bias,
             char_window_size,
             type_window_size,
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index e993b03d..fe84b21e 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -16,8 +16,6 @@ pub struct Predictor {
     type_scorer: TypeScorer,
     dict_scorer: Option<DictScorer>,
 
-    quantize_multiplier: f64,
-
     #[cfg(feature = "simd")]
     padding: usize,
 }
@@ -56,8 +54,6 @@ impl Predictor {
             type_scorer,
             dict_scorer,
 
-            quantize_multiplier: model.quantize_multiplier,
-
             #[cfg(feature = "simd")]
             padding: model.char_window_size.max(model.type_window_size),
         })
@@ -133,23 +129,14 @@ impl Predictor {
         if boundaries_size != 0 {
             let mut ys = vec![0; boundaries_size];
             self.predict_impl(&sentence, 0, &mut ys);
-            let mut scores = sentence
-                .boundary_scores
-                .take()
-                .unwrap_or_else(|| vec![0.; boundaries_size]);
-            for (y, (b, s)) in ys
-                .into_iter()
-                .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
-            {
+            for (&y, b) in ys.iter().zip(sentence.boundaries.iter_mut()) {
                 *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
                 };
-
-                *s = y as f64 * self.quantize_multiplier;
             }
-            sentence.boundary_scores.replace(scores);
+            sentence.boundary_scores.replace(ys);
         }
 
         #[cfg(feature = "simd")]
@@ -160,7 +147,7 @@ impl Predictor {
             let mut scores = sentence
                 .boundary_scores
                 .take()
-                .unwrap_or_else(|| vec![0.; boundaries_size]);
+                .unwrap_or_else(|| vec![0; boundaries_size]);
             for (&y, (b, s)) in ys[self.padding..]
                 .into_iter()
                 .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
@@ -171,7 +158,7 @@ impl Predictor {
                     BoundaryType::NotWordBoundary
                 };
 
-                *s = y as f64 * self.quantize_multiplier;
+                *s = y;
             }
             sentence.boundary_scores.replace(scores);
         }
@@ -272,7 +259,6 @@ mod tests {
                     },
                 ],
             }),
-            quantize_multiplier: 0.5,
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
@@ -367,7 +353,6 @@ mod tests {
                     },
                 ],
             }),
-            quantize_multiplier: 0.25,
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
@@ -470,7 +455,6 @@ mod tests {
                     },
                 ],
             }),
-            quantize_multiplier: 0.25,
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
@@ -560,7 +544,7 @@ mod tests {
             s.boundaries(),
         );
         assert_eq!(
-            &[-38.5, -2.5, 22.5, 66.0, 66.5, 72.0, 25.0, -16.0],
+            &[-77, -5, 45, 132, 133, 144, 50, -32],
             s.boundary_scores().unwrap(),
         );
     }
@@ -585,7 +569,7 @@ mod tests {
             s.boundaries(),
         );
         assert_eq!(
-            &[-34.5, -27.25, -9.75, 14.25, 26.0, 8.5, -19.75, -28.5],
+            &[-138, -109, -39, 57, 104, 34, -79, -114],
             s.boundary_scores().unwrap(),
         );
     }
@@ -610,7 +594,7 @@ mod tests {
             s.boundaries(),
         );
         assert_eq!(
-            &[-34.5, -27.25, -20.75, 4.5, 16.25, -3.0, -10.25, -18.75],
+            &[-138, -109, -83, 18, 65, -12, -41, -75],
             s.boundary_scores().unwrap(),
         );
     }
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index c01e9528..8fd2c800 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -85,7 +85,7 @@ pub struct Sentence {
     pub(crate) char_to_str_pos: Vec<usize>,
     pub(crate) char_type: Vec<u8>,
     pub(crate) boundaries: Vec<BoundaryType>,
-    pub(crate) boundary_scores: Option<Vec<f64>>,
+    pub(crate) boundary_scores: Option<Vec<i32>>,
 }
 
 impl Sentence {
@@ -756,7 +756,7 @@ impl Sentence {
     /// # Returns
     ///
     /// If the predictor inserted, the boundary score information is returned. Otherwise, None.
-    pub fn boundary_scores(&self) -> Option<&[f64]> {
+    pub fn boundary_scores(&self) -> Option<&[i32]> {
         self.boundary_scores.as_deref()
     }
 
diff --git a/vaporetto_rules/README.md b/vaporetto_rules/README.md
index 6527833c..f8edbeac 100644
--- a/vaporetto_rules/README.md
+++ b/vaporetto_rules/README.md
@@ -8,6 +8,7 @@ vaporetto_rules is rule-base filters for Vaporetto.
 ```rust
 use std::fs::File;
 use std::io::BufReader;
+use std::rc::Rc;
 
 use vaporetto::{CharacterType, Model, Predictor, Sentence};
 use vaporetto_rules::{
@@ -18,9 +19,9 @@ use vaporetto_rules::{
 
 let mut f = BufReader::new(File::open("model.bin").unwrap());
 let model = Model::read(&mut f).unwrap();
-let mut predictor = Predictor::new(model);
+let mut predictor = Predictor::new(model).unwrap();
 
-let pre_filters: Vec<Box<dyn StringFilter<String>>> = vec![
+let pre_filters: Vec<Box<dyn StringFilter>> = vec![
     Box::new(KyteaFullwidthFilter::new()),
 ];
 let post_filters: Vec<Box<dyn SentenceFilter>> = vec![
@@ -31,7 +32,9 @@ let post_filters: Vec<Box<dyn SentenceFilter>> = vec![
 let input = "Vaporettoは仲良し家族👨‍👨‍👧‍👦を離れ離れにさせません。"
     .to_string();
 
-let preproc_input = pre_filters.iter().fold(input, |s, filter| filter.filter(s));
+let input = Rc::new(input);
+let preproc_input = pre_filters.iter().fold(input, |s, filter| Rc::new(filter.filter(&s)));
+let preproc_input = Rc::try_unwrap(preproc_input).unwrap();
 
 let sentence = Sentence::from_raw(preproc_input).unwrap();
 let sentence = predictor.predict(sentence);
diff --git a/vaporetto_rules/src/lib.rs b/vaporetto_rules/src/lib.rs
index 9ac6969d..305e0f30 100644
--- a/vaporetto_rules/src/lib.rs
+++ b/vaporetto_rules/src/lib.rs
@@ -7,6 +7,7 @@
 //! ```no_run
 //! use std::fs::File;
 //! use std::io::BufReader;
+//! use std::rc::Rc;
 //!
 //! use vaporetto::{CharacterType, Model, Predictor, Sentence};
 //! use vaporetto_rules::{
@@ -19,7 +20,7 @@
 //! let model = Model::read(&mut f).unwrap();
 //! let mut predictor = Predictor::new(model).unwrap();
 //!
-//! let pre_filters: Vec<Box<dyn StringFilter<String>>> = vec![
+//! let pre_filters: Vec<Box<dyn StringFilter>> = vec![
 //!     Box::new(KyteaFullwidthFilter::new()),
 //! ];
 //! let post_filters: Vec<Box<dyn SentenceFilter>> = vec![
@@ -30,7 +31,9 @@
 //! let input = "Vaporettoは仲良し家族👨‍👨‍👧‍👦を離れ離れにさせません。"
 //!     .to_string();
 //!
-//! let preproc_input = pre_filters.iter().fold(input, |s, filter| filter.filter(s));
+//! let input = Rc::new(input);
+//! let preproc_input = pre_filters.iter().fold(input, |s, filter| Rc::new(filter.filter(&s)));
+//! let preproc_input = Rc::try_unwrap(preproc_input).unwrap();
 //!
 //! let sentence = Sentence::from_raw(preproc_input).unwrap();
 //! let sentence = predictor.predict(sentence);
@@ -62,10 +65,7 @@ pub trait SentenceFilter {
     fn filter(&self, sentence: Sentence) -> Sentence;
 }
 
-pub trait StringFilter<S>
-where
-    S: AsRef<str>,
-{
+pub trait StringFilter {
     /// Filter a specified string using rules.
     ///
     /// # Arguments:
@@ -75,5 +75,5 @@ where
     /// # Returns
     ///
     /// A processed string.
-    fn filter(&self, string: S) -> String;
+    fn filter(&self, string: &str) -> String;
 }
diff --git a/vaporetto_rules/src/string_filters/kytea_fullwidth.rs b/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
index befead3b..3dc841fc 100644
--- a/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
+++ b/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
@@ -20,10 +20,7 @@ impl Default for KyteaFullwidthFilter {
     }
 }
 
-impl<S> StringFilter<S> for KyteaFullwidthFilter
-where
-    S: AsRef<str>,
-{
+impl StringFilter for KyteaFullwidthFilter {
     /// Replace alphanumerics and symbols to full-width characters.
     ///
     /// # Arguments:
@@ -33,8 +30,8 @@ where
     /// # Returns
     ///
     /// A processed text.
-    fn filter(&self, string: S) -> String {
-        let mut chars: Vec<_> = string.as_ref().chars().collect();
+    fn filter(&self, string: &str) -> String {
+        let mut chars: Vec<_> = string.chars().collect();
         for c in &mut chars {
             *c = match *c {
                 'a' => 'ａ',

From c749f33cd14e1445275a0e29914e77d46663f1f9 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 16 Dec 2021 11:57:01 +0900
Subject: [PATCH 26/60] Use self contained serializer instead of serde (#18)

* Implement R/W

* fix

* Refactoring

* fix

* fix

* fix
---
 vaporetto/Cargo.toml         |   8 +-
 vaporetto/src/dict_model.rs  | 163 +++++++++++++++++++++++++++++++++--
 vaporetto/src/errors.rs      |   4 +-
 vaporetto/src/model.rs       |  25 ++++--
 vaporetto/src/ngram_model.rs | 115 +++++++++++++++++++++++-
 5 files changed, 293 insertions(+), 22 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index b25d7528..d0be026b 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "vaporetto"
 version = "0.2.0"
-edition = "2018"
+edition = "2021"
 authors = ["Koichi Akabe <vbkaisetsu@gmail.com>"]
 description = "Vaporetto: a pointwise prediction based tokenizer"
 license = "MIT OR Apache-2.0"
@@ -13,16 +13,14 @@ categories = ["text-processing"]
 autotests = false
 
 [dependencies]
-bincode = "1.3.3"  # MIT
 daachorse = "0.2.0"  # MIT or Apache-2.0
-serde = { version = "1.0", features = ["derive"] }  # MIT or Apache-2.0
+byteorder = "1.4"  # Unlicense or MIT
 
-byteorder = { version = "1.4", optional = true }  # Unlicense or MIT
 liblinear = { version = "1", optional = true }  # MIT
 
 [features]
 default = []
-kytea = ["byteorder"]
+kytea = []
 train = ["liblinear"]
 simd = []
 
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index 279a3cd2..39b08974 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -1,23 +1,51 @@
 use std::collections::HashMap;
+use std::io::{Read, Write};
+use std::mem;
 
-use serde::{Deserialize, Serialize};
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
+use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 
-#[derive(Clone, Copy, Default, Serialize, Deserialize)]
+#[derive(Clone, Copy, Default)]
 pub struct DictWeight {
     pub right: i32,
     pub inside: i32,
     pub left: i32,
 }
 
-#[derive(Serialize, Deserialize)]
+impl DictWeight {
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        buf.write_i32::<LittleEndian>(self.right)?;
+        buf.write_i32::<LittleEndian>(self.inside)?;
+        buf.write_i32::<LittleEndian>(self.left)?;
+        Ok(mem::size_of::<i32>() * 3)
+    }
+
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        Ok(Self {
+            right: buf.read_i32::<LittleEndian>()?,
+            inside: buf.read_i32::<LittleEndian>()?,
+            left: buf.read_i32::<LittleEndian>()?,
+        })
+    }
+}
+
 pub enum DictModel {
     Wordwise(DictModelWordwise),
     Lengthwise(DictModelLengthwise),
 }
 
 impl DictModel {
+    const TYPE_ID_WORDWISE: u8 = 0;
+    const TYPE_ID_LENGTHWISE: u8 = 1;
+
     pub fn merge_dict_weights(
         &mut self,
         char_ngram_model: &mut NgramModel<String>,
@@ -42,15 +70,74 @@ impl DictModel {
             Self::Lengthwise(model) => model.dump_dictionary(),
         }
     }
+
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let size = match self {
+            Self::Wordwise(model) => {
+                buf.write_u8(Self::TYPE_ID_WORDWISE)?;
+                model.serialize(buf)?
+            }
+            Self::Lengthwise(model) => {
+                buf.write_u8(Self::TYPE_ID_LENGTHWISE)?;
+                model.serialize(buf)?
+            }
+        };
+        Ok(mem::size_of::<u8>() + size)
+    }
+
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let type_id = buf.read_u8()?;
+        match type_id {
+            Self::TYPE_ID_WORDWISE => Ok(Self::Wordwise(DictModelWordwise::deserialize(buf)?)),
+            Self::TYPE_ID_LENGTHWISE => {
+                Ok(Self::Lengthwise(DictModelLengthwise::deserialize(buf)?))
+            }
+            _ => Err(VaporettoError::invalid_model(
+                "invalid type_id of dict_model",
+            )),
+        }
+    }
 }
 
 /// Record of weights for each word.
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone)]
 pub struct WordWeightRecord {
     pub(crate) word: String,
     pub(crate) weights: DictWeight,
 }
 
+impl WordWeightRecord {
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let word_size = self.word.len();
+        buf.write_u32::<LittleEndian>(word_size.try_into().unwrap())?;
+        buf.write_all(self.word.as_bytes())?;
+        let weights_size = self.weights.serialize(&mut buf)?;
+        Ok(mem::size_of::<u32>() + word_size + weights_size)
+    }
+
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let word_size = buf.read_u32::<LittleEndian>()?;
+        let mut str_bytes = vec![0; word_size.try_into().unwrap()];
+        buf.read_exact(&mut str_bytes)?;
+        Ok(Self {
+            word: String::from_utf8(str_bytes)?,
+            weights: DictWeight::deserialize(&mut buf)?,
+        })
+    }
+}
+
 impl WordWeightRecord {
     /// Creates a new word weight record.
     ///
@@ -96,7 +183,6 @@ impl WordWeightRecord {
     }
 }
 
-#[derive(Serialize, Deserialize)]
 pub struct DictModelWordwise {
     pub(crate) dict: Vec<WordWeightRecord>,
 }
@@ -144,9 +230,33 @@ impl DictModelWordwise {
     pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
         self.dict.clone()
     }
+
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let dict_size = self.dict.len();
+        buf.write_u32::<LittleEndian>(dict_size.try_into().unwrap())?;
+        let mut total_size = mem::size_of::<u32>();
+        for entry in &self.dict {
+            total_size += entry.serialize(&mut buf)?;
+        }
+        Ok(total_size)
+    }
+
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let dict_size = buf.read_u32::<LittleEndian>()?;
+        let mut dict = Vec::with_capacity(dict_size.try_into().unwrap());
+        for _ in 0..dict_size {
+            dict.push(WordWeightRecord::deserialize(&mut buf)?);
+        }
+        Ok(Self { dict })
+    }
 }
 
-#[derive(Serialize, Deserialize)]
 pub struct DictModelLengthwise {
     pub(crate) words: Vec<String>,
     pub(crate) weights: Vec<DictWeight>,
@@ -203,4 +313,45 @@ impl DictModelLengthwise {
         }
         result
     }
+
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let words_size = self.words.len();
+        let weights_size = self.weights.len();
+        buf.write_u32::<LittleEndian>(words_size.try_into().unwrap())?;
+        buf.write_u32::<LittleEndian>(weights_size.try_into().unwrap())?;
+        let mut total_size = mem::size_of::<u32>() * 2;
+        for word in &self.words {
+            let word_size = word.len();
+            buf.write_u32::<LittleEndian>(word_size.try_into().unwrap())?;
+            buf.write_all(word.as_bytes())?;
+            total_size += mem::size_of::<u32>() + word_size;
+        }
+        for weight in &self.weights {
+            total_size += weight.serialize(&mut buf)?;
+        }
+        Ok(total_size)
+    }
+
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let words_size = buf.read_u32::<LittleEndian>()?;
+        let weights_size = buf.read_u32::<LittleEndian>()?;
+        let mut words = Vec::with_capacity(words_size.try_into().unwrap());
+        for _ in 0..words_size {
+            let word_size = buf.read_u32::<LittleEndian>()?;
+            let mut word_bytes = vec![0; word_size.try_into().unwrap()];
+            buf.read_exact(&mut word_bytes)?;
+            words.push(String::from_utf8(word_bytes)?);
+        }
+        let mut weights = Vec::with_capacity(weights_size.try_into().unwrap());
+        for _ in 0..weights_size {
+            weights.push(DictWeight::deserialize(&mut buf)?);
+        }
+        Ok(Self { words, weights })
+    }
 }
diff --git a/vaporetto/src/errors.rs b/vaporetto/src/errors.rs
index 863da6cf..5597a8ed 100644
--- a/vaporetto/src/errors.rs
+++ b/vaporetto/src/errors.rs
@@ -3,6 +3,8 @@
 use std::error::Error;
 use std::fmt;
 
+pub type Result<T, E = VaporettoError> = std::result::Result<T, E>;
+
 #[derive(Debug)]
 pub enum VaporettoError {
     InvalidModel(InvalidModelError),
@@ -52,8 +54,6 @@ impl fmt::Display for VaporettoError {
 
 impl Error for VaporettoError {}
 
-pub type Result<T, E = VaporettoError> = std::result::Result<T, E>;
-
 /// Error used when the model is invalid.
 #[derive(Debug)]
 pub struct InvalidModelError {
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index f5a9a815..d9508855 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -1,8 +1,9 @@
 use std::io::{Read, Write};
 
-use serde::{Deserialize, Serialize};
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
 use crate::dict_model::{DictModel, DictModelWordwise, WordWeightRecord};
+use crate::errors::Result;
 use crate::ngram_model::NgramModel;
 
 #[cfg(feature = "train")]
@@ -26,7 +27,6 @@ const EPSILON: f64 = 1e-6;
 const QUANTIZE_BIT_DEPTH: u8 = 16;
 
 /// Model data.
-#[derive(Serialize, Deserialize)]
 pub struct Model {
     pub(crate) char_ngram_model: NgramModel<String>,
     pub(crate) type_ngram_model: NgramModel<Vec<u8>>,
@@ -46,11 +46,17 @@ impl Model {
     /// # Errors
     ///
     /// When `wtr` generates an error, it will be returned as is.
-    pub fn write<W>(&self, wtr: &mut W) -> Result<(), bincode::Error>
+    pub fn write<W>(&self, mut wtr: W) -> Result<()>
     where
         W: Write,
     {
-        bincode::serialize_into(wtr, self)
+        self.char_ngram_model.serialize(&mut wtr)?;
+        self.type_ngram_model.serialize(&mut wtr)?;
+        self.dict_model.serialize(&mut wtr)?;
+        wtr.write_i32::<LittleEndian>(self.bias)?;
+        wtr.write_u32::<LittleEndian>(self.char_window_size.try_into().unwrap())?;
+        wtr.write_u32::<LittleEndian>(self.type_window_size.try_into().unwrap())?;
+        Ok(())
     }
 
     /// Creates a model from a reader.
@@ -66,11 +72,18 @@ impl Model {
     /// # Errors
     ///
     /// When `rdr` generates an error, it will be returned as is.
-    pub fn read<R>(rdr: &mut R) -> Result<Self, bincode::Error>
+    pub fn read<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        bincode::deserialize_from(rdr)
+        Ok(Self {
+            char_ngram_model: NgramModel::<String>::deserialize(&mut rdr)?,
+            type_ngram_model: NgramModel::<Vec<u8>>::deserialize(&mut rdr)?,
+            dict_model: DictModel::deserialize(&mut rdr)?,
+            bias: rdr.read_i32::<LittleEndian>()?,
+            char_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
+            type_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
+        })
     }
 
     #[cfg(feature = "train")]
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index 28ce97e6..eceaead6 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -1,8 +1,12 @@
 use std::collections::HashMap;
+use std::io::{Read, Write};
+use std::mem;
 
-use serde::{Deserialize, Serialize};
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
-#[derive(Clone, Serialize, Deserialize)]
+use crate::errors::Result;
+
+#[derive(Clone)]
 pub struct NgramData<T>
 where
     T: Clone,
@@ -11,7 +15,62 @@ where
     pub(crate) weights: Vec<i32>,
 }
 
-#[derive(Serialize, Deserialize)]
+impl<T> NgramData<T>
+where
+    T: AsRef<[u8]> + Clone,
+{
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let ngram = self.ngram.as_ref();
+        let ngram_size = ngram.len();
+        let weights_size = self.weights.len();
+        buf.write_u32::<LittleEndian>(ngram_size.try_into().unwrap())?;
+        buf.write_u32::<LittleEndian>(weights_size.try_into().unwrap())?;
+        buf.write_all(ngram)?;
+        for &w in &self.weights {
+            buf.write_i32::<LittleEndian>(w)?;
+        }
+        Ok(mem::size_of::<u32>() * 2 + ngram_size + mem::size_of::<i32>() * weights_size)
+    }
+}
+
+impl NgramData<String> {
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let ngram_size = buf.read_u32::<LittleEndian>()?;
+        let weights_size = buf.read_u32::<LittleEndian>()?;
+        let mut ngram_bytes = vec![0; ngram_size.try_into().unwrap()];
+        buf.read_exact(&mut ngram_bytes)?;
+        let ngram = String::from_utf8(ngram_bytes)?;
+        let mut weights = vec![];
+        for _ in 0..weights_size {
+            weights.push(buf.read_i32::<LittleEndian>()?);
+        }
+        Ok(Self { ngram, weights })
+    }
+}
+
+impl NgramData<Vec<u8>> {
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let ngram_size = buf.read_u32::<LittleEndian>()?;
+        let weights_size = buf.read_u32::<LittleEndian>()?;
+        let mut ngram = vec![0; ngram_size.try_into().unwrap()];
+        buf.read_exact(&mut ngram)?;
+        let mut weights = Vec::with_capacity(weights_size.try_into().unwrap());
+        for _ in 0..weights_size {
+            weights.push(buf.read_i32::<LittleEndian>()?);
+        }
+        Ok(Self { ngram, weights })
+    }
+}
+
 pub struct NgramModel<T>
 where
     T: Clone,
@@ -60,4 +119,54 @@ where
             *weights = new_weights.unwrap();
         }
     }
+
+    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    where
+        W: Write,
+    {
+        let data_size = self.data.len();
+        buf.write_u32::<LittleEndian>(data_size.try_into().unwrap())?;
+        let mut total_size = mem::size_of::<u32>();
+        for d in &self.data {
+            total_size += d.serialize(&mut buf)?;
+        }
+        buf.write_u8(self.merged.into())?;
+        Ok(total_size + mem::size_of::<u8>())
+    }
+}
+
+impl NgramModel<String> {
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let data_size = buf.read_u32::<LittleEndian>()?;
+        let mut data = Vec::with_capacity(data_size.try_into().unwrap());
+        for _ in 0..data_size {
+            data.push(NgramData::<String>::deserialize(&mut buf)?);
+        }
+        let merged_u8 = buf.read_u8()?;
+        Ok(Self {
+            data,
+            merged: merged_u8 != 0,
+        })
+    }
+}
+
+impl NgramModel<Vec<u8>> {
+    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let data_size = buf.read_u32::<LittleEndian>()?;
+        let mut data = Vec::with_capacity(data_size.try_into().unwrap());
+        for _ in 0..data_size {
+            data.push(NgramData::<Vec<u8>>::deserialize(&mut buf)?);
+        }
+        let merged_u8 = buf.read_u8()?;
+        Ok(Self {
+            data,
+            merged: merged_u8 != 0,
+        })
+    }
 }

From c4e2d1949957fc8c93c2fce0c7436e865e126155 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 16 Dec 2021 18:19:46 +0900
Subject: [PATCH 27/60] Support comment in the dictionary (#19)

* Support inserting comments to words in the dictionary

* Update README

* fmt
---
 README.md                    | 13 +++++++------
 manipulate_model/src/main.rs |  3 +++
 vaporetto/src/dict_model.rs  | 31 +++++++++++++++++++++++++------
 vaporetto/src/kytea_model.rs |  1 +
 vaporetto/src/predictor.rs   |  3 +++
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 167ccfba..a32a3673 100644
--- a/README.md
+++ b/README.md
@@ -114,19 +114,20 @@ To concatenate `メロンパン` into a single token, manipulate the model in th
 
 2. Edit the dictionary.
 
-   The dictionary is a csv file. Each row contains a word and corresponding weights in the following order:
+   The dictionary is a csv file. Each row contains a word, corresponding weights, and a comment in the following order:
 
    * `right_weight` - A weight that is added when the word is found to the right of the boundary.
    * `inside_weight` - A weight that is added when the word is overlapped on the boundary.
    * `left_weight` - A weight that is added when the word is found to the left of the boundary.
+   * `comment` - A comment that does not affect the behaviour.
 
    Vaporetto splits a text when the total weight of the boundary is a positive number, so we add a new entry as follows:
    ```diff
-    メロレオストーシス,6944,-2553,5319
-    メロン,8924,-10861,7081
-   +メロンパン,0,-100000,0
-    メロン果実,4168,-1165,3558
-    メロヴィング,6999,-15413,7583
+    メロレオストーシス,6944,-2553,5319,
+    メロン,8924,-10861,7081,
+   +メロンパン,0,-100000,0,melon🍈 in English.
+    メロン果実,4168,-1165,3558,
+    メロヴィング,6999,-15413,7583,
    ```
 
    In this case, `-100000` will be added when the boundary is inside of the word `メロンパン`.
diff --git a/manipulate_model/src/main.rs b/manipulate_model/src/main.rs
index db4e6e87..f074ca85 100644
--- a/manipulate_model/src/main.rs
+++ b/manipulate_model/src/main.rs
@@ -34,6 +34,7 @@ struct WordWeightRecordFlatten {
     right: i32,
     inside: i32,
     left: i32,
+    comment: String,
 }
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -53,6 +54,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 right: data.get_right_weight(),
                 inside: data.get_inside_weight(),
                 left: data.get_left_weight(),
+                comment: data.get_comment().to_string(),
             })?;
         }
     }
@@ -69,6 +71,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 record.right,
                 record.inside,
                 record.left,
+                record.comment,
             ));
         }
         model.replace_dictionary(dict);
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index 39b08974..1affe071 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -110,6 +110,7 @@ impl DictModel {
 pub struct WordWeightRecord {
     pub(crate) word: String,
     pub(crate) weights: DictWeight,
+    pub(crate) comment: String,
 }
 
 impl WordWeightRecord {
@@ -118,10 +119,13 @@ impl WordWeightRecord {
         W: Write,
     {
         let word_size = self.word.len();
+        let comment_size = self.comment.len();
         buf.write_u32::<LittleEndian>(word_size.try_into().unwrap())?;
+        buf.write_u32::<LittleEndian>(comment_size.try_into().unwrap())?;
         buf.write_all(self.word.as_bytes())?;
+        buf.write_all(self.comment.as_bytes())?;
         let weights_size = self.weights.serialize(&mut buf)?;
-        Ok(mem::size_of::<u32>() + word_size + weights_size)
+        Ok(mem::size_of::<u32>() * 2 + word_size + weights_size + comment_size)
     }
 
     pub fn deserialize<R>(mut buf: R) -> Result<Self>
@@ -129,11 +133,15 @@ impl WordWeightRecord {
         R: Read,
     {
         let word_size = buf.read_u32::<LittleEndian>()?;
-        let mut str_bytes = vec![0; word_size.try_into().unwrap()];
-        buf.read_exact(&mut str_bytes)?;
+        let comment_size = buf.read_u32::<LittleEndian>()?;
+        let mut word_bytes = vec![0; word_size.try_into().unwrap()];
+        buf.read_exact(&mut word_bytes)?;
+        let mut comment_bytes = vec![0; comment_size.try_into().unwrap()];
+        buf.read_exact(&mut comment_bytes)?;
         Ok(Self {
-            word: String::from_utf8(str_bytes)?,
+            word: String::from_utf8(word_bytes)?,
             weights: DictWeight::deserialize(&mut buf)?,
+            comment: String::from_utf8(comment_bytes)?,
         })
     }
 }
@@ -147,11 +155,12 @@ impl WordWeightRecord {
     /// * `right` - A weight of the boundary when the word is found at right.
     /// * `inside` - A weight of the boundary when the word is overlapped on the boundary.
     /// * `left` - A weight of the boundary when the word is found at left.
+    /// * `comment` - A comment that does not affect the behaviour.
     ///
     /// # Returns
     ///
     /// A new record.
-    pub const fn new(word: String, right: i32, inside: i32, left: i32) -> Self {
+    pub const fn new(word: String, right: i32, inside: i32, left: i32, comment: String) -> Self {
         Self {
             word,
             weights: DictWeight {
@@ -159,6 +168,7 @@ impl WordWeightRecord {
                 inside,
                 left,
             },
+            comment,
         }
     }
 
@@ -181,6 +191,11 @@ impl WordWeightRecord {
     pub const fn get_left_weight(&self) -> i32 {
         self.weights.left
     }
+
+    /// Gets a reference to the comment.
+    pub fn get_comment(&self) -> &str {
+        &self.comment
+    }
 }
 
 pub struct DictModelWordwise {
@@ -309,7 +324,11 @@ impl DictModelLengthwise {
             let word_size = word.chars().count();
             let word_size_idx = word_size.min(self.weights.len()) - 1;
             let weights = self.weights[word_size_idx];
-            result.push(WordWeightRecord { word, weights });
+            result.push(WordWeightRecord {
+                word,
+                weights,
+                comment: "".to_string(),
+            });
         }
         result
     }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 984bd46c..e18575ac 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -448,6 +448,7 @@ impl TryFrom<KyteaModel> for Model {
                 dict.push(WordWeightRecord {
                     word: w.into_iter().collect(),
                     weights,
+                    comment: "".to_string(),
                 });
             }
         }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index fe84b21e..89ea4971 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -436,6 +436,7 @@ mod tests {
                             inside: 39,
                             left: 40,
                         },
+                        comment: "".to_string(),
                     },
                     WordWeightRecord {
                         word: "世界".to_string(),
@@ -444,6 +445,7 @@ mod tests {
                             inside: 42,
                             left: 43,
                         },
+                        comment: "".to_string(),
                     },
                     WordWeightRecord {
                         word: "世".to_string(),
@@ -452,6 +454,7 @@ mod tests {
                             inside: 45,
                             left: 46,
                         },
+                        comment: "".to_string(),
                     },
                 ],
             }),

From f899ae73bb644361f5bf3e2d27c2361ac08f4064 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 20 Dec 2021 11:12:30 +0900
Subject: [PATCH 28/60] Reimplement merge_weights() using a stack (#20)

* Reimplement merging

* fix
---
 vaporetto/src/ngram_model.rs | 43 +++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index eceaead6..4fa30179 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -96,27 +96,40 @@ where
             return;
         }
         self.merged = true;
-        let ngrams = self
+        let mut check = vec![false; self.data.len()];
+        let ngram_ids: HashMap<_, _> = self
             .data
             .iter()
             .cloned()
-            .map(|d| (d.ngram.as_ref().to_vec(), d.weights))
-            .collect::<HashMap<_, _>>();
-        for NgramData { ngram, weights } in &mut self.data {
-            let ngram = ngram.as_ref();
-            let mut new_weights: Option<Vec<_>> = None;
-            for st in (0..ngram.len()).rev() {
-                if let Some(weights) = ngrams.get(&ngram[st..]) {
-                    if let Some(new_weights) = new_weights.as_mut() {
-                        for (w_new, w) in new_weights.iter_mut().zip(weights) {
-                            *w_new += *w;
-                        }
-                    } else {
-                        new_weights.replace(weights.clone());
+            .enumerate()
+            .map(|(i, d)| (d.ngram.as_ref().to_vec(), i))
+            .collect();
+        let mut stack = vec![];
+        for i in 0..self.data.len() {
+            if check[i] {
+                continue;
+            }
+            stack.push(i);
+            let ngram = self.data[i].ngram.as_ref();
+            for j in 1..ngram.len() {
+                if let Some(&k) = ngram_ids.get(&ngram[j..]) {
+                    stack.push(k);
+                    if check[k] {
+                        break;
                     }
                 }
             }
-            *weights = new_weights.unwrap();
+            let mut idx_from = stack.pop().unwrap();
+            check[idx_from] = true;
+            while let Some(idx_to) = stack.pop() {
+                let mut new_weights = self.data[idx_from].weights.clone();
+                for (w1, w2) in new_weights.iter_mut().zip(&self.data[idx_to].weights) {
+                    *w1 += w2;
+                }
+                self.data[idx_to].weights = new_weights;
+                idx_from = idx_to;
+                check[idx_to] = true;
+            }
         }
     }
 

From 9c43a4e77cc1adaa6b67f2f1d517c2e255ddca21 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 20 Dec 2021 12:45:53 +0900
Subject: [PATCH 29/60] Add --metric option to `evaluate` command (#21)

* Add --metric option

* format
---
 evaluate/src/main.rs | 113 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 18 deletions(-)

diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index 3e96ecf0..da279869 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -33,6 +33,23 @@ impl FromStr for WsConst {
     }
 }
 
+#[derive(Debug)]
+enum EvaluationMetric {
+    CharBoundaryAccuracy,
+    WordAccuracy,
+}
+
+impl FromStr for EvaluationMetric {
+    type Err = &'static str;
+    fn from_str(metric: &str) -> Result<Self, Self::Err> {
+        match metric {
+            "char" => Ok(Self::CharBoundaryAccuracy),
+            "word" => Ok(Self::WordAccuracy),
+            _ => Err("Could not parse a metric value"),
+        }
+    }
+}
+
 #[derive(StructOpt, Debug)]
 #[structopt(
     name = "evaluate",
@@ -51,6 +68,12 @@ struct Opt {
     /// Do not normalize input strings before prediction.
     #[structopt(long)]
     no_norm: bool,
+
+    /// Evaluation metric: {char, word}.
+    /// char: evaluates each charactor boundary.
+    /// word: evaluates each word using Nagata's method.
+    #[structopt(long, default_value = "char")]
+    metric: EvaluationMetric,
 }
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -75,10 +98,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let predictor = Predictor::new(model)?;
 
     eprintln!("Start tokenization");
-    let mut n_true_positive = 0;
-    let mut n_false_positive = 0;
-    let mut n_false_negative = 0;
 
+    let mut results = vec![];
     for line in stdin().lock().lines() {
         let s = Sentence::from_tokenized(line?)?;
         let s = if opt.no_norm {
@@ -92,25 +113,81 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         let reference = s.boundaries().to_vec();
         let s = predictor.predict(s);
         let s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
-        for (&r, &h) in reference.iter().zip(s.boundaries()) {
-            if r == h {
-                if h == BoundaryType::WordBoundary {
-                    n_true_positive += 1;
+        results.push((reference, s.boundaries().to_vec()));
+    }
+
+    match opt.metric {
+        EvaluationMetric::CharBoundaryAccuracy => {
+            let mut n_tp = 0;
+            let mut n_tn = 0;
+            let mut n_fp = 0;
+            let mut n_fn = 0;
+            for (rs, hs) in results {
+                for (r, h) in rs.into_iter().zip(hs) {
+                    if r == h {
+                        if h == BoundaryType::WordBoundary {
+                            n_tp += 1;
+                        } else {
+                            n_tn += 1;
+                        }
+                    } else if h == BoundaryType::WordBoundary {
+                        n_fp += 1;
+                    } else {
+                        n_fn += 1;
+                    }
+                }
+            }
+            let precision = n_tp as f64 / (n_tp + n_fp) as f64;
+            let recall = n_tp as f64 / (n_tp + n_fn) as f64;
+            let f1 = 2. * precision * recall / (precision + recall);
+            println!("Precision: {}", precision);
+            println!("Recall: {}", recall);
+            println!("F1: {}", f1);
+            println!("TP: {}, TN: {}, FP: {}, FN: {}", n_tp, n_tn, n_fp, n_fn);
+        }
+        EvaluationMetric::WordAccuracy => {
+            // Reference:
+            // Masaaki Nagata. 1994. A stochastic Japanese morphological analyzer using a forward-DP
+            // backward-A* n-best search algorithm. In COLING 1994 Volume 1: The 15th International
+            // Conference on Computational Linguistics.
+            let mut n_sys = 0;
+            let mut n_ref = 0;
+            let mut n_cor = 0;
+            let mut matched = true;
+            for (rs, hs) in results {
+                for (r, h) in rs.into_iter().zip(hs) {
+                    if r == h {
+                        if h == BoundaryType::WordBoundary {
+                            if matched {
+                                n_cor += 1;
+                            }
+                            matched = true;
+                            n_ref += 1;
+                            n_sys += 1;
+                        }
+                    } else {
+                        if h == BoundaryType::WordBoundary {
+                            n_sys += 1;
+                        } else {
+                            n_ref += 1;
+                        }
+                        matched = false;
+                    }
                 }
-            } else if h == BoundaryType::WordBoundary {
-                n_false_positive += 1;
-            } else {
-                n_false_negative += 1;
             }
+            if matched {
+                n_cor += 1;
+            }
+            n_sys += 1;
+            n_ref += 1;
+            let precision = n_cor as f64 / n_sys as f64;
+            let recall = n_cor as f64 / n_ref as f64;
+            let f1 = 2. * precision * recall / (precision + recall);
+            println!("Precision: {}", precision);
+            println!("Recall: {}", recall);
+            println!("F1: {}", f1);
         }
     }
 
-    let precision = n_true_positive as f64 / (n_true_positive + n_false_positive) as f64;
-    let recall = n_true_positive as f64 / (n_true_positive + n_false_negative) as f64;
-    let f1 = 2. * precision * recall / (precision + recall);
-    println!("Precision: {}", precision);
-    println!("Recall: {}", recall);
-    println!("F1: {}", f1);
-
     Ok(())
 }

From e067b7d7a70053b39686a79a69ce248d0167676e Mon Sep 17 00:00:00 2001
From: Shunsuke Kanda <shnsk.knd@gmail.com>
Date: Tue, 21 Dec 2021 13:36:30 +0900
Subject: [PATCH 30/60] handle empty line (#22)

---
 evaluate/src/main.rs | 6 +++++-
 predict/src/main.rs  | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index da279869..81f7d690 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -101,7 +101,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let mut results = vec![];
     for line in stdin().lock().lines() {
-        let s = Sentence::from_tokenized(line?)?;
+        let line = line?;
+        if line.is_empty() {
+            continue;
+        }
+        let s = Sentence::from_tokenized(line)?;
         let s = if opt.no_norm {
             s
         } else {
diff --git a/predict/src/main.rs b/predict/src/main.rs
index 7d96a42e..6edc29a7 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -129,7 +129,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let mut buf1 = Sentence::from_raw(" ")?;
     let mut buf2 = Sentence::from_raw(" ")?;
     for line in stdin().lock().lines() {
-        let ret = tokenize(&predictor, line?, buf1, buf2, &pre_filters, &post_filters)?;
+        let line = line?;
+        if line.is_empty() {
+            println!();
+            continue;
+        }
+        let ret = tokenize(&predictor, line, buf1, buf2, &pre_filters, &post_filters)?;
         let result = ret.0;
         buf1 = ret.1;
         buf2 = ret.2;

From 1718315062b77d878af6007616f393e134637d21 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 21 Dec 2021 18:23:40 +0900
Subject: [PATCH 31/60] Fix error message (#23)

* Fix error messages

* Update README
---
 README.md                 |  3 +++
 vaporetto/src/sentence.rs | 54 ++++++++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index a32a3673..d10e2426 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,9 @@ To train a model, use the following command:
 You can also specify a word dictionary with `--dict` argument.
 A word dictionary is a file with words per line.
 
+The trainer does not accept empty lines.
+Therefore, remove all empty lines from the corpus before training.
+
 You can specify all arguments above multiple times.
 
 ### Model Manipulation
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 8fd2c800..837ea50f 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -126,7 +126,10 @@ impl Sentence {
         boundaries: &mut Vec<BoundaryType>,
     ) -> Result<()> {
         if raw_text.is_empty() {
-            return Err(VaporettoError::invalid_argument("raw_text", "is empty"));
+            return Err(VaporettoError::invalid_argument(
+                "raw_text",
+                "must contain at least one character",
+            ));
         }
 
         chars.clear();
@@ -149,7 +152,7 @@ impl Sentence {
         if tokenized_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
                 "tokenized_text",
-                "is empty",
+                "must contain at least one character",
             ));
         }
 
@@ -169,12 +172,12 @@ impl Sentence {
                     if chars.is_empty() {
                         return Err(VaporettoError::invalid_argument(
                             "tokenized_text",
-                            "starts with a whitespace",
+                            "must not start with a whitespace",
                         ));
                     } else if prev_boundary {
                         return Err(VaporettoError::invalid_argument(
                             "tokenized_text",
-                            "contains consecutive whitespaces",
+                            "must not contain consecutive whitespaces",
                         ));
                     }
                     prev_boundary = true;
@@ -198,7 +201,7 @@ impl Sentence {
         if prev_boundary {
             return Err(VaporettoError::invalid_argument(
                 "tokenized_text",
-                "ends with a whitespace",
+                "must not end with a whitespace",
             ));
         }
 
@@ -212,14 +215,17 @@ impl Sentence {
         boundaries: &mut Vec<BoundaryType>,
     ) -> Result<()> {
         if labeled_text.is_empty() {
-            return Err(VaporettoError::invalid_argument("labeled_text", "is empty"));
+            return Err(VaporettoError::invalid_argument(
+                "labeled_text",
+                "must contain at least one character",
+            ));
         }
 
         let labeled_chars: Vec<char> = labeled_text.chars().collect();
         if labeled_chars.len() % 2 == 0 {
             return Err(VaporettoError::invalid_argument(
                 "labeled_text",
-                format!("invalid length: {}", labeled_chars.len()),
+                "must contain odd number of characters",
             ));
         }
 
@@ -236,7 +242,7 @@ impl Sentence {
                 _ => {
                     return Err(VaporettoError::invalid_argument(
                         "labeled_text",
-                        format!("contains invalid boundary character: '{}'", c),
+                        format!("contains an invalid boundary character: '{}'", c),
                     ))
                 }
             });
@@ -804,7 +810,7 @@ mod tests {
         let s = Sentence::from_raw("");
 
         assert_eq!(
-            "InvalidArgumentError: raw_text: is empty",
+            "InvalidArgumentError: raw_text: must contain at least one character",
             &s.err().unwrap().to_string()
         );
     }
@@ -815,7 +821,7 @@ mod tests {
         let result = s.update_raw("");
 
         assert_eq!(
-            "InvalidArgumentError: raw_text: is empty",
+            "InvalidArgumentError: raw_text: must contain at least one character",
             &result.err().unwrap().to_string()
         );
 
@@ -934,7 +940,7 @@ mod tests {
         let s = Sentence::from_tokenized("");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: is empty",
+            "InvalidArgumentError: tokenized_text: must contain at least one character",
             &s.err().unwrap().to_string()
         );
     }
@@ -945,7 +951,7 @@ mod tests {
         let result = s.update_tokenized("");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: is empty",
+            "InvalidArgumentError: tokenized_text: must contain at least one character",
             &result.err().unwrap().to_string()
         );
 
@@ -966,7 +972,7 @@ mod tests {
         let s = Sentence::from_tokenized(" Rust で 良い プログラミング 体験 を ！");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: starts with a whitespace",
+            "InvalidArgumentError: tokenized_text: must not start with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
@@ -977,7 +983,7 @@ mod tests {
         let result = s.update_tokenized(" Rust で 良い プログラミング 体験 を ！");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: starts with a whitespace",
+            "InvalidArgumentError: tokenized_text: must not start with a whitespace",
             &result.err().unwrap().to_string()
         );
 
@@ -998,7 +1004,7 @@ mod tests {
         let s = Sentence::from_tokenized("Rust で 良い プログラミング 体験 を ！ ");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: ends with a whitespace",
+            "InvalidArgumentError: tokenized_text: must not end with a whitespace",
             &s.err().unwrap().to_string()
         );
     }
@@ -1009,7 +1015,7 @@ mod tests {
         let result = s.update_tokenized("Rust で 良い プログラミング 体験 を ！ ");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: ends with a whitespace",
+            "InvalidArgumentError: tokenized_text: must not end with a whitespace",
             &result.err().unwrap().to_string()
         );
 
@@ -1030,7 +1036,7 @@ mod tests {
         let s = Sentence::from_tokenized("Rust で 良い  プログラミング 体験 を ！");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: contains consecutive whitespaces",
+            "InvalidArgumentError: tokenized_text: must not contain consecutive whitespaces",
             &s.err().unwrap().to_string()
         );
     }
@@ -1041,7 +1047,7 @@ mod tests {
         let result = s.update_tokenized("Rust で 良い  プログラミング 体験 を ！");
 
         assert_eq!(
-            "InvalidArgumentError: tokenized_text: contains consecutive whitespaces",
+            "InvalidArgumentError: tokenized_text: must not contain consecutive whitespaces",
             &result.err().unwrap().to_string()
         );
 
@@ -1386,7 +1392,7 @@ mod tests {
         let s = Sentence::from_partial_annotation("");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: is empty",
+            "InvalidArgumentError: labeled_text: must contain at least one character",
             &s.err().unwrap().to_string()
         );
     }
@@ -1397,7 +1403,7 @@ mod tests {
         let result = s.update_partial_annotation("");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: is empty",
+            "InvalidArgumentError: labeled_text: must contain at least one character",
             &result.err().unwrap().to_string()
         );
     }
@@ -1407,7 +1413,7 @@ mod tests {
         let result = Sentence::from_partial_annotation("火-星 猫|の|生-態 ");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: invalid length: 12",
+            "InvalidArgumentError: labeled_text: must contain odd number of characters",
             &result.err().unwrap().to_string()
         );
     }
@@ -1418,7 +1424,7 @@ mod tests {
         let result = s.update_partial_annotation("火-星 猫|の|生-態 ");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: invalid length: 12",
+            "InvalidArgumentError: labeled_text: must contain odd number of characters",
             &result.err().unwrap().to_string()
         );
     }
@@ -1428,7 +1434,7 @@ mod tests {
         let s = Sentence::from_partial_annotation("火-星?猫|の|生-態");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: contains invalid boundary character: '?'",
+            "InvalidArgumentError: labeled_text: contains an invalid boundary character: '?'",
             &s.err().unwrap().to_string()
         );
     }
@@ -1439,7 +1445,7 @@ mod tests {
         let result = s.update_partial_annotation("火-星?猫|の|生-態");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: contains invalid boundary character: '?'",
+            "InvalidArgumentError: labeled_text: contains an invalid boundary character: '?'",
             &result.err().unwrap().to_string()
         );
     }

From 006a904cbbdb86643b665a8f423b90b7406b2771 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 22 Dec 2021 15:48:13 +0900
Subject: [PATCH 32/60] Portable simd feature (#24)

* Add portable-simd feature

* Update README

* fmt

* fix

* Update README.md

* Enable simd when portable-simd is specified

* Fix README

* fix
---
 evaluate/Cargo.toml          |  2 +-
 predict/Cargo.toml           |  2 +-
 vaporetto/Cargo.toml         |  1 +
 vaporetto/README.md          |  4 +++-
 vaporetto/src/char_scorer.rs | 24 ++++++++++++++++++++----
 vaporetto/src/lib.rs         |  5 ++++-
 vaporetto/src/predictor.rs   |  5 +++--
 vaporetto/src/trainer.rs     |  6 +-----
 8 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/evaluate/Cargo.toml b/evaluate/Cargo.toml
index a4d7b1eb..a05e29f4 100644
--- a/evaluate/Cargo.toml
+++ b/evaluate/Cargo.toml
@@ -5,6 +5,6 @@ edition = "2018"
 
 [dependencies]
 structopt = "0.3"  # MIT or Apache-2.0
-vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto", features = ["simd"] }  # MIT or Apache-2.0
 vaporetto_rules = { path = "../vaporetto_rules" }  # MIT or Apache-2.0
 zstd = "0.9"  # MIT
diff --git a/predict/Cargo.toml b/predict/Cargo.toml
index 66040c0b..5817a39f 100644
--- a/predict/Cargo.toml
+++ b/predict/Cargo.toml
@@ -5,6 +5,6 @@ edition = "2018"
 
 [dependencies]
 structopt = "0.3"  # MIT or Apache-2.0
-vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto", features = ["simd"] }  # MIT or Apache-2.0
 vaporetto_rules = { path = "../vaporetto_rules" }  # MIT or Apache-2.0
 zstd = "0.9"  # MIT
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index d0be026b..69f57f22 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -23,6 +23,7 @@ default = []
 kytea = []
 train = ["liblinear"]
 simd = []
+portable-simd = ["simd"]
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/vaporetto/README.md b/vaporetto/README.md
index 6b774112..4d0038e3 100644
--- a/vaporetto/README.md
+++ b/vaporetto/README.md
@@ -25,7 +25,9 @@ println!("{:?}", s.to_tokenized_vec().unwrap());
 
 * `kytea` - Enables the reader for models generated by KyTea.
 * `train` - Enables the trainer.
-* `simd` - Use the SIMD operations for prediction. (Nightly version of Rust is required.)
+* `simd` - Uses a SIMD-conscious data layout expecting your compiler enables SIMD optimization.
+* `portable-simd` - Uses the [portable SIMD API](https://github.com/rust-lang/portable-simd) instead
+  of our SIMD-conscious data layout. (Nightly Rust is required.)
 
 ## License
 
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 4cf82351..27e14b55 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -4,7 +4,7 @@ use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
 
-#[cfg(feature = "simd")]
+#[cfg(all(feature = "simd", feature = "portable-simd"))]
 use std::simd::i32x8;
 
 pub enum CharScorer {
@@ -89,7 +89,12 @@ impl CharScorerNaive {
 #[cfg(feature = "simd")]
 pub struct CharScorerSimd {
     pma: DoubleArrayAhoCorasick,
+
+    #[cfg(feature = "portable-simd")]
     weights: Vec<i32x8>,
+    #[cfg(not(feature = "portable-simd"))]
+    weights: Vec<[i32; 8]>,
+
     window_size: usize,
 }
 
@@ -109,7 +114,10 @@ impl CharScorerSimd {
                     "invalid size of weight vector",
                 ));
             }
+            #[cfg(feature = "portable-simd")]
             weights.push(i32x8::from_array(s));
+            #[cfg(not(feature = "portable-simd"))]
+            weights.push(s);
         }
         Ok(Self {
             pma,
@@ -126,9 +134,17 @@ impl CharScorerSimd {
             // Therefore, the following code is safe.
             let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
             let ys_slice = &mut ys[offset as usize..offset as usize + 8];
-            let mut target = i32x8::from_slice(ys_slice);
-            target += weights;
-            ys_slice.copy_from_slice(target.as_array());
+
+            #[cfg(feature = "portable-simd")]
+            {
+                let mut target = i32x8::from_slice(ys_slice);
+                target += weights;
+                ys_slice.copy_from_slice(target.as_array());
+            }
+            #[cfg(not(feature = "portable-simd"))]
+            for (y, w) in ys_slice.iter_mut().zip(weights) {
+                *y += w;
+            }
         }
     }
 
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index c1214fe7..db0e803a 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -1,5 +1,8 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
-#![cfg_attr(feature = "simd", feature(portable_simd))]
+#![cfg_attr(
+    all(feature = "simd", feature = "portable-simd"),
+    feature(portable_simd)
+)]
 
 //! # Vaporetto
 //!
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 89ea4971..bf43db50 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -99,7 +99,7 @@ impl Predictor {
             let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
             for (&y, b) in ys[self.padding..]
-                .into_iter()
+                .iter()
                 .zip(sentence.boundaries.iter_mut())
             {
                 *b = if y >= 0 {
@@ -148,8 +148,9 @@ impl Predictor {
                 .boundary_scores
                 .take()
                 .unwrap_or_else(|| vec![0; boundaries_size]);
+            scores.resize(boundaries_size, 0);
             for (&y, (b, s)) in ys[self.padding..]
-                .into_iter()
+                .iter()
                 .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
             {
                 *b = if y >= 0 {
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index c67d1cf2..055df7b4 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -147,11 +147,7 @@ impl<'a> Dataset<'a> {
             let mut feature_ids = BTreeMap::new();
             for f in example.features {
                 let fid = self.fid_manager.get_id(f) + 1;
-                if let Some(v) = feature_ids.get_mut(&fid) {
-                    *v += 1.0;
-                } else {
-                    feature_ids.insert(fid, 1.0);
-                }
+                *feature_ids.entry(fid).or_insert(0.0) += 1.0;
             }
             self.xs.push(feature_ids.into_iter().collect());
             self.ys.push(example.label as u8 as f64);

From 3430df841a2c1c26b5a019077636f18f186858e8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 23 Dec 2021 12:15:17 +0900
Subject: [PATCH 33/60] Reimplement the portable JS builder in Python (#25)

* Use Python instead of Bash

* Add py

* fix

* fix

* fix

* fix

* Update vaporetto_wasm/README.md

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 vaporetto_wasm/README.md            |  9 ++---
 vaporetto_wasm/build_portable_js.py | 55 +++++++++++++++++++++++++++++
 vaporetto_wasm/build_portable_js.sh | 16 ---------
 vaporetto_wasm/src/lib.rs           |  4 +--
 vaporetto_wasm/www/index.js         |  2 +-
 5 files changed, 63 insertions(+), 23 deletions(-)
 create mode 100755 vaporetto_wasm/build_portable_js.py
 delete mode 100755 vaporetto_wasm/build_portable_js.sh

diff --git a/vaporetto_wasm/README.md b/vaporetto_wasm/README.md
index 7507e68d..9d7f4fde 100644
--- a/vaporetto_wasm/README.md
+++ b/vaporetto_wasm/README.md
@@ -2,14 +2,14 @@
 
 ## How to build?
 
-1. Build a model file refering the [documentation](../README.md).
+1. Build a model file following the [documentation](../README.md).
 
-2. Build a JS file containing a web assembly using `build_portable_js.sh`.
+2. Build a JS file containing a web assembly using `build_portable_js.py`.
    This script requires a model file, an identifier, and an output path.
-   
+
    The identifier must consist of alphanumeric characters and underscores.
    ```
-   ./build_portable_js.sh <MODEL_FILE> <IDENTIFIER> <OUTPUT>
+   ./build_portable_js.py --model <MODEL_FILE> --identifier <IDENTIFIER> --output <OUTPUT>
    ```
 
 3. You can use the generated JS file like the follwing code:
@@ -17,6 +17,7 @@
    <!DOCTYPE html>
    <html>
        <head>
+           <!-- Replace vaporetto.js with the script you generated. -->
            <script src="vaporetto.js"></script>
            <script>
                // Replace IDENTIFIER with a string you specified.
diff --git a/vaporetto_wasm/build_portable_js.py b/vaporetto_wasm/build_portable_js.py
new file mode 100755
index 00000000..98459f4b
--- /dev/null
+++ b/vaporetto_wasm/build_portable_js.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import argparse
+import base64
+import os
+import subprocess
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', required=True, help='A path to the model file')
+    parser.add_argument(
+        '--identifier', required=True, help='An identifier that is used to the function name'
+    )
+    parser.add_argument('--output', required=True, help='A path to the generated file')
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = _parse_args()
+
+    working_dir = os.path.dirname(os.path.abspath('__file__'))
+
+    # Builds a wasm with a model file.
+    model_path = os.path.abspath(args.model)
+    env = os.environ.copy()
+    env['VAPORETTO_MODEL_PATH'] = model_path
+    subprocess.run(
+        ['wasm-pack', 'build', '--release', '--target', 'no-modules'],
+        cwd=working_dir,
+        env=env,
+    )
+
+    # Converts the wasm to the base64 string.
+    wasm_path = os.path.join(working_dir, 'pkg/vaporetto_wasm_bg.wasm')
+    with open(wasm_path, 'rb') as fp:
+        wasm_data = fp.read()
+    wasm_data_b64 = base64.b64encode(wasm_data).decode()
+
+    # Reads the glue js file.
+    js_path = os.path.join(working_dir, 'pkg/vaporetto_wasm.js')
+    with open(js_path, 'rt') as fp:
+        js_data = fp.read()
+
+    # Generates a unified js file.
+    with open(args.output, 'wt') as fp:
+        print(
+            js_data.replace('wasm_bindgen', f'__vaporetto_{args.identifier}_wbg'),
+            file=fp,
+        )
+        print(f'async function vaporetto_{args.identifier}(){{', file=fp)
+        print(f'    const data = "data:application/wasm;base64,{wasm_data_b64}";', file=fp)
+        print(f'    await __vaporetto_{args.identifier}_wbg(fetch(data));', file=fp)
+        print(f'    return __vaporetto_{args.identifier}_wbg.Vaporetto;', file=fp)
+        print('}', file=fp)
diff --git a/vaporetto_wasm/build_portable_js.sh b/vaporetto_wasm/build_portable_js.sh
deleted file mode 100755
index d4584f11..00000000
--- a/vaporetto_wasm/build_portable_js.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-DIRNAME="$(dirname $0)"
-MODEL="$(realpath $1)"
-IDENT="$2"
-OUTPUT="$3"
-pushd "$DIRNAME"
-VAPORETTO_MODEL_PATH="$MODEL" wasm-pack build --release --target no-modules
-popd
-encoded_wasm=$(base64 < "${DIRNAME}/pkg/vaporetto_wasm_bg.wasm")
-cat \
-    <(sed "s/wasm_bindgen/__vaporetto_${IDENT}_wbg/g" < "${DIRNAME}/pkg/vaporetto_wasm.js") \
-    <(echo "async function vaporetto_${IDENT}(){await __vaporetto_${IDENT}_wbg(fetch('data:application/wasm;base64,${encoded_wasm}'));return __vaporetto_${IDENT}_wbg.Vaporetto;}") \
-    > "$OUTPUT"
diff --git a/vaporetto_wasm/src/lib.rs b/vaporetto_wasm/src/lib.rs
index 31ec48aa..53978211 100644
--- a/vaporetto_wasm/src/lib.rs
+++ b/vaporetto_wasm/src/lib.rs
@@ -120,8 +120,8 @@ impl Vaporetto {
         if let Some(boundaries) = s.boundary_scores() {
             for (&score, &b) in boundaries.iter().zip(s.boundaries()) {
                 let boundary = Array::new();
-                boundary.push(&JsValue::from_bool(b == BoundaryType::WordBoundary));
-                boundary.push(&JsValue::from_f64(score));
+                boundary.push(&(b == BoundaryType::WordBoundary).into());
+                boundary.push(&score.into());
                 result.push(&boundary);
             }
         }
diff --git a/vaporetto_wasm/www/index.js b/vaporetto_wasm/www/index.js
index 212e5985..47496fe1 100644
--- a/vaporetto_wasm/www/index.js
+++ b/vaporetto_wasm/www/index.js
@@ -20,7 +20,7 @@ vaporetto_bccwj_suw_small().then((Vaporetto) => {
         }
         for (let c of text) {
             if (i >= 0) {
-                tokenized.appendChild(createTextSpan(c, scores[i][0], scores[i][1]));
+                tokenized.appendChild(createTextSpan(c, scores[i][0], scores[i][1] / 10000));
             } else {
                 tokenized.appendChild(createTextSpan(c, false, 0));
             }

From 5c120e9d365f2a78ebcdbdd675d84199da11fee8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 5 Jan 2022 14:50:38 +0900
Subject: [PATCH 34/60] Simplify feature extraction (#26)

* Simplify feature extraction

* Fix

* Fix

* Remove unnecessary switch
---
 train/src/main.rs         |  17 +-
 vaporetto/src/feature.rs  | 580 ++++++++++++--------------------------
 vaporetto/src/lib.rs      |   5 +-
 vaporetto/src/model.rs    | 103 -------
 vaporetto/src/sentence.rs |  66 ++---
 vaporetto/src/trainer.rs  | 296 ++++++++++++-------
 vaporetto/src/utils.rs    |  89 ------
 7 files changed, 413 insertions(+), 743 deletions(-)
 delete mode 100644 vaporetto/src/utils.rs

diff --git a/train/src/main.rs b/train/src/main.rs
index 04b5db2f..80b41106 100644
--- a/train/src/main.rs
+++ b/train/src/main.rs
@@ -4,7 +4,7 @@ use std::io::{prelude::*, stderr, BufReader};
 use std::path::PathBuf;
 
 use structopt::{clap::ArgGroup, StructOpt};
-use vaporetto::{Dataset, Sentence, SolverType, Trainer};
+use vaporetto::{Sentence, SolverType, Trainer};
 use vaporetto_rules::{string_filters::KyteaFullwidthFilter, StringFilter};
 
 #[derive(StructOpt, Debug)]
@@ -58,10 +58,6 @@ struct Opt {
     #[structopt(long, default_value = "1.0")]
     cost: f64,
 
-    /// Whether to use a bias value in classifier training
-    #[structopt(long)]
-    no_bias: bool,
-
     /// The solver. {0, 1, 2, 3, 4, 5, 6, 7} (see LIBLINEAR documentation for more details)
     #[structopt(long, default_value = "1")]
     solver: SolverType,
@@ -147,21 +143,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let dictionary: Vec<String> = dictionary.into_iter().collect();
 
     eprintln!("Extracting into features...");
-    let mut dataset = Dataset::new(
+    let mut trainer = Trainer::new(
         opt.charn, opt.charw, opt.typen, opt.typew, dictionary, opt.dictn,
     )?;
     for (i, s) in train_sents.iter().enumerate() {
         if i % 10000 == 0 {
-            eprint!("# of features: {}\r", dataset.n_features());
+            eprint!("# of features: {}\r", trainer.n_features());
             stderr().flush()?;
         }
-        dataset.push_sentence(s);
+        trainer.push_sentence(s)?;
     }
-    eprintln!("# of features: {}", dataset.n_features());
+    eprintln!("# of features: {}", trainer.n_features());
 
     eprintln!("Start training...");
-    let trainer = Trainer::new(opt.eps, opt.cost, if opt.no_bias { 0. } else { 1. });
-    let model = trainer.train(dataset, opt.solver)?;
+    let model = trainer.train(opt.eps, opt.cost, opt.solver)?;
     eprintln!("Finish training.");
 
     let mut f = zstd::Encoder::new(File::create(opt.model)?, 19)?;
diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 8d80ad36..5681611c 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -1,215 +1,159 @@
-use crate::errors::{Result, VaporettoError};
-use crate::sentence::{BoundaryType, Sentence};
+use std::hash::Hash;
 
 use daachorse::DoubleArrayAhoCorasick;
 
-#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
-pub enum FeatureContent<'a> {
-    CharacterNgram(&'a str),
-    CharacterTypeNgram(&'a [u8]),
-    DictionaryWord(usize),
+use crate::errors::{Result, VaporettoError};
+use crate::sentence::BoundaryType;
+use crate::sentence::Sentence;
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub struct StringNgramFeature<'a> {
+    pub(crate) rel_position: isize,
+    pub(crate) ngram: &'a str,
 }
 
-#[derive(Debug, PartialEq)]
-pub struct FeatureSpan<'a> {
-    start: usize,
-    end: usize,
-    feature: FeatureContent<'a>,
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub struct BytesNgramFeature<'a> {
+    pub(crate) rel_position: isize,
+    pub(crate) ngram: &'a [u8],
 }
 
 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
-pub struct Feature<'a> {
-    pub(crate) rel_position: usize,
-    pub(crate) feature: FeatureContent<'a>,
+pub enum DictionaryWordPosition {
+    Right,
+    Left,
+    Inside,
+}
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub struct DictionaryWordFeature {
+    pub(crate) position: DictionaryWordPosition,
+    pub(crate) length: usize,
+}
+
+#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
+pub enum BoundaryFeature<'a> {
+    CharacterNgram(StringNgramFeature<'a>),
+    CharacterTypeNgram(BytesNgramFeature<'a>),
+    DictionaryWord(DictionaryWordFeature),
+}
+
+impl<'a> BoundaryFeature<'a> {
+    pub const fn char_ngram(rel_position: isize, ngram: &'a str) -> Self {
+        Self::CharacterNgram(StringNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
+    pub const fn type_ngram(rel_position: isize, ngram: &'a [u8]) -> Self {
+        Self::CharacterTypeNgram(BytesNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
+    pub const fn dict_word(position: DictionaryWordPosition, length: usize) -> Self {
+        Self::DictionaryWord(DictionaryWordFeature { position, length })
+    }
 }
 
 #[derive(Debug, PartialEq)]
-pub struct Example<'a> {
-    pub features: Vec<Feature<'a>>,
+pub struct BoundaryExample<'a> {
+    pub features: Vec<BoundaryFeature<'a>>,
     pub label: BoundaryType,
 }
 
-pub struct FeatureExtractor {
+pub struct BoundaryExampleGenerator {
     char_ngram_size: usize,
     type_ngram_size: usize,
+    char_window_size: usize,
+    type_window_size: usize,
     dict_ac: DoubleArrayAhoCorasick,
-    dict_word_size: Vec<usize>,
+    dict_max_word_size: usize,
 }
 
-impl FeatureExtractor {
-    pub fn new<D, P>(
+impl BoundaryExampleGenerator {
+    pub fn new<I, P>(
         char_ngram_size: usize,
         type_ngram_size: usize,
-        dictionary: D,
-        dict_word_max_size: usize,
+        char_window_size: usize,
+        type_window_size: usize,
+        dict: I,
+        dict_max_word_size: usize,
     ) -> Result<Self>
     where
-        D: AsRef<[P]>,
-        P: AsRef<[u8]> + AsRef<str>,
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
     {
-        let dictionary = dictionary.as_ref();
-        let mut dict_word_size = Vec::with_capacity(dictionary.len());
-        for word in dictionary {
-            let size = std::cmp::min(
-                AsRef::<str>::as_ref(word).chars().count(),
-                dict_word_max_size,
-            );
-            if size == 0 {
-                return Err(VaporettoError::invalid_argument(
-                    "dictionary",
-                    "contains an empty string",
-                ));
-            }
-            dict_word_size.push(size);
-        }
         Ok(Self {
             char_ngram_size,
             type_ngram_size,
-            dict_ac: DoubleArrayAhoCorasick::new(dictionary).unwrap(),
-            dict_word_size,
-        })
-    }
-
-    pub fn extract<'a>(&self, sentence: &'a Sentence) -> Vec<FeatureSpan<'a>> {
-        let mut features = vec![];
-        for n in 0..self.char_ngram_size as isize {
-            for i in 0..sentence.char_type.len() as isize - n {
-                let start = i as usize;
-                let end = (i + n + 1) as usize;
-                let feature = FeatureContent::CharacterNgram(sentence.char_substring(start, end));
-                features.push(FeatureSpan {
-                    start,
-                    end,
-                    feature,
-                })
-            }
-        }
-        for n in 0..self.type_ngram_size as isize {
-            for i in 0..sentence.char_type.len() as isize - n {
-                let start = i as usize;
-                let end = (i + n + 1) as usize;
-                let feature =
-                    FeatureContent::CharacterTypeNgram(sentence.type_substring(start, end));
-                features.push(FeatureSpan {
-                    start,
-                    end,
-                    feature,
-                });
-            }
-        }
-        for m in self.dict_ac.find_overlapping_iter(&sentence.text) {
-            let start = sentence.str_to_char_pos[m.start()];
-            let end = sentence.str_to_char_pos[m.end()];
-            let feature = FeatureContent::DictionaryWord(self.dict_word_size[m.pattern()]);
-            features.push(FeatureSpan {
-                start,
-                end,
-                feature,
-            });
-        }
-        features
-    }
-}
-
-pub struct ExampleGenerator {
-    char_window_size: usize,
-    type_window_size: usize,
-}
-
-impl ExampleGenerator {
-    pub const fn new(char_window_size: usize, type_window_size: usize) -> Self {
-        Self {
             char_window_size,
             type_window_size,
-        }
+            dict_ac: DoubleArrayAhoCorasick::new(dict)
+                .map_err(|e| VaporettoError::invalid_argument("dict", format!("{:?}", e)))?,
+            dict_max_word_size,
+        })
     }
 
-    pub fn generate<'a>(
-        &self,
-        sentence: &'a Sentence,
-        feature_spans: impl Into<Vec<FeatureSpan<'a>>>,
-        include_unknown: bool,
-    ) -> Vec<Example<'a>> {
-        let mut examples: Vec<Example> = sentence
-            .boundaries
-            .iter()
-            .map(|&label| Example {
-                features: vec![],
-                label,
-            })
-            .collect();
-        for span in feature_spans.into() {
-            match span.feature {
-                FeatureContent::CharacterNgram(_) => {
-                    let start =
-                        std::cmp::max(span.end - 1, self.char_window_size) - self.char_window_size;
-                    let end = std::cmp::min(
-                        span.start + self.char_window_size,
-                        sentence.boundaries.len(),
-                    );
-                    for (i, example) in examples.iter_mut().enumerate().take(end).skip(start) {
-                        if include_unknown || example.label != BoundaryType::Unknown {
-                            example.features.push(Feature {
-                                rel_position: self.char_window_size + i + 1 - span.end,
-                                feature: span.feature,
-                            });
-                        }
-                    }
-                }
-                FeatureContent::CharacterTypeNgram(_) => {
-                    let start =
-                        std::cmp::max(span.end - 1, self.type_window_size) - self.type_window_size;
-                    let end = std::cmp::min(
-                        span.start + self.type_window_size,
-                        sentence.boundaries.len(),
-                    );
-                    for (i, example) in examples.iter_mut().enumerate().take(end).skip(start) {
-                        if include_unknown || example.label != BoundaryType::Unknown {
-                            example.features.push(Feature {
-                                rel_position: self.type_window_size + i + 1 - span.end,
-                                feature: span.feature,
-                            });
-                        }
-                    }
+    pub fn generate<'a>(&self, s: &'a Sentence) -> Vec<BoundaryExample<'a>> {
+        let mut result = vec![];
+        for (i, &label) in s.boundaries().iter().enumerate() {
+            let mut features = vec![];
+            for n in 1..self.char_ngram_size + 1 {
+                let begin = (i + 1).saturating_sub(self.char_window_size);
+                let end = (i + 1 + self.char_window_size)
+                    .min(s.chars.len())
+                    .saturating_sub(n - 1);
+                for pos in begin..end {
+                    let rel_position = pos as isize - i as isize - 1;
+                    let ngram = s.char_substring(pos, pos + n);
+                    features.push(BoundaryFeature::char_ngram(rel_position, ngram));
                 }
-                FeatureContent::DictionaryWord(_) => {
-                    if span.start >= 1 {
-                        let example = &mut examples[span.start - 1];
-                        if include_unknown || example.label != BoundaryType::Unknown {
-                            example.features.push(Feature {
-                                rel_position: 0,
-                                feature: span.feature,
-                            });
-                        }
-                    }
-                    for example in &mut examples[span.start..span.end - 1] {
-                        if include_unknown || example.label != BoundaryType::Unknown {
-                            example.features.push(Feature {
-                                rel_position: 1,
-                                feature: span.feature,
-                            });
-                        }
-                    }
-                    if span.end <= examples.len() {
-                        let example = &mut examples[span.end - 1];
-                        if include_unknown || example.label != BoundaryType::Unknown {
-                            example.features.push(Feature {
-                                rel_position: 2,
-                                feature: span.feature,
-                            });
-                        }
-                    }
+            }
+            for n in 1..self.type_ngram_size + 1 {
+                let begin = (i + 1).saturating_sub(self.type_window_size);
+                let end = (i + 1 + self.type_window_size)
+                    .min(s.chars.len())
+                    .saturating_sub(n - 1);
+                for pos in begin..end {
+                    let rel_position = pos as isize - i as isize - 1;
+                    let ngram = &s.char_types()[pos..pos + n];
+                    features.push(BoundaryFeature::type_ngram(rel_position, ngram));
                 }
             }
+            result.push(BoundaryExample { features, label })
         }
-        if include_unknown {
-            examples
-        } else {
-            examples
-                .into_iter()
-                .filter(|example| example.label != BoundaryType::Unknown)
-                .collect()
+        for m in self.dict_ac.find_overlapping_iter(&s.text) {
+            let m_start = s.str_to_char_pos[m.start()];
+            let m_end = s.str_to_char_pos[m.end()];
+            let length = (m_end - m_start).min(self.dict_max_word_size);
+            if m_start != 0 {
+                result[m_start - 1]
+                    .features
+                    .push(BoundaryFeature::dict_word(
+                        DictionaryWordPosition::Right,
+                        length,
+                    ));
+            }
+            for example in &mut result[m_start..m_end - 1] {
+                example.features.push(BoundaryFeature::dict_word(
+                    DictionaryWordPosition::Inside,
+                    length,
+                ));
+            }
+            if m_end != s.chars().len() {
+                result[m_end - 1].features.push(BoundaryFeature::dict_word(
+                    DictionaryWordPosition::Left,
+                    length,
+                ));
+            }
         }
+        result
+            .into_iter()
+            .filter(|example| example.label != BoundaryType::Unknown)
+            .collect()
     }
 }
 
@@ -217,170 +161,48 @@ impl ExampleGenerator {
 mod tests {
     use super::*;
     use crate::sentence::CharacterType::*;
+    use BoundaryFeature::*;
     use BoundaryType::*;
-    use FeatureContent::*;
-
-    #[test]
-    fn test_feature_extractor_new_empty_dict_string() {
-        let dict = ["東京特許許可局", "", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 4);
-
-        assert!(fe.is_err());
-        assert_eq!(
-            "InvalidArgumentError: dictionary: contains an empty string",
-            &fe.err().unwrap().to_string()
-        );
-    }
-
-    #[test]
-    fn test_feature_extractor_new_empty_dict() {
-        let dict: &[String] = &[];
-        let fe = FeatureExtractor::new(3, 2, dict, 4).unwrap();
-
-        assert_eq!(3, fe.char_ngram_size);
-        assert_eq!(2, fe.type_ngram_size);
-        assert_eq!(Vec::<usize>::new(), fe.dict_word_size);
-    }
-
-    #[test]
-    fn test_feature_extractor_new() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 4).unwrap();
-
-        assert_eq!(3, fe.char_ngram_size);
-        assert_eq!(2, fe.type_ngram_size);
-        assert_eq!(vec![4, 3, 1], fe.dict_word_size);
-    }
-
-    #[test]
-    fn test_feature_extractor_extract_one() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 4).unwrap();
-        let s = Sentence::from_raw("A").unwrap();
-        let feature_spans = fe.extract(&s);
-
-        let expected = vec![
-            FeatureSpan {
-                start: 0,
-                end: 1,
-                feature: CharacterNgram("A"),
-            },
-            FeatureSpan {
-                start: 0,
-                end: 1,
-                feature: CharacterTypeNgram(&ct2u8![Roman]),
-            },
-        ];
-        assert_eq!(expected, feature_spans);
-    }
-
-    #[test]
-    fn test_feature_extractor_extract() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 2).unwrap();
-        let s = Sentence::from_raw("Ariaは火星猫だ").unwrap();
-        let feature_spans = fe.extract(&s);
-
-        #[rustfmt::skip]
-        let expected = vec![
-            FeatureSpan { start: 0, end: 1, feature: CharacterNgram("A") },
-            FeatureSpan { start: 1, end: 2, feature: CharacterNgram("r") },
-            FeatureSpan { start: 2, end: 3, feature: CharacterNgram("i") },
-            FeatureSpan { start: 3, end: 4, feature: CharacterNgram("a") },
-            FeatureSpan { start: 4, end: 5, feature: CharacterNgram("は") },
-            FeatureSpan { start: 5, end: 6, feature: CharacterNgram("火") },
-            FeatureSpan { start: 6, end: 7, feature: CharacterNgram("星") },
-            FeatureSpan { start: 7, end: 8, feature: CharacterNgram("猫") },
-            FeatureSpan { start: 8, end: 9, feature: CharacterNgram("だ") },
-            FeatureSpan { start: 0, end: 2, feature: CharacterNgram("Ar") },
-            FeatureSpan { start: 1, end: 3, feature: CharacterNgram("ri") },
-            FeatureSpan { start: 2, end: 4, feature: CharacterNgram("ia") },
-            FeatureSpan { start: 3, end: 5, feature: CharacterNgram("aは") },
-            FeatureSpan { start: 4, end: 6, feature: CharacterNgram("は火") },
-            FeatureSpan { start: 5, end: 7, feature: CharacterNgram("火星") },
-            FeatureSpan { start: 6, end: 8, feature: CharacterNgram("星猫") },
-            FeatureSpan { start: 7, end: 9, feature: CharacterNgram("猫だ") },
-            FeatureSpan { start: 0, end: 3, feature: CharacterNgram("Ari") },
-            FeatureSpan { start: 1, end: 4, feature: CharacterNgram("ria") },
-            FeatureSpan { start: 2, end: 5, feature: CharacterNgram("iaは") },
-            FeatureSpan { start: 3, end: 6, feature: CharacterNgram("aは火") },
-            FeatureSpan { start: 4, end: 7, feature: CharacterNgram("は火星") },
-            FeatureSpan { start: 5, end: 8, feature: CharacterNgram("火星猫") },
-            FeatureSpan { start: 6, end: 9, feature: CharacterNgram("星猫だ") },
-            FeatureSpan { start: 0, end: 1, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-            FeatureSpan { start: 1, end: 2, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-            FeatureSpan { start: 2, end: 3, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-            FeatureSpan { start: 3, end: 4, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-            FeatureSpan { start: 4, end: 5, feature: CharacterTypeNgram(&ct2u8![Hiragana]) },
-            FeatureSpan { start: 5, end: 6, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-            FeatureSpan { start: 6, end: 7, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-            FeatureSpan { start: 7, end: 8, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-            FeatureSpan { start: 8, end: 9, feature: CharacterTypeNgram(&ct2u8![Hiragana]) },
-            FeatureSpan { start: 0, end: 2, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
-            FeatureSpan { start: 1, end: 3, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
-            FeatureSpan { start: 2, end: 4, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
-            FeatureSpan { start: 3, end: 5, feature: CharacterTypeNgram(&ct2u8![Roman, Hiragana]) },
-            FeatureSpan { start: 4, end: 6, feature: CharacterTypeNgram(&ct2u8![Hiragana, Kanji]) },
-            FeatureSpan { start: 5, end: 7, feature: CharacterTypeNgram(&ct2u8![Kanji, Kanji]) },
-            FeatureSpan { start: 6, end: 8, feature: CharacterTypeNgram(&ct2u8![Kanji, Kanji]) },
-            FeatureSpan { start: 7, end: 9, feature: CharacterTypeNgram(&ct2u8![Kanji, Hiragana]) },
-            FeatureSpan { start: 5, end: 8, feature: DictionaryWord(2) },
-            FeatureSpan { start: 7, end: 8, feature: DictionaryWord(1) },
-        ];
-        assert_eq!(expected, feature_spans);
-    }
-
-    #[test]
-    fn test_example_generator_new() {
-        let gen = ExampleGenerator::new(3, 2);
-
-        assert_eq!(3, gen.char_window_size);
-        assert_eq!(2, gen.type_window_size);
-    }
 
     #[test]
     fn test_example_generator_generate_one() {
         let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 2).unwrap();
-        let gen = ExampleGenerator::new(3, 2);
+        let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_raw("猫").unwrap();
-        let feature_spans = fe.extract(&s);
-        let examples = gen.generate(&s, feature_spans, true);
+        let examples = gen.generate(&s);
 
-        assert_eq!(Vec::<Example>::new(), examples);
+        assert!(examples.is_empty());
     }
 
     #[test]
     fn test_example_generator_generate_all() {
         let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 2).unwrap();
-        let gen = ExampleGenerator::new(3, 2);
+        let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_partial_annotation("A-r-i-a|は|火-星 猫|だ").unwrap();
-        let feature_spans = fe.extract(&s);
-        let examples = gen.generate(&s, feature_spans, true);
+        let examples = gen.generate(&s);
 
-        assert_eq!(8, examples.len());
+        assert_eq!(7, examples.len());
 
         // pos 3 "A-r"
         #[rustfmt::skip]
-        let expected = Example {
+        let expected = BoundaryExample {
             features: vec![
-                Feature { rel_position: 3, feature: CharacterNgram("A") },
-                Feature { rel_position: 2, feature: CharacterNgram("r") },
-                Feature { rel_position: 1, feature: CharacterNgram("i") },
-                Feature { rel_position: 0, feature: CharacterNgram("a") },
-                Feature { rel_position: 2, feature: CharacterNgram("Ar") },
-                Feature { rel_position: 1, feature: CharacterNgram("ri") },
-                Feature { rel_position: 0, feature: CharacterNgram("ia") },
-                Feature { rel_position: 1, feature: CharacterNgram("Ari") },
-                Feature { rel_position: 0, feature: CharacterNgram("ria") },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "A" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "r" }),
+                CharacterNgram(StringNgramFeature { rel_position: 1, ngram: "i" }),
+                CharacterNgram(StringNgramFeature { rel_position: 2, ngram: "a" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "Ar" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "ri" }),
+                CharacterNgram(StringNgramFeature { rel_position: 1, ngram: "ia" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "Ari" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "ria" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"R" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 0, ngram: b"R" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 1, ngram: b"R" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"RR" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 0, ngram: b"RR" }),
             ],
             label: NotWordBoundary,
         };
@@ -388,100 +210,70 @@ mod tests {
 
         // pos 3 "a|は"
         #[rustfmt::skip]
-        let expected = Example {
+        let expected = BoundaryExample {
             features: vec![
-                Feature { rel_position: 5, feature: CharacterNgram("r") },
-                Feature { rel_position: 4, feature: CharacterNgram("i") },
-                Feature { rel_position: 3, feature: CharacterNgram("a") },
-                Feature { rel_position: 2, feature: CharacterNgram("は") },
-                Feature { rel_position: 1, feature: CharacterNgram("火") },
-                Feature { rel_position: 0, feature: CharacterNgram("星") },
-                Feature { rel_position: 4, feature: CharacterNgram("ri") },
-                Feature { rel_position: 3, feature: CharacterNgram("ia") },
-                Feature { rel_position: 2, feature: CharacterNgram("aは") },
-                Feature { rel_position: 1, feature: CharacterNgram("は火") },
-                Feature { rel_position: 0, feature: CharacterNgram("火星") },
-                Feature { rel_position: 3, feature: CharacterNgram("ria") },
-                Feature { rel_position: 2, feature: CharacterNgram("iaは") },
-                Feature { rel_position: 1, feature: CharacterNgram("aは火") },
-                Feature { rel_position: 0, feature: CharacterNgram("は火星") },
-                Feature { rel_position: 3, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Roman]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Hiragana]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Roman, Roman]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Roman, Hiragana]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Hiragana, Kanji]) },
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "r" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "i" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "a" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "は" }),
+                CharacterNgram(StringNgramFeature { rel_position: 1, ngram: "火" }),
+                CharacterNgram(StringNgramFeature { rel_position: 2, ngram: "星" }),
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "ri" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "ia" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "aは" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "は火" }),
+                CharacterNgram(StringNgramFeature { rel_position: 1, ngram: "火星" }),
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "ria" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "iaは" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "aは火" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "は火星" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -2, ngram: b"R" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"R" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 0, ngram: b"H" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 1, ngram: b"K" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -2, ngram: b"RR" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"RH" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 0, ngram: b"HK" }),
             ],
             label: WordBoundary,
         };
         assert_eq!(expected, examples[3]);
 
-        // pos 6 "星 猫"
-        #[rustfmt::skip]
-        let expected = Example {
-            features: vec![
-                Feature { rel_position: 5, feature: CharacterNgram("は") },
-                Feature { rel_position: 4, feature: CharacterNgram("火") },
-                Feature { rel_position: 3, feature: CharacterNgram("星") },
-                Feature { rel_position: 2, feature: CharacterNgram("猫") },
-                Feature { rel_position: 1, feature: CharacterNgram("だ") },
-                Feature { rel_position: 4, feature: CharacterNgram("は火") },
-                Feature { rel_position: 3, feature: CharacterNgram("火星") },
-                Feature { rel_position: 2, feature: CharacterNgram("星猫") },
-                Feature { rel_position: 1, feature: CharacterNgram("猫だ") },
-                Feature { rel_position: 3, feature: CharacterNgram("は火星") },
-                Feature { rel_position: 2, feature: CharacterNgram("火星猫") },
-                Feature { rel_position: 1, feature: CharacterNgram("星猫だ") },
-                Feature { rel_position: 3, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Hiragana]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Kanji, Kanji]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Kanji, Kanji]) },
-                Feature { rel_position: 0, feature: CharacterTypeNgram(&ct2u8![Kanji, Hiragana]) },
-                Feature { rel_position: 1, feature: DictionaryWord(2) },
-                Feature { rel_position: 0, feature: DictionaryWord(1) },
-            ],
-            label: Unknown,
-        };
-        assert_eq!(expected, examples[6]);
+        // pos 6 "星 猫" (skipped)
 
         // pos 7 "猫|だ"
         #[rustfmt::skip]
-        let expected = Example {
+        let expected = BoundaryExample {
             features: vec![
-                Feature { rel_position: 5, feature: CharacterNgram("火") },
-                Feature { rel_position: 4, feature: CharacterNgram("星") },
-                Feature { rel_position: 3, feature: CharacterNgram("猫") },
-                Feature { rel_position: 2, feature: CharacterNgram("だ") },
-                Feature { rel_position: 4, feature: CharacterNgram("火星") },
-                Feature { rel_position: 3, feature: CharacterNgram("星猫") },
-                Feature { rel_position: 2, feature: CharacterNgram("猫だ") },
-                Feature { rel_position: 3, feature: CharacterNgram("火星猫") },
-                Feature { rel_position: 2, feature: CharacterNgram("星猫だ") },
-                Feature { rel_position: 3, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Kanji]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Hiragana]) },
-                Feature { rel_position: 2, feature: CharacterTypeNgram(&ct2u8![Kanji, Kanji]) },
-                Feature { rel_position: 1, feature: CharacterTypeNgram(&ct2u8![Kanji, Hiragana]) },
-                Feature { rel_position: 2, feature: DictionaryWord(2) },
-                Feature { rel_position: 2, feature: DictionaryWord(1) },
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "火" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "星" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "猫" }),
+                CharacterNgram(StringNgramFeature { rel_position: 0, ngram: "だ" }),
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "火星" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "星猫" }),
+                CharacterNgram(StringNgramFeature { rel_position: -1, ngram: "猫だ" }),
+                CharacterNgram(StringNgramFeature { rel_position: -3, ngram: "火星猫" }),
+                CharacterNgram(StringNgramFeature { rel_position: -2, ngram: "星猫だ" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -2, ngram: b"K" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"K" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: 0, ngram: b"H" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -2, ngram: b"KK" }),
+                CharacterTypeNgram(BytesNgramFeature { rel_position: -1, ngram: b"KH" }),
+                DictionaryWord(DictionaryWordFeature { position: DictionaryWordPosition::Left, length: 2 }),
+                DictionaryWord(DictionaryWordFeature { position: DictionaryWordPosition::Left, length: 1 }),
             ],
             label: WordBoundary,
         };
-        assert_eq!(expected, examples[7]);
+        assert_eq!(expected, examples[6]);
     }
 
     #[test]
     fn test_example_generator_generate_without_unknown() {
         let dict = ["東京特許許可局", "火星猫", "猫"];
-        let fe = FeatureExtractor::new(3, 2, dict, 2).unwrap();
-        let gen = ExampleGenerator::new(3, 2);
+        let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_partial_annotation("A-r-i-a|は|火-星 猫|だ").unwrap();
-        let feature_spans = fe.extract(&s);
-        let examples = gen.generate(&s, feature_spans, false);
+        let examples = gen.generate(&s);
 
         assert_eq!(7, examples.len());
     }
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index db0e803a..39aaebfa 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -28,9 +28,6 @@
 //!
 //! Training requires **crate feature** `train`. For more details, see [`Trainer`].
 
-#[macro_use]
-mod utils;
-
 mod char_scorer;
 mod dict_model;
 mod dict_scorer;
@@ -56,7 +53,7 @@ pub use predictor::Predictor;
 pub use sentence::{BoundaryType, CharacterType, Sentence};
 
 #[cfg(feature = "train")]
-pub use trainer::{Dataset, SolverType, Trainer};
+pub use trainer::{SolverType, Trainer};
 
 #[cfg(feature = "kytea")]
 pub use kytea_model::KyteaModel;
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index d9508855..3a4cf397 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -6,26 +6,6 @@ use crate::dict_model::{DictModel, DictModelWordwise, WordWeightRecord};
 use crate::errors::Result;
 use crate::ngram_model::NgramModel;
 
-#[cfg(feature = "train")]
-use crate::dict_model::{DictModelLengthwise, DictWeight};
-#[cfg(feature = "train")]
-use crate::feature::FeatureContent;
-#[cfg(feature = "train")]
-use crate::ngram_model::NgramData;
-#[cfg(feature = "train")]
-use crate::sentence::BoundaryType;
-#[cfg(feature = "train")]
-use crate::utils::{FeatureIDManager, StringIdManager};
-#[cfg(feature = "train")]
-use liblinear::LibLinearModel;
-
-#[cfg(feature = "train")]
-const EPSILON: f64 = 1e-6;
-
-// Bit depth for weight quantization.
-#[cfg(feature = "train")]
-const QUANTIZE_BIT_DEPTH: u8 = 16;
-
 /// Model data.
 pub struct Model {
     pub(crate) char_ngram_model: NgramModel<String>,
@@ -86,89 +66,6 @@ impl Model {
         })
     }
 
-    #[cfg(feature = "train")]
-    pub(crate) fn from_liblinear_model(
-        model: impl LibLinearModel,
-        fid_manager: FeatureIDManager,
-        dict: Vec<String>,
-        char_window_size: usize,
-        type_window_size: usize,
-        dict_word_max_size: usize,
-    ) -> Self {
-        let wb_idx = model
-            .labels()
-            .iter()
-            .position(|&cls| BoundaryType::WordBoundary as i32 == cls)
-            .unwrap() as i32;
-
-        let bias = model.label_bias(wb_idx);
-        let mut char_ngrams = vec![];
-        let mut type_ngrams = vec![];
-        let mut dict_weights = vec![DictWeight::default(); dict_word_max_size];
-        let mut char_ngram_ids = StringIdManager::new();
-        let mut type_ngram_ids = StringIdManager::new();
-
-        let mut weight_max = bias.abs();
-        for fid in 0..model.num_features() {
-            let weight = model.feature_coefficient(fid as i32, wb_idx).abs();
-            if weight > weight_max {
-                weight_max = weight;
-            }
-        }
-        let quantize_multiplier = weight_max / ((1 << (QUANTIZE_BIT_DEPTH - 1)) - 1) as f64;
-
-        let bias = (bias / quantize_multiplier) as i32;
-
-        for (feature, fid) in fid_manager.map {
-            let weight = model.feature_coefficient(fid as i32 + 1, wb_idx);
-            if weight > -EPSILON && weight < EPSILON {
-                continue;
-            }
-
-            let weight = weight / quantize_multiplier;
-
-            match feature.feature {
-                FeatureContent::CharacterNgram(char_ngram) => {
-                    let id = char_ngram_ids.get_id(&char_ngram);
-                    if id == char_ngrams.len() {
-                        char_ngrams.push(NgramData {
-                            ngram: char_ngram.to_string(),
-                            weights: vec![0; char_window_size * 2 - char_ngram.chars().count() + 1],
-                        });
-                    }
-                    char_ngrams[id].weights[feature.rel_position] = weight as i32;
-                }
-                FeatureContent::CharacterTypeNgram(type_ngram) => {
-                    let id = type_ngram_ids.get_id(type_ngram) as usize;
-                    if id == type_ngrams.len() {
-                        type_ngrams.push(NgramData {
-                            ngram: type_ngram.to_vec(),
-                            weights: vec![0; type_window_size * 2 - type_ngram.len() + 1],
-                        });
-                    }
-                    type_ngrams[id].weights[feature.rel_position] = weight as i32;
-                }
-                FeatureContent::DictionaryWord(size) => match feature.rel_position {
-                    0 => dict_weights[size - 1].right = weight as i32,
-                    1 => dict_weights[size - 1].inside = weight as i32,
-                    2 => dict_weights[size - 1].left = weight as i32,
-                    _ => panic!("Invalid rel_position"),
-                },
-            };
-        }
-        Self {
-            char_ngram_model: NgramModel::new(char_ngrams),
-            type_ngram_model: NgramModel::new(type_ngrams),
-            dict_model: DictModel::Lengthwise(DictModelLengthwise {
-                words: dict,
-                weights: dict_weights,
-            }),
-            bias,
-            char_window_size,
-            type_window_size,
-        }
-    }
-
     pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
         self.dict_model.dump_dictionary()
     }
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 837ea50f..f0c305a2 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -792,18 +792,12 @@ impl Sentence {
         let end = self.char_to_str_pos[end];
         &self.text.as_str()[begin..end]
     }
-
-    #[cfg(feature = "train")]
-    pub(crate) fn type_substring(&self, start: usize, end: usize) -> &[u8] {
-        &self.char_type[start..end]
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
     use BoundaryType::*;
-    use CharacterType::*;
 
     #[test]
     fn test_sentence_from_raw_empty() {
@@ -830,7 +824,7 @@ mod tests {
             chars: vec![' '],
             str_to_char_pos: vec![0, 1],
             char_to_str_pos: vec![0, 1],
-            char_type: ct2u8vec![Other],
+            char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -846,7 +840,7 @@ mod tests {
             chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
-            char_type: ct2u8vec![Hiragana],
+            char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -863,7 +857,7 @@ mod tests {
             chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
-            char_type: ct2u8vec![Hiragana],
+            char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -887,10 +881,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
             ],
-            char_type: ct2u8vec![
-                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
-                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
-            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: None,
         };
@@ -915,10 +906,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
             ],
-            char_type: ct2u8vec![
-                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
-                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
-            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: None,
         };
@@ -960,7 +948,7 @@ mod tests {
             chars: vec![' '],
             str_to_char_pos: vec![0, 1],
             char_to_str_pos: vec![0, 1],
-            char_type: ct2u8vec![Other],
+            char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -992,7 +980,7 @@ mod tests {
             chars: vec![' '],
             str_to_char_pos: vec![0, 1],
             char_to_str_pos: vec![0, 1],
-            char_type: ct2u8vec![Other],
+            char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -1024,7 +1012,7 @@ mod tests {
             chars: vec![' '],
             str_to_char_pos: vec![0, 1],
             char_to_str_pos: vec![0, 1],
-            char_type: ct2u8vec![Other],
+            char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -1056,7 +1044,7 @@ mod tests {
             chars: vec![' '],
             str_to_char_pos: vec![0, 1],
             char_to_str_pos: vec![0, 1],
-            char_type: ct2u8vec![Other],
+            char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -1072,7 +1060,7 @@ mod tests {
             chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
-            char_type: ct2u8vec![Hiragana],
+            char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -1089,7 +1077,7 @@ mod tests {
             chars: vec!['あ'],
             str_to_char_pos: vec![0, 0, 0, 1],
             char_to_str_pos: vec![0, 3],
-            char_type: ct2u8vec![Hiragana],
+            char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
         };
@@ -1113,10 +1101,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
             ],
-            char_type: ct2u8vec![
-                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
-                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
-            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 NotWordBoundary,
@@ -1160,10 +1145,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
             ],
-            char_type: ct2u8vec![
-                Roman, Roman, Roman, Roman, Hiragana, Kanji, Hiragana, Katakana, Katakana,
-                Katakana, Katakana, Katakana, Katakana, Katakana, Kanji, Kanji, Hiragana, Other,
-            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 NotWordBoundary,
@@ -1205,10 +1187,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 3, 6, 9, 12, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
             ],
-            char_type: ct2u8vec![
-                Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji, Other, Roman, Other, Roman, Roman,
-                Other, Roman, Roman, Other, Other,
-            ],
+            char_type: b"KKKHKKORORRORROO".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 WordBoundary,
@@ -1250,10 +1229,7 @@ mod tests {
             char_to_str_pos: vec![
                 0, 3, 6, 9, 12, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
             ],
-            char_type: ct2u8vec![
-                Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji, Other, Roman, Other, Roman, Roman,
-                Other, Roman, Roman, Other, Other,
-            ],
+            char_type: b"KKKHKKORORRORROO".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 WordBoundary,
@@ -1287,9 +1263,7 @@ mod tests {
                 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9,
             ],
             char_to_str_pos: vec![0, 3, 6, 9, 10, 11, 14, 17, 20, 23],
-            char_type: ct2u8vec![
-                Kanji, Kanji, Hiragana, Other, Roman, Hiragana, Kanji, Hiragana, Hiragana,
-            ],
+            char_type: b"KKHORHKHH".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 WordBoundary,
@@ -1317,9 +1291,7 @@ mod tests {
                 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9,
             ],
             char_to_str_pos: vec![0, 3, 6, 9, 10, 11, 14, 17, 20, 23],
-            char_type: ct2u8vec![
-                Kanji, Kanji, Hiragana, Other, Roman, Hiragana, Kanji, Hiragana, Hiragana,
-            ],
+            char_type: b"KKHORHKHH".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 WordBoundary,
@@ -1459,7 +1431,7 @@ mod tests {
             chars: vec!['火', '星', '猫', 'の', '生', '態'],
             str_to_char_pos: vec![0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6],
             char_to_str_pos: vec![0, 3, 6, 9, 12, 15, 18],
-            char_type: ct2u8vec![Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji],
+            char_type: b"KKKHKK".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 Unknown,
@@ -1482,7 +1454,7 @@ mod tests {
             chars: vec!['火', '星', '猫', 'の', '生', '態'],
             str_to_char_pos: vec![0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6],
             char_to_str_pos: vec![0, 3, 6, 9, 12, 15, 18],
-            char_type: ct2u8vec![Kanji, Kanji, Kanji, Hiragana, Kanji, Kanji],
+            char_type: b"KKKHKK".to_vec(),
             boundaries: vec![
                 NotWordBoundary,
                 Unknown,
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index 055df7b4..5a263b2c 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -1,11 +1,59 @@
 use std::collections::BTreeMap;
+use std::collections::HashMap;
+use std::hash::Hash;
 use std::str::FromStr;
 
+use crate::dict_model::{DictModel, DictModelLengthwise, DictWeight};
 use crate::errors::{Result, VaporettoError};
-use crate::feature::{ExampleGenerator, FeatureExtractor};
+use crate::feature::{
+    BoundaryExampleGenerator, BoundaryFeature, BytesNgramFeature, DictionaryWordFeature,
+    DictionaryWordPosition, StringNgramFeature,
+};
 use crate::model::Model;
-use crate::sentence::Sentence;
-use crate::utils::FeatureIDManager;
+use crate::ngram_model::{NgramData, NgramModel};
+use crate::sentence::{BoundaryType, Sentence};
+use liblinear::LibLinearModel;
+
+const EPSILON: f64 = 1e-6;
+
+// Bit depth for weight quantization.
+const QUANTIZE_BIT_DEPTH: u8 = 16;
+
+pub struct Indexer<K> {
+    ids: HashMap<K, usize>,
+    keys: Vec<K>,
+}
+
+impl<K> Indexer<K> {
+    pub fn new() -> Self {
+        Self {
+            ids: HashMap::new(),
+            keys: vec![],
+        }
+    }
+
+    pub fn get_id(&mut self, key: &K) -> usize
+    where
+        K: Clone + Eq + Hash,
+    {
+        if let Some(&id) = self.ids.get(key) {
+            id
+        } else {
+            let id = self.ids.len();
+            self.keys.push(key.clone());
+            self.ids.insert(key.clone(), id);
+            id
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.keys.len()
+    }
+
+    pub fn keys(&self) -> &[K] {
+        &self.keys
+    }
+}
 
 /// Solver type.
 #[derive(Clone, Copy, Debug)]
@@ -68,21 +116,45 @@ impl From<SolverType> for liblinear::SolverType {
     }
 }
 
-/// Dataset manager.
+/// Trainer.
+///
+/// # Examples
+///
+/// ```no_run
+/// use std::fs::File;
+/// use std::io::{prelude::*, BufReader, BufWriter};
+///
+/// use vaporetto::{Sentence, SolverType, Trainer};
+///
+/// let mut train_sents = vec![];
+/// let f = BufReader::new(File::open("dataset-train.txt").unwrap());
+/// for (i, line) in f.lines().enumerate() {
+///     train_sents.push(Sentence::from_tokenized(line.unwrap()).unwrap());
+/// }
+///
+/// let dict: Vec<String> = vec![];
+/// let mut trainer = Trainer::new(3, 3, 3, 3, &dict, 0).unwrap();
+/// for (i, s) in train_sents.iter().enumerate() {
+///     trainer.push_sentence(s);
+/// }
+///
+/// let model = trainer.train(0.01, 1., SolverType::L1RegularizedL2LossSVC).unwrap();
+/// let mut f = BufWriter::new(File::create("model.bin").unwrap());
+/// model.write(&mut f).unwrap();
+/// ```
 #[cfg_attr(docsrs, doc(cfg(feature = "train")))]
-pub struct Dataset<'a> {
+pub struct Trainer<'a> {
     dictionary: Vec<String>,
-    feature_extractor: FeatureExtractor,
-    example_generator: ExampleGenerator,
+    example_generator: BoundaryExampleGenerator,
     char_window_size: usize,
     type_window_size: usize,
-    dict_word_max_size: usize,
-    fid_manager: FeatureIDManager<'a>,
+    dict_max_word_size: usize,
+    feature_ids: Indexer<BoundaryFeature<'a>>,
     xs: Vec<Vec<(u32, f64)>>,
     ys: Vec<f64>,
 }
 
-impl<'a> Dataset<'a> {
+impl<'a> Trainer<'a> {
     /// Creates a new dataset manager.
     ///
     /// # Arguments
@@ -92,7 +164,7 @@ impl<'a> Dataset<'a> {
     /// * `type_ngram_size` - The character type n-gram length.
     /// * `type_window_size` - The character type window size.
     /// * `dictionary` - A word dictionary.
-    /// * `dict_word_max_size` - Dictionary words greater than this value will be grouped together.
+    /// * `dict_max_word_size` - Dictionary words greater than this value will be grouped together.
     ///
     /// # Returns
     ///
@@ -107,7 +179,7 @@ impl<'a> Dataset<'a> {
         type_ngram_size: usize,
         type_window_size: usize,
         dictionary: D,
-        dict_word_max_size: usize,
+        dict_max_word_size: usize,
     ) -> Result<Self>
     where
         D: AsRef<[P]>,
@@ -119,17 +191,18 @@ impl<'a> Dataset<'a> {
                 .iter()
                 .map(|word| (word.as_ref() as &str).to_string())
                 .collect(),
-            feature_extractor: FeatureExtractor::new(
+            example_generator: BoundaryExampleGenerator::new(
                 char_ngram_size,
                 type_ngram_size,
-                dictionary,
-                dict_word_max_size,
+                char_window_size,
+                type_window_size,
+                dictionary.as_ref(),
+                dict_max_word_size,
             )?,
-            example_generator: ExampleGenerator::new(char_window_size, type_window_size),
             char_window_size,
             type_window_size,
-            dict_word_max_size,
-            fid_manager: FeatureIDManager::default(),
+            dict_max_word_size,
+            feature_ids: Indexer::new(),
             xs: vec![],
             ys: vec![],
         })
@@ -140,18 +213,25 @@ impl<'a> Dataset<'a> {
     /// # Arguments
     ///
     /// * `s` - A sentence.
-    pub fn push_sentence(&mut self, s: &'a Sentence) {
-        let feature_spans = self.feature_extractor.extract(s);
-        let examples = self.example_generator.generate(s, feature_spans, false);
+    ///
+    /// # Errors
+    ///
+    /// [`VaporettoError::InvalidArgument`] will be returned if the maximum number of feature has
+    /// been reached.
+    pub fn push_sentence(&mut self, s: &'a Sentence) -> Result<()> {
+        let examples = self.example_generator.generate(s);
         for example in examples {
             let mut feature_ids = BTreeMap::new();
-            for f in example.features {
-                let fid = self.fid_manager.get_id(f) + 1;
-                *feature_ids.entry(fid).or_insert(0.0) += 1.0;
+            for f in &example.features {
+                let fid = self.feature_ids.get_id(f);
+                *feature_ids
+                    .entry((fid + 1).try_into().unwrap())
+                    .or_insert(0.0) += 1.0;
             }
             self.xs.push(feature_ids.into_iter().collect());
             self.ys.push(example.label as u8 as f64);
         }
+        Ok(())
     }
 
     /// Gets the number of features.
@@ -160,96 +240,122 @@ impl<'a> Dataset<'a> {
     ///
     /// The number of features.
     pub fn n_features(&self) -> usize {
-        self.fid_manager.map.len()
+        self.feature_ids.len()
     }
-}
-
-/// Trainer.
-///
-/// # Examples
-///
-/// ```no_run
-/// use std::fs::File;
-/// use std::io::{prelude::*, BufReader, BufWriter};
-///
-/// use vaporetto::{Dataset, Sentence, SolverType, Trainer};
-///
-/// let mut train_sents = vec![];
-/// let f = BufReader::new(File::open("dataset-train.txt").unwrap());
-/// for (i, line) in f.lines().enumerate() {
-///     train_sents.push(Sentence::from_tokenized(line.unwrap()).unwrap());
-/// }
-///
-/// let dict: Vec<String> = vec![];
-/// let mut dataset = Dataset::new(3, 3, 3, 3, &dict, 0).unwrap();
-/// for (i, s) in train_sents.iter().enumerate() {
-///     dataset.push_sentence(s);
-/// }
-///
-/// let trainer = Trainer::new(0.01, 1., 1.);
-/// let model = trainer.train(dataset, SolverType::L1RegularizedL2LossSVC).unwrap();
-/// let mut f = BufWriter::new(File::create("model.bin").unwrap());
-/// model.write(&mut f).unwrap();
-/// ```
-#[cfg_attr(docsrs, doc(cfg(feature = "train")))]
-pub struct Trainer {
-    epsilon: f64,
-    cost: f64,
-    bias: f64,
-}
 
-impl Trainer {
-    /// Creates a new trainer.
+    /// Trains word boundaries.
     ///
     /// # Arguments
     ///
     /// * `epsilon` - The tolerance of the termination criterion.
     /// * `cost` - The parameter C.
-    /// * `bias` - The bias term.
-    ///
-    /// # Returns
-    ///
-    /// A new trainer.
-    pub const fn new(epsilon: f64, cost: f64, bias: f64) -> Self {
-        Self {
-            epsilon,
-            cost,
-            bias,
-        }
-    }
-
-    /// Trains a given dataset.
-    ///
-    /// # Arguments
-    ///
-    /// * `dataset` - A dataset.
     /// * `solver` - Solver type.
     ///
     /// # Returns
     ///
     /// A trained model.
-    pub fn train(&self, dataset: Dataset, solver: SolverType) -> Result<Model> {
+    pub fn train(self, epsilon: f64, cost: f64, solver: SolverType) -> Result<Model> {
         let mut builder = liblinear::Builder::new();
-        let training_input =
-            liblinear::util::TrainingInput::from_sparse_features(dataset.ys, dataset.xs)
-                .map_err(|e| VaporettoError::invalid_model(format!("liblinear error: {:?}", e)))?;
-        builder.problem().input_data(training_input).bias(self.bias);
+        let training_input = liblinear::util::TrainingInput::from_sparse_features(self.ys, self.xs)
+            .map_err(|e| VaporettoError::invalid_model(format!("liblinear error: {:?}", e)))?;
+        builder.problem().input_data(training_input).bias(1.0);
         builder
             .parameters()
             .solver_type(solver.into())
-            .stopping_criterion(self.epsilon)
-            .constraints_violation_cost(self.cost);
+            .stopping_criterion(epsilon)
+            .constraints_violation_cost(cost);
         let model = builder
             .build_model()
             .map_err(|e| VaporettoError::invalid_model(e.to_string()))?;
 
-        Ok(Model::from_liblinear_model(
-            model,
-            dataset.fid_manager,
-            dataset.dictionary,
-            dataset.char_window_size,
-            dataset.type_window_size,
-            dataset.dict_word_max_size,
-        ))
+        let wb_idx = model
+            .labels()
+            .iter()
+            .position(|&cls| BoundaryType::WordBoundary as i32 == cls)
+            .unwrap() as i32;
+
+        let bias = model.label_bias(wb_idx);
+        let mut char_ngrams = vec![];
+        let mut type_ngrams = vec![];
+        let mut dict_weights = vec![DictWeight::default(); self.dict_max_word_size];
+        let mut char_ngram_ids = Indexer::new();
+        let mut type_ngram_ids = Indexer::new();
+
+        let mut weight_max = bias.abs();
+        for fid in 0..model.num_features() {
+            let weight = model.feature_coefficient(fid as i32, wb_idx).abs();
+            if weight > weight_max {
+                weight_max = weight;
+            }
+        }
+        let quantize_multiplier = weight_max / ((1 << (QUANTIZE_BIT_DEPTH - 1)) - 1) as f64;
+
+        let bias = (bias / quantize_multiplier) as i32;
+
+        for (fid, feature) in self.feature_ids.keys().iter().enumerate() {
+            let weight = model.feature_coefficient(fid as i32 + 1, wb_idx);
+            if weight > -EPSILON && weight < EPSILON {
+                continue;
+            }
+
+            let weight = weight / quantize_multiplier;
+
+            match feature {
+                BoundaryFeature::CharacterNgram(StringNgramFeature {
+                    rel_position,
+                    ngram,
+                }) => {
+                    let id = char_ngram_ids.get_id(ngram);
+                    let len = ngram.chars().count();
+                    if id == char_ngrams.len() {
+                        char_ngrams.push(NgramData {
+                            ngram: ngram.to_string(),
+                            weights: vec![0; self.char_window_size * 2 - len + 1],
+                        });
+                    }
+                    let pos = self.char_window_size as isize - len as isize - rel_position;
+                    char_ngrams[id].weights[pos as usize] = weight as i32;
+                }
+                BoundaryFeature::CharacterTypeNgram(BytesNgramFeature {
+                    rel_position,
+                    ngram,
+                }) => {
+                    let id = type_ngram_ids.get_id(ngram) as usize;
+                    let len = ngram.len();
+                    if id == type_ngrams.len() {
+                        type_ngrams.push(NgramData {
+                            ngram: ngram.to_vec(),
+                            weights: vec![0; self.type_window_size * 2 - len + 1],
+                        });
+                    }
+                    let pos = self.type_window_size as isize - len as isize - rel_position;
+                    type_ngrams[id].weights[pos as usize] = weight as i32;
+                }
+                BoundaryFeature::DictionaryWord(DictionaryWordFeature { position, length }) => {
+                    match position {
+                        DictionaryWordPosition::Right => {
+                            dict_weights[length - 1].right = weight as i32
+                        }
+                        DictionaryWordPosition::Inside => {
+                            dict_weights[length - 1].inside = weight as i32
+                        }
+                        DictionaryWordPosition::Left => {
+                            dict_weights[length - 1].left = weight as i32
+                        }
+                    }
+                }
+            };
+        }
+        Ok(Model {
+            char_ngram_model: NgramModel::new(char_ngrams),
+            type_ngram_model: NgramModel::new(type_ngrams),
+            dict_model: DictModel::Lengthwise(DictModelLengthwise {
+                words: self.dictionary,
+                weights: dict_weights,
+            }),
+            bias,
+            char_window_size: self.char_window_size,
+            type_window_size: self.type_window_size,
+        })
     }
 }
diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs
deleted file mode 100644
index c8f3c422..00000000
--- a/vaporetto/src/utils.rs
+++ /dev/null
@@ -1,89 +0,0 @@
-#[cfg(feature = "train")]
-use std::collections::HashMap;
-
-#[cfg(feature = "train")]
-use crate::feature::Feature;
-
-#[cfg(feature = "train")]
-pub struct FeatureIDManager<'a> {
-    pub(crate) map: HashMap<Feature<'a>, u32>,
-}
-
-#[cfg(feature = "train")]
-impl<'a> FeatureIDManager<'a> {
-    pub fn new() -> Self {
-        Self {
-            map: HashMap::new(),
-        }
-    }
-
-    pub fn get_id(&mut self, feature: Feature<'a>) -> u32 {
-        self.map.get(&feature).copied().unwrap_or_else(|| {
-            let new_id = self.map.len() as u32;
-            self.map.insert(feature, new_id);
-            new_id
-        })
-    }
-}
-
-#[cfg(feature = "train")]
-impl<'a> Default for FeatureIDManager<'a> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[cfg(feature = "train")]
-pub struct StringIdManager {
-    pub(crate) map: HashMap<Vec<u8>, usize>,
-}
-
-#[cfg(feature = "train")]
-impl StringIdManager {
-    pub fn new() -> Self {
-        Self {
-            map: HashMap::new(),
-        }
-    }
-
-    pub fn get_id<K>(&mut self, key: K) -> usize
-    where
-        K: AsRef<[u8]>,
-    {
-        let key = key.as_ref();
-        self.map.get(key).copied().unwrap_or_else(|| {
-            let new_id = self.map.len();
-            self.map.insert(key.into(), new_id);
-            new_id
-        })
-    }
-}
-
-#[cfg(test)]
-#[allow(unused_macros)]
-macro_rules! ct2u8 {
-    ( $( $v:path ),* ) => {
-        ct2u8!( $( $v, )* )
-    };
-    ( $( $v:path, )* ) => {
-        [
-            $(
-                $v as u8,
-            )*
-        ]
-    };
-}
-
-#[cfg(test)]
-macro_rules! ct2u8vec {
-    ( $( $v:path ),* ) => {
-        ct2u8vec!( $( $v, )* )
-    };
-    ( $( $v:path, )* ) => {
-        vec![
-            $(
-                $v as u8,
-            )*
-        ]
-    };
-}

From c634e995ace2f71947a501994f4179f5b5a3ac6d Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 7 Jan 2022 17:38:00 +0900
Subject: [PATCH 35/60] Support POS tags in Sentence (#28)

* Simplify feature extraction

* Fix

* Fix

* Remove unnecessary switch

* Support POS tags in Sentence

* Format

* clippy

* clippy

* clippy

* Add comments and documents
---
 vaporetto/src/feature.rs  |   1 -
 vaporetto/src/lib.rs      |   2 +-
 vaporetto/src/sentence.rs | 643 ++++++++++++++++++++++++++++++++++----
 vaporetto_wasm/src/lib.rs |   6 +-
 4 files changed, 580 insertions(+), 72 deletions(-)

diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 5681611c..55ad9d09 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -160,7 +160,6 @@ impl BoundaryExampleGenerator {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::sentence::CharacterType::*;
     use BoundaryFeature::*;
     use BoundaryType::*;
 
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 39aaebfa..39410da0 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -50,7 +50,7 @@ mod kytea_model;
 pub use dict_model::WordWeightRecord;
 pub use model::Model;
 pub use predictor::Predictor;
-pub use sentence::{BoundaryType, CharacterType, Sentence};
+pub use sentence::{BoundaryType, CharacterType, Sentence, Token};
 
 #[cfg(feature = "train")]
 pub use trainer::{SolverType, Trainer};
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index f0c305a2..1578ebfd 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -1,3 +1,5 @@
+use std::rc::Rc;
+
 use crate::errors::{Result, VaporettoError};
 
 /// Character type.
@@ -76,6 +78,16 @@ pub enum BoundaryType {
     Unknown = 2,
 }
 
+/// Token information.
+#[derive(Debug, PartialEq, Clone)]
+pub struct Token<'a> {
+    /// A surface of this token.
+    pub surface: &'a str,
+
+    /// A part-of-speech tag of this token.
+    pub tag: Option<&'a str>,
+}
+
 /// Sentence with boundary annotations.
 #[derive(Debug, PartialEq, Clone)]
 pub struct Sentence {
@@ -86,10 +98,16 @@ pub struct Sentence {
     pub(crate) char_type: Vec<u8>,
     pub(crate) boundaries: Vec<BoundaryType>,
     pub(crate) boundary_scores: Option<Vec<i32>>,
+    pub(crate) tags: Vec<Option<Rc<String>>>,
 }
 
 impl Sentence {
-    fn internal_new(text: String, chars: Vec<char>, boundaries: Vec<BoundaryType>) -> Self {
+    fn internal_new(
+        text: String,
+        chars: Vec<char>,
+        boundaries: Vec<BoundaryType>,
+        tags: Vec<Option<Rc<String>>>,
+    ) -> Self {
         let mut s = Self {
             text,
             chars,
@@ -98,6 +116,7 @@ impl Sentence {
             char_type: Vec::with_capacity(0),
             boundaries,
             boundary_scores: None,
+            tags,
         };
         s.update_common_info();
         s
@@ -117,13 +136,15 @@ impl Sentence {
         self.char_type.clear();
         self.char_type.push(CharacterType::Other as u8);
         self.boundaries.clear();
-        self.boundary_scores = None;
+        self.boundary_scores.take();
+        self.tags.clear();
     }
 
     fn parse_raw_text(
         raw_text: &str,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
+        tags: &mut Vec<Option<Rc<String>>>,
     ) -> Result<()> {
         if raw_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -139,6 +160,7 @@ impl Sentence {
         }
         boundaries.clear();
         boundaries.resize(chars.len() - 1, BoundaryType::Unknown);
+        tags.clear();
 
         Ok(())
     }
@@ -148,6 +170,7 @@ impl Sentence {
         text: &mut String,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
+        tags: &mut Vec<Option<Rc<String>>>,
     ) -> Result<()> {
         if tokenized_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -160,35 +183,64 @@ impl Sentence {
         text.reserve(tokenized_text.len());
         chars.clear();
         boundaries.clear();
+        tags.clear();
 
+        let mut tag_str_tmp = None;
+        let mut tag_str = None;
         let mut prev_boundary = false;
         let mut escape = false;
         for c in tokenized_text.chars() {
             match (escape, c) {
+                // escape a following character
                 (false, '\\') => {
                     escape = true;
                 }
+                // token boundary
                 (false, ' ') => {
                     if chars.is_empty() {
                         return Err(VaporettoError::invalid_argument(
                             "tokenized_text",
                             "must not start with a whitespace",
                         ));
-                    } else if prev_boundary {
+                    }
+                    if prev_boundary {
                         return Err(VaporettoError::invalid_argument(
                             "tokenized_text",
                             "must not contain consecutive whitespaces",
                         ));
                     }
                     prev_boundary = true;
+                    tag_str = tag_str_tmp.take();
+                }
+                // POS tag
+                (false, '/') => {
+                    if chars.is_empty() {
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "must not start with a slash",
+                        ));
+                    }
+                    if prev_boundary {
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "a slash must follow a character",
+                        ));
+                    }
+                    tag_str_tmp.replace("".to_string());
                 }
+                // escaped character or other character
                 (_, _) => {
+                    if let Some(tag) = tag_str_tmp.as_mut() {
+                        tag.push(c);
+                        continue;
+                    }
                     if !chars.is_empty() {
                         boundaries.push(if prev_boundary {
                             BoundaryType::WordBoundary
                         } else {
                             BoundaryType::NotWordBoundary
                         });
+                        tags.push(tag_str.take().map(Rc::new));
                     }
                     prev_boundary = false;
                     escape = false;
@@ -204,6 +256,7 @@ impl Sentence {
                 "must not end with a whitespace",
             ));
         }
+        tags.push(tag_str_tmp.take().map(Rc::new));
 
         Ok(())
     }
@@ -213,6 +266,7 @@ impl Sentence {
         text: &mut String,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
+        tags: &mut Vec<Option<Rc<String>>>,
     ) -> Result<()> {
         if labeled_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -222,34 +276,82 @@ impl Sentence {
         }
 
         let labeled_chars: Vec<char> = labeled_text.chars().collect();
-        if labeled_chars.len() % 2 == 0 {
-            return Err(VaporettoError::invalid_argument(
-                "labeled_text",
-                "must contain odd number of characters",
-            ));
-        }
 
         text.clear();
-        text.reserve(labeled_text.len() - labeled_chars.len() / 2);
         chars.clear();
         boundaries.clear();
 
-        for c in labeled_chars.iter().skip(1).step_by(2) {
-            boundaries.push(match c {
-                ' ' => BoundaryType::Unknown,
-                '|' => BoundaryType::WordBoundary,
-                '-' => BoundaryType::NotWordBoundary,
+        let mut tag_str = None;
+        let mut is_char = true;
+        let mut fixed_token = true;
+        for &c in &labeled_chars {
+            if is_char {
+                text.push(c);
+                chars.push(c);
+                is_char = false;
+                continue;
+            }
+            match c {
+                // unannotated boundary
+                ' ' => {
+                    if tag_str.is_some() {
+                        return Err(VaporettoError::invalid_argument(
+                            "labeled_text",
+                            "POS tag must be annotated to a token".to_string(),
+                        ));
+                    }
+                    tags.push(None);
+                    boundaries.push(BoundaryType::Unknown);
+                    is_char = true;
+                    fixed_token = false;
+                }
+                // token boundary
+                '|' => {
+                    if !fixed_token && tag_str.is_some() {
+                        return Err(VaporettoError::invalid_argument(
+                            "labeled_text",
+                            "POS tag must be annotated to a token".to_string(),
+                        ));
+                    }
+                    tags.push(tag_str.take().map(Rc::new));
+                    boundaries.push(BoundaryType::WordBoundary);
+                    is_char = true;
+                    fixed_token = true;
+                }
+                // not token boundary
+                '-' => {
+                    if tag_str.is_some() {
+                        return Err(VaporettoError::invalid_argument(
+                            "labeled_text",
+                            "POS tag must be annotated to a token".to_string(),
+                        ));
+                    }
+                    tags.push(None);
+                    boundaries.push(BoundaryType::NotWordBoundary);
+                    is_char = true;
+                }
+                // POS tag
+                '/' => {
+                    tag_str.replace("".to_string());
+                }
                 _ => {
-                    return Err(VaporettoError::invalid_argument(
-                        "labeled_text",
-                        format!("contains an invalid boundary character: '{}'", c),
-                    ))
+                    if let Some(tag) = tag_str.as_mut() {
+                        tag.push(c);
+                    } else {
+                        return Err(VaporettoError::invalid_argument(
+                            "labeled_text",
+                            format!("contains an invalid boundary character: '{}'", c),
+                        ));
+                    }
                 }
-            });
+            }
         }
-        for c in labeled_chars.into_iter().step_by(2) {
-            text.push(c);
-            chars.push(c);
+        tags.push(tag_str.take().map(Rc::new));
+        if chars.len() != boundaries.len() + 1 {
+            return Err(VaporettoError::invalid_argument(
+                "labeled_text",
+                "invalid annotation".to_string(),
+            ));
         }
 
         Ok(())
@@ -279,7 +381,6 @@ impl Sentence {
 
         debug_assert!(pos == self.text.len());
 
-        self.str_to_char_pos.fill(0);
         self.str_to_char_pos.resize(self.text.len() + 1, 0);
         for (i, &j) in self.char_to_str_pos.iter().enumerate() {
             // j is always lower than pos + 1, so the following is safe.
@@ -322,9 +423,10 @@ impl Sentence {
 
         let mut chars = Vec::with_capacity(0);
         let mut boundaries = Vec::with_capacity(0);
-        Self::parse_raw_text(&raw_text, &mut chars, &mut boundaries)?;
+        let mut tags = Vec::with_capacity(0);
+        Self::parse_raw_text(&raw_text, &mut chars, &mut boundaries, &mut tags)?;
 
-        Ok(Self::internal_new(raw_text, chars, boundaries))
+        Ok(Self::internal_new(raw_text, chars, boundaries, tags))
     }
 
     /// Updates the [`Sentence`] using a given string.
@@ -353,7 +455,12 @@ impl Sentence {
     {
         let raw_text = raw_text.into();
 
-        match Self::parse_raw_text(&raw_text, &mut self.chars, &mut self.boundaries) {
+        match Self::parse_raw_text(
+            &raw_text,
+            &mut self.chars,
+            &mut self.boundaries,
+            &mut self.tags,
+        ) {
             Ok(_) => {
                 self.text = raw_text;
                 self.update_common_info();
@@ -388,7 +495,10 @@ impl Sentence {
     ///
     /// # Arguments
     ///
-    /// * `tokenized_text` - A tokenized string containing whitespaces for word boundaries.
+    /// * `tokenized_text` - A tokenized text that is annotated by the following rules:
+    ///   - A whitespace (`' '`) is inserted to each token boundary.
+    ///   - If necessary, a POS tag following a slash (`'/'`) can be added to each token.
+    ///   - Each character following a back slash (`'\\'`) is escaped.
     ///
     /// # Returns
     ///
@@ -410,6 +520,9 @@ impl Sentence {
     /// let s = Sentence::from_tokenized("How are you?");
     /// assert!(s.is_ok());
     ///
+    /// let s = Sentence::from_tokenized("How/WRB are/VBP you?");
+    /// assert!(s.is_ok());
+    ///
     /// let s = Sentence::from_tokenized("How  are you?");
     /// assert!(s.is_err());
     /// ```
@@ -422,17 +535,27 @@ impl Sentence {
         let mut text = String::with_capacity(0);
         let mut chars = Vec::with_capacity(0);
         let mut boundaries = Vec::with_capacity(0);
+        let mut tags = Vec::with_capacity(0);
 
-        Self::parse_tokenized_text(tokenized_text, &mut text, &mut chars, &mut boundaries)?;
+        Self::parse_tokenized_text(
+            tokenized_text,
+            &mut text,
+            &mut chars,
+            &mut boundaries,
+            &mut tags,
+        )?;
 
-        Ok(Self::internal_new(text, chars, boundaries))
+        Ok(Self::internal_new(text, chars, boundaries, tags))
     }
 
     /// Updates the [`Sentence`] using tokenized string.
     ///
     /// # Arguments
     ///
-    /// * `tokenized_text` - A tokenized string containing whitespaces for word boundaries.
+    /// * `tokenized_text` - A tokenized text that is annotated by the following rules:
+    ///   - A whitespace (`' '`) is inserted to each token boundary.
+    ///   - If necessary, a POS tag following a slash (`'/'`) can be added to each token.
+    ///   - Each character following a back slash (`'\\'`) is escaped.
     ///
     /// # Errors
     ///
@@ -450,8 +573,12 @@ impl Sentence {
     /// use vaporetto::Sentence;
     ///
     /// let mut s = Sentence::from_tokenized("How are you?").unwrap();
+    ///
     /// s.update_tokenized("I am fine").unwrap();
     /// assert_eq!("Iamfine", s.to_raw_string());
+    ///
+    /// s.update_tokenized("How/WRB are/VBP you ?/.").unwrap();
+    /// assert_eq!("Howareyou?", s.to_raw_string());
     /// ```
     pub fn update_tokenized<S>(&mut self, tokenized_text: S) -> Result<()>
     where
@@ -464,6 +591,7 @@ impl Sentence {
             &mut self.text,
             &mut self.chars,
             &mut self.boundaries,
+            &mut self.tags,
         ) {
             Ok(_) => {
                 self.update_common_info();
@@ -493,6 +621,9 @@ impl Sentence {
     ///
     /// let s = Sentence::from_tokenized("How are you?").unwrap();
     /// assert_eq!("How are you?", s.to_tokenized_string().unwrap());
+    ///
+    /// let s = Sentence::from_tokenized("How/WRB are/VBP you?").unwrap();
+    /// assert_eq!("How/WRB are/VBP you?", s.to_tokenized_string().unwrap());
     /// ```
     pub fn to_tokenized_string(&self) -> Result<String> {
         let chars: Vec<char> = self.text.chars().collect();
@@ -502,9 +633,15 @@ impl Sentence {
             _ => (),
         }
         result.push(chars[0]);
-        for (&c, b) in chars[1..].iter().zip(&self.boundaries) {
+        for (i, (&c, b)) in chars[1..].iter().zip(&self.boundaries).enumerate() {
             match b {
                 BoundaryType::WordBoundary => {
+                    if !self.tags.is_empty() {
+                        if let Some(tag) = self.tags.get(i).and_then(|x| x.as_ref()) {
+                            result.push('/');
+                            result.push_str(tag);
+                        }
+                    }
                     result.push(' ');
                 }
                 BoundaryType::NotWordBoundary => (),
@@ -520,14 +657,18 @@ impl Sentence {
             }
             result.push(c);
         }
+        if let Some(tag) = self.tags.last().and_then(|x| x.as_ref()) {
+            result.push('/');
+            result.push_str(tag);
+        }
         Ok(result)
     }
 
-    /// Generates a vector of words.
+    /// Generates a vector of tokens.
     ///
     /// # Returns
     ///
-    /// A newly allocated vector of words.
+    /// A newly allocated vector of tokens.
     ///
     /// # Errors
     ///
@@ -536,37 +677,72 @@ impl Sentence {
     /// # Examples
     ///
     /// ```
-    /// use vaporetto::Sentence;
+    /// use vaporetto::{Sentence, Token};
     ///
     /// let s = Sentence::from_tokenized("How are you ?").unwrap();
     /// assert_eq!(vec![
-    ///     "How",
-    ///     "are",
-    ///     "you",
-    ///     "?",
+    ///     Token { surface: "How", tag: None },
+    ///     Token { surface: "are", tag: None },
+    ///     Token { surface: "you", tag: None },
+    ///     Token { surface: "?", tag: None },
+    /// ], s.to_tokenized_vec().unwrap());
+    ///
+    /// let s = Sentence::from_tokenized("How/WRB are/VBP you/PRP ?/.").unwrap();
+    /// assert_eq!(vec![
+    ///     Token { surface: "How", tag: Some("WRB") },
+    ///     Token { surface: "are", tag: Some("VBP") },
+    ///     Token { surface: "you", tag: Some("PRP") },
+    ///     Token { surface: "?", tag: Some(".") },
     /// ], s.to_tokenized_vec().unwrap());
     /// ```
-    pub fn to_tokenized_vec(&self) -> Result<Vec<&str>> {
+    pub fn to_tokenized_vec(&self) -> Result<Vec<Token>> {
         let mut result = vec![];
         let mut start = 0;
-        for (i, b) in self.boundaries.iter().enumerate() {
-            match b {
-                BoundaryType::WordBoundary => {
-                    let end = unsafe { *self.char_to_str_pos.get_unchecked(i + 1) };
-                    let word = unsafe { self.text.get_unchecked(start..end) };
-                    result.push(word);
-                    start = end;
+        if self.tags.is_empty() {
+            for (i, b) in self.boundaries.iter().enumerate() {
+                match b {
+                    BoundaryType::WordBoundary => {
+                        let end = unsafe { *self.char_to_str_pos.get_unchecked(i + 1) };
+                        let surface = unsafe { self.text.get_unchecked(start..end) };
+                        result.push(Token { surface, tag: None });
+                        start = end;
+                    }
+                    BoundaryType::NotWordBoundary => (),
+                    BoundaryType::Unknown => {
+                        return Err(VaporettoError::invalid_sentence(
+                            "contains an unknown boundary",
+                        ));
+                    }
                 }
-                BoundaryType::NotWordBoundary => (),
-                BoundaryType::Unknown => {
-                    return Err(VaporettoError::invalid_sentence(
-                        "contains an unknown boundary",
-                    ));
+            }
+            let surface = unsafe { self.text.get_unchecked(start..) };
+            result.push(Token { surface, tag: None });
+        } else {
+            for (i, (b, tag)) in self.boundaries.iter().zip(&self.tags).enumerate() {
+                match b {
+                    BoundaryType::WordBoundary => {
+                        let end = unsafe { *self.char_to_str_pos.get_unchecked(i + 1) };
+                        let surface = unsafe { self.text.get_unchecked(start..end) };
+                        let tag = tag.as_ref().map(|x| x.as_str());
+                        result.push(Token { surface, tag });
+                        start = end;
+                    }
+                    BoundaryType::NotWordBoundary => (),
+                    BoundaryType::Unknown => {
+                        return Err(VaporettoError::invalid_sentence(
+                            "contains an unknown boundary",
+                        ));
+                    }
                 }
             }
+            let surface = unsafe { self.text.get_unchecked(start..) };
+            let tag = self
+                .tags
+                .last()
+                .and_then(|x| x.as_ref())
+                .map(|x| x.as_str());
+            result.push(Token { surface, tag });
         }
-        let word = unsafe { self.text.get_unchecked(start..) };
-        result.push(word);
         Ok(result)
     }
 
@@ -574,7 +750,12 @@ impl Sentence {
     ///
     /// # Arguments
     ///
-    /// * `labeled_text` - A string with partial annotations.
+    /// * `labeled_text` - A partially annotated text. Each character boundary is annotated by the following rules:
+    ///   - If the boundary is a token boundary, a pipe symbol (`'|'`) is inserted.
+    ///   - If the boundary is not a token boundary, a dash symobl (`'-'`) is inserted.
+    ///   - If the boundary is not annotated, a whitespace (`' '`) is inserted.
+    ///
+    ///   In addition, a POS tag following a slash (`'/'`) can be inserted to each token.
     ///
     /// # Returns
     ///
@@ -596,6 +777,9 @@ impl Sentence {
     /// let s = Sentence::from_partial_annotation("g-o-o-d|i-d e-a");
     /// assert!(s.is_ok());
     ///
+    /// let s = Sentence::from_partial_annotation("I-t/PRP|'-s/VBZ|o-k-a-y/JJ|./.");
+    /// assert!(s.is_ok());
+    ///
     /// let s = Sentence::from_partial_annotation("b-a-d/i-d-e-a");
     /// assert!(s.is_err());
     /// ```
@@ -608,16 +792,28 @@ impl Sentence {
         let mut text = String::with_capacity(0);
         let mut chars = Vec::with_capacity(0);
         let mut boundaries = Vec::with_capacity(0);
-        Self::parse_partial_annotation(labeled_text, &mut text, &mut chars, &mut boundaries)?;
+        let mut tags = Vec::with_capacity(0);
+        Self::parse_partial_annotation(
+            labeled_text,
+            &mut text,
+            &mut chars,
+            &mut boundaries,
+            &mut tags,
+        )?;
 
-        Ok(Self::internal_new(text, chars, boundaries))
+        Ok(Self::internal_new(text, chars, boundaries, tags))
     }
 
     /// Updates the [`Sentence`] using a string with partial annotations.
     ///
     /// # Arguments
     ///
-    /// * `labeled_text` - A string with partial annotations.
+    /// * `labeled_text` - A partially annotated text. Each character boundary is annotated by the following rules:
+    ///   - If the boundary is a token boundary, a pipe symbol (`'|'`) is inserted.
+    ///   - If the boundary is not a token boundary, a dash symobl (`'-'`) is inserted.
+    ///   - If the boundary is not annotated, a whitespace (`' '`) is inserted.
+    ///
+    ///   In addition, a POS tag following a slash (`'/'`) can be inserted to each token.
     ///
     /// # Errors
     ///
@@ -637,6 +833,9 @@ impl Sentence {
     /// let mut s = Sentence::from_raw("g-o-o-d|i-d e-a").unwrap();
     /// s.update_partial_annotation("h-e-l-l-o").unwrap();
     /// assert_eq!("hello", s.to_raw_string());
+    ///
+    /// s.update_partial_annotation("I-t/PRP|'-s/VBZ|o-k-a-y/JJ|./.").unwrap();
+    /// assert_eq!("It'sokay.", s.to_raw_string());
     /// ```
     pub fn update_partial_annotation<S>(&mut self, labeled_text: S) -> Result<()>
     where
@@ -649,6 +848,7 @@ impl Sentence {
             &mut self.text,
             &mut self.chars,
             &mut self.boundaries,
+            &mut self.tags,
         ) {
             Ok(_) => {
                 self.update_common_info();
@@ -674,19 +874,36 @@ impl Sentence {
     ///
     /// let s = Sentence::from_tokenized("How are you ?").unwrap();
     /// assert_eq!("H-o-w|a-r-e|y-o-u|?", &s.to_partial_annotation_string());
+    ///
+    /// let s = Sentence::from_tokenized("How/WRB are you/PRP ?").unwrap();
+    /// assert_eq!("H-o-w/WRB|a-r-e|y-o-u/PRP|?", &s.to_partial_annotation_string());
     /// ```
     pub fn to_partial_annotation_string(&self) -> String {
         let chars: Vec<char> = self.text.chars().collect();
         let mut result = String::with_capacity(self.text.len() + chars.len() - 1);
         result.push(chars[0]);
-        for (&c, b) in chars[1..].iter().zip(&self.boundaries) {
-            result.push(match b {
-                BoundaryType::WordBoundary => '|',
-                BoundaryType::NotWordBoundary => '-',
-                BoundaryType::Unknown => ' ',
-            });
+        for (i, (&c, b)) in chars[1..].iter().zip(&self.boundaries).enumerate() {
+            match b {
+                BoundaryType::WordBoundary => {
+                    if let Some(tag) = self.tags.get(i).and_then(|x| x.as_ref()) {
+                        result.push('/');
+                        result.push_str(tag);
+                    }
+                    result.push('|');
+                }
+                BoundaryType::NotWordBoundary => {
+                    result.push('-');
+                }
+                BoundaryType::Unknown => {
+                    result.push(' ');
+                }
+            }
             result.push(c);
         }
+        if let Some(tag) = self.tags.last().and_then(|x| x.as_ref()) {
+            result.push('/');
+            result.push_str(tag);
+        }
         result
     }
 
@@ -827,6 +1044,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -843,6 +1061,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -860,6 +1079,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -884,6 +1104,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -909,6 +1130,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -951,6 +1173,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -983,6 +1206,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -1015,6 +1239,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -1047,6 +1272,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![],
         };
         assert_eq!(expected, s);
     }
@@ -1063,6 +1289,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![None],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1080,6 +1307,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: None,
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1122,6 +1350,70 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 18],
+        };
+        assert_eq!(expected, s.unwrap());
+    }
+
+    #[test]
+    fn test_sentence_from_tokenized_with_tags() {
+        let s =
+            Sentence::from_tokenized("Rust/名詞 で 良い/形容詞 プログラミング 体験 を ！/補助記号");
+
+        let expected = Sentence {
+            text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
+            str_to_char_pos: vec![
+                0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
+                0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
+            ],
+            char_to_str_pos: vec![
+                0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
+            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
+            boundaries: vec![
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+            tags: vec![
+                None,
+                None,
+                None,
+                Some(Rc::new("名詞".to_string())),
+                None,
+                None,
+                Some(Rc::new("形容詞".to_string())),
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                Some(Rc::new("補助記号".to_string())),
+            ],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1166,6 +1458,71 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 18],
+        };
+        assert_eq!(expected, s);
+    }
+
+    #[test]
+    fn test_sentence_update_tokenized_with_tags() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("Rust/名詞 で 良い/形容詞 プログラミング 体験 を ！/補助記号")
+            .unwrap();
+
+        let expected = Sentence {
+            text: "Rustで良いプログラミング体験を！".to_string(),
+            chars: vec![
+                'R', 'u', 's', 't', 'で', '良', 'い', 'プ', 'ロ', 'グ', 'ラ', 'ミ', 'ン', 'グ',
+                '体', '験', 'を', '！',
+            ],
+            str_to_char_pos: vec![
+                0, 1, 2, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0,
+                0, 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, 16, 0, 0, 17, 0, 0, 18,
+            ],
+            char_to_str_pos: vec![
+                0, 1, 2, 3, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
+            ],
+            char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
+            boundaries: vec![
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+            tags: vec![
+                None,
+                None,
+                None,
+                Some(Rc::new("名詞".to_string())),
+                None,
+                None,
+                Some(Rc::new("形容詞".to_string())),
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                Some(Rc::new("補助記号".to_string())),
+            ],
         };
         assert_eq!(expected, s);
     }
@@ -1206,6 +1563,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 16],
         };
         assert_eq!(expected, s);
     }
@@ -1248,6 +1606,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 16],
         };
         assert_eq!(expected, s);
     }
@@ -1275,6 +1634,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 9],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1303,6 +1663,62 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 9],
+        };
+        assert_eq!(expected, s);
+    }
+
+    #[test]
+    fn test_sentence_from_tokenized_escape_slash() {
+        let s = Sentence::from_tokenized("品詞 に \\/ を 用い る");
+
+        let expected = Sentence {
+            text: "品詞に/を用いる".to_string(),
+            chars: vec!['品', '詞', 'に', '/', 'を', '用', 'い', 'る'],
+            str_to_char_pos: vec![
+                0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8,
+            ],
+            char_to_str_pos: vec![0, 3, 6, 9, 10, 13, 16, 19, 22],
+            char_type: b"KKHOHKHH".to_vec(),
+            boundaries: vec![
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+            tags: vec![None; 8],
+        };
+        assert_eq!(expected, s.unwrap());
+    }
+
+    #[test]
+    fn test_sentence_update_tokenized_escape_slash() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        s.update_tokenized("品詞 に \\/ を 用い る").unwrap();
+
+        let expected = Sentence {
+            text: "品詞に/を用いる".to_string(),
+            chars: vec!['品', '詞', 'に', '/', 'を', '用', 'い', 'る'],
+            str_to_char_pos: vec![
+                0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, 8,
+            ],
+            char_to_str_pos: vec![0, 3, 6, 9, 10, 13, 16, 19, 22],
+            char_type: b"KKHOHKHH".to_vec(),
+            boundaries: vec![
+                NotWordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                WordBoundary,
+                NotWordBoundary,
+                WordBoundary,
+            ],
+            boundary_scores: None,
+            tags: vec![None; 8],
         };
         assert_eq!(expected, s);
     }
@@ -1328,6 +1744,17 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sentence_to_tokenized_string_with_tags() {
+        let s =
+            Sentence::from_tokenized("Rust/名詞 で 良い/形容詞 プログラミング 体験 を ！/補助記号");
+
+        assert_eq!(
+            "Rust/名詞 で 良い/形容詞 プログラミング 体験 を ！/補助記号",
+            s.unwrap().to_tokenized_string().unwrap()
+        );
+    }
+
     #[test]
     fn test_sentence_to_tokenized_string_escape() {
         let s = Sentence::from_partial_annotation("火-星-猫|の| |生-態|\\-n");
@@ -1354,7 +1781,77 @@ mod tests {
         let s = Sentence::from_tokenized("Rust で 良い プログラミング 体験 を ！").unwrap();
 
         assert_eq!(
-            vec!["Rust", "で", "良い", "プログラミング", "体験", "を", "！"],
+            vec![
+                Token {
+                    surface: "Rust",
+                    tag: None
+                },
+                Token {
+                    surface: "で",
+                    tag: None
+                },
+                Token {
+                    surface: "良い",
+                    tag: None
+                },
+                Token {
+                    surface: "プログラミング",
+                    tag: None
+                },
+                Token {
+                    surface: "体験",
+                    tag: None
+                },
+                Token {
+                    surface: "を",
+                    tag: None
+                },
+                Token {
+                    surface: "！",
+                    tag: None
+                },
+            ],
+            s.to_tokenized_vec().unwrap()
+        );
+    }
+
+    #[test]
+    fn test_sentence_to_tokenized_vec_with_tags() {
+        let s =
+            Sentence::from_tokenized("Rust/名詞 で 良い/形容詞 プログラミング 体験 を ！/補助記号")
+                .unwrap();
+
+        assert_eq!(
+            vec![
+                Token {
+                    surface: "Rust",
+                    tag: Some("名詞"),
+                },
+                Token {
+                    surface: "で",
+                    tag: None,
+                },
+                Token {
+                    surface: "良い",
+                    tag: Some("形容詞"),
+                },
+                Token {
+                    surface: "プログラミング",
+                    tag: None,
+                },
+                Token {
+                    surface: "体験",
+                    tag: None,
+                },
+                Token {
+                    surface: "を",
+                    tag: None,
+                },
+                Token {
+                    surface: "！",
+                    tag: Some("補助記号"),
+                },
+            ],
             s.to_tokenized_vec().unwrap()
         );
     }
@@ -1385,7 +1882,7 @@ mod tests {
         let result = Sentence::from_partial_annotation("火-星 猫|の|生-態 ");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: must contain odd number of characters",
+            "InvalidArgumentError: labeled_text: invalid annotation",
             &result.err().unwrap().to_string()
         );
     }
@@ -1396,7 +1893,7 @@ mod tests {
         let result = s.update_partial_annotation("火-星 猫|の|生-態 ");
 
         assert_eq!(
-            "InvalidArgumentError: labeled_text: must contain odd number of characters",
+            "InvalidArgumentError: labeled_text: invalid annotation",
             &result.err().unwrap().to_string()
         );
     }
@@ -1440,6 +1937,7 @@ mod tests {
                 NotWordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 6],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1463,6 +1961,7 @@ mod tests {
                 NotWordBoundary,
             ],
             boundary_scores: None,
+            tags: vec![None; 6],
         };
         assert_eq!(expected, s);
     }
@@ -1476,4 +1975,14 @@ mod tests {
             s.unwrap().to_partial_annotation_string()
         );
     }
+
+    #[test]
+    fn test_sentence_to_partial_annotation_string_with_tags() {
+        let s = Sentence::from_partial_annotation("火-星 猫|の/助詞|生-態/名詞");
+
+        assert_eq!(
+            "火-星 猫|の/助詞|生-態/名詞",
+            s.unwrap().to_partial_annotation_string()
+        );
+    }
 }
diff --git a/vaporetto_wasm/src/lib.rs b/vaporetto_wasm/src/lib.rs
index 53978211..23dcdba1 100644
--- a/vaporetto_wasm/src/lib.rs
+++ b/vaporetto_wasm/src/lib.rs
@@ -73,9 +73,9 @@ impl Vaporetto {
             .iter()
             .fold(s, |s, filter| filter.filter(s));
 
-        if let Ok(words) = s.to_tokenized_vec() {
-            for word in words {
-                result.push(&JsValue::from_str(word));
+        if let Ok(tokens) = s.to_tokenized_vec() {
+            for token in tokens {
+                result.push(&JsValue::from_str(token.surface));
             }
         }
         result.into()

From a9eaa1d78d7deb0faec4f491b7674969b376b695 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 17 Jan 2022 13:57:47 +0900
Subject: [PATCH 36/60] Fix README (#31)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d10e2426..c2ef9f0d 100644
--- a/README.md
+++ b/README.md
@@ -128,7 +128,7 @@ To concatenate `メロンパン` into a single token, manipulate the model in th
    ```diff
     メロレオストーシス,6944,-2553,5319,
     メロン,8924,-10861,7081,
-   +メロンパン,0,-100000,0,melon🍈 in English.
+   +メロンパン,0,-100000,0,melon🍈 bread🍞 in English.
     メロン果実,4168,-1165,3558,
     メロヴィング,6999,-15413,7583,
    ```

From 8c3195d5d6e972a50c0700dc9af0d16827dd23cb Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 17 Jan 2022 14:15:38 +0900
Subject: [PATCH 37/60] Update daachorse to 0.3.0 (#30)

---
 vaporetto/Cargo.toml         | 2 +-
 vaporetto/src/char_scorer.rs | 4 ++--
 vaporetto/src/dict_scorer.rs | 2 +-
 vaporetto/src/type_scorer.rs | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 69f57f22..19bd83df 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -13,7 +13,7 @@ categories = ["text-processing"]
 autotests = false
 
 [dependencies]
-daachorse = "0.2.0"  # MIT or Apache-2.0
+daachorse = "0.3.0"  # MIT or Apache-2.0
 byteorder = "1.4"  # Unlicense or MIT
 
 liblinear = { version = "1", optional = true }  # MIT
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 27e14b55..1a6c1464 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -72,7 +72,7 @@ impl CharScorerNaive {
             let offset = m_end as isize - self.window_size as isize - 1;
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
-            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
+            let weights = unsafe { self.weights.get_unchecked(m.value()) };
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
                     *y += w;
@@ -132,7 +132,7 @@ impl CharScorerSimd {
             let offset = padding as isize + m_end as isize - self.window_size as isize - 1;
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
-            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
+            let weights = unsafe { self.weights.get_unchecked(m.value()) };
             let ys_slice = &mut ys[offset as usize..offset as usize + 8];
 
             #[cfg(feature = "portable-simd")]
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 59268538..59f9a6fa 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -49,7 +49,7 @@ impl DictScorerWordwise {
             let m_end = sentence.str_to_char_pos[m.end()];
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
-            let dict_weight = unsafe { self.weights.get_unchecked(m.pattern()) };
+            let dict_weight = unsafe { self.weights.get_unchecked(m.value()) };
             if m_start != 0 {
                 ys[m_start - 1] += dict_weight.right;
             }
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 21a1c365..f315b8c3 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -61,7 +61,7 @@ impl TypeScorerPma {
             let offset = m.end() as isize - self.window_size as isize - 1;
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
-            let weights = unsafe { self.weights.get_unchecked(m.pattern()) };
+            let weights = unsafe { self.weights.get_unchecked(m.value()) };
             if offset >= 0 {
                 for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
                     *y += w;
@@ -108,7 +108,7 @@ impl TypeScorerCache {
             }
             let mut y = 0;
             for m in pma.find_overlapping_no_suffix_iter(&sequence) {
-                y += weights[m.pattern()][sequence_size - m.end()];
+                y += weights[m.value()][sequence_size - m.end()];
             }
             *score = y;
         }

From 25c55899257acff2b696d86cefa33b837a40870f Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 17 Jan 2022 15:37:39 +0900
Subject: [PATCH 38/60] Remove DictModelLengthwise and DictScorerLengthwise
 (#29)

* Remove lengthwise dict model

* Update test

* fmt
---
 manipulate_model/src/main.rs |   2 +-
 vaporetto/src/dict_model.rs  | 185 ++---------------------------------
 vaporetto/src/dict_scorer.rs |  70 +------------
 vaporetto/src/kytea_model.rs |   4 +-
 vaporetto/src/model.rs       |   8 +-
 vaporetto/src/predictor.rs   |  91 ++++++++++-------
 vaporetto/src/trainer.rs     |  19 +++-
 7 files changed, 92 insertions(+), 287 deletions(-)

diff --git a/manipulate_model/src/main.rs b/manipulate_model/src/main.rs
index f074ca85..0f159df3 100644
--- a/manipulate_model/src/main.rs
+++ b/manipulate_model/src/main.rs
@@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         eprintln!("Saving dictionary file...");
         let file = fs::File::create(path)?;
         let mut wtr = csv::Writer::from_writer(file);
-        for data in model.dump_dictionary() {
+        for data in model.dictionary() {
             wtr.serialize(WordWeightRecordFlatten {
                 word: data.get_word().to_string(),
                 right: data.get_right_weight(),
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index 1affe071..01e73c34 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -4,7 +4,7 @@ use std::mem;
 
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
-use crate::errors::{Result, VaporettoError};
+use crate::errors::Result;
 use crate::ngram_model::NgramModel;
 
 #[derive(Clone, Copy, Default)]
@@ -37,74 +37,6 @@ impl DictWeight {
     }
 }
 
-pub enum DictModel {
-    Wordwise(DictModelWordwise),
-    Lengthwise(DictModelLengthwise),
-}
-
-impl DictModel {
-    const TYPE_ID_WORDWISE: u8 = 0;
-    const TYPE_ID_LENGTHWISE: u8 = 1;
-
-    pub fn merge_dict_weights(
-        &mut self,
-        char_ngram_model: &mut NgramModel<String>,
-        char_window_size: usize,
-    ) {
-        match self {
-            Self::Wordwise(model) => model.merge_dict_weights(char_ngram_model, char_window_size),
-            Self::Lengthwise(model) => model.merge_dict_weights(char_ngram_model, char_window_size),
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        match self {
-            Self::Wordwise(model) => model.is_empty(),
-            Self::Lengthwise(model) => model.is_empty(),
-        }
-    }
-
-    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
-        match self {
-            Self::Wordwise(model) => model.dump_dictionary(),
-            Self::Lengthwise(model) => model.dump_dictionary(),
-        }
-    }
-
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
-    where
-        W: Write,
-    {
-        let size = match self {
-            Self::Wordwise(model) => {
-                buf.write_u8(Self::TYPE_ID_WORDWISE)?;
-                model.serialize(buf)?
-            }
-            Self::Lengthwise(model) => {
-                buf.write_u8(Self::TYPE_ID_LENGTHWISE)?;
-                model.serialize(buf)?
-            }
-        };
-        Ok(mem::size_of::<u8>() + size)
-    }
-
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
-    where
-        R: Read,
-    {
-        let type_id = buf.read_u8()?;
-        match type_id {
-            Self::TYPE_ID_WORDWISE => Ok(Self::Wordwise(DictModelWordwise::deserialize(buf)?)),
-            Self::TYPE_ID_LENGTHWISE => {
-                Ok(Self::Lengthwise(DictModelLengthwise::deserialize(buf)?))
-            }
-            _ => Err(VaporettoError::invalid_model(
-                "invalid type_id of dict_model",
-            )),
-        }
-    }
-}
-
 /// Record of weights for each word.
 #[derive(Clone)]
 pub struct WordWeightRecord {
@@ -198,11 +130,15 @@ impl WordWeightRecord {
     }
 }
 
-pub struct DictModelWordwise {
+pub struct DictModel {
     pub(crate) dict: Vec<WordWeightRecord>,
 }
 
-impl DictModelWordwise {
+impl DictModel {
+    pub fn new(dict: Vec<WordWeightRecord>) -> Self {
+        Self { dict }
+    }
+
     pub fn merge_dict_weights(
         &mut self,
         char_ngram_model: &mut NgramModel<String>,
@@ -242,8 +178,8 @@ impl DictModelWordwise {
         self.dict.is_empty()
     }
 
-    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
-        self.dict.clone()
+    pub fn dictionary(&self) -> &[WordWeightRecord] {
+        &self.dict
     }
 
     pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
@@ -271,106 +207,3 @@ impl DictModelWordwise {
         Ok(Self { dict })
     }
 }
-
-pub struct DictModelLengthwise {
-    pub(crate) words: Vec<String>,
-    pub(crate) weights: Vec<DictWeight>,
-}
-
-impl DictModelLengthwise {
-    pub fn merge_dict_weights(
-        &mut self,
-        char_ngram_model: &mut NgramModel<String>,
-        char_window_size: usize,
-    ) {
-        let mut word_map = HashMap::new();
-        for (i, word) in char_ngram_model
-            .data
-            .iter()
-            .map(|d| d.ngram.clone())
-            .enumerate()
-        {
-            word_map.insert(word, i);
-        }
-        let mut new_words = vec![];
-        for word in self.words.drain(..) {
-            let word_size = word.chars().count();
-            match word_map.get(&word) {
-                Some(&idx) if char_window_size >= word_size => {
-                    let start = char_window_size - word_size;
-                    let end = start + word_size;
-                    let word_size_idx = word_size.min(self.weights.len()) - 1;
-                    let weight = &self.weights[word_size_idx];
-                    char_ngram_model.data[idx].weights[start] += weight.right;
-                    for i in start + 1..end {
-                        char_ngram_model.data[idx].weights[i] += weight.inside;
-                    }
-                    char_ngram_model.data[idx].weights[end] += weight.left;
-                }
-                _ => new_words.push(word),
-            }
-        }
-        self.words = new_words;
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.words.is_empty()
-    }
-
-    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
-        let mut result = vec![];
-        for word in &self.words {
-            let word = word.clone();
-            let word_size = word.chars().count();
-            let word_size_idx = word_size.min(self.weights.len()) - 1;
-            let weights = self.weights[word_size_idx];
-            result.push(WordWeightRecord {
-                word,
-                weights,
-                comment: "".to_string(),
-            });
-        }
-        result
-    }
-
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
-    where
-        W: Write,
-    {
-        let words_size = self.words.len();
-        let weights_size = self.weights.len();
-        buf.write_u32::<LittleEndian>(words_size.try_into().unwrap())?;
-        buf.write_u32::<LittleEndian>(weights_size.try_into().unwrap())?;
-        let mut total_size = mem::size_of::<u32>() * 2;
-        for word in &self.words {
-            let word_size = word.len();
-            buf.write_u32::<LittleEndian>(word_size.try_into().unwrap())?;
-            buf.write_all(word.as_bytes())?;
-            total_size += mem::size_of::<u32>() + word_size;
-        }
-        for weight in &self.weights {
-            total_size += weight.serialize(&mut buf)?;
-        }
-        Ok(total_size)
-    }
-
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
-    where
-        R: Read,
-    {
-        let words_size = buf.read_u32::<LittleEndian>()?;
-        let weights_size = buf.read_u32::<LittleEndian>()?;
-        let mut words = Vec::with_capacity(words_size.try_into().unwrap());
-        for _ in 0..words_size {
-            let word_size = buf.read_u32::<LittleEndian>()?;
-            let mut word_bytes = vec![0; word_size.try_into().unwrap()];
-            buf.read_exact(&mut word_bytes)?;
-            words.push(String::from_utf8(word_bytes)?);
-        }
-        let mut weights = Vec::with_capacity(weights_size.try_into().unwrap());
-        for _ in 0..weights_size {
-            weights.push(DictWeight::deserialize(&mut buf)?);
-        }
-        Ok(Self { words, weights })
-    }
-}
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
index 59f9a6fa..6749203c 100644
--- a/vaporetto/src/dict_scorer.rs
+++ b/vaporetto/src/dict_scorer.rs
@@ -1,37 +1,16 @@
 use daachorse::DoubleArrayAhoCorasick;
 
-use crate::dict_model::{DictModel, DictModelLengthwise, DictModelWordwise, DictWeight};
+use crate::dict_model::{DictModel, DictWeight};
 use crate::errors::{Result, VaporettoError};
 use crate::sentence::Sentence;
 
-pub enum DictScorer {
-    Wordwise(DictScorerWordwise),
-    Lengthwise(DictScorerLengthwise),
-}
-
-impl DictScorer {
-    pub fn new(model: DictModel) -> Result<Self> {
-        Ok(match model {
-            DictModel::Wordwise(model) => Self::Wordwise(DictScorerWordwise::new(model)?),
-            DictModel::Lengthwise(model) => Self::Lengthwise(DictScorerLengthwise::new(model)?),
-        })
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
-        match self {
-            Self::Wordwise(model) => model.add_scores(sentence, ys),
-            Self::Lengthwise(model) => model.add_scores(sentence, ys),
-        }
-    }
-}
-
-pub struct DictScorerWordwise {
+pub struct DictScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<DictWeight>,
 }
 
-impl DictScorerWordwise {
-    pub fn new(model: DictModelWordwise) -> Result<Self> {
+impl DictScorer {
+    pub fn new(model: DictModel) -> Result<Self> {
         let mut words = vec![];
         let mut weights = vec![];
         for pair in model.dict {
@@ -62,44 +41,3 @@ impl DictScorerWordwise {
         }
     }
 }
-
-pub struct DictScorerLengthwise {
-    pma: DoubleArrayAhoCorasick,
-    weights: Vec<DictWeight>,
-}
-
-impl DictScorerLengthwise {
-    pub fn new(model: DictModelLengthwise) -> Result<Self> {
-        if model.weights.is_empty() {
-            return Err(VaporettoError::invalid_model(
-                "dict_word_max_size must be >= 1",
-            ));
-        }
-        let pma = DoubleArrayAhoCorasick::new(model.words)
-            .map_err(|_| VaporettoError::invalid_model("invalid dictionary"))?;
-        Ok(Self {
-            pma,
-            weights: model.weights,
-        })
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
-        for m in self.pma.find_overlapping_iter(&sentence.text) {
-            let m_start = sentence.str_to_char_pos[m.start()];
-            let m_end = sentence.str_to_char_pos[m.end()];
-            let idx = (m_end - m_start).min(self.weights.len()) - 1;
-            // The upper bound of idx is weights.len() - 1.
-            // Therefore, the following code is safe.
-            let dict_weight = unsafe { self.weights.get_unchecked(idx) };
-            if m_start != 0 {
-                ys[m_start - 1] += dict_weight.right;
-            }
-            for y in &mut ys[m_start..m_end - 1] {
-                *y += dict_weight.inside;
-            }
-            if m_end <= ys.len() {
-                ys[m_end - 1] += dict_weight.left;
-            }
-        }
-    }
-}
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index e18575ac..5fbb5d64 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -3,7 +3,7 @@ use std::io::BufRead;
 
 use byteorder::{LittleEndian, ReadBytesExt};
 
-use crate::dict_model::{DictModel, DictModelWordwise, DictWeight, WordWeightRecord};
+use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
@@ -456,7 +456,7 @@ impl TryFrom<KyteaModel> for Model {
         Ok(Self {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
-            dict_model: DictModel::Wordwise(DictModelWordwise { dict }),
+            dict_model: DictModel::new(dict),
             bias,
             char_window_size: config.char_w as usize,
             type_window_size: config.type_w as usize,
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 3a4cf397..04f6abe3 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -2,7 +2,7 @@ use std::io::{Read, Write};
 
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
-use crate::dict_model::{DictModel, DictModelWordwise, WordWeightRecord};
+use crate::dict_model::{DictModel, WordWeightRecord};
 use crate::errors::Result;
 use crate::ngram_model::NgramModel;
 
@@ -66,11 +66,11 @@ impl Model {
         })
     }
 
-    pub fn dump_dictionary(&self) -> Vec<WordWeightRecord> {
-        self.dict_model.dump_dictionary()
+    pub fn dictionary(&self) -> &[WordWeightRecord] {
+        self.dict_model.dictionary()
     }
 
     pub fn replace_dictionary(&mut self, dict: Vec<WordWeightRecord>) {
-        self.dict_model = DictModel::Wordwise(DictModelWordwise { dict });
+        self.dict_model = DictModel::new(dict);
     }
 }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index bf43db50..529a30a7 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -172,9 +172,7 @@ impl Predictor {
 mod tests {
     use super::*;
 
-    use crate::dict_model::{
-        DictModel, DictModelLengthwise, DictModelWordwise, DictWeight, WordWeightRecord,
-    };
+    use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
     use crate::ngram_model::{NgramData, NgramModel};
 
     /// Input:  我  ら  は  全  世  界  の  国  民
@@ -245,21 +243,37 @@ mod tests {
                     weights: vec![37, 38, 39],
                 },
             ]),
-            dict_model: DictModel::Lengthwise(DictModelLengthwise {
-                words: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-                weights: vec![
-                    DictWeight {
-                        right: 40,
-                        inside: 41,
-                        left: 42,
+            dict_model: DictModel {
+                dict: vec![
+                    WordWeightRecord {
+                        word: "全世界".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
+                    },
+                    WordWeightRecord {
+                        word: "世界".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
                     },
-                    DictWeight {
-                        right: 43,
-                        inside: 44,
-                        left: 45,
+                    WordWeightRecord {
+                        word: "世".to_string(),
+                        weights: DictWeight {
+                            right: 40,
+                            inside: 41,
+                            left: 42,
+                        },
+                        comment: "".to_string(),
                     },
                 ],
-            }),
+            },
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
@@ -334,26 +348,37 @@ mod tests {
                     weights: vec![33, 34, 35, 36, 37],
                 },
             ]),
-            dict_model: DictModel::Lengthwise(DictModelLengthwise {
-                words: vec!["全世界".to_string(), "世界".to_string(), "世".to_string()],
-                weights: vec![
-                    DictWeight {
-                        right: 38,
-                        inside: 39,
-                        left: 40,
+            dict_model: DictModel {
+                dict: vec![
+                    WordWeightRecord {
+                        word: "全世界".to_string(),
+                        weights: DictWeight {
+                            right: 44,
+                            inside: 45,
+                            left: 46,
+                        },
+                        comment: "".to_string(),
                     },
-                    DictWeight {
-                        right: 41,
-                        inside: 42,
-                        left: 43,
+                    WordWeightRecord {
+                        word: "世界".to_string(),
+                        weights: DictWeight {
+                            right: 41,
+                            inside: 42,
+                            left: 43,
+                        },
+                        comment: "".to_string(),
                     },
-                    DictWeight {
-                        right: 44,
-                        inside: 45,
-                        left: 46,
+                    WordWeightRecord {
+                        word: "世".to_string(),
+                        weights: DictWeight {
+                            right: 38,
+                            inside: 39,
+                            left: 40,
+                        },
+                        comment: "".to_string(),
                     },
                 ],
-            }),
+            },
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
@@ -428,7 +453,7 @@ mod tests {
                     weights: vec![33, 34, 35, 36, 37],
                 },
             ]),
-            dict_model: DictModel::Wordwise(DictModelWordwise {
+            dict_model: DictModel {
                 dict: vec![
                     WordWeightRecord {
                         word: "国民".to_string(),
@@ -458,7 +483,7 @@ mod tests {
                         comment: "".to_string(),
                     },
                 ],
-            }),
+            },
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index 5a263b2c..fbd9bed6 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -3,7 +3,7 @@ use std::collections::HashMap;
 use std::hash::Hash;
 use std::str::FromStr;
 
-use crate::dict_model::{DictModel, DictModelLengthwise, DictWeight};
+use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::feature::{
     BoundaryExampleGenerator, BoundaryFeature, BytesNgramFeature, DictionaryWordFeature,
@@ -349,10 +349,19 @@ impl<'a> Trainer<'a> {
         Ok(Model {
             char_ngram_model: NgramModel::new(char_ngrams),
             type_ngram_model: NgramModel::new(type_ngrams),
-            dict_model: DictModel::Lengthwise(DictModelLengthwise {
-                words: self.dictionary,
-                weights: dict_weights,
-            }),
+            dict_model: DictModel::new(
+                self.dictionary
+                    .into_iter()
+                    .map(|word| {
+                        let idx = word.chars().count().min(dict_weights.len()) - 1;
+                        WordWeightRecord {
+                            word,
+                            weights: dict_weights[idx],
+                            comment: "".to_string(),
+                        }
+                    })
+                    .collect(),
+            ),
             bias,
             char_window_size: self.char_window_size,
             type_window_size: self.type_window_size,

From 3d0206286b1a0727cd8b5cea95c1db2c3401e6b7 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 17 Jan 2022 16:50:34 +0900
Subject: [PATCH 39/60] Refactoring (#32)

---
 vaporetto/src/trainer.rs | 74 ++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index fbd9bed6..f6e6c3fd 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -14,8 +14,6 @@ use crate::ngram_model::{NgramData, NgramModel};
 use crate::sentence::{BoundaryType, Sentence};
 use liblinear::LibLinearModel;
 
-const EPSILON: f64 = 1e-6;
-
 // Bit depth for weight quantization.
 const QUANTIZE_BIT_DEPTH: u8 = 16;
 
@@ -275,11 +273,11 @@ impl<'a> Trainer<'a> {
             .unwrap() as i32;
 
         let bias = model.label_bias(wb_idx);
-        let mut char_ngrams = vec![];
-        let mut type_ngrams = vec![];
+
+        // Uses BTreeMap to increase compression ratio.
+        let mut char_ngram_weights: BTreeMap<_, Vec<_>> = BTreeMap::new();
+        let mut type_ngram_weights: BTreeMap<_, Vec<_>> = BTreeMap::new();
         let mut dict_weights = vec![DictWeight::default(); self.dict_max_word_size];
-        let mut char_ngram_ids = Indexer::new();
-        let mut type_ngram_ids = Indexer::new();
 
         let mut weight_max = bias.abs();
         for fid in 0..model.num_features() {
@@ -293,62 +291,64 @@ impl<'a> Trainer<'a> {
         let bias = (bias / quantize_multiplier) as i32;
 
         for (fid, feature) in self.feature_ids.keys().iter().enumerate() {
-            let weight = model.feature_coefficient(fid as i32 + 1, wb_idx);
-            if weight > -EPSILON && weight < EPSILON {
+            let raw_weight = model.feature_coefficient(fid as i32 + 1, wb_idx);
+            let weight = (raw_weight / quantize_multiplier) as i32;
+
+            if weight == 0 {
                 continue;
             }
 
-            let weight = weight / quantize_multiplier;
-
             match feature {
                 BoundaryFeature::CharacterNgram(StringNgramFeature {
                     rel_position,
                     ngram,
                 }) => {
-                    let id = char_ngram_ids.get_id(ngram);
                     let len = ngram.chars().count();
-                    if id == char_ngrams.len() {
-                        char_ngrams.push(NgramData {
-                            ngram: ngram.to_string(),
-                            weights: vec![0; self.char_window_size * 2 - len + 1],
-                        });
-                    }
                     let pos = self.char_window_size as isize - len as isize - rel_position;
-                    char_ngrams[id].weights[pos as usize] = weight as i32;
+                    if let Some(weights) = char_ngram_weights.get_mut(*ngram) {
+                        weights[pos as usize] = weight;
+                    } else {
+                        let mut weights = vec![0; self.char_window_size * 2 - len + 1];
+                        weights[pos as usize] = weight;
+                        char_ngram_weights.insert(ngram.to_string(), weights);
+                    }
                 }
                 BoundaryFeature::CharacterTypeNgram(BytesNgramFeature {
                     rel_position,
                     ngram,
                 }) => {
-                    let id = type_ngram_ids.get_id(ngram) as usize;
                     let len = ngram.len();
-                    if id == type_ngrams.len() {
-                        type_ngrams.push(NgramData {
-                            ngram: ngram.to_vec(),
-                            weights: vec![0; self.type_window_size * 2 - len + 1],
-                        });
+                    let pos = self.char_window_size as isize - len as isize - rel_position;
+                    if let Some(weights) = type_ngram_weights.get_mut(*ngram) {
+                        weights[pos as usize] = weight;
+                    } else {
+                        let mut weights = vec![0; self.char_window_size * 2 - len + 1];
+                        weights[pos as usize] = weight;
+                        type_ngram_weights.insert(ngram.to_vec(), weights);
                     }
-                    let pos = self.type_window_size as isize - len as isize - rel_position;
-                    type_ngrams[id].weights[pos as usize] = weight as i32;
                 }
                 BoundaryFeature::DictionaryWord(DictionaryWordFeature { position, length }) => {
                     match position {
-                        DictionaryWordPosition::Right => {
-                            dict_weights[length - 1].right = weight as i32
-                        }
-                        DictionaryWordPosition::Inside => {
-                            dict_weights[length - 1].inside = weight as i32
-                        }
-                        DictionaryWordPosition::Left => {
-                            dict_weights[length - 1].left = weight as i32
-                        }
+                        DictionaryWordPosition::Right => dict_weights[length - 1].right = weight,
+                        DictionaryWordPosition::Inside => dict_weights[length - 1].inside = weight,
+                        DictionaryWordPosition::Left => dict_weights[length - 1].left = weight,
                     }
                 }
             };
         }
         Ok(Model {
-            char_ngram_model: NgramModel::new(char_ngrams),
-            type_ngram_model: NgramModel::new(type_ngrams),
+            char_ngram_model: NgramModel::new(
+                char_ngram_weights
+                    .into_iter()
+                    .map(|(ngram, weights)| NgramData { ngram, weights })
+                    .collect(),
+            ),
+            type_ngram_model: NgramModel::new(
+                type_ngram_weights
+                    .into_iter()
+                    .map(|(ngram, weights)| NgramData { ngram, weights })
+                    .collect(),
+            ),
             dict_model: DictModel::new(
                 self.dictionary
                     .into_iter()

From a33a2417d0a3ab6f94e68dc2732ec19ef281784f Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 19 Jan 2022 12:36:12 +0900
Subject: [PATCH 40/60] Merge CharScorer and DictScorer (#33)

* Remove lengthwise dict model

* Update test

* Merge dict model to char n-gram model

* Update tests

* Use BTreeMap

* fmt

* fix

* Update char_scorer.rs

* Refactor from_two_weights()

* Use String instead of Vec<u8>
---
 vaporetto/src/char_scorer.rs | 267 ++++++++++++++++++++---------------
 vaporetto/src/dict_model.rs  |  41 ------
 vaporetto/src/dict_scorer.rs |  43 ------
 vaporetto/src/lib.rs         |   1 -
 vaporetto/src/ngram_model.rs |  62 +-------
 vaporetto/src/predictor.rs   | 222 +++++++++++++++++++++++++----
 vaporetto/src/type_scorer.rs |  57 ++++++--
 7 files changed, 394 insertions(+), 299 deletions(-)
 delete mode 100644 vaporetto/src/dict_scorer.rs

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 1a6c1464..c7cc3c59 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,5 +1,9 @@
+use std::cell::RefCell;
+use std::collections::BTreeMap;
+
 use daachorse::DoubleArrayAhoCorasick;
 
+use crate::dict_model::DictModel;
 use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
@@ -7,148 +11,187 @@ use crate::sentence::Sentence;
 #[cfg(all(feature = "simd", feature = "portable-simd"))]
 use std::simd::i32x8;
 
-pub enum CharScorer {
-    Naive(CharScorerNaive),
+#[cfg(feature = "simd")]
+pub const SIMD_SIZE: usize = 8;
+#[cfg(all(feature = "simd", feature = "portable-simd"))]
+type I32Vec = i32x8;
 
-    #[cfg(feature = "simd")]
-    Simd(CharScorerSimd),
+struct PositionalWeight<W> {
+    pub offset: i32,
+    pub weight: W,
 }
 
-impl CharScorer {
-    pub fn new(model: NgramModel<String>, window_size: usize) -> Result<Self> {
-        #[cfg(not(feature = "simd"))]
-        {
-            Ok(Self::Naive(CharScorerNaive::new(model, window_size)?))
-        }
+type NaivePositionalWeight = PositionalWeight<Vec<i32>>;
 
-        #[cfg(feature = "simd")]
-        Ok(if window_size <= 4 {
-            Self::Simd(CharScorerSimd::new(model, window_size)?)
-        } else {
-            Self::Naive(CharScorerNaive::new(model, window_size)?)
-        })
+impl NaivePositionalWeight {
+    fn new(offset: i32, weight: Vec<i32>) -> Self {
+        Self { offset, weight }
     }
 
-    pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
-        match self {
-            CharScorer::Naive(naive) => naive.add_scores(sentence, &mut ys[padding..]),
-
-            #[cfg(feature = "simd")]
-            CharScorer::Simd(simd) => simd.add_scores(sentence, padding, ys),
+    fn from_two_weights(weight1: &Self, weight2: &Self) -> Self {
+        let (weight1, weight2) = if weight1.offset > weight2.offset {
+            (weight2, weight1)
+        } else {
+            (weight1, weight2)
+        };
+        let shift = (weight2.offset - weight1.offset) as usize;
+        let mut weight = vec![0; weight1.weight.len().max(shift + weight2.weight.len())];
+        weight[..weight1.weight.len()].copy_from_slice(&weight1.weight);
+        for (r, w2) in weight[shift..].iter_mut().zip(&weight2.weight) {
+            *r += w2;
+        }
+        Self {
+            offset: weight1.offset,
+            weight,
         }
     }
 }
 
-pub struct CharScorerNaive {
+enum WeightVector {
+    Array(Vec<i32>),
+
+    #[cfg(all(feature = "simd", not(feature = "portable-simd")))]
+    Simd([i32; SIMD_SIZE]),
+    #[cfg(all(feature = "simd", feature = "portable-simd"))]
+    Simd(I32Vec),
+}
+
+pub struct CharScorer {
     pma: DoubleArrayAhoCorasick,
-    weights: Vec<Vec<i32>>,
-    window_size: usize,
+    weights: Vec<PositionalWeight<WeightVector>>,
 }
 
-impl CharScorerNaive {
-    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Result<Self> {
-        model.merge_weights();
-        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
-            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
-        let mut weights = vec![];
+impl CharScorer {
+    pub fn new(model: NgramModel<String>, window_size: usize, dict: DictModel) -> Result<Self> {
+        // key: ngram, value: (weight, check)
+        let mut weights_map: BTreeMap<String, RefCell<(NaivePositionalWeight, bool)>> =
+            BTreeMap::new();
+
         for d in model.data {
-            if d.weights.len() <= 2 * window_size - d.ngram.chars().count() {
-                return Err(VaporettoError::invalid_model(
-                    "invalid size of weight vector",
-                ));
+            let weight = PositionalWeight::new(-(window_size as i32), d.weights);
+            if let Some(data) = weights_map.get_mut(&d.ngram) {
+                let (prev_weight, _) = &mut *data.borrow_mut();
+                *prev_weight = PositionalWeight::from_two_weights(&weight, prev_weight);
+            } else {
+                weights_map.insert(d.ngram, RefCell::new((weight, false)));
             }
-            weights.push(d.weights);
         }
-        Ok(Self {
-            pma,
-            weights,
-            window_size,
-        })
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
-        for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
-            let m_end = sentence.str_to_char_pos[m.end()];
-            let offset = m_end as isize - self.window_size as isize - 1;
-            // Both the weights and the PMA always have the same number of items.
-            // Therefore, the following code is safe.
-            let weights = unsafe { self.weights.get_unchecked(m.value()) };
-            if offset >= 0 {
-                for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
-                    *y += w;
-                }
+        for d in dict.dict {
+            let word_len = d.word.chars().count();
+            let mut weight = Vec::with_capacity(word_len + 1);
+            weight.push(d.weights.right);
+            weight.resize(word_len, d.weights.inside);
+            weight.push(d.weights.left);
+            let weight = PositionalWeight::new(-(word_len as i32), weight);
+            if let Some(data) = weights_map.get_mut(&d.word) {
+                let (prev_weight, _) = &mut *data.borrow_mut();
+                *prev_weight = PositionalWeight::from_two_weights(&weight, prev_weight);
             } else {
-                for (w, y) in weights[-offset as usize..].iter().zip(ys.iter_mut()) {
-                    *y += w;
-                }
+                weights_map.insert(d.word, RefCell::new((weight, false)));
             }
         }
-    }
-}
-
-#[cfg(feature = "simd")]
-pub struct CharScorerSimd {
-    pma: DoubleArrayAhoCorasick,
-
-    #[cfg(feature = "portable-simd")]
-    weights: Vec<i32x8>,
-    #[cfg(not(feature = "portable-simd"))]
-    weights: Vec<[i32; 8]>,
-
-    window_size: usize,
-}
 
-#[cfg(feature = "simd")]
-impl CharScorerSimd {
-    pub fn new(mut model: NgramModel<String>, window_size: usize) -> Result<Self> {
-        model.merge_weights();
-        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
-            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
-        let mut weights = vec![];
-        for d in model.data {
-            let mut s = [0i32; 8];
-            if let Some(s) = s.get_mut(..d.weights.len()) {
-                s.copy_from_slice(&d.weights);
-            } else {
-                return Err(VaporettoError::invalid_model(
-                    "invalid size of weight vector",
-                ));
+        let mut stack = vec![];
+        for (ngram, data) in &weights_map {
+            if data.borrow().1 {
+                continue;
+            }
+            stack.push(data);
+            for (j, _) in ngram.char_indices().skip(1) {
+                if let Some(data) = weights_map.get(&ngram[j..]) {
+                    stack.push(data);
+                    if data.borrow().1 {
+                        break;
+                    }
+                }
+            }
+            let mut data_from = stack.pop().unwrap();
+            data_from.borrow_mut().1 = true;
+            while let Some(data_to) = stack.pop() {
+                let new_data = (
+                    PositionalWeight::from_two_weights(&data_from.borrow().0, &data_to.borrow().0),
+                    true,
+                );
+                *data_to.borrow_mut() = new_data;
+                data_from = data_to;
             }
-            #[cfg(feature = "portable-simd")]
-            weights.push(i32x8::from_array(s));
-            #[cfg(not(feature = "portable-simd"))]
-            weights.push(s);
         }
-        Ok(Self {
-            pma,
-            weights,
-            window_size,
-        })
+        let mut ngrams = vec![];
+        let mut weights = vec![];
+        for (ngram, data) in weights_map {
+            ngrams.push(ngram);
+            let PositionalWeight { offset, weight } = data.into_inner().0;
+
+            let weight = {
+                #[cfg(not(feature = "simd"))]
+                {
+                    WeightVector::Array(weight)
+                }
+
+                #[cfg(feature = "simd")]
+                if weight.len() <= SIMD_SIZE {
+                    let mut s = [0i32; SIMD_SIZE];
+                    s[..weight.len()].copy_from_slice(weight.as_slice());
+                    #[cfg(not(feature = "portable-simd"))]
+                    {
+                        WeightVector::Simd(s)
+                    }
+                    #[cfg(feature = "portable-simd")]
+                    {
+                        WeightVector::Simd(I32Vec::from_array(s))
+                    }
+                } else {
+                    WeightVector::Array(weight)
+                }
+            };
+            weights.push(PositionalWeight { offset, weight });
+        }
+        let pma = DoubleArrayAhoCorasick::new(ngrams)
+            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
+        Ok(Self { pma, weights })
     }
 
     pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
             let m_end = sentence.str_to_char_pos[m.end()];
-            let offset = padding as isize + m_end as isize - self.window_size as isize - 1;
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
-            let weights = unsafe { self.weights.get_unchecked(m.value()) };
-            let ys_slice = &mut ys[offset as usize..offset as usize + 8];
-
-            #[cfg(feature = "portable-simd")]
-            {
-                let mut target = i32x8::from_slice(ys_slice);
-                target += weights;
-                ys_slice.copy_from_slice(target.as_array());
-            }
-            #[cfg(not(feature = "portable-simd"))]
-            for (y, w) in ys_slice.iter_mut().zip(weights) {
-                *y += w;
+            let pos_weights = unsafe { self.weights.get_unchecked(m.value()) };
+
+            match &pos_weights.weight {
+                WeightVector::Array(weight) => {
+                    let offset = m_end as isize + pos_weights.offset as isize - 1;
+                    if offset >= 0 {
+                        for (w, y) in weight.iter().zip(&mut ys[padding + offset as usize..]) {
+                            *y += w;
+                        }
+                    } else {
+                        for (w, y) in weight[-offset as usize..]
+                            .iter()
+                            .zip(ys[padding..].iter_mut())
+                        {
+                            *y += w;
+                        }
+                    }
+                }
+
+                #[cfg(feature = "simd")]
+                WeightVector::Simd(weight) => {
+                    let offset =
+                        padding as isize + m_end as isize + pos_weights.offset as isize - 1;
+                    let ys_slice = &mut ys[offset as usize..offset as usize + SIMD_SIZE];
+                    #[cfg(feature = "portable-simd")]
+                    {
+                        let mut target = I32Vec::from_slice(ys_slice);
+                        target += weight;
+                        ys_slice.copy_from_slice(target.as_array());
+                    }
+                    #[cfg(not(feature = "portable-simd"))]
+                    for (y, w) in ys_slice.iter_mut().zip(weight) {
+                        *y += w;
+                    }
+                }
             }
         }
     }
-
-    pub const fn simd_len() -> usize {
-        8
-    }
 }
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index 01e73c34..54a90d4b 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -1,11 +1,9 @@
-use std::collections::HashMap;
 use std::io::{Read, Write};
 use std::mem;
 
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
 use crate::errors::Result;
-use crate::ngram_model::NgramModel;
 
 #[derive(Clone, Copy, Default)]
 pub struct DictWeight {
@@ -139,45 +137,6 @@ impl DictModel {
         Self { dict }
     }
 
-    pub fn merge_dict_weights(
-        &mut self,
-        char_ngram_model: &mut NgramModel<String>,
-        char_window_size: usize,
-    ) {
-        let mut word_map = HashMap::new();
-        for (i, word) in char_ngram_model
-            .data
-            .iter()
-            .map(|d| d.ngram.clone())
-            .enumerate()
-        {
-            word_map.insert(word, i);
-        }
-        let mut new_dict = vec![];
-        for data in self.dict.drain(..) {
-            let word_size = data.word.chars().count();
-            match word_map.get(&data.word) {
-                Some(&idx) if char_window_size >= word_size => {
-                    let start = char_window_size - word_size;
-                    let end = start + word_size;
-                    char_ngram_model.data[idx].weights[start] += data.weights.right;
-                    for i in start + 1..end {
-                        char_ngram_model.data[idx].weights[i] += data.weights.inside;
-                    }
-                    char_ngram_model.data[idx].weights[end] += data.weights.left;
-                }
-                _ => {
-                    new_dict.push(data);
-                }
-            }
-        }
-        self.dict = new_dict;
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.dict.is_empty()
-    }
-
     pub fn dictionary(&self) -> &[WordWeightRecord] {
         &self.dict
     }
diff --git a/vaporetto/src/dict_scorer.rs b/vaporetto/src/dict_scorer.rs
deleted file mode 100644
index 6749203c..00000000
--- a/vaporetto/src/dict_scorer.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use daachorse::DoubleArrayAhoCorasick;
-
-use crate::dict_model::{DictModel, DictWeight};
-use crate::errors::{Result, VaporettoError};
-use crate::sentence::Sentence;
-
-pub struct DictScorer {
-    pma: DoubleArrayAhoCorasick,
-    weights: Vec<DictWeight>,
-}
-
-impl DictScorer {
-    pub fn new(model: DictModel) -> Result<Self> {
-        let mut words = vec![];
-        let mut weights = vec![];
-        for pair in model.dict {
-            words.push(pair.word);
-            weights.push(pair.weights);
-        }
-        let pma = DoubleArrayAhoCorasick::new(words)
-            .map_err(|_| VaporettoError::invalid_model("invalid dictionary"))?;
-        Ok(Self { pma, weights })
-    }
-
-    pub fn add_scores(&self, sentence: &Sentence, ys: &mut [i32]) {
-        for m in self.pma.find_overlapping_iter(&sentence.text) {
-            let m_start = sentence.str_to_char_pos[m.start()];
-            let m_end = sentence.str_to_char_pos[m.end()];
-            // Both the weights and the PMA always have the same number of items.
-            // Therefore, the following code is safe.
-            let dict_weight = unsafe { self.weights.get_unchecked(m.value()) };
-            if m_start != 0 {
-                ys[m_start - 1] += dict_weight.right;
-            }
-            for y in &mut ys[m_start..m_end - 1] {
-                *y += dict_weight.inside;
-            }
-            if m_end <= ys.len() {
-                ys[m_end - 1] += dict_weight.left;
-            }
-        }
-    }
-}
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 39410da0..ace72777 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -30,7 +30,6 @@
 
 mod char_scorer;
 mod dict_model;
-mod dict_scorer;
 mod model;
 mod ngram_model;
 mod predictor;
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index 4fa30179..7d5caca1 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -1,4 +1,3 @@
-use std::collections::HashMap;
 use std::io::{Read, Write};
 use std::mem;
 
@@ -76,7 +75,6 @@ where
     T: Clone,
 {
     pub(crate) data: Vec<NgramData<T>>,
-    merged: bool,
 }
 
 impl<T> NgramModel<T>
@@ -85,52 +83,7 @@ where
 {
     #[cfg(any(feature = "train", feature = "kytea", test))]
     pub fn new(data: Vec<NgramData<T>>) -> Self {
-        Self {
-            data,
-            merged: false,
-        }
-    }
-
-    pub fn merge_weights(&mut self) {
-        if self.merged {
-            return;
-        }
-        self.merged = true;
-        let mut check = vec![false; self.data.len()];
-        let ngram_ids: HashMap<_, _> = self
-            .data
-            .iter()
-            .cloned()
-            .enumerate()
-            .map(|(i, d)| (d.ngram.as_ref().to_vec(), i))
-            .collect();
-        let mut stack = vec![];
-        for i in 0..self.data.len() {
-            if check[i] {
-                continue;
-            }
-            stack.push(i);
-            let ngram = self.data[i].ngram.as_ref();
-            for j in 1..ngram.len() {
-                if let Some(&k) = ngram_ids.get(&ngram[j..]) {
-                    stack.push(k);
-                    if check[k] {
-                        break;
-                    }
-                }
-            }
-            let mut idx_from = stack.pop().unwrap();
-            check[idx_from] = true;
-            while let Some(idx_to) = stack.pop() {
-                let mut new_weights = self.data[idx_from].weights.clone();
-                for (w1, w2) in new_weights.iter_mut().zip(&self.data[idx_to].weights) {
-                    *w1 += w2;
-                }
-                self.data[idx_to].weights = new_weights;
-                idx_from = idx_to;
-                check[idx_to] = true;
-            }
-        }
+        Self { data }
     }
 
     pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
@@ -143,7 +96,6 @@ where
         for d in &self.data {
             total_size += d.serialize(&mut buf)?;
         }
-        buf.write_u8(self.merged.into())?;
         Ok(total_size + mem::size_of::<u8>())
     }
 }
@@ -158,11 +110,7 @@ impl NgramModel<String> {
         for _ in 0..data_size {
             data.push(NgramData::<String>::deserialize(&mut buf)?);
         }
-        let merged_u8 = buf.read_u8()?;
-        Ok(Self {
-            data,
-            merged: merged_u8 != 0,
-        })
+        Ok(Self { data })
     }
 }
 
@@ -176,10 +124,6 @@ impl NgramModel<Vec<u8>> {
         for _ in 0..data_size {
             data.push(NgramData::<Vec<u8>>::deserialize(&mut buf)?);
         }
-        let merged_u8 = buf.read_u8()?;
-        Ok(Self {
-            data,
-            merged: merged_u8 != 0,
-        })
+        Ok(Self { data })
     }
 }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 529a30a7..a66a7b2e 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,20 +1,15 @@
 use crate::char_scorer::CharScorer;
-use crate::dict_scorer::DictScorer;
 use crate::errors::Result;
 use crate::model::Model;
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
-#[cfg(feature = "simd")]
-use crate::char_scorer::CharScorerSimd;
-
 /// Predictor.
 pub struct Predictor {
     bias: i32,
 
     char_scorer: CharScorer,
     type_scorer: TypeScorer,
-    dict_scorer: Option<DictScorer>,
 
     #[cfg(feature = "simd")]
     padding: usize,
@@ -33,26 +28,18 @@ impl Predictor {
     pub fn new(model: Model) -> Result<Self> {
         let bias = model.bias;
 
-        let mut char_ngram_model = model.char_ngram_model;
+        let char_ngram_model = model.char_ngram_model;
         let type_ngram_model = model.type_ngram_model;
-        let mut dict_model = model.dict_model;
-
-        dict_model.merge_dict_weights(&mut char_ngram_model, model.char_window_size);
+        let dict_model = model.dict_model;
 
-        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size)?;
+        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size, dict_model)?;
         let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size)?;
-        let dict_scorer = if dict_model.is_empty() {
-            None
-        } else {
-            Some(DictScorer::new(dict_model)?)
-        };
 
         Ok(Self {
             bias,
 
             char_scorer,
             type_scorer,
-            dict_scorer,
 
             #[cfg(feature = "simd")]
             padding: model.char_window_size.max(model.type_window_size),
@@ -63,9 +50,6 @@ impl Predictor {
         ys.fill(self.bias);
         self.char_scorer.add_scores(sentence, padding, ys);
         self.type_scorer.add_scores(sentence, &mut ys[padding..]);
-        if let Some(dict_scorer) = self.dict_scorer.as_ref() {
-            dict_scorer.add_scores(sentence, &mut ys[padding..]);
-        }
     }
 
     /// Predicts word boundaries.
@@ -95,7 +79,7 @@ impl Predictor {
 
         #[cfg(feature = "simd")]
         if boundaries_size != 0 {
-            let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
+            let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
             let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
             for (&y, b) in ys[self.padding..]
@@ -141,7 +125,7 @@ impl Predictor {
 
         #[cfg(feature = "simd")]
         if boundaries_size != 0 {
-            let ys_size = boundaries_size + self.padding + CharScorerSimd::simd_len() - 1;
+            let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
             let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
             let mut scores = sentence
@@ -490,6 +474,138 @@ mod tests {
         }
     }
 
+    /// Input:  我  ら  は  全  世  界  の  国  民
+    /// bias:   -200  ..  ..  ..  ..  ..  ..  ..
+    /// chars:
+    ///   我ら:    3   4   5
+    ///   全世界:          6   7   8   9
+    ///   国民:                       10  11  12
+    ///   世界:           15  16  17  18  19
+    ///   界:             20  21  22  23  24  25
+    /// types:
+    ///   H:      27  28  29
+    ///           26  27  28  29
+    ///                           26  27  28  29
+    ///   K:      32  33
+    ///               30  31  32  33
+    ///                   30  31  32  33
+    ///                       30  31  32  33
+    ///                               30  31  32
+    ///                                   30  31
+    ///   KH:     35  36
+    ///                           34  35  36
+    ///   HK:         37  38  39
+    ///                               37  38  39
+    /// dict:
+    ///   全世界:         43  44  44  45
+    ///   世界:               43  44  45
+    ///   世:                 40  42
+    ///   世界の国民:         43  44  44  44  44
+    ///   は全世界:   43  44  44  44  45
+    ///
+    ///
+    ///   は全世界:   43  44  44  44  45
+    ///                   15  16  17  18  19
+    ///                   20  21  22  23  24  25
+    ///                    6   7   8   9
+    fn generate_model_4() -> Model {
+        Model {
+            char_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: "我ら".to_string(),
+                    weights: vec![1, 2, 3, 4, 5],
+                },
+                NgramData {
+                    ngram: "全世界".to_string(),
+                    weights: vec![6, 7, 8, 9],
+                },
+                NgramData {
+                    ngram: "国民".to_string(),
+                    weights: vec![10, 11, 12, 13, 14],
+                },
+                NgramData {
+                    ngram: "世界".to_string(),
+                    weights: vec![15, 16, 17, 18, 19],
+                },
+                NgramData {
+                    ngram: "界".to_string(),
+                    weights: vec![20, 21, 22, 23, 24, 25],
+                },
+            ]),
+            type_ngram_model: NgramModel::new(vec![
+                NgramData {
+                    ngram: b"H".to_vec(),
+                    weights: vec![26, 27, 28, 29],
+                },
+                NgramData {
+                    ngram: b"K".to_vec(),
+                    weights: vec![30, 31, 32, 33],
+                },
+                NgramData {
+                    ngram: b"KH".to_vec(),
+                    weights: vec![34, 35, 36],
+                },
+                NgramData {
+                    ngram: b"HK".to_vec(),
+                    weights: vec![37, 38, 39],
+                },
+            ]),
+            dict_model: DictModel {
+                dict: vec![
+                    WordWeightRecord {
+                        word: "全世界".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
+                    },
+                    WordWeightRecord {
+                        word: "世界".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
+                    },
+                    WordWeightRecord {
+                        word: "世".to_string(),
+                        weights: DictWeight {
+                            right: 40,
+                            inside: 41,
+                            left: 42,
+                        },
+                        comment: "".to_string(),
+                    },
+                    WordWeightRecord {
+                        word: "世界の国民".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
+                    },
+                    WordWeightRecord {
+                        word: "は全世界".to_string(),
+                        weights: DictWeight {
+                            right: 43,
+                            inside: 44,
+                            left: 45,
+                        },
+                        comment: "".to_string(),
+                    },
+                ],
+            },
+
+            bias: -200,
+            char_window_size: 3,
+            type_window_size: 2,
+        }
+    }
+
     #[test]
     fn test_predict_1() {
         let model = generate_model_1();
@@ -553,12 +669,37 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_predict_4() {
+        let model = generate_model_4();
+        let p = Predictor::new(model).unwrap();
+        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
+        let s = p.predict(s);
+        assert_eq!(
+            &[
+                BoundaryType::NotWordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+            ],
+            s.boundaries(),
+        );
+    }
+
     #[test]
     fn test_predict_with_score_1() {
         let model = generate_model_1();
         let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
+        assert_eq!(
+            &[-77, -5, 45, 132, 133, 144, 50, -32],
+            s.boundary_scores().unwrap(),
+        );
         assert_eq!(
             &[
                 BoundaryType::NotWordBoundary,
@@ -572,10 +713,6 @@ mod tests {
             ],
             s.boundaries(),
         );
-        assert_eq!(
-            &[-77, -5, 45, 132, 133, 144, 50, -32],
-            s.boundary_scores().unwrap(),
-        );
     }
 
     #[test]
@@ -584,6 +721,10 @@ mod tests {
         let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
+        assert_eq!(
+            &[-138, -109, -39, 57, 104, 34, -79, -114],
+            s.boundary_scores().unwrap(),
+        );
         assert_eq!(
             &[
                 BoundaryType::NotWordBoundary,
@@ -597,10 +738,6 @@ mod tests {
             ],
             s.boundaries(),
         );
-        assert_eq!(
-            &[-138, -109, -39, 57, 104, 34, -79, -114],
-            s.boundary_scores().unwrap(),
-        );
     }
 
     #[test]
@@ -609,6 +746,10 @@ mod tests {
         let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
+        assert_eq!(
+            &[-138, -109, -83, 18, 65, -12, -41, -75],
+            s.boundary_scores().unwrap(),
+        );
         assert_eq!(
             &[
                 BoundaryType::NotWordBoundary,
@@ -622,9 +763,30 @@ mod tests {
             ],
             s.boundaries(),
         );
+    }
+
+    #[test]
+    fn test_predict_with_score_4() {
+        let model = generate_model_4();
+        let p = Predictor::new(model).unwrap();
+        let s = Sentence::from_raw("我らは全世界の国民").unwrap();
+        let s = p.predict_with_score(s);
         assert_eq!(
-            &[-138, -109, -83, 18, 65, -12, -41, -75],
+            &[-77, 38, 89, 219, 221, 233, 94, 12],
             s.boundary_scores().unwrap(),
         );
+        assert_eq!(
+            &[
+                BoundaryType::NotWordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+                BoundaryType::WordBoundary,
+            ],
+            s.boundaries(),
+        );
     }
 }
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index f315b8c3..c226c2ef 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -1,3 +1,6 @@
+use std::cell::RefCell;
+use std::collections::BTreeMap;
+
 use daachorse::DoubleArrayAhoCorasick;
 
 use crate::errors::{Result, VaporettoError};
@@ -33,19 +36,48 @@ pub struct TypeScorerPma {
 }
 
 impl TypeScorerPma {
-    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
-        model.merge_weights();
-        let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
-            .map_err(|_| VaporettoError::invalid_model("invalid character type n-grams"))?;
-        let mut weights = vec![];
+    pub fn new(model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
+        // key: ngram, value: (weight, check)
+        let mut weights_map: BTreeMap<Vec<u8>, RefCell<(Vec<i32>, bool)>> = BTreeMap::new();
+
         for d in model.data {
-            if d.weights.len() <= 2 * window_size - d.ngram.len() {
-                return Err(VaporettoError::invalid_model(
-                    "invalid size of weight vector",
-                ));
+            weights_map.insert(d.ngram, RefCell::new((d.weights, false)));
+        }
+
+        let mut stack = vec![];
+        for (ngram, data) in &weights_map {
+            if data.borrow().1 {
+                continue;
             }
-            weights.push(d.weights);
+            stack.push(data);
+            for j in 1..ngram.len() {
+                if let Some(data) = weights_map.get(&ngram[j..]) {
+                    stack.push(data);
+                    if data.borrow().1 {
+                        break;
+                    }
+                }
+            }
+            let mut data_from = stack.pop().unwrap();
+            data_from.borrow_mut().1 = true;
+            while let Some(data_to) = stack.pop() {
+                let mut new_weight = data_from.borrow().0.clone();
+                for (w1, w2) in new_weight.iter_mut().zip(&data_to.borrow().0) {
+                    *w1 += w2;
+                }
+                let new_data = (new_weight, true);
+                *data_to.borrow_mut() = new_data;
+                data_from = data_to;
+            }
+        }
+        let mut ngrams = vec![];
+        let mut weights = vec![];
+        for (ngram, data) in weights_map {
+            ngrams.push(ngram);
+            weights.push(data.into_inner().0);
         }
+        let pma = DoubleArrayAhoCorasick::new(ngrams)
+            .map_err(|_| VaporettoError::invalid_model("invalid character type n-grams"))?;
         Ok(Self {
             pma,
             weights,
@@ -82,8 +114,7 @@ pub struct TypeScorerCache {
 }
 
 impl TypeScorerCache {
-    pub fn new(mut model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
-        model.merge_weights();
+    pub fn new(model: NgramModel<Vec<u8>>, window_size: usize) -> Result<Self> {
         let pma = DoubleArrayAhoCorasick::new(model.data.iter().map(|d| &d.ngram))
             .map_err(|_| VaporettoError::invalid_model("invalid character type n-grams"))?;
         let mut weights = vec![];
@@ -107,7 +138,7 @@ impl TypeScorerCache {
                 continue;
             }
             let mut y = 0;
-            for m in pma.find_overlapping_no_suffix_iter(&sequence) {
+            for m in pma.find_overlapping_iter(&sequence) {
                 y += weights[m.value()][sequence_size - m.end()];
             }
             *score = y;

From 59b932317dcf2d3e609ec76d554e5210850cb933 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Sat, 22 Jan 2022 00:03:14 +0900
Subject: [PATCH 41/60] Refactor offset calculation of CharScorer (#34)

* Refactor offset calculation of CharScorer

* fmt
---
 vaporetto/src/char_scorer.rs | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index c7cc3c59..92f9768a 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -158,18 +158,15 @@ impl CharScorer {
             // Therefore, the following code is safe.
             let pos_weights = unsafe { self.weights.get_unchecked(m.value()) };
 
+            let offset = padding as isize + m_end as isize + pos_weights.offset as isize - 1;
             match &pos_weights.weight {
                 WeightVector::Array(weight) => {
-                    let offset = m_end as isize + pos_weights.offset as isize - 1;
                     if offset >= 0 {
-                        for (w, y) in weight.iter().zip(&mut ys[padding + offset as usize..]) {
+                        for (w, y) in weight.iter().zip(&mut ys[offset as usize..]) {
                             *y += w;
                         }
                     } else {
-                        for (w, y) in weight[-offset as usize..]
-                            .iter()
-                            .zip(ys[padding..].iter_mut())
-                        {
+                        for (w, y) in weight[-offset as usize..].iter().zip(ys.iter_mut()) {
                             *y += w;
                         }
                     }
@@ -177,8 +174,6 @@ impl CharScorer {
 
                 #[cfg(feature = "simd")]
                 WeightVector::Simd(weight) => {
-                    let offset =
-                        padding as isize + m_end as isize + pos_weights.offset as isize - 1;
                     let ys_slice = &mut ys[offset as usize..offset as usize + SIMD_SIZE];
                     #[cfg(feature = "portable-simd")]
                     {

From 1ac638f1d18e8cb3abcda81a9b4488969a9278b8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 25 Jan 2022 15:54:01 +0900
Subject: [PATCH 42/60] Reduce memory allocation (#35)

* Reduce memory allocation

* fmt
---
 predict/src/main.rs        |  4 +--
 vaporetto/src/predictor.rs | 50 +++++++++++++++---------------
 vaporetto/src/sentence.rs  | 62 +++++++++++++++++++-------------------
 3 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/predict/src/main.rs b/predict/src/main.rs
index 6edc29a7..6b2c604d 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -57,8 +57,8 @@ struct Opt {
 }
 
 fn print_scores(s: &Sentence) {
-    if let Some(scores) = s.boundary_scores().as_ref() {
-        for (i, score) in scores.iter().enumerate() {
+    if !s.boundary_scores().is_empty() {
+        for (i, score) in s.boundary_scores().iter().enumerate() {
             println!("{}:{}{} {}", i, s.chars()[i], s.chars()[i + 1], score);
         }
         println!();
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index a66a7b2e..855e8514 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,3 +1,5 @@
+use std::mem;
+
 use crate::char_scorer::CharScorer;
 use crate::errors::Result;
 use crate::model::Model;
@@ -66,21 +68,25 @@ impl Predictor {
 
         #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
-            let mut ys = vec![0; boundaries_size];
+            let mut ys = mem::take(&mut sentence.boundary_scores);
+            ys.resize(boundaries_size, 0);
             self.predict_impl(&sentence, 0, &mut ys);
-            for (y, b) in ys.into_iter().zip(sentence.boundaries.iter_mut()) {
+            for (&y, b) in ys.iter().zip(sentence.boundaries.iter_mut()) {
                 *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
                     BoundaryType::NotWordBoundary
                 };
             }
+            sentence.boundary_scores = ys;
+            sentence.boundary_scores.clear();
         }
 
         #[cfg(feature = "simd")]
         if boundaries_size != 0 {
             let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
-            let mut ys = vec![0; ys_size];
+            let mut ys = mem::take(&mut sentence.boundary_scores);
+            ys.resize(ys_size, 0);
             self.predict_impl(&sentence, self.padding, &mut ys);
             for (&y, b) in ys[self.padding..]
                 .iter()
@@ -92,6 +98,8 @@ impl Predictor {
                     BoundaryType::NotWordBoundary
                 };
             }
+            sentence.boundary_scores = ys;
+            sentence.boundary_scores.clear();
         }
 
         sentence
@@ -111,7 +119,8 @@ impl Predictor {
 
         #[cfg(not(feature = "simd"))]
         if boundaries_size != 0 {
-            let mut ys = vec![0; boundaries_size];
+            let mut ys = mem::take(&mut sentence.boundary_scores);
+            ys.resize(boundaries_size, 0);
             self.predict_impl(&sentence, 0, &mut ys);
             for (&y, b) in ys.iter().zip(sentence.boundaries.iter_mut()) {
                 *b = if y >= 0 {
@@ -120,7 +129,7 @@ impl Predictor {
                     BoundaryType::NotWordBoundary
                 };
             }
-            sentence.boundary_scores.replace(ys);
+            sentence.boundary_scores = ys;
         }
 
         #[cfg(feature = "simd")]
@@ -128,15 +137,13 @@ impl Predictor {
             let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
             let mut ys = vec![0; ys_size];
             self.predict_impl(&sentence, self.padding, &mut ys);
-            let mut scores = sentence
-                .boundary_scores
-                .take()
-                .unwrap_or_else(|| vec![0; boundaries_size]);
-            scores.resize(boundaries_size, 0);
-            for (&y, (b, s)) in ys[self.padding..]
-                .iter()
-                .zip(sentence.boundaries.iter_mut().zip(scores.iter_mut()))
-            {
+            sentence.boundary_scores.resize(boundaries_size, 0);
+            for (&y, (b, s)) in ys[self.padding..].iter().zip(
+                sentence
+                    .boundaries
+                    .iter_mut()
+                    .zip(sentence.boundary_scores.iter_mut()),
+            ) {
                 *b = if y >= 0 {
                     BoundaryType::WordBoundary
                 } else {
@@ -145,7 +152,6 @@ impl Predictor {
 
                 *s = y;
             }
-            sentence.boundary_scores.replace(scores);
         }
 
         sentence
@@ -696,10 +702,7 @@ mod tests {
         let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
-        assert_eq!(
-            &[-77, -5, 45, 132, 133, 144, 50, -32],
-            s.boundary_scores().unwrap(),
-        );
+        assert_eq!(&[-77, -5, 45, 132, 133, 144, 50, -32], s.boundary_scores(),);
         assert_eq!(
             &[
                 BoundaryType::NotWordBoundary,
@@ -723,7 +726,7 @@ mod tests {
         let s = p.predict_with_score(s);
         assert_eq!(
             &[-138, -109, -39, 57, 104, 34, -79, -114],
-            s.boundary_scores().unwrap(),
+            s.boundary_scores(),
         );
         assert_eq!(
             &[
@@ -748,7 +751,7 @@ mod tests {
         let s = p.predict_with_score(s);
         assert_eq!(
             &[-138, -109, -83, 18, 65, -12, -41, -75],
-            s.boundary_scores().unwrap(),
+            s.boundary_scores(),
         );
         assert_eq!(
             &[
@@ -771,10 +774,7 @@ mod tests {
         let p = Predictor::new(model).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
-        assert_eq!(
-            &[-77, 38, 89, 219, 221, 233, 94, 12],
-            s.boundary_scores().unwrap(),
-        );
+        assert_eq!(&[-77, 38, 89, 219, 221, 233, 94, 12], s.boundary_scores(),);
         assert_eq!(
             &[
                 BoundaryType::NotWordBoundary,
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 1578ebfd..8840edc2 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -97,7 +97,7 @@ pub struct Sentence {
     pub(crate) char_to_str_pos: Vec<usize>,
     pub(crate) char_type: Vec<u8>,
     pub(crate) boundaries: Vec<BoundaryType>,
-    pub(crate) boundary_scores: Option<Vec<i32>>,
+    pub(crate) boundary_scores: Vec<i32>,
     pub(crate) tags: Vec<Option<Rc<String>>>,
 }
 
@@ -111,11 +111,11 @@ impl Sentence {
         let mut s = Self {
             text,
             chars,
-            str_to_char_pos: Vec::with_capacity(0),
-            char_to_str_pos: Vec::with_capacity(0),
-            char_type: Vec::with_capacity(0),
+            str_to_char_pos: vec![],
+            char_to_str_pos: vec![],
+            char_type: vec![],
             boundaries,
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags,
         };
         s.update_common_info();
@@ -136,7 +136,7 @@ impl Sentence {
         self.char_type.clear();
         self.char_type.push(CharacterType::Other as u8);
         self.boundaries.clear();
-        self.boundary_scores.take();
+        self.boundary_scores.clear();
         self.tags.clear();
     }
 
@@ -979,8 +979,8 @@ impl Sentence {
     /// # Returns
     ///
     /// If the predictor inserted, the boundary score information is returned. Otherwise, None.
-    pub fn boundary_scores(&self) -> Option<&[i32]> {
-        self.boundary_scores.as_deref()
+    pub fn boundary_scores(&self) -> &[i32] {
+        &self.boundary_scores
     }
 
     /// Gets a character position in the code point unit.
@@ -1043,7 +1043,7 @@ mod tests {
             char_to_str_pos: vec![0, 1],
             char_type: b"O".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1060,7 +1060,7 @@ mod tests {
             char_to_str_pos: vec![0, 3],
             char_type: b"H".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s.unwrap());
@@ -1078,7 +1078,7 @@ mod tests {
             char_to_str_pos: vec![0, 3],
             char_type: b"H".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1103,7 +1103,7 @@ mod tests {
             ],
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s.unwrap());
@@ -1129,7 +1129,7 @@ mod tests {
             ],
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1172,7 +1172,7 @@ mod tests {
             char_to_str_pos: vec![0, 1],
             char_type: b"O".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1205,7 +1205,7 @@ mod tests {
             char_to_str_pos: vec![0, 1],
             char_type: b"O".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1238,7 +1238,7 @@ mod tests {
             char_to_str_pos: vec![0, 1],
             char_type: b"O".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1271,7 +1271,7 @@ mod tests {
             char_to_str_pos: vec![0, 1],
             char_type: b"O".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![],
         };
         assert_eq!(expected, s);
@@ -1288,7 +1288,7 @@ mod tests {
             char_to_str_pos: vec![0, 3],
             char_type: b"H".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None],
         };
         assert_eq!(expected, s.unwrap());
@@ -1306,7 +1306,7 @@ mod tests {
             char_to_str_pos: vec![0, 3],
             char_type: b"H".to_vec(),
             boundaries: vec![],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1349,7 +1349,7 @@ mod tests {
                 WordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 18],
         };
         assert_eq!(expected, s.unwrap());
@@ -1393,7 +1393,7 @@ mod tests {
                 WordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![
                 None,
                 None,
@@ -1457,7 +1457,7 @@ mod tests {
                 WordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 18],
         };
         assert_eq!(expected, s);
@@ -1502,7 +1502,7 @@ mod tests {
                 WordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![
                 None,
                 None,
@@ -1562,7 +1562,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 16],
         };
         assert_eq!(expected, s);
@@ -1605,7 +1605,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 16],
         };
         assert_eq!(expected, s);
@@ -1633,7 +1633,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 9],
         };
         assert_eq!(expected, s.unwrap());
@@ -1662,7 +1662,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 9],
         };
         assert_eq!(expected, s);
@@ -1689,7 +1689,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 8],
         };
         assert_eq!(expected, s.unwrap());
@@ -1717,7 +1717,7 @@ mod tests {
                 NotWordBoundary,
                 WordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 8],
         };
         assert_eq!(expected, s);
@@ -1936,7 +1936,7 @@ mod tests {
                 WordBoundary,
                 NotWordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 6],
         };
         assert_eq!(expected, s.unwrap());
@@ -1960,7 +1960,7 @@ mod tests {
                 WordBoundary,
                 NotWordBoundary,
             ],
-            boundary_scores: None,
+            boundary_scores: vec![],
             tags: vec![None; 6],
         };
         assert_eq!(expected, s);

From c9edb465c8f97377929a8a7f1f3d5a98fbafc825 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 26 Jan 2022 12:35:08 +0900
Subject: [PATCH 43/60] Fix wordwise evaluator (#38)

---
 evaluate/src/main.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index 81f7d690..d55dca86 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -157,8 +157,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             let mut n_sys = 0;
             let mut n_ref = 0;
             let mut n_cor = 0;
-            let mut matched = true;
             for (rs, hs) in results {
+                let mut matched = true;
                 for (r, h) in rs.into_iter().zip(hs) {
                     if r == h {
                         if h == BoundaryType::WordBoundary {
@@ -178,12 +178,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                         matched = false;
                     }
                 }
+                if matched {
+                    n_cor += 1;
+                }
+                n_sys += 1;
+                n_ref += 1;
             }
-            if matched {
-                n_cor += 1;
-            }
-            n_sys += 1;
-            n_ref += 1;
             let precision = n_cor as f64 / n_sys as f64;
             let recall = n_cor as f64 / n_ref as f64;
             let f1 = 2. * precision * recall / (precision + recall);

From 06a14a21977271f9e873426ac8f4b720e98b97f6 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 26 Jan 2022 13:11:50 +0900
Subject: [PATCH 44/60] Add TagExampleGenerator (#36)

* Add TagExampleGenerator

* Remove character type features from TagExample

* Add tests

* Sort results in tests

* Fix order
---
 vaporetto/src/feature.rs | 326 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 325 insertions(+), 1 deletion(-)

diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 55ad9d09..61e3ad89 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -1,4 +1,5 @@
 use std::hash::Hash;
+use std::rc::Rc;
 
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -6,7 +7,7 @@ use crate::errors::{Result, VaporettoError};
 use crate::sentence::BoundaryType;
 use crate::sentence::Sentence;
 
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub struct StringNgramFeature<'a> {
     pub(crate) rel_position: isize,
     pub(crate) ngram: &'a str,
@@ -157,6 +158,118 @@ impl BoundaryExampleGenerator {
     }
 }
 
+#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub enum TagFeature<'a> {
+    LeftCharacterNgram(StringNgramFeature<'a>),
+    RightCharacterNgram(StringNgramFeature<'a>),
+    Character(&'a str),
+}
+
+impl<'a> TagFeature<'a> {
+    pub const fn left_char_ngram(rel_position: isize, ngram: &'a str) -> Self {
+        Self::LeftCharacterNgram(StringNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
+    pub const fn right_char_ngram(rel_position: isize, ngram: &'a str) -> Self {
+        Self::RightCharacterNgram(StringNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
+    pub const fn chars(chars: &'a str) -> Self {
+        Self::Character(chars)
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct TagExample<'a> {
+    pub features: Vec<TagFeature<'a>>,
+    pub tag: Rc<String>,
+}
+
+pub struct TagExampleGenerator {
+    char_ngram_size: usize,
+    char_window_size: usize,
+}
+
+#[allow(dead_code)]
+impl TagExampleGenerator {
+    pub const fn new(char_ngram_size: usize, char_window_size: usize) -> Self {
+        Self {
+            char_ngram_size,
+            char_window_size,
+        }
+    }
+
+    pub fn generate<'a>(&self, sentence: &'a Sentence) -> Result<Vec<TagExample<'a>>> {
+        let mut result = vec![];
+        let mut features = vec![];
+        let mut current_tag: Option<Rc<String>> =
+            sentence.tags.last().and_then(|x| x.as_ref()).map(Rc::clone);
+        let mut tag_right_pos = sentence.chars.len();
+        for (i, (t, b)) in sentence
+            .tags
+            .iter()
+            .zip(sentence.boundaries())
+            .enumerate()
+            .rev()
+        {
+            match b {
+                BoundaryType::WordBoundary => {
+                    if let Some(tag) = current_tag.take() {
+                        for j in (i + 1).saturating_sub(self.char_window_size)..i + 1 {
+                            let rel_position = j as isize - i as isize - 1;
+                            for end in j + 1..sentence.chars.len().min(j + self.char_ngram_size) + 1
+                            {
+                                features.push(TagFeature::left_char_ngram(
+                                    rel_position,
+                                    sentence.char_substring(j, end),
+                                ));
+                            }
+                        }
+                        features.push(TagFeature::chars(
+                            sentence.char_substring(i + 1, tag_right_pos),
+                        ));
+                        result.push(TagExample { features, tag });
+                        features = vec![];
+                    }
+                    if let Some(tag) = t.as_ref() {
+                        current_tag.replace(Rc::clone(tag));
+                        tag_right_pos = i + 1;
+                        for j in (i + 2)..(i + 2 + self.char_window_size).min(sentence.chars.len())
+                        {
+                            for start in j.saturating_sub(self.char_ngram_size)..j {
+                                let rel_position = j as isize - i as isize - 1;
+                                features.push(TagFeature::right_char_ngram(
+                                    rel_position,
+                                    sentence.char_substring(start, j),
+                                ));
+                            }
+                        }
+                    }
+                }
+                BoundaryType::NotWordBoundary => (),
+                BoundaryType::Unknown => {
+                    if current_tag.is_some() {
+                        return Err(VaporettoError::invalid_argument("sentence", ""));
+                    }
+                }
+            }
+        }
+        if let Some(tag) = current_tag.take() {
+            features.push(TagFeature::Character(
+                sentence.char_substring(0, tag_right_pos),
+            ));
+            result.push(TagExample { features, tag });
+        }
+        Ok(result)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -276,4 +389,215 @@ mod tests {
 
         assert_eq!(7, examples.len());
     }
+
+    #[test]
+    fn test_tag_example_generate_33() {
+        let gen = TagExampleGenerator::new(3, 3);
+
+        let s =
+            Sentence::from_partial_annotation("A-r-i-a/名詞|は/助詞|火-星 猫|だ/助動詞").unwrap();
+        let mut examples = gen.generate(&s).unwrap();
+
+        // The order of examples is unimportant.
+        examples
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        examples.sort_unstable();
+
+        let mut expected = vec![
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "iaは"),
+                    TagFeature::right_char_ngram(1, "aは"),
+                    TagFeature::right_char_ngram(1, "は"),
+                    TagFeature::right_char_ngram(2, "aは火"),
+                    TagFeature::right_char_ngram(2, "は火"),
+                    TagFeature::right_char_ngram(2, "火"),
+                    TagFeature::right_char_ngram(3, "は火星"),
+                    TagFeature::right_char_ngram(3, "火星"),
+                    TagFeature::right_char_ngram(3, "星"),
+                    TagFeature::chars("Aria"),
+                ],
+                tag: Rc::new("名詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "aは火"),
+                    TagFeature::right_char_ngram(1, "は火"),
+                    TagFeature::right_char_ngram(1, "火"),
+                    TagFeature::right_char_ngram(2, "は火星"),
+                    TagFeature::right_char_ngram(2, "火星"),
+                    TagFeature::right_char_ngram(2, "星"),
+                    TagFeature::right_char_ngram(3, "火星猫"),
+                    TagFeature::right_char_ngram(3, "星猫"),
+                    TagFeature::right_char_ngram(3, "猫"),
+                    TagFeature::left_char_ngram(-3, "r"),
+                    TagFeature::left_char_ngram(-3, "ri"),
+                    TagFeature::left_char_ngram(-3, "ria"),
+                    TagFeature::left_char_ngram(-2, "i"),
+                    TagFeature::left_char_ngram(-2, "ia"),
+                    TagFeature::left_char_ngram(-2, "iaは"),
+                    TagFeature::left_char_ngram(-1, "a"),
+                    TagFeature::left_char_ngram(-1, "aは"),
+                    TagFeature::left_char_ngram(-1, "aは火"),
+                    TagFeature::chars("は"),
+                ],
+                tag: Rc::new("助詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::left_char_ngram(-3, "火"),
+                    TagFeature::left_char_ngram(-3, "火星"),
+                    TagFeature::left_char_ngram(-3, "火星猫"),
+                    TagFeature::left_char_ngram(-2, "星"),
+                    TagFeature::left_char_ngram(-2, "星猫"),
+                    TagFeature::left_char_ngram(-2, "星猫だ"),
+                    TagFeature::left_char_ngram(-1, "猫"),
+                    TagFeature::left_char_ngram(-1, "猫だ"),
+                    TagFeature::chars("だ"),
+                ],
+                tag: Rc::new("助動詞".to_string()),
+            },
+        ];
+
+        expected
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        expected.sort_unstable();
+
+        assert_eq!(expected, examples);
+    }
+
+    #[test]
+    fn test_tag_example_generate_32() {
+        let gen = TagExampleGenerator::new(3, 2);
+
+        let s =
+            Sentence::from_partial_annotation("A-r-i-a/名詞|は/助詞|火-星 猫|だ/助動詞").unwrap();
+        let mut examples = gen.generate(&s).unwrap();
+
+        // The order of examples is unimportant.
+        examples
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        examples.sort_unstable();
+
+        let mut expected = vec![
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "iaは"),
+                    TagFeature::right_char_ngram(1, "aは"),
+                    TagFeature::right_char_ngram(1, "は"),
+                    TagFeature::right_char_ngram(2, "aは火"),
+                    TagFeature::right_char_ngram(2, "は火"),
+                    TagFeature::right_char_ngram(2, "火"),
+                    TagFeature::chars("Aria"),
+                ],
+                tag: Rc::new("名詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "aは火"),
+                    TagFeature::right_char_ngram(1, "は火"),
+                    TagFeature::right_char_ngram(1, "火"),
+                    TagFeature::right_char_ngram(2, "は火星"),
+                    TagFeature::right_char_ngram(2, "火星"),
+                    TagFeature::right_char_ngram(2, "星"),
+                    TagFeature::left_char_ngram(-2, "i"),
+                    TagFeature::left_char_ngram(-2, "ia"),
+                    TagFeature::left_char_ngram(-2, "iaは"),
+                    TagFeature::left_char_ngram(-1, "a"),
+                    TagFeature::left_char_ngram(-1, "aは"),
+                    TagFeature::left_char_ngram(-1, "aは火"),
+                    TagFeature::chars("は"),
+                ],
+                tag: Rc::new("助詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::left_char_ngram(-2, "星"),
+                    TagFeature::left_char_ngram(-2, "星猫"),
+                    TagFeature::left_char_ngram(-2, "星猫だ"),
+                    TagFeature::left_char_ngram(-1, "猫"),
+                    TagFeature::left_char_ngram(-1, "猫だ"),
+                    TagFeature::chars("だ"),
+                ],
+                tag: Rc::new("助動詞".to_string()),
+            },
+        ];
+
+        expected
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        expected.sort_unstable();
+
+        assert_eq!(expected, examples);
+    }
+
+    #[test]
+    fn test_tag_example_generate_23() {
+        let gen = TagExampleGenerator::new(2, 3);
+
+        let s =
+            Sentence::from_partial_annotation("A-r-i-a/名詞|は/助詞|火-星 猫|だ/助動詞").unwrap();
+        let mut examples = gen.generate(&s).unwrap();
+
+        // The order of examples is unimportant.
+        examples
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        examples.sort_unstable();
+
+        let mut expected = vec![
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "aは"),
+                    TagFeature::right_char_ngram(1, "は"),
+                    TagFeature::right_char_ngram(2, "は火"),
+                    TagFeature::right_char_ngram(2, "火"),
+                    TagFeature::right_char_ngram(3, "火星"),
+                    TagFeature::right_char_ngram(3, "星"),
+                    TagFeature::chars("Aria"),
+                ],
+                tag: Rc::new("名詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "は火"),
+                    TagFeature::right_char_ngram(1, "火"),
+                    TagFeature::right_char_ngram(2, "火星"),
+                    TagFeature::right_char_ngram(2, "星"),
+                    TagFeature::right_char_ngram(3, "星猫"),
+                    TagFeature::right_char_ngram(3, "猫"),
+                    TagFeature::left_char_ngram(-3, "r"),
+                    TagFeature::left_char_ngram(-3, "ri"),
+                    TagFeature::left_char_ngram(-2, "i"),
+                    TagFeature::left_char_ngram(-2, "ia"),
+                    TagFeature::left_char_ngram(-1, "a"),
+                    TagFeature::left_char_ngram(-1, "aは"),
+                    TagFeature::chars("は"),
+                ],
+                tag: Rc::new("助詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::left_char_ngram(-3, "火"),
+                    TagFeature::left_char_ngram(-3, "火星"),
+                    TagFeature::left_char_ngram(-2, "星"),
+                    TagFeature::left_char_ngram(-2, "星猫"),
+                    TagFeature::left_char_ngram(-1, "猫"),
+                    TagFeature::left_char_ngram(-1, "猫だ"),
+                    TagFeature::chars("だ"),
+                ],
+                tag: Rc::new("助動詞".to_string()),
+            },
+        ];
+
+        expected
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        expected.sort_unstable();
+
+        assert_eq!(expected, examples);
+    }
 }

From 2c4350a32a5d673d8b556869cbb9a7bb5b7dfbd8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 26 Jan 2022 17:17:56 +0900
Subject: [PATCH 45/60] Add getter and setter functions for POS tags (#37)

* Add getter and setter functions for POS tags

* Update CLIs

* clippy

* Fix bugs

* Fix tests

* Update docs

* Update vaporetto/src/sentence.rs

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 evaluate/src/main.rs      | 29 +++++++++---------
 predict/src/main.rs       |  1 +
 train/src/main.rs         |  4 ++-
 vaporetto/src/sentence.rs | 63 +++++++++++++++++++++++++++++++++------
 4 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index d55dca86..07eb8919 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -106,18 +106,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             continue;
         }
         let s = Sentence::from_tokenized(line)?;
+        let ref_boundaries = s.boundaries().to_vec();
+        let ref_tags = s.tags().to_vec();
         let s = if opt.no_norm {
             s
         } else {
             let new_line = fullwidth_filter.filter(s.to_raw_string());
-            let mut new_s = Sentence::from_raw(new_line)?;
-            new_s.boundaries_mut().clone_from_slice(s.boundaries());
-            new_s
+            Sentence::from_raw(new_line)?
         };
-        let reference = s.boundaries().to_vec();
         let s = predictor.predict(s);
         let s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
-        results.push((reference, s.boundaries().to_vec()));
+        let hyp_boundaries = s.boundaries().to_vec();
+        let hyp_tags = s.tags().to_vec();
+        results.push((ref_boundaries, ref_tags, hyp_boundaries, hyp_tags));
     }
 
     match opt.metric {
@@ -126,8 +127,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             let mut n_tn = 0;
             let mut n_fp = 0;
             let mut n_fn = 0;
-            for (rs, hs) in results {
-                for (r, h) in rs.into_iter().zip(hs) {
+            for (rs_b, _, hs_b, _) in results {
+                for (r, h) in rs_b.into_iter().zip(hs_b) {
                     if r == h {
                         if h == BoundaryType::WordBoundary {
                             n_tp += 1;
@@ -157,12 +158,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             let mut n_sys = 0;
             let mut n_ref = 0;
             let mut n_cor = 0;
-            for (rs, hs) in results {
+            for (rs_b, rs_t, hs_b, hs_t) in results {
                 let mut matched = true;
-                for (r, h) in rs.into_iter().zip(hs) {
-                    if r == h {
-                        if h == BoundaryType::WordBoundary {
-                            if matched {
+                for (((r_b, r_t), h_b), h_t) in rs_b.iter().zip(&rs_t).zip(&hs_b).zip(&hs_t) {
+                    if r_b == h_b {
+                        if *h_b == BoundaryType::WordBoundary {
+                            if matched && r_t == h_t {
                                 n_cor += 1;
                             }
                             matched = true;
@@ -170,7 +171,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                             n_sys += 1;
                         }
                     } else {
-                        if h == BoundaryType::WordBoundary {
+                        if *h_b == BoundaryType::WordBoundary {
                             n_sys += 1;
                         } else {
                             n_ref += 1;
@@ -178,7 +179,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                         matched = false;
                     }
                 }
-                if matched {
+                if matched && rs_t.last().unwrap() == hs_t.last().unwrap() {
                     n_cor += 1;
                 }
                 n_sys += 1;
diff --git a/predict/src/main.rs b/predict/src/main.rs
index 6b2c604d..c461f21a 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -94,6 +94,7 @@ fn tokenize(
         buf1.to_tokenized_string()?
     } else {
         buf2.boundaries_mut().copy_from_slice(buf1.boundaries());
+        buf2.tags_mut().clone_from_slice(buf1.tags());
         buf2.to_tokenized_string()?
     };
     Ok((result, buf1, buf2))
diff --git a/train/src/main.rs b/train/src/main.rs
index 80b41106..85373d97 100644
--- a/train/src/main.rs
+++ b/train/src/main.rs
@@ -91,6 +91,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 let new_line = fullwidth_filter.filter(s.to_raw_string());
                 let mut new_s = Sentence::from_raw(new_line)?;
                 new_s.boundaries_mut().clone_from_slice(s.boundaries());
+                new_s.tags_mut().clone_from_slice(s.tags());
                 new_s
             };
             train_sents.push(s);
@@ -112,7 +113,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             } else {
                 let new_line = fullwidth_filter.filter(s.to_raw_string());
                 let mut new_s = Sentence::from_raw(new_line)?;
-                new_s.boundaries_mut().clone_from_slice(s.boundaries());
+                new_s.boundaries_mut().copy_from_slice(s.boundaries());
+                new_s.tags_mut().clone_from_slice(s.tags());
                 new_s
             };
             train_sents.push(s);
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 8840edc2..2f23ab8a 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -138,6 +138,7 @@ impl Sentence {
         self.boundaries.clear();
         self.boundary_scores.clear();
         self.tags.clear();
+        self.tags.push(None);
     }
 
     fn parse_raw_text(
@@ -161,6 +162,7 @@ impl Sentence {
         boundaries.clear();
         boundaries.resize(chars.len() - 1, BoundaryType::Unknown);
         tags.clear();
+        tags.resize(chars.len(), None);
 
         Ok(())
     }
@@ -280,6 +282,7 @@ impl Sentence {
         text.clear();
         chars.clear();
         boundaries.clear();
+        tags.clear();
 
         let mut tag_str = None;
         let mut is_char = true;
@@ -938,6 +941,48 @@ impl Sentence {
         &mut self.boundaries
     }
 
+    /// Gets a reference to the part-of-speech information.
+    ///
+    /// Each tag is placed at the last of the corresponding token. For example, when the first token
+    /// containing three characters has a tag, that tag will be placed at the third element of the
+    /// returned slice.
+    ///
+    /// # Returns
+    ///
+    /// A reference to the POS information.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::rc::Rc;
+    ///
+    /// use vaporetto::{BoundaryType, Sentence};
+    ///
+    /// let s = Sentence::from_tokenized("I/PRP am a/DT cat/NN ./.").unwrap();
+    /// assert_eq!(&[
+    ///     Some(Rc::new("PRP".to_string())), // 'I'
+    ///     None,                             // 'a'
+    ///     None,                             // 'm'
+    ///     Some(Rc::new("DT".to_string())),  // 'a'
+    ///     None,                             // 'c'
+    ///     None,                             // 'a'
+    ///     Some(Rc::new("NN".to_string())),  // 't'
+    ///     Some(Rc::new(".".to_string())),   // '.'
+    /// ], s.tags());
+    /// ```
+    pub fn tags(&self) -> &[Option<Rc<String>>] {
+        &self.tags
+    }
+
+    /// Gets a mutable reference to the part-of-speech information.
+    ///
+    /// # Returns
+    ///
+    /// A mutable reference to the part-of-speech information.
+    pub fn tags_mut(&mut self) -> &mut [Option<Rc<String>>] {
+        &mut self.tags
+    }
+
     /// Gets a reference to the characters.
     ///
     /// # Returns
@@ -1044,7 +1089,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1061,7 +1106,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1079,7 +1124,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1104,7 +1149,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None; 18],
         };
         assert_eq!(expected, s.unwrap());
     }
@@ -1130,7 +1175,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None; 18],
         };
         assert_eq!(expected, s);
     }
@@ -1173,7 +1218,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1206,7 +1251,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1239,7 +1284,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }
@@ -1272,7 +1317,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
-            tags: vec![],
+            tags: vec![None],
         };
         assert_eq!(expected, s);
     }

From ed2473951df74d1e27fdbfda19bb08d5bae4835d Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 27 Jan 2022 14:27:13 +0900
Subject: [PATCH 46/60] Add TagTrainer (#39)

* Add TagTrainer

* fmt

* Remove unnecessary annotation
---
 train/src/main.rs            |  12 ++-
 vaporetto/src/feature.rs     |   5 +-
 vaporetto/src/kytea_model.rs |   2 +
 vaporetto/src/lib.rs         |   3 +
 vaporetto/src/model.rs       |   4 +
 vaporetto/src/ngram_model.rs |   1 +
 vaporetto/src/predictor.rs   |   5 +
 vaporetto/src/tag_model.rs   |  86 ++++++++++++++++++
 vaporetto/src/tag_trainer.rs | 172 +++++++++++++++++++++++++++++++++++
 vaporetto/src/trainer.rs     |  35 +++++--
 10 files changed, 312 insertions(+), 13 deletions(-)
 create mode 100644 vaporetto/src/tag_model.rs
 create mode 100644 vaporetto/src/tag_trainer.rs

diff --git a/train/src/main.rs b/train/src/main.rs
index 85373d97..44f5cd5d 100644
--- a/train/src/main.rs
+++ b/train/src/main.rs
@@ -150,12 +150,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     )?;
     for (i, s) in train_sents.iter().enumerate() {
         if i % 10000 == 0 {
-            eprint!("# of features: {}\r", trainer.n_features());
+            eprint!(
+                "# of features: {}, # of tag features: {}\r",
+                trainer.n_features(),
+                trainer.n_tag_features()
+            );
             stderr().flush()?;
         }
         trainer.push_sentence(s)?;
     }
-    eprintln!("# of features: {}", trainer.n_features());
+    eprintln!(
+        "# of features: {}, # of tag features: {}",
+        trainer.n_features(),
+        trainer.n_tag_features()
+    );
 
     eprintln!("Start training...");
     let model = trainer.train(opt.eps, opt.cost, opt.solver)?;
diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 61e3ad89..33688f95 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -196,7 +196,6 @@ pub struct TagExampleGenerator {
     char_window_size: usize,
 }
 
-#[allow(dead_code)]
 impl TagExampleGenerator {
     pub const fn new(char_ngram_size: usize, char_window_size: usize) -> Self {
         Self {
@@ -261,9 +260,7 @@ impl TagExampleGenerator {
             }
         }
         if let Some(tag) = current_tag.take() {
-            features.push(TagFeature::Character(
-                sentence.char_substring(0, tag_right_pos),
-            ));
+            features.push(TagFeature::chars(sentence.char_substring(0, tag_right_pos)));
             result.push(TagExample { features, tag });
         }
         Ok(result)
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 5fbb5d64..2f774c48 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -7,6 +7,7 @@ use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
+use crate::tag_model::TagModel;
 
 struct KyteaConfig {
     _model_tag: String,
@@ -458,6 +459,7 @@ impl TryFrom<KyteaModel> for Model {
             type_ngram_model: NgramModel::new(type_ngrams),
             dict_model: DictModel::new(dict),
             bias,
+            tag_model: TagModel::default(),
             char_window_size: config.char_w as usize,
             type_window_size: config.type_w as usize,
         })
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index ace72777..2ad57f4e 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -34,6 +34,7 @@ mod model;
 mod ngram_model;
 mod predictor;
 mod sentence;
+mod tag_model;
 mod type_scorer;
 
 pub mod errors;
@@ -41,6 +42,8 @@ pub mod errors;
 #[cfg(feature = "train")]
 mod feature;
 #[cfg(feature = "train")]
+mod tag_trainer;
+#[cfg(feature = "train")]
 mod trainer;
 
 #[cfg(feature = "kytea")]
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index 04f6abe3..e7d37d8a 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -5,6 +5,7 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 use crate::dict_model::{DictModel, WordWeightRecord};
 use crate::errors::Result;
 use crate::ngram_model::NgramModel;
+use crate::tag_model::TagModel;
 
 /// Model data.
 pub struct Model {
@@ -14,6 +15,7 @@ pub struct Model {
     pub(crate) bias: i32,
     pub(crate) char_window_size: usize,
     pub(crate) type_window_size: usize,
+    pub(crate) tag_model: TagModel,
 }
 
 impl Model {
@@ -36,6 +38,7 @@ impl Model {
         wtr.write_i32::<LittleEndian>(self.bias)?;
         wtr.write_u32::<LittleEndian>(self.char_window_size.try_into().unwrap())?;
         wtr.write_u32::<LittleEndian>(self.type_window_size.try_into().unwrap())?;
+        self.tag_model.serialize(&mut wtr)?;
         Ok(())
     }
 
@@ -63,6 +66,7 @@ impl Model {
             bias: rdr.read_i32::<LittleEndian>()?,
             char_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
             type_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
+            tag_model: TagModel::deserialize(&mut rdr)?,
         })
     }
 
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index 7d5caca1..1427e575 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -70,6 +70,7 @@ impl NgramData<Vec<u8>> {
     }
 }
 
+#[derive(Default)]
 pub struct NgramModel<T>
 where
     T: Clone,
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 855e8514..07ffc6bf 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -164,6 +164,7 @@ mod tests {
 
     use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
     use crate::ngram_model::{NgramData, NgramModel};
+    use crate::tag_model::TagModel;
 
     /// Input:  我  ら  は  全  世  界  の  国  民
     /// bias:   -200  ..  ..  ..  ..  ..  ..  ..
@@ -267,6 +268,7 @@ mod tests {
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
+            tag_model: TagModel::default(),
         }
     }
 
@@ -372,6 +374,7 @@ mod tests {
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
+            tag_model: TagModel::default(),
         }
     }
 
@@ -477,6 +480,7 @@ mod tests {
             bias: -285,
             char_window_size: 2,
             type_window_size: 3,
+            tag_model: TagModel::default(),
         }
     }
 
@@ -609,6 +613,7 @@ mod tests {
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
+            tag_model: TagModel::default(),
         }
     }
 
diff --git a/vaporetto/src/tag_model.rs b/vaporetto/src/tag_model.rs
new file mode 100644
index 00000000..e4cfd697
--- /dev/null
+++ b/vaporetto/src/tag_model.rs
@@ -0,0 +1,86 @@
+use std::io::{Read, Write};
+
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
+
+use crate::errors::Result;
+use crate::ngram_model::NgramModel;
+
+pub struct TagClassInfo {
+    pub(crate) name: String,
+    pub(crate) bias: i32,
+}
+
+impl TagClassInfo {
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<()>
+    where
+        W: Write,
+    {
+        wtr.write_u32::<LittleEndian>(self.name.len().try_into().unwrap())?;
+        wtr.write_all(self.name.as_bytes())?;
+        wtr.write_i32::<LittleEndian>(self.bias)?;
+        Ok(())
+    }
+
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let name_size = rdr.read_u32::<LittleEndian>()?;
+        let mut name_bytes = vec![0; name_size.try_into().unwrap()];
+        rdr.read_exact(&mut name_bytes)?;
+        let name = String::from_utf8(name_bytes)?;
+        Ok(Self {
+            name,
+            bias: rdr.read_i32::<LittleEndian>()?,
+        })
+    }
+}
+
+// Left and right weight arrays of the TagModel are ordered as follows:
+//
+//      tok1 tok2 tok3 ...
+//
+// tag1   1    5    9
+// tag2   2    6    .
+// tag3   3    7    .
+// ...    4    8    .
+#[derive(Default)]
+pub struct TagModel {
+    pub(crate) class_info: Vec<TagClassInfo>,
+    pub(crate) left_char_model: NgramModel<String>,
+    pub(crate) right_char_model: NgramModel<String>,
+    pub(crate) self_char_model: NgramModel<String>,
+}
+
+impl TagModel {
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<()>
+    where
+        W: Write,
+    {
+        wtr.write_u32::<LittleEndian>(self.class_info.len().try_into().unwrap())?;
+        for cls in &self.class_info {
+            cls.serialize(&mut wtr)?;
+        }
+        self.left_char_model.serialize(&mut wtr)?;
+        self.right_char_model.serialize(&mut wtr)?;
+        self.self_char_model.serialize(&mut wtr)?;
+        Ok(())
+    }
+
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
+    where
+        R: Read,
+    {
+        let n_class = rdr.read_u32::<LittleEndian>()?;
+        let mut class_info = vec![];
+        for _ in 0..n_class {
+            class_info.push(TagClassInfo::deserialize(&mut rdr)?);
+        }
+        Ok(Self {
+            class_info,
+            left_char_model: NgramModel::<String>::deserialize(&mut rdr)?,
+            right_char_model: NgramModel::<String>::deserialize(&mut rdr)?,
+            self_char_model: NgramModel::<String>::deserialize(&mut rdr)?,
+        })
+    }
+}
diff --git a/vaporetto/src/tag_trainer.rs b/vaporetto/src/tag_trainer.rs
new file mode 100644
index 00000000..921ebfd4
--- /dev/null
+++ b/vaporetto/src/tag_trainer.rs
@@ -0,0 +1,172 @@
+use std::collections::BTreeMap;
+
+use liblinear::LibLinearModel;
+
+use crate::errors::{Result, VaporettoError};
+use crate::feature::{StringNgramFeature, TagExampleGenerator, TagFeature};
+use crate::ngram_model::{NgramData, NgramModel};
+use crate::sentence::Sentence;
+use crate::tag_model::{TagClassInfo, TagModel};
+use crate::trainer::{Indexer, SolverType, QUANTIZE_BIT_DEPTH};
+
+pub struct TagTrainer<'a> {
+    example_generator: TagExampleGenerator,
+    char_window_size: usize,
+    feature_ids: Indexer<TagFeature<'a>>,
+    tag_ids: Indexer<String>,
+    xs: Vec<Vec<(u32, f64)>>,
+    ys: Vec<f64>,
+}
+
+impl<'a> TagTrainer<'a> {
+    pub fn new(char_ngram_size: usize, char_window_size: usize) -> Self {
+        Self {
+            example_generator: TagExampleGenerator::new(char_ngram_size, char_window_size),
+            char_window_size,
+            feature_ids: Indexer::new(),
+            tag_ids: Indexer::new(),
+            xs: vec![],
+            ys: vec![],
+        }
+    }
+
+    pub fn push_sentence(&mut self, s: &'a Sentence) -> Result<()> {
+        let examples = self.example_generator.generate(s)?;
+        for example in examples {
+            let mut feature_ids = BTreeMap::new();
+            for f in &example.features {
+                let fid = self.feature_ids.get_id(f);
+                *feature_ids
+                    .entry((fid + 1).try_into().unwrap())
+                    .or_insert(0.0) += 1.0;
+            }
+            self.xs.push(feature_ids.into_iter().collect());
+            self.ys
+                .push(self.tag_ids.get_id(example.tag.as_str()) as f64);
+        }
+        Ok(())
+    }
+
+    pub fn n_features(&self) -> usize {
+        self.feature_ids.len()
+    }
+
+    pub fn train(self, epsilon: f64, cost: f64, solver: SolverType) -> Result<TagModel> {
+        if self.xs.is_empty() {
+            // Returns an empty model if there is no training data.
+            return Ok(TagModel::default());
+        }
+
+        let mut builder = liblinear::Builder::new();
+        let training_input = liblinear::util::TrainingInput::from_sparse_features(self.ys, self.xs)
+            .map_err(|e| VaporettoError::invalid_model(format!("liblinear error: {:?}", e)))?;
+        builder.problem().input_data(training_input).bias(1.0);
+        builder
+            .parameters()
+            .solver_type(solver.into())
+            .stopping_criterion(epsilon)
+            .constraints_violation_cost(cost);
+        let model = builder
+            .build_model()
+            .map_err(|e| VaporettoError::invalid_model(e.to_string()))?;
+
+        // Uses BTreeMap to increase compression ratio.
+        let mut left_char_weights: BTreeMap<_, Vec<_>> = BTreeMap::new();
+        let mut right_char_weights: BTreeMap<_, Vec<_>> = BTreeMap::new();
+        let mut self_char_weights: BTreeMap<_, Vec<_>> = BTreeMap::new();
+
+        let mut weight_max = 0.;
+        for i in 0..self.tag_ids.len() as i32 {
+            let weight = model.label_bias(i).abs();
+            if weight > weight_max {
+                weight_max = weight;
+            }
+            for fid in 0..model.num_features() {
+                let weight = model.feature_coefficient(fid as i32, i).abs();
+                if weight > weight_max {
+                    weight_max = weight;
+                }
+            }
+        }
+        let quantize_multiplier = weight_max / ((1 << (QUANTIZE_BIT_DEPTH - 1)) - 1) as f64;
+
+        let mut class_info = vec![];
+
+        for i in 0..self.tag_ids.len() {
+            class_info.push(TagClassInfo {
+                name: self.tag_ids.keys()[model.labels()[i] as usize].clone(),
+                bias: (model.label_bias(i as i32) / quantize_multiplier) as i32,
+            });
+
+            for (fid, feature) in self.feature_ids.keys().iter().enumerate() {
+                let raw_weight = model.feature_coefficient(fid as i32 + 1, i as i32);
+                let weight = (raw_weight / quantize_multiplier) as i32;
+
+                if weight == 0 {
+                    continue;
+                }
+
+                match feature {
+                    TagFeature::LeftCharacterNgram(StringNgramFeature {
+                        rel_position,
+                        ngram,
+                    }) => {
+                        let pos = -rel_position - 1;
+                        let idx = i + pos as usize * self.tag_ids.len();
+                        if let Some(weights) = left_char_weights.get_mut(*ngram) {
+                            weights[idx] = weight;
+                        } else {
+                            let mut weights = vec![0; self.char_window_size * self.tag_ids.len()];
+                            weights[idx] = weight;
+                            left_char_weights.insert(ngram.to_string(), weights);
+                        }
+                    }
+                    TagFeature::RightCharacterNgram(StringNgramFeature {
+                        rel_position,
+                        ngram,
+                    }) => {
+                        let pos = self.char_window_size as isize - rel_position;
+                        let idx = i as usize + pos as usize * self.tag_ids.len();
+                        if let Some(weights) = right_char_weights.get_mut(*ngram) {
+                            weights[idx] = weight;
+                        } else {
+                            let mut weights = vec![0; self.char_window_size * self.tag_ids.len()];
+                            weights[idx] = weight;
+                            right_char_weights.insert(ngram.to_string(), weights);
+                        }
+                    }
+                    TagFeature::Character(ngram) => {
+                        if let Some(weights) = self_char_weights.get_mut(*ngram) {
+                            weights[i as usize] = weight;
+                        } else {
+                            let mut weights = vec![0; self.tag_ids.len()];
+                            weights[i as usize] = weight;
+                            self_char_weights.insert(ngram.to_string(), weights);
+                        }
+                    }
+                };
+            }
+        }
+        Ok(TagModel {
+            class_info,
+            left_char_model: NgramModel::new(
+                left_char_weights
+                    .into_iter()
+                    .map(|(ngram, weights)| NgramData { ngram, weights })
+                    .collect(),
+            ),
+            right_char_model: NgramModel::new(
+                right_char_weights
+                    .into_iter()
+                    .map(|(ngram, weights)| NgramData { ngram, weights })
+                    .collect(),
+            ),
+            self_char_model: NgramModel::new(
+                self_char_weights
+                    .into_iter()
+                    .map(|(ngram, weights)| NgramData { ngram, weights })
+                    .collect(),
+            ),
+        })
+    }
+}
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index f6e6c3fd..10f24267 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -1,8 +1,11 @@
+use std::borrow::Borrow;
 use std::collections::BTreeMap;
 use std::collections::HashMap;
 use std::hash::Hash;
 use std::str::FromStr;
 
+use liblinear::LibLinearModel;
+
 use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::feature::{
@@ -12,17 +15,20 @@ use crate::feature::{
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
 use crate::sentence::{BoundaryType, Sentence};
-use liblinear::LibLinearModel;
+use crate::tag_trainer::TagTrainer;
 
 // Bit depth for weight quantization.
-const QUANTIZE_BIT_DEPTH: u8 = 16;
+pub const QUANTIZE_BIT_DEPTH: u8 = 16;
 
 pub struct Indexer<K> {
     ids: HashMap<K, usize>,
     keys: Vec<K>,
 }
 
-impl<K> Indexer<K> {
+impl<K> Indexer<K>
+where
+    K: Eq + Hash,
+{
     pub fn new() -> Self {
         Self {
             ids: HashMap::new(),
@@ -30,16 +36,17 @@ impl<K> Indexer<K> {
         }
     }
 
-    pub fn get_id(&mut self, key: &K) -> usize
+    pub fn get_id<Q: ?Sized>(&mut self, key: &Q) -> usize
     where
-        K: Clone + Eq + Hash,
+        K: Borrow<Q>,
+        Q: ToOwned<Owned = K> + Eq + Hash,
     {
         if let Some(&id) = self.ids.get(key) {
             id
         } else {
             let id = self.ids.len();
-            self.keys.push(key.clone());
-            self.ids.insert(key.clone(), id);
+            self.keys.push(key.to_owned());
+            self.ids.insert(key.to_owned(), id);
             id
         }
     }
@@ -150,6 +157,7 @@ pub struct Trainer<'a> {
     feature_ids: Indexer<BoundaryFeature<'a>>,
     xs: Vec<Vec<(u32, f64)>>,
     ys: Vec<f64>,
+    tag_trainer: TagTrainer<'a>,
 }
 
 impl<'a> Trainer<'a> {
@@ -203,6 +211,7 @@ impl<'a> Trainer<'a> {
             feature_ids: Indexer::new(),
             xs: vec![],
             ys: vec![],
+            tag_trainer: TagTrainer::new(char_ngram_size, char_window_size),
         })
     }
 
@@ -229,6 +238,7 @@ impl<'a> Trainer<'a> {
             self.xs.push(feature_ids.into_iter().collect());
             self.ys.push(example.label as u8 as f64);
         }
+        self.tag_trainer.push_sentence(s)?;
         Ok(())
     }
 
@@ -241,6 +251,15 @@ impl<'a> Trainer<'a> {
         self.feature_ids.len()
     }
 
+    /// Gets the number of tag features.
+    ///
+    /// # Returns
+    ///
+    /// The number of tag features.
+    pub fn n_tag_features(&self) -> usize {
+        self.tag_trainer.n_features()
+    }
+
     /// Trains word boundaries.
     ///
     /// # Arguments
@@ -336,6 +355,7 @@ impl<'a> Trainer<'a> {
                 }
             };
         }
+        let tag_model = self.tag_trainer.train(epsilon, cost, solver)?;
         Ok(Model {
             char_ngram_model: NgramModel::new(
                 char_ngram_weights
@@ -363,6 +383,7 @@ impl<'a> Trainer<'a> {
                     .collect(),
             ),
             bias,
+            tag_model,
             char_window_size: self.char_window_size,
             type_window_size: self.type_window_size,
         })

From 71a9ce744e4373f9dee1939423d0b4044655908d Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 28 Jan 2022 13:28:05 +0900
Subject: [PATCH 47/60] Remove simd flag and always enable SIMD (#40)

* Remove simd flag and always enable SIMD

* fmt
---
 evaluate/Cargo.toml          |  2 +-
 predict/Cargo.toml           |  2 +-
 vaporetto/Cargo.toml         |  3 +--
 vaporetto/README.md          |  1 -
 vaporetto/src/char_scorer.rs | 17 ++++-------------
 vaporetto/src/lib.rs         |  5 +----
 vaporetto/src/predictor.rs   | 35 -----------------------------------
 7 files changed, 8 insertions(+), 57 deletions(-)

diff --git a/evaluate/Cargo.toml b/evaluate/Cargo.toml
index a05e29f4..a4d7b1eb 100644
--- a/evaluate/Cargo.toml
+++ b/evaluate/Cargo.toml
@@ -5,6 +5,6 @@ edition = "2018"
 
 [dependencies]
 structopt = "0.3"  # MIT or Apache-2.0
-vaporetto = { path = "../vaporetto", features = ["simd"] }  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
 vaporetto_rules = { path = "../vaporetto_rules" }  # MIT or Apache-2.0
 zstd = "0.9"  # MIT
diff --git a/predict/Cargo.toml b/predict/Cargo.toml
index 5817a39f..66040c0b 100644
--- a/predict/Cargo.toml
+++ b/predict/Cargo.toml
@@ -5,6 +5,6 @@ edition = "2018"
 
 [dependencies]
 structopt = "0.3"  # MIT or Apache-2.0
-vaporetto = { path = "../vaporetto", features = ["simd"] }  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
 vaporetto_rules = { path = "../vaporetto_rules" }  # MIT or Apache-2.0
 zstd = "0.9"  # MIT
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 19bd83df..b742ca68 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -22,8 +22,7 @@ liblinear = { version = "1", optional = true }  # MIT
 default = []
 kytea = []
 train = ["liblinear"]
-simd = []
-portable-simd = ["simd"]
+portable-simd = []
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/vaporetto/README.md b/vaporetto/README.md
index 4d0038e3..7b40d926 100644
--- a/vaporetto/README.md
+++ b/vaporetto/README.md
@@ -25,7 +25,6 @@ println!("{:?}", s.to_tokenized_vec().unwrap());
 
 * `kytea` - Enables the reader for models generated by KyTea.
 * `train` - Enables the trainer.
-* `simd` - Uses a SIMD-conscious data layout expecting your compiler enables SIMD optimization.
 * `portable-simd` - Uses the [portable SIMD API](https://github.com/rust-lang/portable-simd) instead
   of our SIMD-conscious data layout. (Nightly Rust is required.)
 
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 92f9768a..4faa7720 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -8,12 +8,11 @@ use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
 
-#[cfg(all(feature = "simd", feature = "portable-simd"))]
+#[cfg(feature = "portable-simd")]
 use std::simd::i32x8;
 
-#[cfg(feature = "simd")]
 pub const SIMD_SIZE: usize = 8;
-#[cfg(all(feature = "simd", feature = "portable-simd"))]
+#[cfg(feature = "portable-simd")]
 type I32Vec = i32x8;
 
 struct PositionalWeight<W> {
@@ -50,9 +49,9 @@ impl NaivePositionalWeight {
 enum WeightVector {
     Array(Vec<i32>),
 
-    #[cfg(all(feature = "simd", not(feature = "portable-simd")))]
+    #[cfg(not(feature = "portable-simd"))]
     Simd([i32; SIMD_SIZE]),
-    #[cfg(all(feature = "simd", feature = "portable-simd"))]
+    #[cfg(feature = "portable-simd")]
     Simd(I32Vec),
 }
 
@@ -123,12 +122,6 @@ impl CharScorer {
             let PositionalWeight { offset, weight } = data.into_inner().0;
 
             let weight = {
-                #[cfg(not(feature = "simd"))]
-                {
-                    WeightVector::Array(weight)
-                }
-
-                #[cfg(feature = "simd")]
                 if weight.len() <= SIMD_SIZE {
                     let mut s = [0i32; SIMD_SIZE];
                     s[..weight.len()].copy_from_slice(weight.as_slice());
@@ -171,8 +164,6 @@ impl CharScorer {
                         }
                     }
                 }
-
-                #[cfg(feature = "simd")]
                 WeightVector::Simd(weight) => {
                     let ys_slice = &mut ys[offset as usize..offset as usize + SIMD_SIZE];
                     #[cfg(feature = "portable-simd")]
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index 2ad57f4e..f03ac4ee 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -1,8 +1,5 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
-#![cfg_attr(
-    all(feature = "simd", feature = "portable-simd"),
-    feature(portable_simd)
-)]
+#![cfg_attr(feature = "portable-simd", feature(portable_simd))]
 
 //! # Vaporetto
 //!
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 07ffc6bf..3b8c36c2 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -13,7 +13,6 @@ pub struct Predictor {
     char_scorer: CharScorer,
     type_scorer: TypeScorer,
 
-    #[cfg(feature = "simd")]
     padding: usize,
 }
 
@@ -43,7 +42,6 @@ impl Predictor {
             char_scorer,
             type_scorer,
 
-            #[cfg(feature = "simd")]
             padding: model.char_window_size.max(model.type_window_size),
         })
     }
@@ -66,23 +64,6 @@ impl Predictor {
     pub fn predict(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
 
-        #[cfg(not(feature = "simd"))]
-        if boundaries_size != 0 {
-            let mut ys = mem::take(&mut sentence.boundary_scores);
-            ys.resize(boundaries_size, 0);
-            self.predict_impl(&sentence, 0, &mut ys);
-            for (&y, b) in ys.iter().zip(sentence.boundaries.iter_mut()) {
-                *b = if y >= 0 {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-            }
-            sentence.boundary_scores = ys;
-            sentence.boundary_scores.clear();
-        }
-
-        #[cfg(feature = "simd")]
         if boundaries_size != 0 {
             let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
             let mut ys = mem::take(&mut sentence.boundary_scores);
@@ -117,22 +98,6 @@ impl Predictor {
     pub fn predict_with_score(&self, mut sentence: Sentence) -> Sentence {
         let boundaries_size = sentence.boundaries.len();
 
-        #[cfg(not(feature = "simd"))]
-        if boundaries_size != 0 {
-            let mut ys = mem::take(&mut sentence.boundary_scores);
-            ys.resize(boundaries_size, 0);
-            self.predict_impl(&sentence, 0, &mut ys);
-            for (&y, b) in ys.iter().zip(sentence.boundaries.iter_mut()) {
-                *b = if y >= 0 {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-            }
-            sentence.boundary_scores = ys;
-        }
-
-        #[cfg(feature = "simd")]
         if boundaries_size != 0 {
             let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
             let mut ys = vec![0; ys_size];

From ca52795f8853e066764b38db6fa83741f39f717c Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 28 Jan 2022 16:26:08 +0900
Subject: [PATCH 48/60] Refactor scorers (#41)

---
 vaporetto/src/char_scorer.rs | 144 +++++++++++++----------------------
 vaporetto/src/lib.rs         |   1 +
 vaporetto/src/type_scorer.rs |  11 +--
 vaporetto/src/utils.rs       |  81 ++++++++++++++++++++
 4 files changed, 139 insertions(+), 98 deletions(-)
 create mode 100644 vaporetto/src/utils.rs

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 4faa7720..b976d485 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,12 +1,10 @@
-use std::cell::RefCell;
-use std::collections::BTreeMap;
-
 use daachorse::DoubleArrayAhoCorasick;
 
 use crate::dict_model::DictModel;
 use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
+use crate::utils::{AddWeight, MergableWeight, WeightMerger};
 
 #[cfg(feature = "portable-simd")]
 use std::simd::i32x8;
@@ -26,7 +24,9 @@ impl NaivePositionalWeight {
     fn new(offset: i32, weight: Vec<i32>) -> Self {
         Self { offset, weight }
     }
+}
 
+impl MergableWeight for NaivePositionalWeight {
     fn from_two_weights(weight1: &Self, weight2: &Self) -> Self {
         let (weight1, weight2) = if weight1.offset > weight2.offset {
             (weight2, weight1)
@@ -55,6 +55,48 @@ enum WeightVector {
     Simd(I32Vec),
 }
 
+impl WeightVector {
+    pub fn new(weight: Vec<i32>) -> Self {
+        if weight.len() <= SIMD_SIZE {
+            let mut s = [0i32; SIMD_SIZE];
+            s[..weight.len()].copy_from_slice(weight.as_slice());
+            #[cfg(not(feature = "portable-simd"))]
+            {
+                Self::Simd(s)
+            }
+            #[cfg(feature = "portable-simd")]
+            {
+                Self::Simd(I32Vec::from_array(s))
+            }
+        } else {
+            Self::Array(weight)
+        }
+    }
+}
+
+impl AddWeight for WeightVector {
+    fn add_weight(&self, ys: &mut [i32], offset: isize) {
+        match self {
+            WeightVector::Array(weight) => {
+                weight.add_weight(ys, offset);
+            }
+            WeightVector::Simd(weight) => {
+                let ys_slice = &mut ys[offset as usize..offset as usize + SIMD_SIZE];
+                #[cfg(feature = "portable-simd")]
+                {
+                    let mut target = I32Vec::from_slice(ys_slice);
+                    target += weight;
+                    ys_slice.copy_from_slice(target.as_array());
+                }
+                #[cfg(not(feature = "portable-simd"))]
+                for (y, w) in ys_slice.iter_mut().zip(weight) {
+                    *y += w;
+                }
+            }
+        }
+    }
+}
+
 pub struct CharScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<PositionalWeight<WeightVector>>,
@@ -62,18 +104,11 @@ pub struct CharScorer {
 
 impl CharScorer {
     pub fn new(model: NgramModel<String>, window_size: usize, dict: DictModel) -> Result<Self> {
-        // key: ngram, value: (weight, check)
-        let mut weights_map: BTreeMap<String, RefCell<(NaivePositionalWeight, bool)>> =
-            BTreeMap::new();
+        let mut weight_merger = WeightMerger::new();
 
         for d in model.data {
             let weight = PositionalWeight::new(-(window_size as i32), d.weights);
-            if let Some(data) = weights_map.get_mut(&d.ngram) {
-                let (prev_weight, _) = &mut *data.borrow_mut();
-                *prev_weight = PositionalWeight::from_two_weights(&weight, prev_weight);
-            } else {
-                weights_map.insert(d.ngram, RefCell::new((weight, false)));
-            }
+            weight_merger.add(&d.ngram, weight);
         }
         for d in dict.dict {
             let word_len = d.word.chars().count();
@@ -82,62 +117,18 @@ impl CharScorer {
             weight.resize(word_len, d.weights.inside);
             weight.push(d.weights.left);
             let weight = PositionalWeight::new(-(word_len as i32), weight);
-            if let Some(data) = weights_map.get_mut(&d.word) {
-                let (prev_weight, _) = &mut *data.borrow_mut();
-                *prev_weight = PositionalWeight::from_two_weights(&weight, prev_weight);
-            } else {
-                weights_map.insert(d.word, RefCell::new((weight, false)));
-            }
+            weight_merger.add(&d.word, weight);
         }
 
-        let mut stack = vec![];
-        for (ngram, data) in &weights_map {
-            if data.borrow().1 {
-                continue;
-            }
-            stack.push(data);
-            for (j, _) in ngram.char_indices().skip(1) {
-                if let Some(data) = weights_map.get(&ngram[j..]) {
-                    stack.push(data);
-                    if data.borrow().1 {
-                        break;
-                    }
-                }
-            }
-            let mut data_from = stack.pop().unwrap();
-            data_from.borrow_mut().1 = true;
-            while let Some(data_to) = stack.pop() {
-                let new_data = (
-                    PositionalWeight::from_two_weights(&data_from.borrow().0, &data_to.borrow().0),
-                    true,
-                );
-                *data_to.borrow_mut() = new_data;
-                data_from = data_to;
-            }
-        }
         let mut ngrams = vec![];
         let mut weights = vec![];
-        for (ngram, data) in weights_map {
+        for (ngram, data) in weight_merger.merge() {
             ngrams.push(ngram);
-            let PositionalWeight { offset, weight } = data.into_inner().0;
-
-            let weight = {
-                if weight.len() <= SIMD_SIZE {
-                    let mut s = [0i32; SIMD_SIZE];
-                    s[..weight.len()].copy_from_slice(weight.as_slice());
-                    #[cfg(not(feature = "portable-simd"))]
-                    {
-                        WeightVector::Simd(s)
-                    }
-                    #[cfg(feature = "portable-simd")]
-                    {
-                        WeightVector::Simd(I32Vec::from_array(s))
-                    }
-                } else {
-                    WeightVector::Array(weight)
-                }
-            };
-            weights.push(PositionalWeight { offset, weight });
+            let PositionalWeight { offset, weight } = data;
+            weights.push(PositionalWeight {
+                offset,
+                weight: WeightVector::new(weight),
+            });
         }
         let pma = DoubleArrayAhoCorasick::new(ngrams)
             .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
@@ -152,32 +143,7 @@ impl CharScorer {
             let pos_weights = unsafe { self.weights.get_unchecked(m.value()) };
 
             let offset = padding as isize + m_end as isize + pos_weights.offset as isize - 1;
-            match &pos_weights.weight {
-                WeightVector::Array(weight) => {
-                    if offset >= 0 {
-                        for (w, y) in weight.iter().zip(&mut ys[offset as usize..]) {
-                            *y += w;
-                        }
-                    } else {
-                        for (w, y) in weight[-offset as usize..].iter().zip(ys.iter_mut()) {
-                            *y += w;
-                        }
-                    }
-                }
-                WeightVector::Simd(weight) => {
-                    let ys_slice = &mut ys[offset as usize..offset as usize + SIMD_SIZE];
-                    #[cfg(feature = "portable-simd")]
-                    {
-                        let mut target = I32Vec::from_slice(ys_slice);
-                        target += weight;
-                        ys_slice.copy_from_slice(target.as_array());
-                    }
-                    #[cfg(not(feature = "portable-simd"))]
-                    for (y, w) in ys_slice.iter_mut().zip(weight) {
-                        *y += w;
-                    }
-                }
-            }
+            pos_weights.weight.add_weight(ys, offset);
         }
     }
 }
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index f03ac4ee..a17a351f 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -33,6 +33,7 @@ mod predictor;
 mod sentence;
 mod tag_model;
 mod type_scorer;
+mod utils;
 
 pub mod errors;
 
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index c226c2ef..946e8e1a 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -6,6 +6,7 @@ use daachorse::DoubleArrayAhoCorasick;
 use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
 use crate::sentence::Sentence;
+use crate::utils::AddWeight;
 
 pub enum TypeScorer {
     Pma(TypeScorerPma),
@@ -94,15 +95,7 @@ impl TypeScorerPma {
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
             let weights = unsafe { self.weights.get_unchecked(m.value()) };
-            if offset >= 0 {
-                for (w, y) in weights.iter().zip(&mut ys[offset as usize..]) {
-                    *y += w;
-                }
-            } else {
-                for (w, y) in weights[-offset as usize..].iter().zip(ys.iter_mut()) {
-                    *y += w;
-                }
-            }
+            weights.add_weight(ys, offset);
         }
     }
 }
diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs
new file mode 100644
index 00000000..961928e0
--- /dev/null
+++ b/vaporetto/src/utils.rs
@@ -0,0 +1,81 @@
+use std::cell::RefCell;
+use std::collections::BTreeMap;
+
+pub trait AddWeight {
+    fn add_weight(&self, target: &mut [i32], offset: isize);
+}
+
+impl AddWeight for Vec<i32> {
+    fn add_weight(&self, ys: &mut [i32], offset: isize) {
+        if offset >= 0 {
+            for (w, y) in self.iter().zip(&mut ys[offset as usize..]) {
+                *y += w;
+            }
+        } else {
+            for (w, y) in self[-offset as usize..].iter().zip(ys.iter_mut()) {
+                *y += w;
+            }
+        }
+    }
+}
+
+pub trait MergableWeight {
+    fn from_two_weights(weight1: &Self, weight2: &Self) -> Self;
+}
+
+pub struct WeightMerger<W> {
+    map: BTreeMap<String, RefCell<(W, bool)>>,
+}
+
+impl<W> WeightMerger<W>
+where
+    W: MergableWeight,
+{
+    pub fn new() -> Self {
+        Self {
+            map: BTreeMap::new(),
+        }
+    }
+
+    pub fn add(&mut self, ngram: &str, weight: W) {
+        if let Some(data) = self.map.get_mut(ngram) {
+            let (prev_weight, _) = &mut *data.borrow_mut();
+            *prev_weight = W::from_two_weights(&weight, prev_weight);
+        } else {
+            self.map
+                .insert(ngram.to_string(), RefCell::new((weight, false)));
+        }
+    }
+
+    pub fn merge(self) -> Vec<(String, W)> {
+        let mut stack = vec![];
+        for (ngram, data) in &self.map {
+            if data.borrow().1 {
+                continue;
+            }
+            stack.push(data);
+            for (j, _) in ngram.char_indices().skip(1) {
+                if let Some(data) = self.map.get(&ngram[j..]) {
+                    stack.push(data);
+                    if data.borrow().1 {
+                        break;
+                    }
+                }
+            }
+            let mut data_from = stack.pop().unwrap();
+            data_from.borrow_mut().1 = true;
+            while let Some(data_to) = stack.pop() {
+                let new_data = (
+                    W::from_two_weights(&data_from.borrow().0, &data_to.borrow().0),
+                    true,
+                );
+                *data_to.borrow_mut() = new_data;
+                data_from = data_to;
+            }
+        }
+        self.map
+            .into_iter()
+            .map(|(ngram, weight)| (ngram, weight.into_inner().0))
+            .collect()
+    }
+}

From 33e5cfa5872285c4d82c9aeb43fff674b91871e4 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Fri, 28 Jan 2022 18:23:58 +0900
Subject: [PATCH 49/60] Add TagScores to Sentence (#42)

* Add TagScores to Sentence

* Add docs
---
 vaporetto/src/sentence.rs | 82 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 2f23ab8a..9655388f 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -88,6 +88,60 @@ pub struct Token<'a> {
     pub tag: Option<&'a str>,
 }
 
+/// Weight array with the corresponding range.
+///
+/// This data is placed on the end of each range.
+#[derive(Debug, PartialEq, Clone)]
+pub struct TagRangeScore {
+    /// The relative position of the start position from the end position.
+    pub start_rel_position: i32,
+
+    /// Weight array.
+    pub weight: Vec<i32>,
+}
+
+impl TagRangeScore {
+    #[allow(dead_code)]
+    pub fn new(start_rel_position: i32, weight: Vec<i32>) -> Self {
+        Self {
+            start_rel_position,
+            weight,
+        }
+    }
+}
+
+pub type TagRangeScores = Rc<Vec<TagRangeScore>>;
+
+#[derive(Debug, PartialEq, Clone, Default)]
+pub struct TagScores {
+    pub left_scores: Vec<i32>,
+    pub right_scores: Vec<i32>,
+    pub self_scores: Vec<Option<TagRangeScores>>,
+}
+
+impl TagScores {
+    /// Clears scores.
+    pub fn clear(&mut self) {
+        self.left_scores.clear();
+        self.right_scores.clear();
+        self.self_scores.clear();
+    }
+
+    /// Initializes score arrays.
+    ///
+    /// # Arguments
+    ///
+    /// * `n_chars` - Length of characters in code points.
+    /// * `n_tags` - The number of tags.
+    #[allow(dead_code)]
+    pub fn init(&mut self, n_chars: usize, n_tags: usize) {
+        self.clear();
+        self.left_scores.resize(n_chars * n_tags, 0);
+        self.right_scores.resize(n_chars * n_tags, 0);
+        self.self_scores.resize(n_chars, None);
+    }
+}
+
 /// Sentence with boundary annotations.
 #[derive(Debug, PartialEq, Clone)]
 pub struct Sentence {
@@ -98,6 +152,7 @@ pub struct Sentence {
     pub(crate) char_type: Vec<u8>,
     pub(crate) boundaries: Vec<BoundaryType>,
     pub(crate) boundary_scores: Vec<i32>,
+    pub(crate) tag_scores: TagScores,
     pub(crate) tags: Vec<Option<Rc<String>>>,
 }
 
@@ -116,6 +171,7 @@ impl Sentence {
             char_type: vec![],
             boundaries,
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags,
         };
         s.update_common_info();
@@ -137,6 +193,7 @@ impl Sentence {
         self.char_type.push(CharacterType::Other as u8);
         self.boundaries.clear();
         self.boundary_scores.clear();
+        self.tag_scores.clear();
         self.tags.clear();
         self.tags.push(None);
     }
@@ -373,6 +430,8 @@ impl Sentence {
         self.char_to_str_pos.clear();
         self.str_to_char_pos.clear();
         self.char_type.clear();
+        self.boundary_scores.clear();
+        self.tag_scores.clear();
 
         let mut pos = 0;
         self.char_to_str_pos.push(0);
@@ -1089,6 +1148,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1106,6 +1166,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s.unwrap());
@@ -1124,6 +1185,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1149,6 +1211,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 18],
         };
         assert_eq!(expected, s.unwrap());
@@ -1175,6 +1238,7 @@ mod tests {
             char_type: b"RRRRHKHTTTTTTTKKHO".to_vec(),
             boundaries: vec![Unknown; 17],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 18],
         };
         assert_eq!(expected, s);
@@ -1218,6 +1282,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1251,6 +1316,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1284,6 +1350,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1317,6 +1384,7 @@ mod tests {
             char_type: b"O".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1334,6 +1402,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s.unwrap());
@@ -1352,6 +1421,7 @@ mod tests {
             char_type: b"H".to_vec(),
             boundaries: vec![],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None],
         };
         assert_eq!(expected, s);
@@ -1395,6 +1465,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 18],
         };
         assert_eq!(expected, s.unwrap());
@@ -1439,6 +1510,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![
                 None,
                 None,
@@ -1503,6 +1575,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 18],
         };
         assert_eq!(expected, s);
@@ -1548,6 +1621,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![
                 None,
                 None,
@@ -1608,6 +1682,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 16],
         };
         assert_eq!(expected, s);
@@ -1651,6 +1726,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 16],
         };
         assert_eq!(expected, s);
@@ -1679,6 +1755,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 9],
         };
         assert_eq!(expected, s.unwrap());
@@ -1708,6 +1785,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 9],
         };
         assert_eq!(expected, s);
@@ -1735,6 +1813,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 8],
         };
         assert_eq!(expected, s.unwrap());
@@ -1763,6 +1842,7 @@ mod tests {
                 WordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 8],
         };
         assert_eq!(expected, s);
@@ -1982,6 +2062,7 @@ mod tests {
                 NotWordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 6],
         };
         assert_eq!(expected, s.unwrap());
@@ -2006,6 +2087,7 @@ mod tests {
                 NotWordBoundary,
             ],
             boundary_scores: vec![],
+            tag_scores: TagScores::default(),
             tags: vec![None; 6],
         };
         assert_eq!(expected, s);

From cab0792fa16d47691a7c67ef17878d75edd46f8a Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Sun, 30 Jan 2022 22:16:36 +0900
Subject: [PATCH 50/60] Refactoring (#43)

* Refactoring

* Revert changes around U8Map
---
 vaporetto/src/char_scorer.rs | 12 ++++--
 vaporetto/src/predictor.rs   | 80 +++++++++++++-----------------------
 vaporetto/src/type_scorer.rs | 10 ++---
 3 files changed, 42 insertions(+), 60 deletions(-)

diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index b976d485..d60a7bbf 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -107,7 +107,7 @@ impl CharScorer {
         let mut weight_merger = WeightMerger::new();
 
         for d in model.data {
-            let weight = PositionalWeight::new(-(window_size as i32), d.weights);
+            let weight = PositionalWeight::new(-(window_size as i32) - 1, d.weights);
             weight_merger.add(&d.ngram, weight);
         }
         for d in dict.dict {
@@ -116,7 +116,7 @@ impl CharScorer {
             weight.push(d.weights.right);
             weight.resize(word_len, d.weights.inside);
             weight.push(d.weights.left);
-            let weight = PositionalWeight::new(-(word_len as i32), weight);
+            let weight = PositionalWeight::new(-(word_len as i32) - 1, weight);
             weight_merger.add(&d.word, weight);
         }
 
@@ -136,13 +136,17 @@ impl CharScorer {
     }
 
     pub fn add_scores(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
+        // If the following assertion fails, Vaporetto has a bug.
+        assert_eq!(sentence.str_to_char_pos.len(), sentence.text.len() + 1);
+
         for m in self.pma.find_overlapping_no_suffix_iter(&sentence.text) {
-            let m_end = sentence.str_to_char_pos[m.end()];
+            // This was checked outside of the iteration.
+            let m_end = unsafe { *sentence.str_to_char_pos.get_unchecked(m.end()) };
             // Both the weights and the PMA always have the same number of items.
             // Therefore, the following code is safe.
             let pos_weights = unsafe { self.weights.get_unchecked(m.value()) };
 
-            let offset = padding as isize + m_end as isize + pos_weights.offset as isize - 1;
+            let offset = padding as isize + m_end as isize + pos_weights.offset as isize;
             pos_weights.weight.add_weight(ys, offset);
         }
     }
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 3b8c36c2..08f740ea 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,6 +1,6 @@
 use std::mem;
 
-use crate::char_scorer::CharScorer;
+use crate::char_scorer::{self, CharScorer};
 use crate::errors::Result;
 use crate::model::Model;
 use crate::sentence::{BoundaryType, Sentence};
@@ -46,10 +46,27 @@ impl Predictor {
         })
     }
 
-    fn predict_impl(&self, sentence: &Sentence, padding: usize, ys: &mut [i32]) {
-        ys.fill(self.bias);
-        self.char_scorer.add_scores(sentence, padding, ys);
-        self.type_scorer.add_scores(sentence, &mut ys[padding..]);
+    fn predict_impl(&self, mut sentence: Sentence) -> Sentence {
+        let ys_size = sentence.boundaries.len() + self.padding + char_scorer::SIMD_SIZE - 1;
+        let mut ys = mem::take(&mut sentence.boundary_scores);
+        ys.clear();
+        ys.resize(ys_size, self.bias);
+        self.char_scorer
+            .add_scores(&sentence, self.padding, &mut ys);
+        self.type_scorer
+            .add_scores(&sentence, &mut ys[self.padding..]);
+        for (&y, b) in ys[self.padding..]
+            .iter()
+            .zip(sentence.boundaries.iter_mut())
+        {
+            *b = if y >= 0 {
+                BoundaryType::WordBoundary
+            } else {
+                BoundaryType::NotWordBoundary
+            };
+        }
+        sentence.boundary_scores = ys;
+        sentence
     }
 
     /// Predicts word boundaries.
@@ -61,28 +78,9 @@ impl Predictor {
     /// # Returns
     ///
     /// A sentence with predicted boundary information.
-    pub fn predict(&self, mut sentence: Sentence) -> Sentence {
-        let boundaries_size = sentence.boundaries.len();
-
-        if boundaries_size != 0 {
-            let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
-            let mut ys = mem::take(&mut sentence.boundary_scores);
-            ys.resize(ys_size, 0);
-            self.predict_impl(&sentence, self.padding, &mut ys);
-            for (&y, b) in ys[self.padding..]
-                .iter()
-                .zip(sentence.boundaries.iter_mut())
-            {
-                *b = if y >= 0 {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-            }
-            sentence.boundary_scores = ys;
-            sentence.boundary_scores.clear();
-        }
-
+    pub fn predict(&self, sentence: Sentence) -> Sentence {
+        let mut sentence = self.predict_impl(sentence);
+        sentence.boundary_scores.clear();
         sentence
     }
 
@@ -95,30 +93,10 @@ impl Predictor {
     /// # Returns
     ///
     /// A sentence with predicted boundary information.
-    pub fn predict_with_score(&self, mut sentence: Sentence) -> Sentence {
-        let boundaries_size = sentence.boundaries.len();
-
-        if boundaries_size != 0 {
-            let ys_size = boundaries_size + self.padding + crate::char_scorer::SIMD_SIZE - 1;
-            let mut ys = vec![0; ys_size];
-            self.predict_impl(&sentence, self.padding, &mut ys);
-            sentence.boundary_scores.resize(boundaries_size, 0);
-            for (&y, (b, s)) in ys[self.padding..].iter().zip(
-                sentence
-                    .boundaries
-                    .iter_mut()
-                    .zip(sentence.boundary_scores.iter_mut()),
-            ) {
-                *b = if y >= 0 {
-                    BoundaryType::WordBoundary
-                } else {
-                    BoundaryType::NotWordBoundary
-                };
-
-                *s = y;
-            }
-        }
-
+    pub fn predict_with_score(&self, sentence: Sentence) -> Sentence {
+        let mut sentence = self.predict_impl(sentence);
+        sentence.boundary_scores.rotate_left(self.padding);
+        sentence.boundary_scores.truncate(sentence.boundaries.len());
         sentence
     }
 }
diff --git a/vaporetto/src/type_scorer.rs b/vaporetto/src/type_scorer.rs
index 946e8e1a..c3928943 100644
--- a/vaporetto/src/type_scorer.rs
+++ b/vaporetto/src/type_scorer.rs
@@ -164,12 +164,12 @@ impl TypeScorerCache {
     }
 
     fn seqid_to_seq(mut seqid: usize, sequence: &mut [u8]) -> bool {
-        for i in (0..sequence.len()).rev() {
+        for type_id in sequence.iter_mut().rev() {
             let x = seqid & ALPHABET_MASK;
             if x == ALPHABET_MASK {
                 return false; // invalid
             }
-            sequence[i] = ID_TO_TYPE[x];
+            *type_id = ID_TO_TYPE[x];
             seqid >>= ALPHABET_SHIFT;
         }
         assert_eq!(seqid, 0);
@@ -198,7 +198,7 @@ const ALPHABET_SIZE: usize = 8;
 const ALPHABET_MASK: usize = ALPHABET_SIZE - 1;
 const ALPHABET_SHIFT: usize = 3;
 const TYPE_TO_ID: [u32; 256] = make_type_to_id();
-const ID_TO_TYPE: [u8; 256] = make_id_to_type();
+const ID_TO_TYPE: [u8; ALPHABET_SIZE] = make_id_to_type();
 
 const fn make_type_to_id() -> [u32; 256] {
     use crate::sentence::CharacterType::*;
@@ -213,10 +213,10 @@ const fn make_type_to_id() -> [u32; 256] {
     type_to_id
 }
 
-const fn make_id_to_type() -> [u8; 256] {
+const fn make_id_to_type() -> [u8; ALPHABET_SIZE] {
     use crate::sentence::CharacterType::*;
 
-    let mut id_to_type = [0u8; 256];
+    let mut id_to_type = [0u8; ALPHABET_SIZE];
     id_to_type[1] = Digit as u8;
     id_to_type[2] = Roman as u8;
     id_to_type[3] = Hiragana as u8;

From 43300cb5d56f9a9d2b699f51bb0b2df917084f91 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 31 Jan 2022 13:13:53 +0900
Subject: [PATCH 51/60] Fix a bug of TagFeatureGenerator (#44)

---
 vaporetto/src/feature.rs | 70 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 33688f95..b7bf7561 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -239,10 +239,11 @@ impl TagExampleGenerator {
                     if let Some(tag) = t.as_ref() {
                         current_tag.replace(Rc::clone(tag));
                         tag_right_pos = i + 1;
-                        for j in (i + 2)..(i + 2 + self.char_window_size).min(sentence.chars.len())
+                        for j in
+                            (i + 2)..(i + 2 + self.char_window_size).min(sentence.chars.len() + 1)
                         {
+                            let rel_position = j as isize - i as isize - 1;
                             for start in j.saturating_sub(self.char_ngram_size)..j {
-                                let rel_position = j as isize - i as isize - 1;
                                 features.push(TagFeature::right_char_ngram(
                                     rel_position,
                                     sentence.char_substring(start, j),
@@ -597,4 +598,69 @@ mod tests {
 
         assert_eq!(expected, examples);
     }
+
+    #[test]
+    fn test_tag_example_generate_check_sentence_boundary() {
+        let gen = TagExampleGenerator::new(3, 3);
+
+        let s = Sentence::from_tokenized("僕/代名詞 は/助詞 人間/名詞").unwrap();
+        let mut examples = gen.generate(&s).unwrap();
+
+        // The order of examples is unimportant.
+        examples
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        examples.sort_unstable();
+
+        let mut expected = vec![
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "僕は"),
+                    TagFeature::right_char_ngram(1, "は"),
+                    TagFeature::right_char_ngram(2, "僕は人"),
+                    TagFeature::right_char_ngram(2, "は人"),
+                    TagFeature::right_char_ngram(2, "人"),
+                    TagFeature::right_char_ngram(3, "は人間"),
+                    TagFeature::right_char_ngram(3, "人間"),
+                    TagFeature::right_char_ngram(3, "間"),
+                    TagFeature::chars("僕"),
+                ],
+                tag: Rc::new("代名詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::right_char_ngram(1, "僕は人"),
+                    TagFeature::right_char_ngram(1, "は人"),
+                    TagFeature::right_char_ngram(1, "人"),
+                    TagFeature::right_char_ngram(2, "は人間"),
+                    TagFeature::right_char_ngram(2, "人間"),
+                    TagFeature::right_char_ngram(2, "間"),
+                    TagFeature::left_char_ngram(-1, "僕は人"),
+                    TagFeature::left_char_ngram(-1, "僕は"),
+                    TagFeature::left_char_ngram(-1, "僕"),
+                    TagFeature::chars("は"),
+                ],
+                tag: Rc::new("助詞".to_string()),
+            },
+            TagExample {
+                features: vec![
+                    TagFeature::left_char_ngram(-2, "僕は人"),
+                    TagFeature::left_char_ngram(-2, "僕は"),
+                    TagFeature::left_char_ngram(-2, "僕"),
+                    TagFeature::left_char_ngram(-1, "は人間"),
+                    TagFeature::left_char_ngram(-1, "は人"),
+                    TagFeature::left_char_ngram(-1, "は"),
+                    TagFeature::chars("人間"),
+                ],
+                tag: Rc::new("名詞".to_string()),
+            },
+        ];
+
+        expected
+            .iter_mut()
+            .for_each(|example| example.features.sort_unstable());
+        expected.sort_unstable();
+
+        assert_eq!(expected, examples);
+    }
 }

From f7b3d5cc59f555ee945d56e5444b51e3904ed748 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 1 Feb 2022 18:11:35 +0900
Subject: [PATCH 52/60] Add BOS and EOS features for tags (#45)

* Add BOS and EOS features for tags

* Deny NULL in Sentence
---
 vaporetto/src/feature.rs     |  82 ++++++++++++++++++++++++
 vaporetto/src/sentence.rs    | 120 +++++++++++++++++++++++++++++++++++
 vaporetto/src/tag_trainer.rs |  22 +++++++
 3 files changed, 224 insertions(+)

diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index b7bf7561..29096986 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -161,7 +161,9 @@ impl BoundaryExampleGenerator {
 #[derive(Debug, Hash, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TagFeature<'a> {
     LeftCharacterNgram(StringNgramFeature<'a>),
+    LeftCharacterNgramBos(StringNgramFeature<'a>),
     RightCharacterNgram(StringNgramFeature<'a>),
+    RightCharacterNgramEos(StringNgramFeature<'a>),
     Character(&'a str),
 }
 
@@ -173,6 +175,13 @@ impl<'a> TagFeature<'a> {
         })
     }
 
+    pub const fn left_char_ngram_bos(rel_position: isize, ngram: &'a str) -> Self {
+        Self::LeftCharacterNgramBos(StringNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
     pub const fn right_char_ngram(rel_position: isize, ngram: &'a str) -> Self {
         Self::RightCharacterNgram(StringNgramFeature {
             rel_position,
@@ -180,6 +189,13 @@ impl<'a> TagFeature<'a> {
         })
     }
 
+    pub const fn right_char_ngram_eos(rel_position: isize, ngram: &'a str) -> Self {
+        Self::RightCharacterNgramEos(StringNgramFeature {
+            rel_position,
+            ngram,
+        })
+    }
+
     pub const fn chars(chars: &'a str) -> Self {
         Self::Character(chars)
     }
@@ -207,6 +223,14 @@ impl TagExampleGenerator {
     pub fn generate<'a>(&self, sentence: &'a Sentence) -> Result<Vec<TagExample<'a>>> {
         let mut result = vec![];
         let mut features = vec![];
+        for start in (sentence.chars.len() + 1).saturating_sub(self.char_ngram_size)
+            ..sentence.chars.len() + 1
+        {
+            features.push(TagFeature::right_char_ngram_eos(
+                1,
+                sentence.char_substring(start, sentence.chars.len()),
+            ));
+        }
         let mut current_tag: Option<Rc<String>> =
             sentence.tags.last().and_then(|x| x.as_ref()).map(Rc::clone);
         let mut tag_right_pos = sentence.chars.len();
@@ -220,6 +244,15 @@ impl TagExampleGenerator {
             match b {
                 BoundaryType::WordBoundary => {
                     if let Some(tag) = current_tag.take() {
+                        if i + 2 <= self.char_window_size {
+                            let rel_position = -(i as isize) - 2;
+                            for end in 0..sentence.chars.len().min(self.char_ngram_size) {
+                                features.push(TagFeature::left_char_ngram_bos(
+                                    rel_position,
+                                    sentence.char_substring(0, end),
+                                ));
+                            }
+                        }
                         for j in (i + 1).saturating_sub(self.char_window_size)..i + 1 {
                             let rel_position = j as isize - i as isize - 1;
                             for end in j + 1..sentence.chars.len().min(j + self.char_ngram_size) + 1
@@ -250,6 +283,18 @@ impl TagExampleGenerator {
                                 ));
                             }
                         }
+                        if i + self.char_window_size >= sentence.chars.len() {
+                            let rel_position = sentence.chars.len() as isize - i as isize;
+                            for start in (sentence.chars.len() + 1)
+                                .saturating_sub(self.char_ngram_size)
+                                ..sentence.chars.len() + 1
+                            {
+                                features.push(TagFeature::right_char_ngram_eos(
+                                    rel_position,
+                                    sentence.char_substring(start, sentence.chars.len()),
+                                ));
+                            }
+                        }
                     }
                 }
                 BoundaryType::NotWordBoundary => (),
@@ -261,6 +306,12 @@ impl TagExampleGenerator {
             }
         }
         if let Some(tag) = current_tag.take() {
+            for end in 0..sentence.chars.len().min(self.char_ngram_size) {
+                features.push(TagFeature::left_char_ngram_bos(
+                    -1,
+                    sentence.char_substring(0, end),
+                ));
+            }
             features.push(TagFeature::chars(sentence.char_substring(0, tag_right_pos)));
             result.push(TagExample { features, tag });
         }
@@ -414,6 +465,9 @@ mod tests {
                     TagFeature::right_char_ngram(3, "は火星"),
                     TagFeature::right_char_ngram(3, "火星"),
                     TagFeature::right_char_ngram(3, "星"),
+                    TagFeature::left_char_ngram_bos(-1, ""),
+                    TagFeature::left_char_ngram_bos(-1, "A"),
+                    TagFeature::left_char_ngram_bos(-1, "Ar"),
                     TagFeature::chars("Aria"),
                 ],
                 tag: Rc::new("名詞".to_string()),
@@ -444,6 +498,9 @@ mod tests {
             },
             TagExample {
                 features: vec![
+                    TagFeature::right_char_ngram_eos(1, "猫だ"),
+                    TagFeature::right_char_ngram_eos(1, "だ"),
+                    TagFeature::right_char_ngram_eos(1, ""),
                     TagFeature::left_char_ngram(-3, "火"),
                     TagFeature::left_char_ngram(-3, "火星"),
                     TagFeature::left_char_ngram(-3, "火星猫"),
@@ -489,6 +546,9 @@ mod tests {
                     TagFeature::right_char_ngram(2, "aは火"),
                     TagFeature::right_char_ngram(2, "は火"),
                     TagFeature::right_char_ngram(2, "火"),
+                    TagFeature::left_char_ngram_bos(-1, ""),
+                    TagFeature::left_char_ngram_bos(-1, "A"),
+                    TagFeature::left_char_ngram_bos(-1, "Ar"),
                     TagFeature::chars("Aria"),
                 ],
                 tag: Rc::new("名詞".to_string()),
@@ -513,6 +573,9 @@ mod tests {
             },
             TagExample {
                 features: vec![
+                    TagFeature::right_char_ngram_eos(1, "猫だ"),
+                    TagFeature::right_char_ngram_eos(1, "だ"),
+                    TagFeature::right_char_ngram_eos(1, ""),
                     TagFeature::left_char_ngram(-2, "星"),
                     TagFeature::left_char_ngram(-2, "星猫"),
                     TagFeature::left_char_ngram(-2, "星猫だ"),
@@ -555,6 +618,8 @@ mod tests {
                     TagFeature::right_char_ngram(2, "火"),
                     TagFeature::right_char_ngram(3, "火星"),
                     TagFeature::right_char_ngram(3, "星"),
+                    TagFeature::left_char_ngram_bos(-1, ""),
+                    TagFeature::left_char_ngram_bos(-1, "A"),
                     TagFeature::chars("Aria"),
                 ],
                 tag: Rc::new("名詞".to_string()),
@@ -579,6 +644,8 @@ mod tests {
             },
             TagExample {
                 features: vec![
+                    TagFeature::right_char_ngram_eos(1, "だ"),
+                    TagFeature::right_char_ngram_eos(1, ""),
                     TagFeature::left_char_ngram(-3, "火"),
                     TagFeature::left_char_ngram(-3, "火星"),
                     TagFeature::left_char_ngram(-2, "星"),
@@ -623,6 +690,9 @@ mod tests {
                     TagFeature::right_char_ngram(3, "は人間"),
                     TagFeature::right_char_ngram(3, "人間"),
                     TagFeature::right_char_ngram(3, "間"),
+                    TagFeature::left_char_ngram_bos(-1, ""),
+                    TagFeature::left_char_ngram_bos(-1, "僕"),
+                    TagFeature::left_char_ngram_bos(-1, "僕は"),
                     TagFeature::chars("僕"),
                 ],
                 tag: Rc::new("代名詞".to_string()),
@@ -635,6 +705,12 @@ mod tests {
                     TagFeature::right_char_ngram(2, "は人間"),
                     TagFeature::right_char_ngram(2, "人間"),
                     TagFeature::right_char_ngram(2, "間"),
+                    TagFeature::right_char_ngram_eos(3, "人間"),
+                    TagFeature::right_char_ngram_eos(3, "間"),
+                    TagFeature::right_char_ngram_eos(3, ""),
+                    TagFeature::left_char_ngram_bos(-2, "僕は"),
+                    TagFeature::left_char_ngram_bos(-2, "僕"),
+                    TagFeature::left_char_ngram_bos(-2, ""),
                     TagFeature::left_char_ngram(-1, "僕は人"),
                     TagFeature::left_char_ngram(-1, "僕は"),
                     TagFeature::left_char_ngram(-1, "僕"),
@@ -644,6 +720,12 @@ mod tests {
             },
             TagExample {
                 features: vec![
+                    TagFeature::right_char_ngram_eos(1, "人間"),
+                    TagFeature::right_char_ngram_eos(1, "間"),
+                    TagFeature::right_char_ngram_eos(1, ""),
+                    TagFeature::left_char_ngram_bos(-3, "僕は"),
+                    TagFeature::left_char_ngram_bos(-3, "僕"),
+                    TagFeature::left_char_ngram_bos(-3, ""),
                     TagFeature::left_char_ngram(-2, "僕は人"),
                     TagFeature::left_char_ngram(-2, "僕は"),
                     TagFeature::left_char_ngram(-2, "僕"),
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index 9655388f..f2908fe7 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -214,6 +214,12 @@ impl Sentence {
         chars.clear();
 
         for c in raw_text.chars() {
+            if c == '\0' {
+                return Err(VaporettoError::invalid_argument(
+                    "raw_text",
+                    "must not contain NULL",
+                ));
+            }
             chars.push(c);
         }
         boundaries.clear();
@@ -301,6 +307,12 @@ impl Sentence {
                         });
                         tags.push(tag_str.take().map(Rc::new));
                     }
+                    if c == '\0' {
+                        return Err(VaporettoError::invalid_argument(
+                            "tokenized_text",
+                            "must not contain NULL",
+                        ));
+                    }
                     prev_boundary = false;
                     escape = false;
                     text.push(c);
@@ -346,6 +358,12 @@ impl Sentence {
         let mut fixed_token = true;
         for &c in &labeled_chars {
             if is_char {
+                if c == '\0' {
+                    return Err(VaporettoError::invalid_argument(
+                        "labeled_text",
+                        "must not contain NULL",
+                    ));
+                }
                 text.push(c);
                 chars.push(c);
                 is_char = false;
@@ -1154,6 +1172,40 @@ mod tests {
         assert_eq!(expected, s);
     }
 
+    #[test]
+    fn test_sentence_from_raw_null() {
+        let s = Sentence::from_raw("A1あ\0ア亜");
+
+        assert_eq!(
+            "InvalidArgumentError: raw_text: must not contain NULL",
+            &s.err().unwrap().to_string()
+        );
+    }
+
+    #[test]
+    fn test_sentence_update_raw_null() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_raw("A1あ\0ア亜");
+
+        assert_eq!(
+            "InvalidArgumentError: raw_text: must not contain NULL",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: b"O".to_vec(),
+            boundaries: vec![],
+            boundary_scores: vec![],
+            tag_scores: TagScores::default(),
+            tags: vec![None],
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_raw_one() {
         let s = Sentence::from_raw("あ");
@@ -1288,6 +1340,40 @@ mod tests {
         assert_eq!(expected, s);
     }
 
+    #[test]
+    fn test_sentence_from_tokenized_null() {
+        let s = Sentence::from_tokenized("A1あ\0ア亜");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: must not contain NULL",
+            &s.err().unwrap().to_string()
+        );
+    }
+
+    #[test]
+    fn test_sentence_update_tokenized_null() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_tokenized("A1あ\0ア亜");
+
+        assert_eq!(
+            "InvalidArgumentError: tokenized_text: must not contain NULL",
+            &result.err().unwrap().to_string()
+        );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: b"O".to_vec(),
+            boundaries: vec![],
+            boundary_scores: vec![],
+            tag_scores: TagScores::default(),
+            tags: vec![None],
+        };
+        assert_eq!(expected, s);
+    }
+
     #[test]
     fn test_sentence_from_tokenized_start_with_space() {
         let s = Sentence::from_tokenized(" Rust で 良い プログラミング 体験 を ！");
@@ -2000,6 +2086,40 @@ mod tests {
             "InvalidArgumentError: labeled_text: must contain at least one character",
             &result.err().unwrap().to_string()
         );
+
+        let expected = Sentence {
+            text: " ".to_string(),
+            chars: vec![' '],
+            str_to_char_pos: vec![0, 1],
+            char_to_str_pos: vec![0, 1],
+            char_type: b"O".to_vec(),
+            boundaries: vec![],
+            boundary_scores: vec![],
+            tag_scores: TagScores::default(),
+            tags: vec![None],
+        };
+        assert_eq!(expected, s);
+    }
+
+    #[test]
+    fn test_sentence_from_partial_annotation_null() {
+        let s = Sentence::from_partial_annotation("A-1-あ-\0-ア-亜");
+
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: must not contain NULL",
+            &s.err().unwrap().to_string()
+        );
+    }
+
+    #[test]
+    fn test_sentence_update_partial_annotation_null() {
+        let mut s = Sentence::from_raw("12345").unwrap();
+        let result = s.update_partial_annotation("A-1-あ-\0-ア-亜");
+
+        assert_eq!(
+            "InvalidArgumentError: labeled_text: must not contain NULL",
+            &result.err().unwrap().to_string()
+        );
     }
 
     #[test]
diff --git a/vaporetto/src/tag_trainer.rs b/vaporetto/src/tag_trainer.rs
index 921ebfd4..6717ce2e 100644
--- a/vaporetto/src/tag_trainer.rs
+++ b/vaporetto/src/tag_trainer.rs
@@ -121,6 +121,17 @@ impl<'a> TagTrainer<'a> {
                             left_char_weights.insert(ngram.to_string(), weights);
                         }
                     }
+                    TagFeature::LeftCharacterNgramBos(StringNgramFeature {
+                        rel_position,
+                        ngram,
+                    }) => {
+                        let pos = -rel_position - 1;
+                        let idx = i + pos as usize * self.tag_ids.len();
+                        let ngram = "\0".to_string() + *ngram;
+                        left_char_weights.entry(ngram).or_insert_with(|| {
+                            vec![0; self.char_window_size * self.tag_ids.len()]
+                        })[idx] = weight;
+                    }
                     TagFeature::RightCharacterNgram(StringNgramFeature {
                         rel_position,
                         ngram,
@@ -135,6 +146,17 @@ impl<'a> TagTrainer<'a> {
                             right_char_weights.insert(ngram.to_string(), weights);
                         }
                     }
+                    TagFeature::RightCharacterNgramEos(StringNgramFeature {
+                        rel_position,
+                        ngram,
+                    }) => {
+                        let pos = self.char_window_size as isize - rel_position;
+                        let idx = i as usize + pos as usize * self.tag_ids.len();
+                        let ngram = ngram.to_string() + "\0";
+                        right_char_weights.entry(ngram).or_insert_with(|| {
+                            vec![0; self.char_window_size * self.tag_ids.len()]
+                        })[idx] = weight;
+                    }
                     TagFeature::Character(ngram) => {
                         if let Some(weights) = self_char_weights.get_mut(*ngram) {
                             weights[i as usize] = weight;

From 39e9b292ddb9251c106e8f02e6fc3f2f4b6aa65a Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Tue, 1 Feb 2022 19:18:30 +0900
Subject: [PATCH 53/60] Wrap DoubleArrayAhoCorasick in Option (#46)

---
 vaporetto/src/feature.rs | 65 +++++++++++++++++++++++-----------------
 vaporetto/src/trainer.rs |  2 +-
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index 29096986..f8eecdc8 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -70,7 +70,7 @@ pub struct BoundaryExampleGenerator {
     type_ngram_size: usize,
     char_window_size: usize,
     type_window_size: usize,
-    dict_ac: DoubleArrayAhoCorasick,
+    dict_ac: Option<DoubleArrayAhoCorasick>,
     dict_max_word_size: usize,
 }
 
@@ -80,20 +80,27 @@ impl BoundaryExampleGenerator {
         type_ngram_size: usize,
         char_window_size: usize,
         type_window_size: usize,
-        dict: I,
+        dict: Option<I>,
         dict_max_word_size: usize,
     ) -> Result<Self>
     where
         I: IntoIterator<Item = P>,
         P: AsRef<[u8]>,
     {
+        let dict_ac = if let Some(dict) = dict {
+            Some(
+                DoubleArrayAhoCorasick::new(dict)
+                    .map_err(|e| VaporettoError::invalid_argument("dict", format!("{:?}", e)))?,
+            )
+        } else {
+            None
+        };
         Ok(Self {
             char_ngram_size,
             type_ngram_size,
             char_window_size,
             type_window_size,
-            dict_ac: DoubleArrayAhoCorasick::new(dict)
-                .map_err(|e| VaporettoError::invalid_argument("dict", format!("{:?}", e)))?,
+            dict_ac,
             dict_max_word_size,
         })
     }
@@ -126,29 +133,31 @@ impl BoundaryExampleGenerator {
             }
             result.push(BoundaryExample { features, label })
         }
-        for m in self.dict_ac.find_overlapping_iter(&s.text) {
-            let m_start = s.str_to_char_pos[m.start()];
-            let m_end = s.str_to_char_pos[m.end()];
-            let length = (m_end - m_start).min(self.dict_max_word_size);
-            if m_start != 0 {
-                result[m_start - 1]
-                    .features
-                    .push(BoundaryFeature::dict_word(
-                        DictionaryWordPosition::Right,
+        if let Some(dict_ac) = self.dict_ac.as_ref() {
+            for m in dict_ac.find_overlapping_iter(&s.text) {
+                let m_start = s.str_to_char_pos[m.start()];
+                let m_end = s.str_to_char_pos[m.end()];
+                let length = (m_end - m_start).min(self.dict_max_word_size);
+                if m_start != 0 {
+                    result[m_start - 1]
+                        .features
+                        .push(BoundaryFeature::dict_word(
+                            DictionaryWordPosition::Right,
+                            length,
+                        ));
+                }
+                for example in &mut result[m_start..m_end - 1] {
+                    example.features.push(BoundaryFeature::dict_word(
+                        DictionaryWordPosition::Inside,
                         length,
                     ));
-            }
-            for example in &mut result[m_start..m_end - 1] {
-                example.features.push(BoundaryFeature::dict_word(
-                    DictionaryWordPosition::Inside,
-                    length,
-                ));
-            }
-            if m_end != s.chars().len() {
-                result[m_end - 1].features.push(BoundaryFeature::dict_word(
-                    DictionaryWordPosition::Left,
-                    length,
-                ));
+                }
+                if m_end != s.chars().len() {
+                    result[m_end - 1].features.push(BoundaryFeature::dict_word(
+                        DictionaryWordPosition::Left,
+                        length,
+                    ));
+                }
             }
         }
         result
@@ -327,7 +336,7 @@ mod tests {
 
     #[test]
     fn test_example_generator_generate_one() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
+        let dict = Some(["東京特許許可局", "火星猫", "猫"]);
         let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_raw("猫").unwrap();
@@ -338,7 +347,7 @@ mod tests {
 
     #[test]
     fn test_example_generator_generate_all() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
+        let dict = Some(["東京特許許可局", "火星猫", "猫"]);
         let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_partial_annotation("A-r-i-a|は|火-星 猫|だ").unwrap();
@@ -430,7 +439,7 @@ mod tests {
 
     #[test]
     fn test_example_generator_generate_without_unknown() {
-        let dict = ["東京特許許可局", "火星猫", "猫"];
+        let dict = Some(["東京特許許可局", "火星猫", "猫"]);
         let gen = BoundaryExampleGenerator::new(3, 2, 3, 2, dict, 2).unwrap();
 
         let s = Sentence::from_partial_annotation("A-r-i-a|は|火-星 猫|だ").unwrap();
diff --git a/vaporetto/src/trainer.rs b/vaporetto/src/trainer.rs
index 10f24267..96b1fc5c 100644
--- a/vaporetto/src/trainer.rs
+++ b/vaporetto/src/trainer.rs
@@ -202,7 +202,7 @@ impl<'a> Trainer<'a> {
                 type_ngram_size,
                 char_window_size,
                 type_window_size,
-                dictionary.as_ref(),
+                Some(dictionary.as_ref()).filter(|d| !d.is_empty()),
                 dict_max_word_size,
             )?,
             char_window_size,

From e454c58459e3fd5d550f8aaed38f24450ff40800 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 3 Feb 2022 19:07:21 +0900
Subject: [PATCH 54/60] Add POS tagger (#48)

* Add CharScorerWithTags

* Update docs and tests

* Update CLIs and vaporetto_rules

* Update README.md

* Update README.md

* Update vaporetto/src/predictor.rs

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

* fix

* Remove unnecessary checking

* clippy

* fmt

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 README.md                    |  20 ++
 evaluate/src/main.rs         |  19 +-
 predict/src/main.rs          |   7 +-
 vaporetto/Cargo.toml         |   2 +-
 vaporetto/src/char_scorer.rs | 209 ++++++++++++++++++-
 vaporetto/src/lib.rs         |   2 +-
 vaporetto/src/predictor.rs   | 378 ++++++++++++++++++++++++++++++++---
 vaporetto/src/utils.rs       |  31 ++-
 vaporetto_rules/src/lib.rs   |   2 +-
 9 files changed, 622 insertions(+), 48 deletions(-)

diff --git a/README.md b/README.md
index c2ef9f0d..3d97b950 100644
--- a/README.md
+++ b/README.md
@@ -163,6 +163,26 @@ Now `メロンパン` is split into a single token.
 11:った 14178
 ```
 
+### POS tagging
+
+Vaporetto experimentally supports POS tagging.
+
+To train tags, add a slash and tag name following each token in the dataset as follows:
+
+* For fully annotated corpora
+  ```
+  この/連体詞 人/名詞 は/助詞 火星/名詞 人/接尾辞 です/助動詞
+  ```
+
+* For partially annotated corpora
+  ```
+  ヴ-ェ-ネ-ツ-ィ-ア/名詞|は/助詞|イ-タ-リ-ア/名詞|に/助詞|あ-り ま-す
+  ```
+
+If the dataset contains tags, the `train` command automatically trains them.
+
+In prediction, tags are not predicted by default, so you have to specify `--predict-tags` argument to `predict` command if necessary.
+
 ## Speed Comparison of Various Tokenizers
 
 Vaporetto is 6.9 times faster than KyTea. With `feature=simd`, it becomes 7.8 times faster. (`simd` option requires Nightly Rust.)
diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index 07eb8919..dd0d4b4c 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -60,6 +60,10 @@ struct Opt {
     #[structopt(long)]
     model: PathBuf,
 
+    /// Predicts POS tags.
+    #[structopt(long)]
+    predict_tags: bool,
+
     /// Do not segment some character types: {D, R, H, T, K, O, G}.
     /// D: Digit, R: Roman, H: Hiragana, T: Katakana, K: Kanji, O: Other, G: Grapheme cluster.
     #[structopt(long)]
@@ -95,7 +99,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     eprintln!("Loading model file...");
     let mut f = zstd::Decoder::new(File::open(opt.model)?)?;
     let model = Model::read(&mut f)?;
-    let predictor = Predictor::new(model)?;
+    let predictor = Predictor::new(model, opt.predict_tags)?;
 
     eprintln!("Start tokenization");
 
@@ -105,17 +109,16 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         if line.is_empty() {
             continue;
         }
-        let s = Sentence::from_tokenized(line)?;
+        let mut s = Sentence::from_tokenized(line)?;
         let ref_boundaries = s.boundaries().to_vec();
         let ref_tags = s.tags().to_vec();
-        let s = if opt.no_norm {
-            s
-        } else {
+        if !opt.no_norm {
             let new_line = fullwidth_filter.filter(s.to_raw_string());
-            Sentence::from_raw(new_line)?
+            s = Sentence::from_raw(new_line)?
         };
-        let s = predictor.predict(s);
-        let s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
+        s = predictor.predict(s);
+        s = post_filters.iter().fold(s, |s, filter| filter.filter(s));
+        s = predictor.fill_tags(s);
         let hyp_boundaries = s.boundaries().to_vec();
         let hyp_tags = s.tags().to_vec();
         results.push((ref_boundaries, ref_tags, hyp_boundaries, hyp_tags));
diff --git a/predict/src/main.rs b/predict/src/main.rs
index c461f21a..3be29879 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -42,6 +42,10 @@ struct Opt {
     #[structopt(long)]
     model: PathBuf,
 
+    /// Predicts POS tags.
+    #[structopt(long)]
+    predict_tags: bool,
+
     /// Do not segment some character types: {D, R, H, T, K, O, G}.
     /// D: Digit, R: Roman, H: Hiragana, T: Katakana, K: Kanji, O: Other, G: Grapheme cluster.
     #[structopt(long)]
@@ -90,6 +94,7 @@ fn tokenize(
     }
     buf1 = predictor.predict_with_score(buf1);
     buf1 = post_filters.iter().fold(buf1, |s, filter| filter.filter(s));
+    buf1 = predictor.fill_tags(buf1);
     let result = if pre_filters.is_empty() {
         buf1.to_tokenized_string()?
     } else {
@@ -122,7 +127,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     eprintln!("Loading model file...");
     let mut f = zstd::Decoder::new(File::open(opt.model)?)?;
     let model = Model::read(&mut f)?;
-    let predictor = Predictor::new(model)?;
+    let predictor = Predictor::new(model, opt.predict_tags)?;
 
     eprintln!("Start tokenization");
     let mut n_chars = 0;
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index b742ca68..96548639 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -13,7 +13,7 @@ categories = ["text-processing"]
 autotests = false
 
 [dependencies]
-daachorse = "0.3.0"  # MIT or Apache-2.0
+daachorse = "0.4.0"  # MIT or Apache-2.0
 byteorder = "1.4"  # Unlicense or MIT
 
 liblinear = { version = "1", optional = true }  # MIT
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index d60a7bbf..18bd0d7a 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,10 +1,13 @@
+use std::iter;
+use std::rc::Rc;
+
 use daachorse::DoubleArrayAhoCorasick;
 
 use crate::dict_model::DictModel;
 use crate::errors::{Result, VaporettoError};
 use crate::ngram_model::NgramModel;
-use crate::sentence::Sentence;
-use crate::utils::{AddWeight, MergableWeight, WeightMerger};
+use crate::sentence::{Sentence, TagRangeScore, TagRangeScores, TagScores};
+use crate::utils::{self, AddWeight, MergableWeight, WeightMerger};
 
 #[cfg(feature = "portable-simd")]
 use std::simd::i32x8;
@@ -13,6 +16,7 @@ pub const SIMD_SIZE: usize = 8;
 #[cfg(feature = "portable-simd")]
 type I32Vec = i32x8;
 
+#[derive(Clone)]
 struct PositionalWeight<W> {
     pub offset: i32,
     pub weight: W,
@@ -27,13 +31,14 @@ impl NaivePositionalWeight {
 }
 
 impl MergableWeight for NaivePositionalWeight {
-    fn from_two_weights(weight1: &Self, weight2: &Self) -> Self {
+    fn from_two_weights(weight1: &Self, weight2: &Self, n_classes: usize) -> Self {
+        debug_assert!(n_classes != 0);
         let (weight1, weight2) = if weight1.offset > weight2.offset {
             (weight2, weight1)
         } else {
             (weight1, weight2)
         };
-        let shift = (weight2.offset - weight1.offset) as usize;
+        let shift = (weight2.offset - weight1.offset) as usize * n_classes;
         let mut weight = vec![0; weight1.weight.len().max(shift + weight2.weight.len())];
         weight[..weight1.weight.len()].copy_from_slice(&weight1.weight);
         for (r, w2) in weight[shift..].iter_mut().zip(&weight2.weight) {
@@ -46,6 +51,7 @@ impl MergableWeight for NaivePositionalWeight {
     }
 }
 
+#[derive(Clone)]
 enum WeightVector {
     Array(Vec<i32>),
 
@@ -97,6 +103,80 @@ impl AddWeight for WeightVector {
     }
 }
 
+pub struct WeightSet<W>
+where
+    W: Clone,
+{
+    boundary: Option<PositionalWeight<W>>,
+    tag_left: Option<PositionalWeight<Vec<i32>>>,
+    tag_right: Option<PositionalWeight<Vec<i32>>>,
+    tag_self: Option<TagRangeScores>,
+}
+
+type NaiveWeightSet = WeightSet<Vec<i32>>;
+
+impl NaiveWeightSet {
+    fn boundary_weight(offset: i32, weight: Vec<i32>) -> Self {
+        Self {
+            boundary: Some(PositionalWeight::new(offset, weight)),
+            tag_left: None,
+            tag_right: None,
+            tag_self: None,
+        }
+    }
+
+    fn tag_left_weight(offset: i32, weight: Vec<i32>) -> Self {
+        Self {
+            boundary: None,
+            tag_left: Some(PositionalWeight::new(offset, weight)),
+            tag_right: None,
+            tag_self: None,
+        }
+    }
+
+    fn tag_right_weight(offset: i32, weight: Vec<i32>) -> Self {
+        Self {
+            boundary: None,
+            tag_left: None,
+            tag_right: Some(PositionalWeight::new(offset, weight)),
+            tag_self: None,
+        }
+    }
+
+    fn tag_self_weight(start_rel_position: i32, weight: Vec<i32>) -> Self {
+        Self {
+            boundary: None,
+            tag_left: None,
+            tag_right: None,
+            tag_self: Some(Rc::new(vec![TagRangeScore::new(
+                start_rel_position,
+                weight,
+            )])),
+        }
+    }
+}
+
+impl MergableWeight for NaiveWeightSet {
+    fn from_two_weights(weight1: &Self, weight2: &Self, n_classes: usize) -> Self {
+        Self {
+            boundary: utils::xor_or_zip_with(&weight1.boundary, &weight2.boundary, |w1, w2| {
+                PositionalWeight::from_two_weights(w1, w2, 1)
+            }),
+            tag_left: utils::xor_or_zip_with(&weight1.tag_left, &weight2.tag_left, |w1, w2| {
+                PositionalWeight::from_two_weights(w1, w2, n_classes)
+            }),
+            tag_right: utils::xor_or_zip_with(&weight1.tag_right, &weight2.tag_right, |w1, w2| {
+                PositionalWeight::from_two_weights(w1, w2, n_classes)
+            }),
+            tag_self: utils::xor_or_zip_with(&weight1.tag_self, &weight2.tag_self, |w1, w2| {
+                let mut w = w1.to_vec();
+                w.append(&mut w2.to_vec());
+                Rc::new(w)
+            }),
+        }
+    }
+}
+
 pub struct CharScorer {
     pma: DoubleArrayAhoCorasick,
     weights: Vec<PositionalWeight<WeightVector>>,
@@ -104,7 +184,7 @@ pub struct CharScorer {
 
 impl CharScorer {
     pub fn new(model: NgramModel<String>, window_size: usize, dict: DictModel) -> Result<Self> {
-        let mut weight_merger = WeightMerger::new();
+        let mut weight_merger = WeightMerger::new(1);
 
         for d in model.data {
             let weight = PositionalWeight::new(-(window_size as i32) - 1, d.weights);
@@ -151,3 +231,122 @@ impl CharScorer {
         }
     }
 }
+
+pub struct CharScorerWithTags {
+    pma: DoubleArrayAhoCorasick,
+    weights: Vec<WeightSet<WeightVector>>,
+    n_tags: usize,
+}
+
+impl CharScorerWithTags {
+    pub fn new(
+        model: NgramModel<String>,
+        window_size: usize,
+        dict: DictModel,
+        n_tags: usize,
+        tag_left_model: NgramModel<String>,
+        tag_right_model: NgramModel<String>,
+        tag_self_model: NgramModel<String>,
+    ) -> Result<Self> {
+        let mut weight_merger = WeightMerger::new(n_tags);
+
+        for d in model.data {
+            let weight = WeightSet::boundary_weight(-(window_size as i32), d.weights);
+            weight_merger.add(&d.ngram, weight);
+        }
+        for d in dict.dict {
+            let word_len = d.word.chars().count();
+            let mut weight = Vec::with_capacity(word_len + 1);
+            weight.push(d.weights.right);
+            weight.resize(word_len, d.weights.inside);
+            weight.push(d.weights.left);
+            let weight = WeightSet::boundary_weight(-(word_len as i32), weight);
+            weight_merger.add(&d.word, weight);
+        }
+        for d in tag_left_model.data {
+            let weight =
+                WeightSet::tag_left_weight(-(d.ngram.chars().count() as i32) + 1, d.weights);
+            weight_merger.add(&d.ngram, weight);
+        }
+        for d in tag_right_model.data {
+            let weight = WeightSet::tag_right_weight(-(window_size as i32) - 1, d.weights);
+            weight_merger.add(&d.ngram, weight);
+        }
+        for d in tag_self_model.data {
+            let weight = WeightSet::tag_self_weight(-(d.ngram.chars().count() as i32), d.weights);
+            weight_merger.add(&d.ngram, weight);
+        }
+
+        let mut ngrams = vec![];
+        let mut weights = vec![];
+        for (ngram, data) in weight_merger.merge() {
+            ngrams.push(ngram);
+            let WeightSet {
+                boundary,
+                tag_left,
+                tag_right,
+                tag_self,
+            } = data;
+            weights.push(WeightSet {
+                boundary: boundary.map(|PositionalWeight { offset, weight }| PositionalWeight {
+                    offset,
+                    weight: WeightVector::new(weight),
+                }),
+                tag_left,
+                tag_right,
+                tag_self,
+            });
+        }
+        let pma = DoubleArrayAhoCorasick::new(ngrams)
+            .map_err(|_| VaporettoError::invalid_model("invalid character n-grams"))?;
+        Ok(Self {
+            pma,
+            weights,
+            n_tags,
+        })
+    }
+
+    pub fn add_scores(
+        &self,
+        sentence: &Sentence,
+        padding: usize,
+        ys: &mut [i32],
+        tag_ys: &mut TagScores,
+    ) {
+        for m in self.pma.find_overlapping_no_suffix_iter_from_iter(
+            iter::once(0)
+                .chain(sentence.text.as_bytes().iter().cloned())
+                .chain(iter::once(0)),
+        ) {
+            let m_end = sentence
+                .str_to_char_pos
+                .get(m.end() - 1)
+                .copied()
+                .unwrap_or(sentence.chars.len() + 1);
+
+            // Both the weights and the PMA always have the same number of items.
+            // Therefore, the following code is safe.
+            let weight_set = unsafe { self.weights.get_unchecked(m.value()) };
+
+            if let Some(pos_weights) = weight_set.boundary.as_ref() {
+                let offset = padding as isize + m_end as isize + pos_weights.offset as isize - 1;
+                pos_weights.weight.add_weight(ys, offset);
+            }
+            if let Some(pos_weights) = weight_set.tag_left.as_ref() {
+                let offset = (m_end as isize + pos_weights.offset as isize) * self.n_tags as isize;
+                pos_weights
+                    .weight
+                    .add_weight(&mut tag_ys.left_scores, offset);
+            }
+            if let Some(pos_weights) = weight_set.tag_right.as_ref() {
+                let offset = (m_end as isize + pos_weights.offset as isize) * self.n_tags as isize;
+                pos_weights
+                    .weight
+                    .add_weight(&mut tag_ys.right_scores, offset);
+            }
+            if let Some(weight) = weight_set.tag_self.as_ref() {
+                tag_ys.self_scores[m_end - 1].replace(Rc::clone(weight));
+            }
+        }
+    }
+}
diff --git a/vaporetto/src/lib.rs b/vaporetto/src/lib.rs
index a17a351f..8985eff0 100644
--- a/vaporetto/src/lib.rs
+++ b/vaporetto/src/lib.rs
@@ -15,7 +15,7 @@
 //!
 //! let mut f = BufReader::new(File::open("model.bin").unwrap());
 //! let model = Model::read(&mut f).unwrap();
-//! let predictor = Predictor::new(model).unwrap();
+//! let predictor = Predictor::new(model, false).unwrap();
 //!
 //! let s = Sentence::from_raw("火星猫の生態").unwrap();
 //! let s = predictor.predict(s);
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 08f740ea..1a28804a 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,19 +1,31 @@
 use std::mem;
 
-use crate::char_scorer::{self, CharScorer};
+use std::cmp::Ordering;
+use std::rc::Rc;
+
+use crate::char_scorer::{self, CharScorer, CharScorerWithTags};
 use crate::errors::Result;
 use crate::model::Model;
 use crate::sentence::{BoundaryType, Sentence};
 use crate::type_scorer::TypeScorer;
 
+enum CharScorerWrapper {
+    Boundary(CharScorer),
+    BoundaryAndTags(CharScorerWithTags),
+}
+
 /// Predictor.
 pub struct Predictor {
     bias: i32,
 
-    char_scorer: CharScorer,
+    char_scorer: CharScorerWrapper,
     type_scorer: TypeScorer,
 
     padding: usize,
+
+    // for tag prediction
+    tag_names: Vec<Rc<String>>,
+    tag_bias: Vec<i32>,
 }
 
 impl Predictor {
@@ -22,27 +34,48 @@ impl Predictor {
     /// # Arguments
     ///
     /// * `model` - A model data.
+    /// * `predict_tags` - If you want to predict tags, set to true.
     ///
     /// # Returns
     ///
     /// A new predictor.
-    pub fn new(model: Model) -> Result<Self> {
-        let bias = model.bias;
-
-        let char_ngram_model = model.char_ngram_model;
-        let type_ngram_model = model.type_ngram_model;
-        let dict_model = model.dict_model;
+    pub fn new(model: Model, predict_tags: bool) -> Result<Self> {
+        let mut tag_names = vec![];
+        let mut tag_bias = vec![];
 
-        let char_scorer = CharScorer::new(char_ngram_model, model.char_window_size, dict_model)?;
-        let type_scorer = TypeScorer::new(type_ngram_model, model.type_window_size)?;
+        let char_scorer = if predict_tags {
+            for cls in model.tag_model.class_info {
+                tag_names.push(Rc::new(cls.name));
+                tag_bias.push(cls.bias);
+            }
+            CharScorerWrapper::BoundaryAndTags(CharScorerWithTags::new(
+                model.char_ngram_model,
+                model.char_window_size,
+                model.dict_model,
+                tag_names.len(),
+                model.tag_model.left_char_model,
+                model.tag_model.right_char_model,
+                model.tag_model.self_char_model,
+            )?)
+        } else {
+            CharScorerWrapper::Boundary(CharScorer::new(
+                model.char_ngram_model,
+                model.char_window_size,
+                model.dict_model,
+            )?)
+        };
+        let type_scorer = TypeScorer::new(model.type_ngram_model, model.type_window_size)?;
 
         Ok(Self {
-            bias,
+            bias: model.bias,
 
             char_scorer,
             type_scorer,
 
             padding: model.char_window_size.max(model.type_window_size),
+
+            tag_names,
+            tag_bias,
         })
     }
 
@@ -51,8 +84,17 @@ impl Predictor {
         let mut ys = mem::take(&mut sentence.boundary_scores);
         ys.clear();
         ys.resize(ys_size, self.bias);
-        self.char_scorer
-            .add_scores(&sentence, self.padding, &mut ys);
+        match &self.char_scorer {
+            CharScorerWrapper::Boundary(char_scorer) => {
+                char_scorer.add_scores(&sentence, self.padding, &mut ys);
+            }
+            CharScorerWrapper::BoundaryAndTags(char_scorer) => {
+                let mut tag_ys = mem::take(&mut sentence.tag_scores);
+                tag_ys.init(sentence.chars.len(), self.tag_names.len());
+                char_scorer.add_scores(&sentence, self.padding, &mut ys, &mut tag_ys);
+                sentence.tag_scores = tag_ys;
+            }
+        }
         self.type_scorer
             .add_scores(&sentence, &mut ys[self.padding..]);
         for (&y, b) in ys[self.padding..]
@@ -99,6 +141,110 @@ impl Predictor {
         sentence.boundary_scores.truncate(sentence.boundaries.len());
         sentence
     }
+
+    fn best_tag(&self, scores: &[i32]) -> Rc<String> {
+        Rc::clone(
+            scores
+                .iter()
+                .zip(&self.tag_names)
+                .max_by_key(|(&x, _)| x)
+                .unwrap()
+                .1,
+        )
+    }
+
+    /// Fills tags using calculated scores.
+    ///
+    /// Tags are predicted using token boundaries, so you have to apply boundary post-processors
+    /// before filling tags.
+    ///
+    /// # Arguments
+    ///
+    /// * `sentence` - A sentence.
+    ///
+    /// # Returns
+    ///
+    /// A sentence with tag information. When the predictor is instantiated with
+    /// `predict_tag = false`, the sentence is returned without any modification.
+    pub fn fill_tags(&self, mut sentence: Sentence) -> Sentence {
+        if self.tag_names.is_empty() {
+            return sentence;
+        }
+        if sentence.tags.is_empty() {
+            sentence.tags.resize(sentence.chars().len(), None);
+        }
+        let n_tags = self.tag_names.len();
+        let mut tag_score = self.tag_bias.clone();
+        let mut left_scores_iter = sentence.tag_scores.left_scores.chunks(n_tags);
+        for (t, l) in tag_score.iter_mut().zip(left_scores_iter.next().unwrap()) {
+            *t += l;
+        }
+        let mut right_scores_iter = sentence.tag_scores.right_scores.chunks(n_tags);
+        let mut last_boundary_idx = 0;
+        for (i, ((((b, left_scores), right_scores), self_scores), tag)) in sentence
+            .boundaries
+            .iter()
+            .zip(left_scores_iter)
+            .zip(&mut right_scores_iter)
+            .zip(&sentence.tag_scores.self_scores)
+            .zip(&mut sentence.tags)
+            .enumerate()
+        {
+            if *b == BoundaryType::WordBoundary {
+                for (t, r) in tag_score.iter_mut().zip(right_scores) {
+                    *t += *r;
+                }
+                if let Some(self_weights) = self_scores.as_ref() {
+                    let diff = last_boundary_idx as i32 - i as i32 - 1;
+                    for self_weight in self_weights.iter() {
+                        match self_weight.start_rel_position.cmp(&diff) {
+                            Ordering::Greater => continue,
+                            Ordering::Equal => {
+                                for (t, s) in tag_score.iter_mut().zip(&self_weight.weight) {
+                                    *t += *s;
+                                }
+                            }
+                            Ordering::Less => (),
+                        }
+                        break;
+                    }
+                }
+                tag.replace(self.best_tag(&tag_score));
+                for (t, (l, b)) in tag_score
+                    .iter_mut()
+                    .zip(left_scores.iter().zip(&self.tag_bias))
+                {
+                    *t = *l + *b;
+                }
+                last_boundary_idx = i + 1;
+            }
+        }
+        for (t, r) in tag_score.iter_mut().zip(right_scores_iter.next().unwrap()) {
+            *t += r;
+        }
+        if let Some(self_weights) = sentence.tag_scores.self_scores.last().unwrap().as_ref() {
+            let diff = last_boundary_idx as i32 - sentence.chars.len() as i32;
+            for self_weight in self_weights.iter() {
+                match self_weight.start_rel_position.cmp(&diff) {
+                    Ordering::Greater => continue,
+                    Ordering::Equal => {
+                        for (t, s) in tag_score.iter_mut().zip(&self_weight.weight) {
+                            *t += *s;
+                        }
+                    }
+                    Ordering::Less => (),
+                }
+                break;
+            }
+        }
+        sentence
+            .tags
+            .last_mut()
+            .unwrap()
+            .replace(self.best_tag(&tag_score));
+
+        sentence
+    }
 }
 
 #[cfg(test)]
@@ -107,7 +253,8 @@ mod tests {
 
     use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
     use crate::ngram_model::{NgramData, NgramModel};
-    use crate::tag_model::TagModel;
+    use crate::sentence::Token;
+    use crate::tag_model::{TagClassInfo, TagModel};
 
     /// Input:  我  ら  は  全  世  界  の  国  民
     /// bias:   -200  ..  ..  ..  ..  ..  ..  ..
@@ -552,7 +699,6 @@ mod tests {
                     },
                 ],
             },
-
             bias: -200,
             char_window_size: 3,
             type_window_size: 2,
@@ -560,10 +706,133 @@ mod tests {
         }
     }
 
+    /// Input:  人  と  人  を  つ  な  ぐ  人
+    /// left:
+    ///   \0人: 1   4
+    ///         2   5
+    ///         3   6
+    ///     人:     7  10   7  10
+    ///             8  11   8  11
+    ///             9  12   9  12
+    /// つなぐ:                    13  16  19
+    ///                            14  17  20
+    ///                            15  18  21
+    ///   人\0:                            22
+    ///                                    23
+    ///                                    24
+    ///
+    ///    sum: 1  11  10   7  10  13  16  41
+    ///         2  13  11   8  11  14  17  43
+    ///         3  15  12   9  12  15  18  45
+    ///
+    /// right:
+    /// \0人と:  28
+    ///          29
+    ///          30
+    ///   人を:      31  34  37
+    ///              32  35  38
+    ///              33  36  39
+    ///     を:      40  43
+    ///              41  44
+    ///              42  45
+    ///   人\0:                          46  49
+    ///                                  47  50
+    ///                                  48  51
+    ///
+    ///     sum: 28  71  77  37   0   0  46  49
+    ///          29  73  79  38   0   0  47  50
+    ///          30  75  81  39   0   0  48  51
+    fn generate_model_5() -> Model {
+        Model {
+            char_ngram_model: NgramModel::new(vec![NgramData {
+                ngram: "xxxx".to_string(),
+                weights: vec![0],
+            }]),
+            type_ngram_model: NgramModel::new(vec![NgramData {
+                ngram: b"RRRR".to_vec(),
+                weights: vec![0],
+            }]),
+            dict_model: DictModel { dict: vec![] },
+            bias: 0,
+            char_window_size: 2,
+            type_window_size: 2,
+            tag_model: TagModel {
+                class_info: vec![
+                    TagClassInfo {
+                        name: "名詞".to_string(),
+                        bias: 5,
+                    },
+                    TagClassInfo {
+                        name: "動詞".to_string(),
+                        bias: 3,
+                    },
+                    TagClassInfo {
+                        name: "助詞".to_string(),
+                        bias: 1,
+                    },
+                ],
+                left_char_model: NgramModel::new(vec![
+                    NgramData {
+                        ngram: "\0人".to_string(),
+                        weights: vec![1, 2, 3, 4, 5, 6],
+                    },
+                    NgramData {
+                        ngram: "人".to_string(),
+                        weights: vec![7, 8, 9, 10, 11, 12],
+                    },
+                    NgramData {
+                        ngram: "つなぐ".to_string(),
+                        weights: vec![13, 14, 15, 16, 17, 18, 19, 20, 21],
+                    },
+                    NgramData {
+                        ngram: "ぐ人\0".to_string(),
+                        weights: vec![22, 23, 24],
+                    },
+                ]),
+                right_char_model: NgramModel::new(vec![
+                    NgramData {
+                        ngram: "\0人と".to_string(),
+                        weights: vec![25, 26, 27, 28, 29, 30],
+                    },
+                    NgramData {
+                        ngram: "人を".to_string(),
+                        weights: vec![31, 32, 33, 34, 35, 36, 37, 38, 39],
+                    },
+                    NgramData {
+                        ngram: "を".to_string(),
+                        weights: vec![40, 41, 42, 43, 44, 45],
+                    },
+                    NgramData {
+                        ngram: "人\0".to_string(),
+                        weights: vec![46, 47, 48, 49, 50, 51],
+                    },
+                ]),
+                self_char_model: NgramModel::new(vec![
+                    NgramData {
+                        ngram: "人".to_string(),
+                        weights: vec![2, -1, -1],
+                    },
+                    NgramData {
+                        ngram: "と".to_string(),
+                        weights: vec![0, 0, 0],
+                    },
+                    NgramData {
+                        ngram: "つなぐ".to_string(),
+                        weights: vec![0, 1, 0],
+                    },
+                    NgramData {
+                        ngram: "を".to_string(),
+                        weights: vec![0, 0, 0],
+                    },
+                ]),
+            },
+        }
+    }
+
     #[test]
     fn test_predict_1() {
         let model = generate_model_1();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -584,7 +853,7 @@ mod tests {
     #[test]
     fn test_predict_2() {
         let model = generate_model_2();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -605,7 +874,7 @@ mod tests {
     #[test]
     fn test_predict_3() {
         let model = generate_model_3();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -626,7 +895,7 @@ mod tests {
     #[test]
     fn test_predict_4() {
         let model = generate_model_4();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict(s);
         assert_eq!(
@@ -647,7 +916,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_1() {
         let model = generate_model_1();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(&[-77, -5, 45, 132, 133, 144, 50, -32], s.boundary_scores(),);
@@ -669,7 +938,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_2() {
         let model = generate_model_2();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(
@@ -694,7 +963,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_3() {
         let model = generate_model_3();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(
@@ -719,7 +988,7 @@ mod tests {
     #[test]
     fn test_predict_with_score_4() {
         let model = generate_model_4();
-        let p = Predictor::new(model).unwrap();
+        let p = Predictor::new(model, false).unwrap();
         let s = Sentence::from_raw("我らは全世界の国民").unwrap();
         let s = p.predict_with_score(s);
         assert_eq!(&[-77, 38, 89, 219, 221, 233, 94, 12], s.boundary_scores(),);
@@ -737,4 +1006,67 @@ mod tests {
             s.boundaries(),
         );
     }
+
+    #[test]
+    fn test_predict_with_score_5() {
+        let model = generate_model_5();
+        let p = Predictor::new(model, true).unwrap();
+        let s = Sentence::from_raw("人と人をつなぐ人").unwrap();
+        let mut s = p.predict(s);
+        assert_eq!(
+            &[
+                1, 2, 3, 11, 13, 15, 10, 11, 12, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 41,
+                43, 45
+            ],
+            s.tag_scores.left_scores.as_slice()
+        );
+        assert_eq!(
+            &[
+                28, 29, 30, 71, 73, 75, 77, 79, 81, 37, 38, 39, 0, 0, 0, 0, 0, 0, 46, 47, 48, 49,
+                50, 51
+            ],
+            s.tag_scores.right_scores.as_slice()
+        );
+
+        s.boundaries_mut().copy_from_slice(&[
+            BoundaryType::WordBoundary,
+            BoundaryType::WordBoundary,
+            BoundaryType::WordBoundary,
+            BoundaryType::WordBoundary,
+            BoundaryType::NotWordBoundary,
+            BoundaryType::NotWordBoundary,
+            BoundaryType::WordBoundary,
+        ]);
+        let s = p.fill_tags(s);
+
+        assert_eq!(
+            vec![
+                Token {
+                    surface: "人",
+                    tag: Some("名詞")
+                },
+                Token {
+                    surface: "と",
+                    tag: Some("助詞")
+                },
+                Token {
+                    surface: "人",
+                    tag: Some("名詞")
+                },
+                Token {
+                    surface: "を",
+                    tag: Some("助詞")
+                },
+                Token {
+                    surface: "つなぐ",
+                    tag: Some("動詞")
+                },
+                Token {
+                    surface: "人",
+                    tag: Some("名詞")
+                }
+            ],
+            s.to_tokenized_vec().unwrap(),
+        );
+    }
 }
diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs
index 961928e0..e68a9889 100644
--- a/vaporetto/src/utils.rs
+++ b/vaporetto/src/utils.rs
@@ -8,11 +8,13 @@ pub trait AddWeight {
 impl AddWeight for Vec<i32> {
     fn add_weight(&self, ys: &mut [i32], offset: isize) {
         if offset >= 0 {
-            for (w, y) in self.iter().zip(&mut ys[offset as usize..]) {
-                *y += w;
+            if let Some(ys) = ys.get_mut(offset as usize..) {
+                for (w, y) in self.iter().zip(ys) {
+                    *y += w;
+                }
             }
-        } else {
-            for (w, y) in self[-offset as usize..].iter().zip(ys.iter_mut()) {
+        } else if let Some(ws) = self.get(-offset as usize..) {
+            for (w, y) in ws.iter().zip(ys.iter_mut()) {
                 *y += w;
             }
         }
@@ -20,27 +22,29 @@ impl AddWeight for Vec<i32> {
 }
 
 pub trait MergableWeight {
-    fn from_two_weights(weight1: &Self, weight2: &Self) -> Self;
+    fn from_two_weights(weight1: &Self, weight2: &Self, n_classes: usize) -> Self;
 }
 
 pub struct WeightMerger<W> {
     map: BTreeMap<String, RefCell<(W, bool)>>,
+    n_classes: usize,
 }
 
 impl<W> WeightMerger<W>
 where
     W: MergableWeight,
 {
-    pub fn new() -> Self {
+    pub fn new(n_classes: usize) -> Self {
         Self {
             map: BTreeMap::new(),
+            n_classes,
         }
     }
 
     pub fn add(&mut self, ngram: &str, weight: W) {
         if let Some(data) = self.map.get_mut(ngram) {
             let (prev_weight, _) = &mut *data.borrow_mut();
-            *prev_weight = W::from_two_weights(&weight, prev_weight);
+            *prev_weight = W::from_two_weights(&weight, prev_weight, self.n_classes);
         } else {
             self.map
                 .insert(ngram.to_string(), RefCell::new((weight, false)));
@@ -66,7 +70,7 @@ where
             data_from.borrow_mut().1 = true;
             while let Some(data_to) = stack.pop() {
                 let new_data = (
-                    W::from_two_weights(&data_from.borrow().0, &data_to.borrow().0),
+                    W::from_two_weights(&data_from.borrow().0, &data_to.borrow().0, self.n_classes),
                     true,
                 );
                 *data_to.borrow_mut() = new_data;
@@ -79,3 +83,14 @@ where
             .collect()
     }
 }
+
+pub fn xor_or_zip_with<T, F>(lhs: &Option<T>, rhs: &Option<T>, f: F) -> Option<T>
+where
+    T: Clone,
+    F: FnOnce(&T, &T) -> T,
+{
+    lhs.as_ref().map_or_else(
+        || rhs.clone(),
+        |x1| Some(rhs.as_ref().map_or_else(|| x1.clone(), |x2| f(x1, x2))),
+    )
+}
diff --git a/vaporetto_rules/src/lib.rs b/vaporetto_rules/src/lib.rs
index 305e0f30..6cf9de17 100644
--- a/vaporetto_rules/src/lib.rs
+++ b/vaporetto_rules/src/lib.rs
@@ -18,7 +18,7 @@
 //!
 //! let mut f = BufReader::new(File::open("model.bin").unwrap());
 //! let model = Model::read(&mut f).unwrap();
-//! let mut predictor = Predictor::new(model).unwrap();
+//! let mut predictor = Predictor::new(model, false).unwrap();
 //!
 //! let pre_filters: Vec<Box<dyn StringFilter>> = vec![
 //!     Box::new(KyteaFullwidthFilter::new()),

From 3538d738cd9b77ac2475123357237b5eb23d9d39 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 9 Feb 2022 19:44:06 +0900
Subject: [PATCH 55/60] Bump up to 0.3.0 (#49)

---
 vaporetto/Cargo.toml       |  2 +-
 vaporetto_rules/Cargo.toml |  4 ++--
 vaporetto_wasm/src/lib.rs  | 14 ++++++--------
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 96548639..02680225 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vaporetto"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 authors = ["Koichi Akabe <vbkaisetsu@gmail.com>"]
 description = "Vaporetto: a pointwise prediction based tokenizer"
diff --git a/vaporetto_rules/Cargo.toml b/vaporetto_rules/Cargo.toml
index 3dda0fc9..df653066 100644
--- a/vaporetto_rules/Cargo.toml
+++ b/vaporetto_rules/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vaporetto_rules"
-version = "0.1.4"
+version = "0.1.5"
 edition = "2018"
 authors = ["Koichi Akabe <vbkaisetsu@gmail.com>"]
 description = "Rule-base filters for Vaporetto"
@@ -14,4 +14,4 @@ autotests = false
 
 [dependencies]
 unicode-segmentation = "1.8.0"  # MIT or Apache-2.0
-vaporetto = { path = "../vaporetto", version = "0.2.0" }  # MIT or Apache-2.0
+vaporetto = { path = "../vaporetto", version = "0.3.0" }  # MIT or Apache-2.0
diff --git a/vaporetto_wasm/src/lib.rs b/vaporetto_wasm/src/lib.rs
index 23dcdba1..32a74ff2 100644
--- a/vaporetto_wasm/src/lib.rs
+++ b/vaporetto_wasm/src/lib.rs
@@ -28,7 +28,7 @@ impl Vaporetto {
         let mut buff = vec![];
         decoder.read_to_end(&mut buff).unwrap();
         let model = Model::read(&mut buff.as_slice()).unwrap();
-        let predictor = Predictor::new(model).unwrap();
+        let predictor = Predictor::new(model, false).unwrap();
         let post_filters: Vec<_> = filters
             .chars()
             .map(|c| {
@@ -117,13 +117,11 @@ impl Vaporetto {
             .iter()
             .fold(s, |s, filter| filter.filter(s));
 
-        if let Some(boundaries) = s.boundary_scores() {
-            for (&score, &b) in boundaries.iter().zip(s.boundaries()) {
-                let boundary = Array::new();
-                boundary.push(&(b == BoundaryType::WordBoundary).into());
-                boundary.push(&score.into());
-                result.push(&boundary);
-            }
+        for (&score, &b) in s.boundary_scores().iter().zip(s.boundaries()) {
+            let boundary = Array::new();
+            boundary.push(&(b == BoundaryType::WordBoundary).into());
+            boundary.push(&score.into());
+            result.push(&boundary);
         }
         result.into()
     }

From c21636f0930beb85765b9fbbe49514d151e756a7 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Wed, 9 Feb 2022 19:52:01 +0900
Subject: [PATCH 56/60] Update figure (#50)

* Update figure

* fix

* Update README
---
 README.md              |   2 +-
 figures/comparison.ngp | 131 +++++++--------
 figures/comparison.svg | 350 +++++++++++++++++++++--------------------
 figures/comparison.txt |  15 +-
 4 files changed, 240 insertions(+), 258 deletions(-)

diff --git a/README.md b/README.md
index 3d97b950..12995262 100644
--- a/README.md
+++ b/README.md
@@ -185,7 +185,7 @@ In prediction, tags are not predicted by default, so you have to specify `--pred
 
 ## Speed Comparison of Various Tokenizers
 
-Vaporetto is 6.9 times faster than KyTea. With `feature=simd`, it becomes 7.8 times faster. (`simd` option requires Nightly Rust.)
+Vaporetto is 8.25 times faster than KyTea.
 
 Details can be found [here](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison).
 
diff --git a/figures/comparison.ngp b/figures/comparison.ngp
index 1378083c..2d68e03b 100644
--- a/figures/comparison.ngp
+++ b/figures/comparison.ngp
@@ -10,12 +10,12 @@ new axis name:fX1
 	axis::clip=true
 	axis::redraw_flag=true
 	axis::min=0
-	axis::max=12
+	axis::max=13
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=3200
+	axis::x=4600
+	axis::y=2200
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -80,15 +80,15 @@ new axis name:fY1
 	axis::clip=true
 	axis::redraw_flag=true
 	axis::min=0.5
-	axis::max=3.5
+	axis::max=2.5
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=3200
+	axis::x=4600
+	axis::y=2200
 	axis::direction=9000
 	axis::baseline=true
-	axis::length=3000
+	axis::length=2000
 	axis::width=40
 	axis::style=
 	axis::auto_scale_margin=500
@@ -154,7 +154,7 @@ new axis name:fU1
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
+	axis::x=4600
 	axis::y=200
 	axis::direction=0
 	axis::baseline=true
@@ -224,11 +224,11 @@ new axis name:fR1
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=17700
-	axis::y=3200
+	axis::x=15900
+	axis::y=2200
 	axis::direction=9000
 	axis::baseline=true
-	axis::length=3000
+	axis::length=2000
 	axis::width=40
 	axis::style=
 	axis::auto_scale_margin=500
@@ -292,12 +292,12 @@ new axis name:fX2
 	axis::clip=true
 	axis::redraw_flag=true
 	axis::min=0
-	axis::max=12
+	axis::max=13
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=4400
+	axis::x=4600
+	axis::y=3400
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -366,8 +366,8 @@ new axis name:fY2
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=4400
+	axis::x=4600
+	axis::y=3400
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=1000
@@ -436,8 +436,8 @@ new axis name:fU2
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=3400
+	axis::x=4600
+	axis::y=2400
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -506,8 +506,8 @@ new axis name:fR2
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=17700
-	axis::y=4400
+	axis::x=15900
+	axis::y=3400
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=1000
@@ -574,12 +574,12 @@ new axis name:fX3
 	axis::clip=true
 	axis::redraw_flag=true
 	axis::min=0
-	axis::max=12
+	axis::max=13
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=6600
+	axis::x=4600
+	axis::y=5600
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -648,8 +648,8 @@ new axis name:fY3
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=6600
+	axis::x=4600
+	axis::y=5600
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=2000
@@ -718,8 +718,8 @@ new axis name:fU3
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=4600
+	axis::x=4600
+	axis::y=3600
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -788,8 +788,8 @@ new axis name:fR3
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=17700
-	axis::y=6600
+	axis::x=15900
+	axis::y=5600
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=2000
@@ -856,12 +856,12 @@ new axis name:fX4
 	axis::clip=true
 	axis::redraw_flag=true
 	axis::min=0
-	axis::max=12
+	axis::max=13
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=8800
+	axis::x=4600
+	axis::y=7800
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -930,8 +930,8 @@ new axis name:fY4
 	axis::inc=1
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=8800
+	axis::x=4600
+	axis::y=7800
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=2000
@@ -1000,8 +1000,8 @@ new axis name:fU4
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=6400
-	axis::y=6800
+	axis::x=4600
+	axis::y=5800
 	axis::direction=0
 	axis::baseline=true
 	axis::length=11300
@@ -1070,8 +1070,8 @@ new axis name:fR4
 	axis::inc=0
 	axis::div=0
 	axis::type=linear
-	axis::x=17700
-	axis::y=8800
+	axis::x=15900
+	axis::y=7800
 	axis::direction=9000
 	axis::baseline=true
 	axis::length=2000
@@ -1145,7 +1145,7 @@ new data
 	data::interpolation=spline
 	data::fit=
 	data::math_x=
-	data::math_y='4-Y'
+	data::math_y='3-Y'
 	data::func_f=
 	data::func_g=
 	data::func_h=
@@ -1167,7 +1167,7 @@ new data
 	data::csv=false
 	data::head_skip=1
 	data::read_step=1
-	data::final_line=4
+	data::final_line=3
 	data::mask=
 	data::move_data=
 	data::move_data_x=
@@ -1217,9 +1217,9 @@ new data
 	data::remark='#%'\'''
 	data::ifs=','
 	data::csv=false
-	data::head_skip=4
+	data::head_skip=3
 	data::read_step=1
-	data::final_line=5
+	data::final_line=4
 	data::mask=
 	data::move_data=
 	data::move_data_x=
@@ -1269,9 +1269,9 @@ new data
 	data::remark='#%'\'''
 	data::ifs=','
 	data::csv=false
-	data::head_skip=5
+	data::head_skip=4
 	data::read_step=1
-	data::final_line=7
+	data::final_line=6
 	data::mask=
 	data::move_data=
 	data::move_data_x=
@@ -1321,9 +1321,9 @@ new data
 	data::remark='#%'\'''
 	data::ifs=','
 	data::csv=false
-	data::head_skip=7
+	data::head_skip=6
 	data::read_step=1
-	data::final_line=9
+	data::final_line=8
 	data::mask=
 	data::move_data=
 	data::move_data_x=
@@ -1384,8 +1384,8 @@ new text
 	text::clip=true
 	text::redraw_flag=true
 	text::text='Analysis Speed [×10^6@ chars/s]'
-	text::x=6400
-	text::y=10000
+	text::x=5400
+	text::y=9000
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1404,7 +1404,7 @@ new text
 	text::redraw_flag=true
 	text::text='MeCab (2020-09-14)'
 	text::x=200
-	text::y=4000
+	text::y=3000
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1423,7 +1423,7 @@ new text
 	text::redraw_flag=true
 	text::text='Kuromoji (0.9.0)'
 	text::x=200
-	text::y=5200
+	text::y=4200
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1442,7 +1442,7 @@ new text
 	text::redraw_flag=true
 	text::text='Lindera (0.8.1)'
 	text::x=200
-	text::y=6200
+	text::y=5200
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1461,7 +1461,7 @@ new text
 	text::redraw_flag=true
 	text::text='Sudachi (0.5.3)'
 	text::x=200
-	text::y=7400
+	text::y=6400
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1478,28 +1478,9 @@ new text
 	text::A=255
 	text::clip=true
 	text::redraw_flag=true
-	text::text='sudachi.rs (0.6.0)'
+	text::text='sudachi.rs (0.6.2)'
 	text::x=200
-	text::y=8400
-	text::pt=1200
-	text::font='Sans-serif'
-	text::style=0
-	text::space=0
-	text::direction=0
-	text::script_size=7000
-	text::raw=false
-
-new text
-	text::hidden=false
-	text::R=0
-	text::G=0
-	text::B=0
-	text::A=255
-	text::clip=true
-	text::redraw_flag=true
-	text::text='Vaporetto (0.3.0, feature=simd)'
-	text::x=200
-	text::y=2800
+	text::y=7400
 	text::pt=1200
 	text::font='Sans-serif'
 	text::style=0
@@ -1512,7 +1493,7 @@ new gra name:viewer
 	gra::left_margin=0
 	gra::top_margin=0
 	gra::zoom=10000
-	gra::paper_width=18000
-	gra::paper_height=10200
+	gra::paper_width=16000
+	gra::paper_height=9300
 	gra::decimalsign=period
 	gra::draw_obj='axisgrid axis data merge legend rectangle arc path mark text'
diff --git a/figures/comparison.svg b/figures/comparison.svg
index 5f93598f..211ccde0 100644
--- a/figures/comparison.svg
+++ b/figures/comparison.svg
@@ -1,179 +1,181 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="510.23622pt" height="289.133858pt" viewBox="0 0 510.23622 289.133858" version="1.1">
-<g id="surface32868">
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 501.730469 90.707031 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 181.417969 82.203125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 90.707031 L 208.089844 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 90.707031 L 234.792969 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 90.707031 L 261.496094 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 90.707031 L 288.171875 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 90.707031 L 314.871094 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 90.707031 L 341.574219 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 90.707031 L 368.25 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 90.707031 L 394.953125 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 90.707031 L 421.652344 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 90.707031 L 448.328125 82.203125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 90.707031 L 475.03125 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 90.707031 L 501.730469 85.039062 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 90.707031 L 181.417969 5.667969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 5.667969 L 501.730469 5.667969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 5.667969 L 181.417969 14.171875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 5.667969 L 208.089844 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 5.667969 L 234.792969 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 5.667969 L 261.496094 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 5.667969 L 288.171875 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 5.667969 L 314.871094 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 5.667969 L 341.574219 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 5.667969 L 368.25 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 5.667969 L 394.953125 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 5.667969 L 421.652344 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 5.667969 L 448.328125 14.171875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 5.667969 L 475.03125 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 5.667969 L 501.730469 11.339844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 90.707031 L 501.730469 5.667969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 501.730469 124.722656 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 181.417969 116.21875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 124.722656 L 208.089844 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 124.722656 L 234.792969 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 124.722656 L 261.496094 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 124.722656 L 288.171875 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 124.722656 L 314.871094 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 124.722656 L 341.574219 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 124.722656 L 368.25 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 124.722656 L 394.953125 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 124.722656 L 421.652344 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 124.722656 L 448.328125 116.21875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 124.722656 L 475.03125 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 124.722656 L 501.730469 119.054688 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 124.722656 L 181.417969 96.378906 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 96.378906 L 501.730469 96.378906 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 96.378906 L 181.417969 104.882812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 96.378906 L 208.089844 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 96.378906 L 234.792969 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 96.378906 L 261.496094 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 96.378906 L 288.171875 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 96.378906 L 314.871094 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 96.378906 L 341.574219 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 96.378906 L 368.25 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 96.378906 L 394.953125 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 96.378906 L 421.652344 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 96.378906 L 448.328125 104.882812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 96.378906 L 475.03125 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 96.378906 L 501.730469 102.046875 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 124.722656 L 501.730469 96.378906 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 501.730469 187.085938 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 181.417969 178.582031 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 187.085938 L 208.089844 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 187.085938 L 234.792969 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 187.085938 L 261.496094 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 187.085938 L 288.171875 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 187.085938 L 314.871094 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 187.085938 L 341.574219 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 187.085938 L 368.25 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 187.085938 L 394.953125 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 187.085938 L 421.652344 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 187.085938 L 448.328125 178.582031 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 187.085938 L 475.03125 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 187.085938 L 501.730469 181.417969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 187.085938 L 181.417969 130.394531 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 130.394531 L 501.730469 130.394531 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 130.394531 L 181.417969 138.898438 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 130.394531 L 208.089844 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 130.394531 L 234.792969 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 130.394531 L 261.496094 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 130.394531 L 288.171875 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 130.394531 L 314.871094 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 130.394531 L 341.574219 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 130.394531 L 368.25 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 130.394531 L 394.953125 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 130.394531 L 421.652344 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 130.394531 L 448.328125 138.898438 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 130.394531 L 475.03125 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 130.394531 L 501.730469 136.0625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 187.085938 L 501.730469 130.394531 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 501.730469 249.449219 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 181.417969 240.945312 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 249.449219 L 208.089844 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 249.449219 L 234.792969 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 249.449219 L 261.496094 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 249.449219 L 288.171875 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 249.449219 L 314.871094 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 249.449219 L 341.574219 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 249.449219 L 368.25 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 249.449219 L 394.953125 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 249.449219 L 421.652344 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 249.449219 L 448.328125 240.945312 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 249.449219 L 475.03125 243.78125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 249.449219 L 501.730469 243.78125 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 184.074219 262.242188 C 184.074219 261.105469 183.988281 260.050781 183.425781 259.101562 C 183.03125 258.441406 182.285156 258.035156 181.496094 258.035156 C 180.894531 258.035156 180.042969 258.261719 179.503906 259.199219 C 178.941406 260.144531 178.90625 261.417969 178.90625 262.242188 C 178.90625 262.84375 178.90625 264.210938 179.445312 265.179688 C 180.042969 266.238281 180.957031 266.40625 181.484375 266.40625 C 182.179688 266.40625 183.042969 266.117188 183.558594 265.121094 C 184.011719 264.246094 184.074219 263.203125 184.074219 262.242188 Z M 183.078125 262.101562 C 183.078125 262.914062 183.078125 263.742188 182.800781 264.523438 C 182.527344 265.347656 181.964844 265.625 181.496094 265.625 C 179.902344 265.625 179.902344 263.226562 179.902344 262.101562 C 179.902344 261.34375 179.902344 260.734375 180.082031 260.109375 C 180.308594 259.28125 180.800781 258.816406 181.484375 258.816406 C 183.078125 258.816406 183.078125 260.984375 183.078125 262.101562 Z M 178.414062 252.292969 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 317.40625 263.707031 C 317.40625 262.21875 316.339844 261.03125 315.070312 261.03125 C 314.914062 261.03125 314.324219 261.03125 313.761719 261.511719 L 313.761719 259.113281 L 316.988281 259.113281 L 316.988281 258.300781 L 312.839844 258.300781 L 312.839844 262.757812 L 313.703125 262.757812 C 314.085938 261.871094 314.816406 261.8125 315.042969 261.8125 C 315.535156 261.8125 316.253906 262.183594 316.253906 263.695312 C 316.253906 265.15625 315.390625 265.625 314.671875 265.625 C 314.050781 265.625 313.234375 265.324219 312.765625 264.523438 L 312.335938 265.230469 C 312.527344 265.492188 313.328125 266.40625 314.683594 266.40625 C 316.183594 266.40625 317.40625 265.207031 317.40625 263.707031 Z M 311.867188 252.292969 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 447.554688 266.140625 L 447.554688 265.398438 L 446.019531 265.398438 L 446.019531 258.035156 L 445.769531 258.035156 C 444.964844 258.816406 443.816406 258.816406 443.394531 258.816406 L 443.394531 259.558594 C 443.730469 259.558594 444.34375 259.558594 445.003906 259.28125 L 445.003906 265.398438 L 443.46875 265.398438 L 443.46875 266.140625 Z M 453.976562 262.242188 C 453.976562 261.105469 453.890625 260.050781 453.328125 259.101562 C 452.933594 258.441406 452.1875 258.035156 451.398438 258.035156 C 450.796875 258.035156 449.945312 258.261719 449.40625 259.199219 C 448.84375 260.144531 448.808594 261.417969 448.808594 262.242188 C 448.808594 262.84375 448.808594 264.210938 449.347656 265.179688 C 449.945312 266.238281 450.859375 266.40625 451.386719 266.40625 C 452.082031 266.40625 452.945312 266.117188 453.460938 265.121094 C 453.914062 264.246094 453.976562 263.203125 453.976562 262.242188 Z M 452.980469 262.101562 C 452.980469 262.914062 452.980469 263.742188 452.703125 264.523438 C 452.429688 265.347656 451.867188 265.625 451.398438 265.625 C 449.804688 265.625 449.804688 263.226562 449.804688 262.101562 C 449.804688 261.34375 449.804688 260.734375 449.984375 260.109375 C 450.210938 259.28125 450.703125 258.816406 451.386719 258.816406 C 452.980469 258.816406 452.980469 260.984375 452.980469 262.101562 Z M 442.316406 252.292969 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 249.449219 L 181.417969 192.757812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 192.757812 L 501.730469 192.757812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 192.757812 L 181.417969 201.261719 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 208.089844 192.757812 L 208.089844 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 234.792969 192.757812 L 234.792969 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 261.496094 192.757812 L 261.496094 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 288.171875 192.757812 L 288.171875 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 314.871094 192.757812 L 314.871094 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 341.574219 192.757812 L 341.574219 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 368.25 192.757812 L 368.25 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 394.953125 192.757812 L 394.953125 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 421.652344 192.757812 L 421.652344 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 448.328125 192.757812 L 448.328125 201.261719 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 475.03125 192.757812 L 475.03125 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 192.757812 L 501.730469 198.425781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 501.730469 249.449219 L 501.730469 192.757812 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 17.007812 L 220.59375 17.007812 L 220.59375 22.675781 L 181.417969 22.675781 Z M 181.417969 17.007812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 17.007812 L 220.59375 17.007812 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.59375 17.007812 L 220.59375 22.675781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.59375 22.675781 L 181.417969 22.675781 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 22.675781 L 181.417969 17.007812 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 45.355469 L 450.425781 45.355469 L 450.425781 51.023438 L 181.417969 51.023438 Z M 181.417969 45.355469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 45.355469 L 450.425781 45.355469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.425781 45.355469 L 450.425781 51.023438 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.425781 51.023438 L 181.417969 51.023438 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 51.023438 L 181.417969 45.355469 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 73.699219 L 486.085938 73.699219 L 486.085938 79.371094 L 181.417969 79.371094 Z M 181.417969 73.699219 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 73.699219 L 486.085938 73.699219 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 486.085938 73.699219 L 486.085938 79.371094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 486.085938 79.371094 L 181.417969 79.371094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 79.371094 L 181.417969 73.699219 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 107.714844 L 304.722656 107.714844 L 304.722656 113.386719 L 181.417969 113.386719 Z M 181.417969 107.714844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 107.714844 L 304.722656 107.714844 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.722656 107.714844 L 304.722656 113.386719 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 304.722656 113.386719 L 181.417969 113.386719 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 113.386719 L 181.417969 107.714844 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 141.730469 L 221.015625 141.730469 L 221.015625 147.402344 L 181.417969 147.402344 Z M 181.417969 141.730469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 141.730469 L 221.015625 141.730469 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 221.015625 141.730469 L 221.015625 147.402344 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 221.015625 147.402344 L 181.417969 147.402344 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 147.402344 L 181.417969 141.730469 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 170.078125 L 220.109375 170.078125 L 220.109375 175.746094 L 181.417969 175.746094 Z M 181.417969 170.078125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 170.078125 L 220.109375 170.078125 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.109375 170.078125 L 220.109375 175.746094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 220.109375 175.746094 L 181.417969 175.746094 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 175.746094 L 181.417969 170.078125 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 204.09375 L 189.921875 204.09375 L 189.921875 209.765625 L 181.417969 209.765625 Z M 181.417969 204.09375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 204.09375 L 189.921875 204.09375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 189.921875 204.09375 L 189.921875 209.765625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 189.921875 209.765625 L 181.417969 209.765625 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 209.765625 L 181.417969 204.09375 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 181.417969 232.441406 L 207.210938 232.441406 L 207.210938 238.109375 L 181.417969 238.109375 Z M 181.417969 232.441406 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 232.441406 L 207.210938 232.441406 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.210938 232.441406 L 207.210938 238.109375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 207.210938 238.109375 L 181.417969 238.109375 "/>
-<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 181.417969 238.109375 L 181.417969 232.441406 "/>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="453.543307pt" height="263.622047pt" viewBox="0 0 453.543307 263.622047" version="1.1">
+<g id="surface193327">
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 62.363281 L 450.707031 62.363281 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 62.363281 L 130.394531 53.859375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 62.363281 L 155.027344 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 62.363281 L 179.660156 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 62.363281 L 204.292969 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 62.363281 L 228.925781 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 62.363281 L 253.585938 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 62.363281 L 278.21875 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 62.363281 L 302.855469 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 62.363281 L 327.488281 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 62.363281 L 352.148438 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 62.363281 L 376.78125 53.859375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 62.363281 L 401.414062 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 62.363281 L 426.046875 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 62.363281 L 450.707031 56.691406 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 62.363281 L 130.394531 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 5.667969 L 450.707031 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 5.667969 L 130.394531 14.171875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 5.667969 L 155.027344 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 5.667969 L 179.660156 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 5.667969 L 204.292969 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 5.667969 L 228.925781 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 5.667969 L 253.585938 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 5.667969 L 278.21875 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 5.667969 L 302.855469 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 5.667969 L 327.488281 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 5.667969 L 352.148438 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 5.667969 L 376.78125 14.171875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 5.667969 L 401.414062 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 5.667969 L 426.046875 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 5.667969 L 450.707031 11.339844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 62.363281 L 450.707031 5.667969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 96.378906 L 450.707031 96.378906 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 96.378906 L 130.394531 87.875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 96.378906 L 155.027344 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 96.378906 L 179.660156 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 96.378906 L 204.292969 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 96.378906 L 228.925781 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 96.378906 L 253.585938 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 96.378906 L 278.21875 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 96.378906 L 302.855469 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 96.378906 L 327.488281 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 96.378906 L 352.148438 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 96.378906 L 376.78125 87.875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 96.378906 L 401.414062 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 96.378906 L 426.046875 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 96.378906 L 450.707031 90.707031 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 96.378906 L 130.394531 68.03125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 68.03125 L 450.707031 68.03125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 68.03125 L 130.394531 76.535156 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 68.03125 L 155.027344 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 68.03125 L 179.660156 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 68.03125 L 204.292969 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 68.03125 L 228.925781 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 68.03125 L 253.585938 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 68.03125 L 278.21875 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 68.03125 L 302.855469 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 68.03125 L 327.488281 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 68.03125 L 352.148438 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 68.03125 L 376.78125 76.535156 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 68.03125 L 401.414062 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 68.03125 L 426.046875 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 68.03125 L 450.707031 73.699219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 96.378906 L 450.707031 68.03125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 158.738281 L 450.707031 158.738281 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 158.738281 L 130.394531 150.234375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 158.738281 L 155.027344 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 158.738281 L 179.660156 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 158.738281 L 204.292969 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 158.738281 L 228.925781 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 158.738281 L 253.585938 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 158.738281 L 278.21875 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 158.738281 L 302.855469 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 158.738281 L 327.488281 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 158.738281 L 352.148438 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 158.738281 L 376.78125 150.234375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 158.738281 L 401.414062 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 158.738281 L 426.046875 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 158.738281 L 450.707031 153.070312 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 158.738281 L 130.394531 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 102.046875 L 450.707031 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 102.046875 L 130.394531 110.550781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 102.046875 L 155.027344 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 102.046875 L 179.660156 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 102.046875 L 204.292969 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 102.046875 L 228.925781 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 102.046875 L 253.585938 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 102.046875 L 278.21875 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 102.046875 L 302.855469 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 102.046875 L 327.488281 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 102.046875 L 352.148438 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 102.046875 L 376.78125 110.550781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 102.046875 L 401.414062 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 102.046875 L 426.046875 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 102.046875 L 450.707031 107.714844 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 158.738281 L 450.707031 102.046875 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 221.101562 L 450.707031 221.101562 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 221.101562 L 130.394531 212.597656 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 221.101562 L 155.027344 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 221.101562 L 179.660156 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 221.101562 L 204.292969 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 221.101562 L 228.925781 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 221.101562 L 253.585938 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 221.101562 L 278.21875 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 221.101562 L 302.855469 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 221.101562 L 327.488281 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 221.101562 L 352.148438 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 221.101562 L 376.78125 212.597656 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 221.101562 L 401.414062 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 221.101562 L 426.046875 215.433594 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 221.101562 L 450.707031 215.433594 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 133.050781 233.898438 C 133.050781 232.761719 132.964844 231.707031 132.402344 230.757812 C 132.007812 230.097656 131.261719 229.691406 130.472656 229.691406 C 129.871094 229.691406 129.019531 229.917969 128.480469 230.855469 C 127.917969 231.800781 127.882812 233.074219 127.882812 233.898438 C 127.882812 234.5 127.882812 235.867188 128.421875 236.835938 C 129.019531 237.894531 129.933594 238.0625 130.460938 238.0625 C 131.15625 238.0625 132.019531 237.773438 132.535156 236.777344 C 132.988281 235.902344 133.050781 234.859375 133.050781 233.898438 Z M 132.054688 233.757812 C 132.054688 234.570312 132.054688 235.398438 131.777344 236.179688 C 131.503906 237.003906 130.941406 237.28125 130.472656 237.28125 C 128.878906 237.28125 128.878906 234.882812 128.878906 233.757812 C 128.878906 233 128.878906 232.390625 129.058594 231.765625 C 129.285156 230.9375 129.777344 230.472656 130.460938 230.472656 C 132.054688 230.472656 132.054688 232.640625 132.054688 233.757812 Z M 127.390625 223.949219 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 256.121094 235.363281 C 256.121094 233.875 255.054688 232.6875 253.785156 232.6875 C 253.628906 232.6875 253.039062 232.6875 252.476562 233.167969 L 252.476562 230.769531 L 255.703125 230.769531 L 255.703125 229.957031 L 251.554688 229.957031 L 251.554688 234.414062 L 252.417969 234.414062 C 252.800781 233.527344 253.53125 233.46875 253.757812 233.46875 C 254.25 233.46875 254.96875 233.839844 254.96875 235.351562 C 254.96875 236.8125 254.105469 237.28125 253.386719 237.28125 C 252.765625 237.28125 251.949219 236.980469 251.480469 236.179688 L 251.050781 236.886719 C 251.242188 237.148438 252.042969 238.0625 253.398438 238.0625 C 254.898438 238.0625 256.121094 236.863281 256.121094 235.363281 Z M 250.582031 223.949219 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 376.011719 237.796875 L 376.011719 237.054688 L 374.476562 237.054688 L 374.476562 229.691406 L 374.226562 229.691406 C 373.421875 230.472656 372.273438 230.472656 371.851562 230.472656 L 371.851562 231.214844 C 372.1875 231.214844 372.800781 231.214844 373.460938 230.9375 L 373.460938 237.054688 L 371.925781 237.054688 L 371.925781 237.796875 Z M 382.433594 233.898438 C 382.433594 232.761719 382.347656 231.707031 381.785156 230.757812 C 381.390625 230.097656 380.644531 229.691406 379.855469 229.691406 C 379.253906 229.691406 378.402344 229.917969 377.863281 230.855469 C 377.300781 231.800781 377.265625 233.074219 377.265625 233.898438 C 377.265625 234.5 377.265625 235.867188 377.804688 236.835938 C 378.402344 237.894531 379.316406 238.0625 379.84375 238.0625 C 380.539062 238.0625 381.402344 237.773438 381.917969 236.777344 C 382.371094 235.902344 382.433594 234.859375 382.433594 233.898438 Z M 381.4375 233.757812 C 381.4375 234.570312 381.4375 235.398438 381.160156 236.179688 C 380.886719 237.003906 380.324219 237.28125 379.855469 237.28125 C 378.261719 237.28125 378.261719 234.882812 378.261719 233.757812 C 378.261719 233 378.261719 232.390625 378.441406 231.765625 C 378.667969 230.9375 379.160156 230.472656 379.84375 230.472656 C 381.4375 230.472656 381.4375 232.640625 381.4375 233.757812 Z M 370.773438 223.949219 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 221.101562 L 130.394531 164.410156 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 164.410156 L 450.707031 164.410156 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 164.410156 L 130.394531 172.914062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 155.027344 164.410156 L 155.027344 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 179.660156 164.410156 L 179.660156 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 204.292969 164.410156 L 204.292969 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 228.925781 164.410156 L 228.925781 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 253.585938 164.410156 L 253.585938 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 278.21875 164.410156 L 278.21875 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 302.855469 164.410156 L 302.855469 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 327.488281 164.410156 L 327.488281 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 352.148438 164.410156 L 352.148438 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 376.78125 164.410156 L 376.78125 172.914062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 401.414062 164.410156 L 401.414062 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 426.046875 164.410156 L 426.046875 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 164.410156 L 450.707031 170.078125 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 450.707031 221.101562 L 450.707031 164.410156 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 17.007812 L 166.394531 17.007812 L 166.394531 22.675781 L 130.394531 22.675781 Z M 130.394531 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 17.007812 L 166.394531 17.007812 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.394531 17.007812 L 166.394531 22.675781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.394531 22.675781 L 130.394531 22.675781 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 22.675781 L 130.394531 17.007812 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 45.355469 L 427.519531 45.355469 L 427.519531 51.023438 L 130.394531 51.023438 Z M 130.394531 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 45.355469 L 427.519531 45.355469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 427.519531 45.355469 L 427.519531 51.023438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 427.519531 51.023438 L 130.394531 51.023438 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 51.023438 L 130.394531 45.355469 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 79.371094 L 243.894531 79.371094 L 243.894531 85.039062 L 130.394531 85.039062 Z M 130.394531 79.371094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 79.371094 L 243.894531 79.371094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 243.894531 79.371094 L 243.894531 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 243.894531 85.039062 L 130.394531 85.039062 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 85.039062 L 130.394531 79.371094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 113.386719 L 166.675781 113.386719 L 166.675781 119.054688 L 130.394531 119.054688 Z M 130.394531 113.386719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 113.386719 L 166.675781 113.386719 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.675781 113.386719 L 166.675781 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.675781 119.054688 L 130.394531 119.054688 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 119.054688 L 130.394531 113.386719 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 141.730469 L 166.449219 141.730469 L 166.449219 147.402344 L 130.394531 147.402344 Z M 130.394531 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 141.730469 L 166.449219 141.730469 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.449219 141.730469 L 166.449219 147.402344 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 166.449219 147.402344 L 130.394531 147.402344 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 147.402344 L 130.394531 141.730469 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 175.746094 L 138.273438 175.746094 L 138.273438 181.417969 L 130.394531 181.417969 Z M 130.394531 175.746094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 175.746094 L 138.273438 175.746094 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 138.273438 175.746094 L 138.273438 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 138.273438 181.417969 L 130.394531 181.417969 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 181.417969 L 130.394531 175.746094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 130.394531 204.09375 L 154.886719 204.09375 L 154.886719 209.765625 L 130.394531 209.765625 Z M 130.394531 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 204.09375 L 154.886719 204.09375 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 154.886719 204.09375 L 154.886719 209.765625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 154.886719 209.765625 L 130.394531 209.765625 "/>
+<path style="fill:none;stroke-width:1.133858;stroke-linecap:square;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 130.394531 209.765625 L 130.394531 204.09375 "/>
 <path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 23.523438 L 10.25 18.488281 L 13.535156 15.203125 L 12.359375 15.203125 L 7.933594 19.613281 L 7.933594 15.203125 L 6.84375 15.203125 L 6.84375 23.523438 L 7.933594 23.523438 L 7.933594 20.789062 L 9.539062 19.195312 L 12.527344 23.523438 Z M 20.171875 18.199219 L 19.222656 18.199219 C 18.3125 20.382812 17.652344 21.976562 17.59375 22.816406 C 17.558594 22.265625 17.054688 21.03125 16.910156 20.683594 C 16.417969 19.457031 16.214844 19.050781 15.84375 18.199219 L 14.835938 18.199219 L 17.234375 23.523438 L 16.789062 24.589844 C 16.476562 25.261719 16.25 25.261719 16.082031 25.261719 C 15.890625 25.261719 15.472656 25.214844 15.113281 25.058594 L 15.183594 25.886719 C 15.21875 25.898438 15.664062 25.980469 16.070312 25.980469 C 16.394531 25.980469 16.910156 25.980469 17.507812 24.554688 Z M 28.628906 16.113281 L 28.628906 15.273438 L 21.113281 15.273438 L 21.113281 16.113281 L 21.84375 16.113281 L 24.300781 16.089844 L 24.300781 23.523438 L 25.453125 23.523438 L 25.453125 16.089844 L 27.910156 16.113281 Z M 32.789062 20.898438 C 32.789062 20.527344 32.777344 19.613281 32.308594 18.894531 C 31.804688 18.140625 31.039062 17.996094 30.582031 17.996094 C 29.191406 17.996094 28.085938 19.277344 28.085938 20.8125 C 28.085938 22.394531 29.261719 23.65625 30.761719 23.65625 C 31.335938 23.65625 32.03125 23.511719 32.726562 23.066406 L 32.65625 22.242188 C 31.902344 22.78125 31.191406 22.875 30.773438 22.875 C 29.777344 22.875 29.011719 22 28.976562 20.898438 Z M 32.070312 20.191406 L 29.046875 20.191406 C 29.238281 19.421875 29.828125 18.773438 30.582031 18.773438 C 31.023438 18.773438 31.839844 18.980469 32.070312 20.191406 Z M 37.726562 23.523438 L 37.726562 20.058594 C 37.726562 18.871094 36.851562 17.996094 35.664062 17.996094 C 34.886719 17.996094 34.3125 18.199219 33.734375 18.523438 L 33.808594 19.375 C 34.40625 18.929688 34.992188 18.75 35.652344 18.75 C 36.324219 18.75 36.734375 19.289062 36.734375 20.070312 L 36.734375 20.574219 C 34.671875 20.621094 33.316406 21.160156 33.316406 22.132812 C 33.316406 22.695312 33.6875 23.65625 34.730469 23.65625 C 34.96875 23.65625 36.011719 23.632812 36.769531 23.066406 L 36.769531 23.523438 Z M 36.734375 21.84375 C 36.734375 22.097656 36.734375 22.394531 36.300781 22.648438 C 36 22.828125 35.605469 22.875 35.367188 22.875 C 34.753906 22.875 34.238281 22.589844 34.238281 22.109375 C 34.238281 21.257812 36.578125 21.222656 36.734375 21.222656 Z M 46.769531 26.507812 C 46.121094 25.847656 44.632812 24.316406 44.632812 20.527344 C 44.632812 16.726562 46.121094 15.203125 46.769531 14.53125 L 45.988281 14.53125 C 44.574219 15.71875 43.640625 17.769531 43.640625 20.515625 C 43.640625 23.367188 44.632812 25.371094 45.988281 26.507812 Z M 53.207031 23.523438 L 53.207031 22.625 L 50.953125 22.625 C 50.808594 22.625 50.664062 22.636719 50.523438 22.636719 L 49.203125 22.636719 L 51.167969 20.851562 C 52.429688 19.820312 53.207031 19.171875 53.207031 18.007812 C 53.207031 16.628906 52.210938 15.417969 50.582031 15.417969 C 49.3125 15.417969 48.554688 16.148438 48.183594 17.335938 L 48.734375 18.03125 C 49.046875 16.628906 49.671875 16.234375 50.414062 16.234375 C 51.480469 16.234375 52.152344 17.046875 52.152344 18.03125 C 52.152344 19.121094 51.386719 19.820312 50.496094 20.609375 L 48.28125 22.707031 L 48.28125 23.523438 Z M 59.328125 19.625 C 59.328125 18.488281 59.242188 17.433594 58.679688 16.484375 C 58.285156 15.824219 57.539062 15.417969 56.75 15.417969 C 56.148438 15.417969 55.296875 15.644531 54.757812 16.582031 C 54.195312 17.527344 54.160156 18.800781 54.160156 19.625 C 54.160156 20.226562 54.160156 21.59375 54.699219 22.5625 C 55.296875 23.621094 56.210938 23.789062 56.738281 23.789062 C 57.433594 23.789062 58.296875 23.5 58.8125 22.503906 C 59.265625 21.628906 59.328125 20.585938 59.328125 19.625 Z M 58.332031 19.484375 C 58.332031 20.296875 58.332031 21.125 58.054688 21.90625 C 57.78125 22.730469 57.21875 23.007812 56.75 23.007812 C 55.15625 23.007812 55.15625 20.609375 55.15625 19.484375 C 55.15625 18.726562 55.15625 18.117188 55.335938 17.492188 C 55.5625 16.664062 56.054688 16.199219 56.738281 16.199219 C 58.332031 16.199219 58.332031 18.367188 58.332031 19.484375 Z M 65.207031 23.523438 L 65.207031 22.625 L 62.953125 22.625 C 62.808594 22.625 62.664062 22.636719 62.523438 22.636719 L 61.203125 22.636719 L 63.167969 20.851562 C 64.429688 19.820312 65.207031 19.171875 65.207031 18.007812 C 65.207031 16.628906 64.210938 15.417969 62.582031 15.417969 C 61.3125 15.417969 60.554688 16.148438 60.183594 17.335938 L 60.734375 18.03125 C 61.046875 16.628906 61.671875 16.234375 62.414062 16.234375 C 63.480469 16.234375 64.152344 17.046875 64.152344 18.03125 C 64.152344 19.121094 63.386719 19.820312 62.496094 20.609375 L 60.28125 22.707031 L 60.28125 23.523438 Z M 71.328125 19.625 C 71.328125 18.488281 71.242188 17.433594 70.679688 16.484375 C 70.285156 15.824219 69.539062 15.417969 68.75 15.417969 C 68.148438 15.417969 67.296875 15.644531 66.757812 16.582031 C 66.195312 17.527344 66.160156 18.800781 66.160156 19.625 C 66.160156 20.226562 66.160156 21.59375 66.699219 22.5625 C 67.296875 23.621094 68.210938 23.789062 68.738281 23.789062 C 69.433594 23.789062 70.296875 23.5 70.8125 22.503906 C 71.265625 21.628906 71.328125 20.585938 71.328125 19.625 Z M 70.332031 19.484375 C 70.332031 20.296875 70.332031 21.125 70.054688 21.90625 C 69.78125 22.730469 69.21875 23.007812 68.75 23.007812 C 67.15625 23.007812 67.15625 20.609375 67.15625 19.484375 C 67.15625 18.726562 67.15625 18.117188 67.335938 17.492188 C 67.5625 16.664062 68.054688 16.199219 68.738281 16.199219 C 70.332031 16.199219 70.332031 18.367188 70.332031 19.484375 Z M 75.085938 21.234375 L 75.085938 20.488281 L 71.800781 20.488281 L 71.800781 21.234375 Z M 81.328125 19.625 C 81.328125 18.488281 81.242188 17.433594 80.679688 16.484375 C 80.285156 15.824219 79.539062 15.417969 78.75 15.417969 C 78.148438 15.417969 77.296875 15.644531 76.757812 16.582031 C 76.195312 17.527344 76.160156 18.800781 76.160156 19.625 C 76.160156 20.226562 76.160156 21.59375 76.699219 22.5625 C 77.296875 23.621094 78.210938 23.789062 78.738281 23.789062 C 79.433594 23.789062 80.296875 23.5 80.8125 22.503906 C 81.265625 21.628906 81.328125 20.585938 81.328125 19.625 Z M 80.332031 19.484375 C 80.332031 20.296875 80.332031 21.125 80.054688 21.90625 C 79.78125 22.730469 79.21875 23.007812 78.75 23.007812 C 77.15625 23.007812 77.15625 20.609375 77.15625 19.484375 C 77.15625 18.726562 77.15625 18.117188 77.335938 17.492188 C 77.5625 16.664062 78.054688 16.199219 78.738281 16.199219 C 80.332031 16.199219 80.332031 18.367188 80.332031 19.484375 Z M 87.472656 21.519531 L 87.472656 20.742188 L 86.273438 20.742188 L 86.273438 15.683594 L 85.109375 15.683594 L 82.015625 20.742188 L 82.015625 21.519531 L 85.265625 21.519531 L 85.265625 23.523438 L 86.273438 23.523438 L 86.273438 21.519531 Z M 85.335938 20.742188 L 83.011719 20.742188 L 83.742188 19.542969 C 84.101562 18.929688 85.324219 16.84375 85.335938 16.234375 Z M 91.085938 21.234375 L 91.085938 20.488281 L 87.800781 20.488281 L 87.800781 21.234375 Z M 97.328125 19.625 C 97.328125 18.488281 97.242188 17.433594 96.679688 16.484375 C 96.285156 15.824219 95.539062 15.417969 94.75 15.417969 C 94.148438 15.417969 93.296875 15.644531 92.757812 16.582031 C 92.195312 17.527344 92.160156 18.800781 92.160156 19.625 C 92.160156 20.226562 92.160156 21.59375 92.699219 22.5625 C 93.296875 23.621094 94.210938 23.789062 94.738281 23.789062 C 95.433594 23.789062 96.296875 23.5 96.8125 22.503906 C 97.265625 21.628906 97.328125 20.585938 97.328125 19.625 Z M 96.332031 19.484375 C 96.332031 20.296875 96.332031 21.125 96.054688 21.90625 C 95.78125 22.730469 95.21875 23.007812 94.75 23.007812 C 93.15625 23.007812 93.15625 20.609375 93.15625 19.484375 C 93.15625 18.726562 93.15625 18.117188 93.335938 17.492188 C 93.5625 16.664062 94.054688 16.199219 94.738281 16.199219 C 96.332031 16.199219 96.332031 18.367188 96.332031 19.484375 Z M 103.304688 21.351562 C 103.304688 20.40625 102.609375 19.613281 101.625 19.289062 C 102.464844 18.859375 102.96875 18.078125 102.96875 17.253906 C 102.96875 16.222656 101.949219 15.417969 100.714844 15.417969 C 99.707031 15.417969 98.855469 15.933594 98.351562 16.6875 L 98.820312 17.371094 C 99.347656 16.328125 100.257812 16.160156 100.703125 16.160156 C 101.3125 16.160156 101.914062 16.496094 101.914062 17.253906 C 101.914062 17.695312 101.660156 18.726562 100.367188 18.847656 C 100.136719 18.871094 99.945312 18.882812 99.71875 18.894531 L 99.71875 19.675781 L 100.617188 19.675781 C 101.78125 19.675781 102.152344 20.621094 102.152344 21.339844 C 102.152344 22.289062 101.589844 23.007812 100.675781 23.007812 C 99.695312 23.007812 98.78125 22.46875 98.328125 21.820312 C 98.230469 22.300781 98.230469 22.324219 98.183594 22.589844 C 98.769531 23.296875 99.683594 23.789062 100.703125 23.789062 C 102.210938 23.789062 103.304688 22.613281 103.304688 21.351562 Z M 107.480469 20.527344 C 107.480469 17.671875 106.484375 15.667969 105.128906 14.53125 L 104.351562 14.53125 C 105 15.191406 106.484375 16.726562 106.484375 20.515625 C 106.484375 24.316406 105 25.835938 104.351562 26.507812 L 105.128906 26.507812 C 106.546875 25.320312 107.480469 23.273438 107.480469 20.527344 Z M 5.667969 9.675781 "/>
 <path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.726562 43.550781 L 12.730469 43.550781 L 11.230469 47.289062 C 11.097656 47.613281 10.042969 50.253906 9.886719 50.960938 L 9.875 50.960938 C 9.757812 50.324219 8.808594 47.972656 8.507812 47.21875 L 7.046875 43.550781 L 5.835938 43.550781 L 9.207031 51.871094 L 10.355469 51.871094 Z M 18.726562 51.871094 L 18.726562 48.40625 C 18.726562 47.21875 17.851562 46.34375 16.664062 46.34375 C 15.886719 46.34375 15.3125 46.546875 14.734375 46.871094 L 14.808594 47.722656 C 15.40625 47.277344 15.992188 47.097656 16.652344 47.097656 C 17.324219 47.097656 17.734375 47.636719 17.734375 48.417969 L 17.734375 48.921875 C 15.671875 48.96875 14.316406 49.507812 14.316406 50.480469 C 14.316406 51.042969 14.6875 52.003906 15.730469 52.003906 C 15.96875 52.003906 17.011719 51.980469 17.769531 51.414062 L 17.769531 51.871094 Z M 17.734375 50.191406 C 17.734375 50.445312 17.734375 50.742188 17.300781 50.996094 C 17 51.175781 16.605469 51.222656 16.367188 51.222656 C 15.753906 51.222656 15.238281 50.9375 15.238281 50.457031 C 15.238281 49.605469 17.578125 49.570312 17.734375 49.570312 Z M 25.589844 49.199219 C 25.589844 47.757812 24.835938 46.414062 23.671875 46.414062 C 23.335938 46.414062 22.449219 46.476562 21.621094 47.121094 L 21.621094 46.546875 L 20.664062 46.546875 L 20.664062 54.195312 L 21.660156 54.195312 L 21.660156 51.320312 C 22.101562 51.738281 22.703125 52.003906 23.324219 52.003906 C 24.535156 52.003906 25.589844 50.816406 25.589844 49.199219 Z M 24.597656 49.210938 C 24.597656 50.359375 23.757812 51.222656 22.820312 51.222656 C 22.4375 51.222656 22.007812 51.066406 21.660156 50.480469 L 21.660156 47.890625 C 21.8125 47.675781 22.257812 47.230469 22.941406 47.230469 C 23.839844 47.230469 24.597656 48.082031 24.597656 49.210938 Z M 32.460938 49.234375 C 32.460938 47.664062 31.261719 46.34375 29.738281 46.34375 C 28.25 46.34375 27.027344 47.636719 27.027344 49.234375 C 27.027344 50.816406 28.285156 52.003906 29.738281 52.003906 C 31.222656 52.003906 32.460938 50.792969 32.460938 49.234375 Z M 31.464844 49.113281 C 31.464844 50.503906 30.625 51.1875 29.75 51.1875 C 28.828125 51.1875 28.023438 50.46875 28.023438 49.113281 C 28.023438 47.710938 28.921875 47.121094 29.738281 47.121094 C 30.601562 47.121094 31.464844 47.734375 31.464844 49.113281 Z M 36.695312 47.277344 L 36.695312 46.414062 C 35.832031 46.425781 35.042969 46.859375 34.550781 47.566406 L 34.550781 46.476562 L 33.664062 46.476562 L 33.664062 51.871094 L 34.621094 51.871094 L 34.621094 49.316406 C 34.621094 48.058594 35.605469 47.289062 36.695312 47.277344 Z M 41.789062 49.246094 C 41.789062 48.875 41.777344 47.960938 41.308594 47.242188 C 40.804688 46.488281 40.039062 46.34375 39.582031 46.34375 C 38.191406 46.34375 37.085938 47.625 37.085938 49.160156 C 37.085938 50.742188 38.261719 52.003906 39.761719 52.003906 C 40.335938 52.003906 41.03125 51.859375 41.726562 51.414062 L 41.65625 50.589844 C 40.902344 51.128906 40.191406 51.222656 39.773438 51.222656 C 38.777344 51.222656 38.011719 50.347656 37.976562 49.246094 Z M 41.070312 48.539062 L 38.046875 48.539062 C 38.238281 47.769531 38.828125 47.121094 39.582031 47.121094 C 40.023438 47.121094 40.839844 47.328125 41.070312 48.539062 Z M 45.769531 51.558594 L 45.566406 50.804688 C 45.253906 51.042969 44.832031 51.1875 44.460938 51.1875 C 43.992188 51.1875 43.839844 50.769531 43.839844 50.144531 L 43.839844 47.289062 L 45.566406 47.289062 L 45.566406 46.546875 L 43.839844 46.546875 L 43.839844 45.023438 L 42.949219 45.023438 L 42.949219 46.546875 L 41.894531 46.546875 L 41.894531 47.289062 L 42.914062 47.289062 L 42.914062 50.359375 C 42.914062 51.066406 43.070312 52.003906 43.945312 52.003906 C 44.472656 52.003906 45.097656 51.894531 45.769531 51.558594 Z M 49.769531 51.558594 L 49.566406 50.804688 C 49.253906 51.042969 48.832031 51.1875 48.460938 51.1875 C 47.992188 51.1875 47.839844 50.769531 47.839844 50.144531 L 47.839844 47.289062 L 49.566406 47.289062 L 49.566406 46.546875 L 47.839844 46.546875 L 47.839844 45.023438 L 46.949219 45.023438 L 46.949219 46.546875 L 45.894531 46.546875 L 45.894531 47.289062 L 46.914062 47.289062 L 46.914062 50.359375 C 46.914062 51.066406 47.070312 52.003906 47.945312 52.003906 C 48.472656 52.003906 49.097656 51.894531 49.769531 51.558594 Z M 55.460938 49.234375 C 55.460938 47.664062 54.261719 46.34375 52.738281 46.34375 C 51.25 46.34375 50.027344 47.636719 50.027344 49.234375 C 50.027344 50.816406 51.285156 52.003906 52.738281 52.003906 C 54.222656 52.003906 55.460938 50.792969 55.460938 49.234375 Z M 54.464844 49.113281 C 54.464844 50.503906 53.625 51.1875 52.75 51.1875 C 51.828125 51.1875 51.023438 50.46875 51.023438 49.113281 C 51.023438 47.710938 51.921875 47.121094 52.738281 47.121094 C 53.601562 47.121094 54.464844 47.734375 54.464844 49.113281 Z M 63.769531 54.855469 C 63.121094 54.195312 61.632812 52.664062 61.632812 48.875 C 61.632812 45.074219 63.121094 43.550781 63.769531 42.878906 L 62.988281 42.878906 C 61.574219 44.066406 60.640625 46.117188 60.640625 48.863281 C 60.640625 51.714844 61.632812 53.71875 62.988281 54.855469 Z M 70.328125 47.972656 C 70.328125 46.835938 70.242188 45.78125 69.679688 44.832031 C 69.285156 44.171875 68.539062 43.765625 67.75 43.765625 C 67.148438 43.765625 66.296875 43.992188 65.757812 44.929688 C 65.195312 45.875 65.160156 47.148438 65.160156 47.972656 C 65.160156 48.574219 65.160156 49.941406 65.699219 50.910156 C 66.296875 51.96875 67.210938 52.136719 67.738281 52.136719 C 68.433594 52.136719 69.296875 51.847656 69.8125 50.851562 C 70.265625 49.976562 70.328125 48.933594 70.328125 47.972656 Z M 69.332031 47.832031 C 69.332031 48.644531 69.332031 49.472656 69.054688 50.253906 C 68.78125 51.078125 68.21875 51.355469 67.75 51.355469 C 66.15625 51.355469 66.15625 48.957031 66.15625 47.832031 C 66.15625 47.074219 66.15625 46.464844 66.335938 45.839844 C 66.5625 45.011719 67.054688 44.546875 67.738281 44.546875 C 69.332031 44.546875 69.332031 46.714844 69.332031 47.832031 Z M 72.886719 51.871094 L 72.886719 50.839844 L 71.855469 50.839844 L 71.855469 51.871094 Z M 79.304688 49.699219 C 79.304688 48.753906 78.609375 47.960938 77.625 47.636719 C 78.464844 47.207031 78.96875 46.425781 78.96875 45.601562 C 78.96875 44.570312 77.949219 43.765625 76.714844 43.765625 C 75.707031 43.765625 74.855469 44.28125 74.351562 45.035156 L 74.820312 45.71875 C 75.347656 44.675781 76.257812 44.507812 76.703125 44.507812 C 77.3125 44.507812 77.914062 44.84375 77.914062 45.601562 C 77.914062 46.042969 77.660156 47.074219 76.367188 47.195312 C 76.136719 47.21875 75.945312 47.230469 75.71875 47.242188 L 75.71875 48.023438 L 76.617188 48.023438 C 77.78125 48.023438 78.152344 48.96875 78.152344 49.6875 C 78.152344 50.636719 77.589844 51.355469 76.675781 51.355469 C 75.695312 51.355469 74.78125 50.816406 74.328125 50.167969 C 74.230469 50.648438 74.230469 50.671875 74.183594 50.9375 C 74.769531 51.644531 75.683594 52.136719 76.703125 52.136719 C 78.210938 52.136719 79.304688 50.960938 79.304688 49.699219 Z M 81.886719 51.871094 L 81.886719 50.839844 L 80.855469 50.839844 L 80.855469 51.871094 Z M 88.328125 47.972656 C 88.328125 46.835938 88.242188 45.78125 87.679688 44.832031 C 87.285156 44.171875 86.539062 43.765625 85.75 43.765625 C 85.148438 43.765625 84.296875 43.992188 83.757812 44.929688 C 83.195312 45.875 83.160156 47.148438 83.160156 47.972656 C 83.160156 48.574219 83.160156 49.941406 83.699219 50.910156 C 84.296875 51.96875 85.210938 52.136719 85.738281 52.136719 C 86.433594 52.136719 87.296875 51.847656 87.8125 50.851562 C 88.265625 49.976562 88.328125 48.933594 88.328125 47.972656 Z M 87.332031 47.832031 C 87.332031 48.644531 87.332031 49.472656 87.054688 50.253906 C 86.78125 51.078125 86.21875 51.355469 85.75 51.355469 C 84.15625 51.355469 84.15625 48.957031 84.15625 47.832031 C 84.15625 47.074219 84.15625 46.464844 84.335938 45.839844 C 84.5625 45.011719 85.054688 44.546875 85.738281 44.546875 C 87.332031 44.546875 87.332031 46.714844 87.332031 47.832031 Z M 92.480469 48.875 C 92.480469 46.019531 91.484375 44.015625 90.128906 42.878906 L 89.351562 42.878906 C 90 43.539062 91.484375 45.074219 91.484375 48.863281 C 91.484375 52.664062 90 54.183594 89.351562 54.855469 L 90.128906 54.855469 C 91.546875 53.667969 92.480469 51.621094 92.480469 48.875 Z M 5.667969 38.023438 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 189.296875 284.3125 L 186.082031 275.992188 L 184.980469 275.992188 L 181.765625 284.3125 L 182.714844 284.3125 L 183.625 281.925781 L 187.183594 281.925781 L 188.109375 284.3125 Z M 186.898438 281.21875 L 183.910156 281.21875 L 185.398438 276.878906 Z M 194.800781 284.3125 L 194.800781 280.75 C 194.800781 279.863281 194.5625 278.855469 193.109375 278.855469 C 192.164062 278.855469 191.589844 279.410156 191.3125 279.769531 L 191.3125 278.917969 L 190.390625 278.917969 L 190.390625 284.3125 L 191.382812 284.3125 L 191.382812 281.386719 C 191.382812 280.59375 191.695312 279.636719 192.605469 279.636719 C 193.804688 279.636719 193.804688 280.488281 193.804688 280.835938 L 193.804688 284.3125 Z M 200.476562 284.3125 L 200.476562 280.847656 C 200.476562 279.660156 199.601562 278.785156 198.414062 278.785156 C 197.636719 278.785156 197.0625 278.988281 196.484375 279.3125 L 196.558594 280.164062 C 197.15625 279.71875 197.742188 279.539062 198.402344 279.539062 C 199.074219 279.539062 199.484375 280.078125 199.484375 280.859375 L 199.484375 281.363281 C 197.421875 281.410156 196.066406 281.949219 196.066406 282.921875 C 196.066406 283.484375 196.4375 284.445312 197.480469 284.445312 C 197.71875 284.445312 198.761719 284.421875 199.519531 283.855469 L 199.519531 284.3125 Z M 199.484375 282.632812 C 199.484375 282.886719 199.484375 283.183594 199.050781 283.4375 C 198.75 283.617188 198.355469 283.664062 198.117188 283.664062 C 197.503906 283.664062 196.988281 283.378906 196.988281 282.898438 C 196.988281 282.046875 199.328125 282.011719 199.484375 282.011719 Z M 203.359375 284.3125 L 203.359375 275.992188 L 202.402344 275.992188 L 202.402344 284.3125 Z M 209.921875 278.988281 L 208.972656 278.988281 C 208.0625 281.171875 207.402344 282.765625 207.34375 283.605469 C 207.308594 283.054688 206.804688 281.820312 206.660156 281.472656 C 206.167969 280.246094 205.964844 279.839844 205.59375 278.988281 L 204.585938 278.988281 L 206.984375 284.3125 L 206.539062 285.378906 C 206.226562 286.050781 206 286.050781 205.832031 286.050781 C 205.640625 286.050781 205.222656 286.003906 204.863281 285.847656 L 204.933594 286.675781 C 204.96875 286.6875 205.414062 286.769531 205.820312 286.769531 C 206.144531 286.769531 206.660156 286.769531 207.257812 285.34375 Z M 214.855469 282.765625 C 214.855469 282.347656 214.6875 281.925781 214.265625 281.566406 C 213.859375 281.21875 213.5 281.148438 212.757812 281.003906 C 212.324219 280.917969 211.726562 280.8125 211.726562 280.246094 C 211.726562 279.539062 212.578125 279.539062 212.757812 279.539062 C 213.632812 279.539062 214.148438 279.816406 214.496094 280.007812 L 214.652344 279.167969 C 213.90625 278.832031 213.355469 278.785156 212.863281 278.785156 C 212.589844 278.785156 210.824219 278.785156 210.824219 280.367188 C 210.824219 280.941406 211.171875 281.304688 211.304688 281.433594 C 211.761719 281.832031 212.074219 281.890625 212.886719 282.046875 C 213.273438 282.117188 213.957031 282.25 213.957031 282.863281 C 213.957031 283.640625 212.984375 283.640625 212.828125 283.640625 C 212.121094 283.640625 211.4375 283.402344 210.921875 283.042969 L 210.765625 283.917969 C 211.484375 284.300781 212.214844 284.445312 212.828125 284.445312 C 213.605469 284.445312 214.855469 284.191406 214.855469 282.765625 Z M 217.359375 284.3125 L 217.359375 278.988281 L 216.402344 278.988281 L 216.402344 284.3125 Z M 217.46875 277.5625 L 217.46875 276.410156 L 216.316406 276.410156 L 216.316406 277.5625 Z M 222.855469 282.765625 C 222.855469 282.347656 222.6875 281.925781 222.265625 281.566406 C 221.859375 281.21875 221.5 281.148438 220.757812 281.003906 C 220.324219 280.917969 219.726562 280.8125 219.726562 280.246094 C 219.726562 279.539062 220.578125 279.539062 220.757812 279.539062 C 221.632812 279.539062 222.148438 279.816406 222.496094 280.007812 L 222.652344 279.167969 C 221.90625 278.832031 221.355469 278.785156 220.863281 278.785156 C 220.589844 278.785156 218.824219 278.785156 218.824219 280.367188 C 218.824219 280.941406 219.171875 281.304688 219.304688 281.433594 C 219.761719 281.832031 220.074219 281.890625 220.886719 282.046875 C 221.273438 282.117188 221.957031 282.25 221.957031 282.863281 C 221.957031 283.640625 220.984375 283.640625 220.828125 283.640625 C 220.121094 283.640625 219.4375 283.402344 218.921875 283.042969 L 218.765625 283.917969 C 219.484375 284.300781 220.214844 284.445312 220.828125 284.445312 C 221.605469 284.445312 222.855469 284.191406 222.855469 282.765625 Z M 233.570312 282.023438 C 233.570312 280.667969 232.453125 279.78125 231.554688 279.5625 L 230.34375 279.277344 C 229.730469 279.132812 229.121094 278.652344 229.121094 277.957031 C 229.121094 277.273438 229.78125 276.578125 230.871094 276.578125 C 231.734375 276.578125 232.332031 276.832031 232.957031 277.359375 L 233.148438 276.316406 C 232.550781 276.015625 231.972656 275.726562 230.882812 275.726562 C 229.179688 275.726562 228.101562 276.914062 228.101562 278.113281 C 228.101562 278.414062 228.160156 278.894531 228.570312 279.445312 C 229.121094 280.164062 229.898438 280.34375 230.390625 280.464844 L 231.398438 280.714844 C 231.949219 280.882812 232.550781 281.410156 232.550781 282.152344 C 232.550781 282.945312 231.867188 283.6875 230.800781 283.6875 C 229.828125 283.6875 228.917969 283.316406 228.160156 282.707031 L 227.96875 283.75 C 229.144531 284.480469 230.234375 284.578125 230.800781 284.578125 C 232.441406 284.578125 233.570312 283.351562 233.570312 282.023438 Z M 240.339844 281.640625 C 240.339844 280.199219 239.585938 278.855469 238.421875 278.855469 C 238.085938 278.855469 237.199219 278.917969 236.371094 279.5625 L 236.371094 278.988281 L 235.414062 278.988281 L 235.414062 286.636719 L 236.410156 286.636719 L 236.410156 283.761719 C 236.851562 284.179688 237.453125 284.445312 238.074219 284.445312 C 239.285156 284.445312 240.339844 283.257812 240.339844 281.640625 Z M 239.347656 281.652344 C 239.347656 282.800781 238.507812 283.664062 237.570312 283.664062 C 237.1875 283.664062 236.757812 283.507812 236.410156 282.921875 L 236.410156 280.332031 C 236.5625 280.117188 237.007812 279.671875 237.691406 279.671875 C 238.589844 279.671875 239.347656 280.523438 239.347656 281.652344 Z M 246.539062 281.6875 C 246.539062 281.316406 246.527344 280.402344 246.058594 279.683594 C 245.554688 278.929688 244.789062 278.785156 244.332031 278.785156 C 242.941406 278.785156 241.835938 280.066406 241.835938 281.601562 C 241.835938 283.183594 243.011719 284.445312 244.511719 284.445312 C 245.085938 284.445312 245.78125 284.300781 246.476562 283.855469 L 246.40625 283.03125 C 245.652344 283.570312 244.941406 283.664062 244.523438 283.664062 C 243.527344 283.664062 242.761719 282.789062 242.726562 281.6875 Z M 245.820312 280.980469 L 242.796875 280.980469 C 242.988281 280.210938 243.578125 279.5625 244.332031 279.5625 C 244.773438 279.5625 245.589844 279.769531 245.820312 280.980469 Z M 251.539062 281.6875 C 251.539062 281.316406 251.527344 280.402344 251.058594 279.683594 C 250.554688 278.929688 249.789062 278.785156 249.332031 278.785156 C 247.941406 278.785156 246.835938 280.066406 246.835938 281.601562 C 246.835938 283.183594 248.011719 284.445312 249.511719 284.445312 C 250.085938 284.445312 250.78125 284.300781 251.476562 283.855469 L 251.40625 283.03125 C 250.652344 283.570312 249.941406 283.664062 249.523438 283.664062 C 248.527344 283.664062 247.761719 282.789062 247.726562 281.6875 Z M 250.820312 280.980469 L 247.796875 280.980469 C 247.988281 280.210938 248.578125 279.5625 249.332031 279.5625 C 249.773438 279.5625 250.589844 279.769531 250.820312 280.980469 Z M 256.777344 284.3125 L 256.777344 275.992188 L 255.820312 275.992188 L 255.820312 279.515625 C 255.148438 278.953125 254.378906 278.855469 253.996094 278.855469 C 252.785156 278.855469 251.851562 280.117188 251.851562 281.652344 C 251.851562 283.199219 252.773438 284.445312 253.949219 284.445312 C 254.308594 284.445312 255.0625 284.359375 255.78125 283.6875 L 255.78125 284.3125 Z M 255.78125 282.621094 C 255.78125 282.851562 255.78125 282.875 255.652344 283.042969 C 255.328125 283.472656 254.917969 283.664062 254.488281 283.664062 C 253.707031 283.664062 252.84375 283.101562 252.84375 281.664062 C 252.84375 280.128906 253.863281 279.636719 254.621094 279.636719 C 255.171875 279.636719 255.554688 279.949219 255.78125 280.261719 Z M 264.726562 287.308594 L 264.726562 286.53125 L 263.578125 286.53125 L 263.578125 276.097656 L 264.726562 276.097656 L 264.726562 275.320312 L 262.652344 275.320312 L 262.652344 287.308594 Z M 273.117188 284.203125 C 273.296875 284.023438 273.117188 283.84375 272.996094 283.726562 L 270.574219 281.304688 L 272.984375 278.894531 C 273.105469 278.773438 273.285156 278.59375 273.105469 278.414062 C 272.925781 278.222656 272.757812 278.390625 272.625 278.519531 L 270.203125 280.941406 L 267.78125 278.519531 C 267.648438 278.378906 267.480469 278.222656 267.300781 278.402344 C 267.121094 278.582031 267.300781 278.761719 267.421875 278.882812 L 269.84375 281.304688 L 267.421875 283.726562 C 267.300781 283.84375 267.121094 284.023438 267.300781 284.203125 C 267.480469 284.394531 267.648438 284.230469 267.78125 284.097656 L 270.203125 281.675781 L 272.636719 284.097656 C 272.769531 284.230469 272.925781 284.394531 273.117188 284.203125 Z M 280.65625 284.3125 L 280.65625 283.570312 L 279.121094 283.570312 L 279.121094 276.207031 L 278.871094 276.207031 C 278.066406 276.988281 276.917969 276.988281 276.496094 276.988281 L 276.496094 277.730469 C 276.832031 277.730469 277.445312 277.730469 278.105469 277.453125 L 278.105469 283.570312 L 276.570312 283.570312 L 276.570312 284.3125 Z M 287.078125 280.414062 C 287.078125 279.277344 286.992188 278.222656 286.429688 277.273438 C 286.035156 276.613281 285.289062 276.207031 284.5 276.207031 C 283.898438 276.207031 283.046875 276.433594 282.507812 277.371094 C 281.945312 278.316406 281.910156 279.589844 281.910156 280.414062 C 281.910156 281.015625 281.910156 282.382812 282.449219 283.351562 C 283.046875 284.410156 283.960938 284.578125 284.488281 284.578125 C 285.183594 284.578125 286.046875 284.289062 286.5625 283.292969 C 287.015625 282.417969 287.078125 281.375 287.078125 280.414062 Z M 286.082031 280.273438 C 286.082031 281.085938 286.082031 281.914062 285.804688 282.695312 C 285.53125 283.519531 284.96875 283.796875 284.5 283.796875 C 282.90625 283.796875 282.90625 281.398438 282.90625 280.273438 C 282.90625 279.515625 282.90625 278.90625 283.085938 278.28125 C 283.3125 277.453125 283.804688 276.988281 284.488281 276.988281 C 286.082031 276.988281 286.082031 279.15625 286.082031 280.273438 Z M 181.417969 270.464844 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 291.359375 275.652344 C 291.359375 275.183594 291.292969 274.761719 291.007812 274.308594 C 290.671875 273.78125 290.378906 273.640625 289.984375 273.640625 C 289.667969 273.640625 289.046875 273.730469 288.527344 274.328125 C 288.617188 273.203125 289.1875 272.371094 290.085938 272.371094 C 290.253906 272.371094 290.5625 272.378906 290.992188 272.546875 L 290.992188 272.019531 C 290.589844 271.867188 290.261719 271.851562 290.078125 271.851562 C 288.871094 271.851562 287.777344 273.027344 287.777344 274.832031 C 287.777344 277.128906 288.792969 277.707031 289.582031 277.707031 C 290.09375 277.707031 290.464844 277.523438 290.800781 277.144531 C 291.203125 276.703125 291.359375 276.316406 291.359375 275.652344 Z M 290.632812 275.652344 C 290.632812 275.980469 290.632812 276.324219 290.414062 276.683594 C 290.253906 276.9375 290.019531 277.164062 289.582031 277.164062 C 288.683594 277.164062 288.558594 275.964844 288.535156 275.6875 C 288.535156 275.570312 288.535156 275.550781 288.542969 275.460938 C 288.542969 274.820312 288.945312 274.183594 289.625 274.183594 C 289.941406 274.183594 290.226562 274.300781 290.445312 274.695312 C 290.613281 275 290.632812 275.265625 290.632812 275.652344 Z M 287.417969 267.832031 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 300.539062 283.867188 L 300.464844 283.015625 C 299.890625 283.414062 299.253906 283.628906 298.535156 283.628906 C 297.515625 283.628906 296.84375 282.777344 296.84375 281.640625 C 296.84375 280.714844 297.300781 279.601562 298.582031 279.601562 C 299.339844 279.601562 299.722656 279.746094 300.320312 280.152344 L 300.476562 279.3125 C 299.804688 278.929688 299.433594 278.785156 298.570312 278.785156 C 296.867188 278.785156 295.851562 280.222656 295.851562 281.652344 C 295.851562 283.136719 296.964844 284.445312 298.523438 284.445312 C 299.363281 284.445312 300 284.191406 300.539062 283.867188 Z M 305.800781 284.3125 L 305.800781 280.75 C 305.800781 279.863281 305.5625 278.855469 304.109375 278.855469 C 303.425781 278.855469 302.78125 279.179688 302.347656 279.71875 L 302.347656 275.992188 L 301.390625 275.992188 L 301.390625 284.3125 L 302.382812 284.3125 L 302.382812 281.386719 C 302.382812 280.59375 302.695312 279.636719 303.605469 279.636719 C 304.804688 279.636719 304.804688 280.488281 304.804688 280.835938 L 304.804688 284.3125 Z M 311.476562 284.3125 L 311.476562 280.847656 C 311.476562 279.660156 310.601562 278.785156 309.414062 278.785156 C 308.636719 278.785156 308.0625 278.988281 307.484375 279.3125 L 307.558594 280.164062 C 308.15625 279.71875 308.742188 279.539062 309.402344 279.539062 C 310.074219 279.539062 310.484375 280.078125 310.484375 280.859375 L 310.484375 281.363281 C 308.421875 281.410156 307.066406 281.949219 307.066406 282.921875 C 307.066406 283.484375 307.4375 284.445312 308.480469 284.445312 C 308.71875 284.445312 309.761719 284.421875 310.519531 283.855469 L 310.519531 284.3125 Z M 310.484375 282.632812 C 310.484375 282.886719 310.484375 283.183594 310.050781 283.4375 C 309.75 283.617188 309.355469 283.664062 309.117188 283.664062 C 308.503906 283.664062 307.988281 283.378906 307.988281 282.898438 C 307.988281 282.046875 310.328125 282.011719 310.484375 282.011719 Z M 316.445312 279.71875 L 316.445312 278.855469 C 315.582031 278.867188 314.792969 279.300781 314.300781 280.007812 L 314.300781 278.917969 L 313.414062 278.917969 L 313.414062 284.3125 L 314.371094 284.3125 L 314.371094 281.757812 C 314.371094 280.5 315.355469 279.730469 316.445312 279.71875 Z M 320.855469 282.765625 C 320.855469 282.347656 320.6875 281.925781 320.265625 281.566406 C 319.859375 281.21875 319.5 281.148438 318.757812 281.003906 C 318.324219 280.917969 317.726562 280.8125 317.726562 280.246094 C 317.726562 279.539062 318.578125 279.539062 318.757812 279.539062 C 319.632812 279.539062 320.148438 279.816406 320.496094 280.007812 L 320.652344 279.167969 C 319.90625 278.832031 319.355469 278.785156 318.863281 278.785156 C 318.589844 278.785156 316.824219 278.785156 316.824219 280.367188 C 316.824219 280.941406 317.171875 281.304688 317.304688 281.433594 C 317.761719 281.832031 318.074219 281.890625 318.886719 282.046875 C 319.273438 282.117188 319.957031 282.25 319.957031 282.863281 C 319.957031 283.640625 318.984375 283.640625 318.828125 283.640625 C 318.121094 283.640625 317.4375 283.402344 316.921875 283.042969 L 316.765625 283.917969 C 317.484375 284.300781 318.214844 284.445312 318.828125 284.445312 C 319.605469 284.445312 320.855469 284.191406 320.855469 282.765625 Z M 326.886719 275.582031 C 326.886719 275.402344 326.742188 275.320312 326.632812 275.320312 C 326.441406 275.320312 326.394531 275.453125 326.347656 275.59375 L 322.160156 286.84375 C 322.125 286.949219 322.101562 287 322.101562 287.046875 C 322.101562 287.226562 322.246094 287.308594 322.351562 287.308594 C 322.546875 287.308594 322.59375 287.179688 322.640625 287.035156 L 326.824219 275.789062 C 326.863281 275.679688 326.886719 275.632812 326.886719 275.582031 Z M 331.855469 282.765625 C 331.855469 282.347656 331.6875 281.925781 331.265625 281.566406 C 330.859375 281.21875 330.5 281.148438 329.757812 281.003906 C 329.324219 280.917969 328.726562 280.8125 328.726562 280.246094 C 328.726562 279.539062 329.578125 279.539062 329.757812 279.539062 C 330.632812 279.539062 331.148438 279.816406 331.496094 280.007812 L 331.652344 279.167969 C 330.90625 278.832031 330.355469 278.785156 329.863281 278.785156 C 329.589844 278.785156 327.824219 278.785156 327.824219 280.367188 C 327.824219 280.941406 328.171875 281.304688 328.304688 281.433594 C 328.761719 281.832031 329.074219 281.890625 329.886719 282.046875 C 330.273438 282.117188 330.957031 282.25 330.957031 282.863281 C 330.957031 283.640625 329.984375 283.640625 329.828125 283.640625 C 329.121094 283.640625 328.4375 283.402344 327.921875 283.042969 L 327.765625 283.917969 C 328.484375 284.300781 329.214844 284.445312 329.828125 284.445312 C 330.605469 284.445312 331.855469 284.191406 331.855469 282.765625 Z M 334.757812 287.308594 L 334.757812 275.320312 L 332.683594 275.320312 L 332.683594 276.097656 L 333.832031 276.097656 L 333.832031 286.53125 L 332.683594 286.53125 L 332.683594 287.308594 Z M 291.417969 270.464844 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 15.199219 114.234375 L 15.199219 105.914062 L 13.761719 105.914062 L 12.15625 110.015625 C 11.84375 110.816406 11.195312 112.472656 11.050781 113.058594 L 11.039062 113.058594 C 10.917969 112.566406 10.367188 111.117188 10.140625 110.542969 L 8.546875 106.453125 L 8.328125 105.914062 L 6.890625 105.914062 L 6.890625 114.234375 L 7.875 114.234375 L 7.875 106.847656 L 7.886719 106.847656 C 8.03125 107.460938 8.726562 109.246094 8.855469 109.582031 L 10.175781 112.964844 L 10.570312 113.96875 L 11.484375 113.96875 L 13.136719 109.75 C 13.316406 109.304688 14.050781 107.410156 14.203125 106.847656 L 14.21875 106.847656 L 14.21875 114.234375 Z M 21.789062 111.609375 C 21.789062 111.238281 21.777344 110.324219 21.308594 109.605469 C 20.804688 108.851562 20.039062 108.707031 19.582031 108.707031 C 18.191406 108.707031 17.085938 109.988281 17.085938 111.523438 C 17.085938 113.105469 18.261719 114.367188 19.761719 114.367188 C 20.335938 114.367188 21.03125 114.222656 21.726562 113.777344 L 21.65625 112.953125 C 20.902344 113.492188 20.191406 113.585938 19.773438 113.585938 C 18.777344 113.585938 18.011719 112.710938 17.976562 111.609375 Z M 21.070312 110.902344 L 18.046875 110.902344 C 18.238281 110.132812 18.828125 109.484375 19.582031 109.484375 C 20.023438 109.484375 20.839844 109.691406 21.070312 110.902344 Z M 28.898438 113.695312 L 28.824219 112.808594 C 28.464844 113.046875 28.105469 113.273438 27.699219 113.40625 C 27.292969 113.527344 26.871094 113.550781 26.441406 113.550781 C 25.660156 113.550781 24.941406 113.144531 24.4375 112.53125 C 23.875 111.835938 23.609375 110.960938 23.609375 110.074219 C 23.609375 109.175781 23.875 108.300781 24.4375 107.617188 C 24.941406 107.003906 25.660156 106.597656 26.441406 106.597656 C 26.824219 106.597656 27.195312 106.632812 27.554688 106.742188 C 27.925781 106.847656 28.261719 107.039062 28.574219 107.257812 L 28.765625 106.199219 C 28.394531 106.058594 28.011719 105.9375 27.613281 105.878906 C 27.230469 105.804688 26.835938 105.78125 26.441406 105.78125 C 25.371094 105.78125 24.355469 106.25 23.621094 107.039062 C 22.867188 107.855469 22.460938 108.945312 22.460938 110.074219 C 22.460938 111.1875 22.867188 112.28125 23.621094 113.105469 C 24.355469 113.898438 25.371094 114.367188 26.441406 114.367188 C 26.871094 114.367188 27.304688 114.34375 27.722656 114.234375 C 28.144531 114.125 28.527344 113.910156 28.898438 113.695312 Z M 34.726562 114.234375 L 34.726562 110.769531 C 34.726562 109.582031 33.851562 108.707031 32.664062 108.707031 C 31.886719 108.707031 31.3125 108.910156 30.734375 109.234375 L 30.808594 110.085938 C 31.40625 109.640625 31.992188 109.460938 32.652344 109.460938 C 33.324219 109.460938 33.734375 110 33.734375 110.78125 L 33.734375 111.285156 C 31.671875 111.332031 30.316406 111.871094 30.316406 112.84375 C 30.316406 113.40625 30.6875 114.367188 31.730469 114.367188 C 31.96875 114.367188 33.011719 114.34375 33.769531 113.777344 L 33.769531 114.234375 Z M 33.734375 112.554688 C 33.734375 112.808594 33.734375 113.105469 33.300781 113.359375 C 33 113.539062 32.605469 113.585938 32.367188 113.585938 C 31.753906 113.585938 31.238281 113.300781 31.238281 112.820312 C 31.238281 111.96875 33.578125 111.933594 33.734375 111.933594 Z M 41.589844 111.5625 C 41.589844 110.109375 40.800781 108.777344 39.625 108.777344 C 39.300781 108.777344 38.4375 108.839844 37.621094 109.472656 L 37.621094 105.914062 L 36.664062 105.914062 L 36.664062 114.234375 L 37.660156 114.234375 L 37.660156 113.671875 C 37.8125 113.839844 38.425781 114.367188 39.3125 114.367188 C 40.558594 114.367188 41.589844 113.15625 41.589844 111.5625 Z M 40.597656 111.5625 C 40.597656 113.105469 39.566406 113.585938 38.832031 113.585938 C 38.066406 113.585938 37.71875 112.953125 37.660156 112.84375 L 37.660156 110.230469 C 37.839844 109.976562 38.269531 109.558594 38.953125 109.558594 C 39.671875 109.558594 40.597656 110.0625 40.597656 111.5625 Z M 49.769531 117.21875 C 49.121094 116.558594 47.632812 115.027344 47.632812 111.238281 C 47.632812 107.4375 49.121094 105.914062 49.769531 105.242188 L 48.988281 105.242188 C 47.574219 106.429688 46.640625 108.480469 46.640625 111.226562 C 46.640625 114.078125 47.632812 116.082031 48.988281 117.21875 Z M 56.207031 114.234375 L 56.207031 113.335938 L 53.953125 113.335938 C 53.808594 113.335938 53.664062 113.347656 53.523438 113.347656 L 52.203125 113.347656 L 54.167969 111.5625 C 55.429688 110.53125 56.207031 109.882812 56.207031 108.71875 C 56.207031 107.339844 55.210938 106.128906 53.582031 106.128906 C 52.3125 106.128906 51.554688 106.859375 51.183594 108.046875 L 51.734375 108.742188 C 52.046875 107.339844 52.671875 106.945312 53.414062 106.945312 C 54.480469 106.945312 55.152344 107.757812 55.152344 108.742188 C 55.152344 109.832031 54.386719 110.53125 53.496094 111.320312 L 51.28125 113.417969 L 51.28125 114.234375 Z M 62.328125 110.335938 C 62.328125 109.199219 62.242188 108.144531 61.679688 107.195312 C 61.285156 106.535156 60.539062 106.128906 59.75 106.128906 C 59.148438 106.128906 58.296875 106.355469 57.757812 107.292969 C 57.195312 108.238281 57.160156 109.511719 57.160156 110.335938 C 57.160156 110.9375 57.160156 112.304688 57.699219 113.273438 C 58.296875 114.332031 59.210938 114.5 59.738281 114.5 C 60.433594 114.5 61.296875 114.210938 61.8125 113.214844 C 62.265625 112.339844 62.328125 111.296875 62.328125 110.335938 Z M 61.332031 110.195312 C 61.332031 111.007812 61.332031 111.835938 61.054688 112.617188 C 60.78125 113.441406 60.21875 113.71875 59.75 113.71875 C 58.15625 113.71875 58.15625 111.320312 58.15625 110.195312 C 58.15625 109.4375 58.15625 108.828125 58.335938 108.203125 C 58.5625 107.375 59.054688 106.910156 59.738281 106.910156 C 61.332031 106.910156 61.332031 109.078125 61.332031 110.195312 Z M 68.207031 114.234375 L 68.207031 113.335938 L 65.953125 113.335938 C 65.808594 113.335938 65.664062 113.347656 65.523438 113.347656 L 64.203125 113.347656 L 66.167969 111.5625 C 67.429688 110.53125 68.207031 109.882812 68.207031 108.71875 C 68.207031 107.339844 67.210938 106.128906 65.582031 106.128906 C 64.3125 106.128906 63.554688 106.859375 63.183594 108.046875 L 63.734375 108.742188 C 64.046875 107.339844 64.671875 106.945312 65.414062 106.945312 C 66.480469 106.945312 67.152344 107.757812 67.152344 108.742188 C 67.152344 109.832031 66.386719 110.53125 65.496094 111.320312 L 63.28125 113.417969 L 63.28125 114.234375 Z M 74.328125 110.335938 C 74.328125 109.199219 74.242188 108.144531 73.679688 107.195312 C 73.285156 106.535156 72.539062 106.128906 71.75 106.128906 C 71.148438 106.128906 70.296875 106.355469 69.757812 107.292969 C 69.195312 108.238281 69.160156 109.511719 69.160156 110.335938 C 69.160156 110.9375 69.160156 112.304688 69.699219 113.273438 C 70.296875 114.332031 71.210938 114.5 71.738281 114.5 C 72.433594 114.5 73.296875 114.210938 73.8125 113.214844 C 74.265625 112.339844 74.328125 111.296875 74.328125 110.335938 Z M 73.332031 110.195312 C 73.332031 111.007812 73.332031 111.835938 73.054688 112.617188 C 72.78125 113.441406 72.21875 113.71875 71.75 113.71875 C 70.15625 113.71875 70.15625 111.320312 70.15625 110.195312 C 70.15625 109.4375 70.15625 108.828125 70.335938 108.203125 C 70.5625 107.375 71.054688 106.910156 71.738281 106.910156 C 73.332031 106.910156 73.332031 109.078125 73.332031 110.195312 Z M 78.085938 111.945312 L 78.085938 111.199219 L 74.800781 111.199219 L 74.800781 111.945312 Z M 84.328125 110.335938 C 84.328125 109.199219 84.242188 108.144531 83.679688 107.195312 C 83.285156 106.535156 82.539062 106.128906 81.75 106.128906 C 81.148438 106.128906 80.296875 106.355469 79.757812 107.292969 C 79.195312 108.238281 79.160156 109.511719 79.160156 110.335938 C 79.160156 110.9375 79.160156 112.304688 79.699219 113.273438 C 80.296875 114.332031 81.210938 114.5 81.738281 114.5 C 82.433594 114.5 83.296875 114.210938 83.8125 113.214844 C 84.265625 112.339844 84.328125 111.296875 84.328125 110.335938 Z M 83.332031 110.195312 C 83.332031 111.007812 83.332031 111.835938 83.054688 112.617188 C 82.78125 113.441406 82.21875 113.71875 81.75 113.71875 C 80.15625 113.71875 80.15625 111.320312 80.15625 110.195312 C 80.15625 109.4375 80.15625 108.828125 80.335938 108.203125 C 80.5625 107.375 81.054688 106.910156 81.738281 106.910156 C 83.332031 106.910156 83.332031 109.078125 83.332031 110.195312 Z M 90.304688 110.230469 C 90.304688 106.96875 88.902344 106.128906 87.785156 106.128906 C 87.101562 106.128906 86.515625 106.355469 85.976562 106.945312 C 85.386719 107.605469 85.183594 108.15625 85.183594 109.054688 C 85.183594 109.882812 85.339844 110.457031 85.746094 111.070312 C 86.167969 111.714844 86.574219 111.945312 87.148438 111.945312 C 88.179688 111.945312 88.914062 111.34375 89.234375 110.949219 C 89.042969 113.09375 87.988281 113.71875 87.199219 113.71875 C 86.863281 113.71875 86.40625 113.648438 85.949219 113.261719 L 85.554688 113.933594 C 86.09375 114.367188 86.632812 114.5 87.199219 114.5 C 88.804688 114.5 90.304688 112.832031 90.304688 110.230469 Z M 89.210938 109.320312 C 89.210938 110.300781 88.601562 111.164062 87.664062 111.164062 C 87.019531 111.164062 86.730469 110.816406 86.539062 110.515625 C 86.238281 110.039062 86.226562 109.558594 86.226562 109.054688 C 86.226562 108.539062 86.238281 108.023438 86.621094 107.496094 C 86.765625 107.292969 87.078125 106.871094 87.785156 106.871094 C 89.03125 106.871094 89.1875 108.609375 89.199219 109.042969 C 89.210938 109.113281 89.210938 109.234375 89.210938 109.320312 Z M 94.085938 111.945312 L 94.085938 111.199219 L 90.800781 111.199219 L 90.800781 111.945312 Z M 99.90625 114.234375 L 99.90625 113.492188 L 98.371094 113.492188 L 98.371094 106.128906 L 98.121094 106.128906 C 97.316406 106.910156 96.167969 106.910156 95.746094 106.910156 L 95.746094 107.652344 C 96.082031 107.652344 96.695312 107.652344 97.355469 107.375 L 97.355469 113.492188 L 95.820312 113.492188 L 95.820312 114.234375 Z M 106.472656 112.230469 L 106.472656 111.453125 L 105.273438 111.453125 L 105.273438 106.394531 L 104.109375 106.394531 L 101.015625 111.453125 L 101.015625 112.230469 L 104.265625 112.230469 L 104.265625 114.234375 L 105.273438 114.234375 L 105.273438 112.230469 Z M 104.335938 111.453125 L 102.011719 111.453125 L 102.742188 110.253906 C 103.101562 109.640625 104.324219 107.554688 104.335938 106.945312 Z M 110.480469 111.238281 C 110.480469 108.382812 109.484375 106.378906 108.128906 105.242188 L 107.351562 105.242188 C 108 105.902344 109.484375 107.4375 109.484375 111.226562 C 109.484375 115.027344 108 116.546875 107.351562 117.21875 L 108.128906 117.21875 C 109.546875 116.03125 110.480469 113.984375 110.480469 111.238281 Z M 5.667969 100.386719 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 148.25 L 10.25 143.214844 L 13.535156 139.929688 L 12.359375 139.929688 L 7.933594 144.339844 L 7.933594 139.929688 L 6.84375 139.929688 L 6.84375 148.25 L 7.933594 148.25 L 7.933594 145.515625 L 9.539062 143.921875 L 12.527344 148.25 Z M 20.050781 148.25 L 20.050781 142.925781 L 19.054688 142.925781 L 19.054688 146.390625 C 19.054688 147.351562 18.300781 147.675781 17.582031 147.675781 C 16.71875 147.675781 16.632812 147.398438 16.632812 146.882812 L 16.632812 142.925781 L 15.640625 142.925781 L 15.640625 146.941406 C 15.640625 147.960938 16.046875 148.382812 16.898438 148.382812 C 17.59375 148.382812 18.527344 148.179688 19.09375 147.675781 L 19.09375 148.25 Z M 24.695312 143.65625 L 24.695312 142.792969 C 23.832031 142.804688 23.042969 143.238281 22.550781 143.945312 L 22.550781 142.855469 L 21.664062 142.855469 L 21.664062 148.25 L 22.621094 148.25 L 22.621094 145.695312 C 22.621094 144.4375 23.605469 143.667969 24.695312 143.65625 Z M 30.460938 145.613281 C 30.460938 144.042969 29.261719 142.722656 27.738281 142.722656 C 26.25 142.722656 25.027344 144.015625 25.027344 145.613281 C 25.027344 147.195312 26.285156 148.382812 27.738281 148.382812 C 29.222656 148.382812 30.460938 147.171875 30.460938 145.613281 Z M 29.464844 145.492188 C 29.464844 146.882812 28.625 147.566406 27.75 147.566406 C 26.828125 147.566406 26.023438 146.847656 26.023438 145.492188 C 26.023438 144.089844 26.921875 143.5 27.738281 143.5 C 28.601562 143.5 29.464844 144.113281 29.464844 145.492188 Z M 39.46875 148.25 L 39.46875 144.6875 C 39.46875 143.800781 39.230469 142.792969 37.777344 142.792969 C 36.769531 142.792969 36.207031 143.40625 35.917969 143.777344 C 35.835938 143.539062 35.582031 142.792969 34.359375 142.792969 C 33.414062 142.792969 32.839844 143.347656 32.5625 143.707031 L 32.5625 142.855469 L 31.640625 142.855469 L 31.640625 148.25 L 32.632812 148.25 L 32.632812 145.324219 C 32.632812 144.53125 32.945312 143.574219 33.855469 143.574219 C 35.054688 143.574219 35.054688 144.425781 35.054688 144.773438 L 35.054688 148.25 L 36.050781 148.25 L 36.050781 145.324219 C 36.050781 144.53125 36.363281 143.574219 37.273438 143.574219 C 38.472656 143.574219 38.472656 144.425781 38.472656 144.773438 L 38.472656 148.25 Z M 46.460938 145.613281 C 46.460938 144.042969 45.261719 142.722656 43.738281 142.722656 C 42.25 142.722656 41.027344 144.015625 41.027344 145.613281 C 41.027344 147.195312 42.285156 148.382812 43.738281 148.382812 C 45.222656 148.382812 46.460938 147.171875 46.460938 145.613281 Z M 45.464844 145.492188 C 45.464844 146.882812 44.625 147.566406 43.75 147.566406 C 42.828125 147.566406 42.023438 146.847656 42.023438 145.492188 C 42.023438 144.089844 42.921875 143.5 43.738281 143.5 C 44.601562 143.5 45.464844 144.113281 45.464844 145.492188 Z M 48.957031 148.957031 L 48.957031 142.925781 L 48 142.925781 L 48 149.066406 C 48 149.855469 47.28125 149.894531 47.085938 149.894531 C 46.75 149.894531 46.453125 149.761719 46.1875 149.519531 L 45.914062 150.300781 C 46.511719 150.636719 47.085938 150.707031 47.398438 150.707031 C 48.214844 150.707031 48.957031 150.058594 48.957031 148.957031 Z M 48.957031 141.5 L 48.957031 140.347656 L 47.808594 140.347656 L 47.808594 141.5 Z M 51.609375 148.25 L 51.609375 142.925781 L 50.652344 142.925781 L 50.652344 148.25 Z M 51.71875 141.5 L 51.71875 140.347656 L 50.566406 140.347656 L 50.566406 141.5 Z M 60.769531 151.234375 C 60.121094 150.574219 58.632812 149.042969 58.632812 145.253906 C 58.632812 141.453125 60.121094 139.929688 60.769531 139.257812 L 59.988281 139.257812 C 58.574219 140.445312 57.640625 142.496094 57.640625 145.242188 C 57.640625 148.09375 58.632812 150.097656 59.988281 151.234375 Z M 67.328125 144.351562 C 67.328125 143.214844 67.242188 142.160156 66.679688 141.210938 C 66.285156 140.550781 65.539062 140.144531 64.75 140.144531 C 64.148438 140.144531 63.296875 140.371094 62.757812 141.308594 C 62.195312 142.253906 62.160156 143.527344 62.160156 144.351562 C 62.160156 144.953125 62.160156 146.320312 62.699219 147.289062 C 63.296875 148.347656 64.210938 148.515625 64.738281 148.515625 C 65.433594 148.515625 66.296875 148.226562 66.8125 147.230469 C 67.265625 146.355469 67.328125 145.3125 67.328125 144.351562 Z M 66.332031 144.210938 C 66.332031 145.023438 66.332031 145.851562 66.054688 146.632812 C 65.78125 147.457031 65.21875 147.734375 64.75 147.734375 C 63.15625 147.734375 63.15625 145.335938 63.15625 144.210938 C 63.15625 143.453125 63.15625 142.84375 63.335938 142.21875 C 63.5625 141.390625 64.054688 140.925781 64.738281 140.925781 C 66.332031 140.925781 66.332031 143.09375 66.332031 144.210938 Z M 69.886719 148.25 L 69.886719 147.21875 L 68.855469 147.21875 L 68.855469 148.25 Z M 76.304688 144.246094 C 76.304688 140.984375 74.902344 140.144531 73.785156 140.144531 C 73.101562 140.144531 72.515625 140.371094 71.976562 140.960938 C 71.386719 141.621094 71.183594 142.171875 71.183594 143.070312 C 71.183594 143.898438 71.339844 144.472656 71.746094 145.085938 C 72.167969 145.730469 72.574219 145.960938 73.148438 145.960938 C 74.179688 145.960938 74.914062 145.359375 75.234375 144.964844 C 75.042969 147.109375 73.988281 147.734375 73.199219 147.734375 C 72.863281 147.734375 72.40625 147.664062 71.949219 147.277344 L 71.554688 147.949219 C 72.09375 148.382812 72.632812 148.515625 73.199219 148.515625 C 74.804688 148.515625 76.304688 146.847656 76.304688 144.246094 Z M 75.210938 143.335938 C 75.210938 144.316406 74.601562 145.179688 73.664062 145.179688 C 73.019531 145.179688 72.730469 144.832031 72.539062 144.53125 C 72.238281 144.054688 72.226562 143.574219 72.226562 143.070312 C 72.226562 142.554688 72.238281 142.039062 72.621094 141.511719 C 72.765625 141.308594 73.078125 140.886719 73.785156 140.886719 C 75.03125 140.886719 75.1875 142.625 75.199219 143.058594 C 75.210938 143.128906 75.210938 143.25 75.210938 143.335938 Z M 78.886719 148.25 L 78.886719 147.21875 L 77.855469 147.21875 L 77.855469 148.25 Z M 85.328125 144.351562 C 85.328125 143.214844 85.242188 142.160156 84.679688 141.210938 C 84.285156 140.550781 83.539062 140.144531 82.75 140.144531 C 82.148438 140.144531 81.296875 140.371094 80.757812 141.308594 C 80.195312 142.253906 80.160156 143.527344 80.160156 144.351562 C 80.160156 144.953125 80.160156 146.320312 80.699219 147.289062 C 81.296875 148.347656 82.210938 148.515625 82.738281 148.515625 C 83.433594 148.515625 84.296875 148.226562 84.8125 147.230469 C 85.265625 146.355469 85.328125 145.3125 85.328125 144.351562 Z M 84.332031 144.210938 C 84.332031 145.023438 84.332031 145.851562 84.054688 146.632812 C 83.78125 147.457031 83.21875 147.734375 82.75 147.734375 C 81.15625 147.734375 81.15625 145.335938 81.15625 144.210938 C 81.15625 143.453125 81.15625 142.84375 81.335938 142.21875 C 81.5625 141.390625 82.054688 140.925781 82.738281 140.925781 C 84.332031 140.925781 84.332031 143.09375 84.332031 144.210938 Z M 89.480469 145.253906 C 89.480469 142.398438 88.484375 140.394531 87.128906 139.257812 L 86.351562 139.257812 C 87 139.917969 88.484375 141.453125 88.484375 145.242188 C 88.484375 149.042969 87 150.5625 86.351562 151.234375 L 87.128906 151.234375 C 88.546875 150.046875 89.480469 148 89.480469 145.253906 Z M 5.667969 134.402344 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.855469 176.59375 L 11.855469 175.753906 L 11.328125 175.753906 L 8.414062 175.777344 L 7.957031 175.777344 L 7.957031 168.273438 L 6.808594 168.273438 L 6.808594 176.59375 Z M 14.609375 176.59375 L 14.609375 171.269531 L 13.652344 171.269531 L 13.652344 176.59375 Z M 14.71875 169.84375 L 14.71875 168.691406 L 13.566406 168.691406 L 13.566406 169.84375 Z M 21.050781 176.59375 L 21.050781 173.03125 C 21.050781 172.144531 20.8125 171.136719 19.359375 171.136719 C 18.414062 171.136719 17.839844 171.691406 17.5625 172.050781 L 17.5625 171.199219 L 16.640625 171.199219 L 16.640625 176.59375 L 17.632812 176.59375 L 17.632812 173.667969 C 17.632812 172.875 17.945312 171.917969 18.855469 171.917969 C 20.054688 171.917969 20.054688 172.769531 20.054688 173.117188 L 20.054688 176.59375 Z M 27.027344 176.59375 L 27.027344 168.273438 L 26.070312 168.273438 L 26.070312 171.796875 C 25.398438 171.234375 24.628906 171.136719 24.246094 171.136719 C 23.035156 171.136719 22.101562 172.398438 22.101562 173.933594 C 22.101562 175.480469 23.023438 176.726562 24.199219 176.726562 C 24.558594 176.726562 25.3125 176.640625 26.03125 175.96875 L 26.03125 176.59375 Z M 26.03125 174.902344 C 26.03125 175.132812 26.03125 175.15625 25.902344 175.324219 C 25.578125 175.753906 25.167969 175.945312 24.738281 175.945312 C 23.957031 175.945312 23.09375 175.382812 23.09375 173.945312 C 23.09375 172.410156 24.113281 171.917969 24.871094 171.917969 C 25.421875 171.917969 25.804688 172.230469 26.03125 172.542969 Z M 32.789062 173.96875 C 32.789062 173.597656 32.777344 172.683594 32.308594 171.964844 C 31.804688 171.210938 31.039062 171.066406 30.582031 171.066406 C 29.191406 171.066406 28.085938 172.347656 28.085938 173.882812 C 28.085938 175.464844 29.261719 176.726562 30.761719 176.726562 C 31.335938 176.726562 32.03125 176.582031 32.726562 176.136719 L 32.65625 175.3125 C 31.902344 175.851562 31.191406 175.945312 30.773438 175.945312 C 29.777344 175.945312 29.011719 175.070312 28.976562 173.96875 Z M 32.070312 173.261719 L 29.046875 173.261719 C 29.238281 172.492188 29.828125 171.84375 30.582031 171.84375 C 31.023438 171.84375 31.839844 172.050781 32.070312 173.261719 Z M 36.695312 172 L 36.695312 171.136719 C 35.832031 171.148438 35.042969 171.582031 34.550781 172.289062 L 34.550781 171.199219 L 33.664062 171.199219 L 33.664062 176.59375 L 34.621094 176.59375 L 34.621094 174.039062 C 34.621094 172.78125 35.605469 172.011719 36.695312 172 Z M 41.726562 176.59375 L 41.726562 173.128906 C 41.726562 171.941406 40.851562 171.066406 39.664062 171.066406 C 38.886719 171.066406 38.3125 171.269531 37.734375 171.59375 L 37.808594 172.445312 C 38.40625 172 38.992188 171.820312 39.652344 171.820312 C 40.324219 171.820312 40.734375 172.359375 40.734375 173.140625 L 40.734375 173.644531 C 38.671875 173.691406 37.316406 174.230469 37.316406 175.203125 C 37.316406 175.765625 37.6875 176.726562 38.730469 176.726562 C 38.96875 176.726562 40.011719 176.703125 40.769531 176.136719 L 40.769531 176.59375 Z M 40.734375 174.914062 C 40.734375 175.167969 40.734375 175.464844 40.300781 175.71875 C 40 175.898438 39.605469 175.945312 39.367188 175.945312 C 38.753906 175.945312 38.238281 175.660156 38.238281 175.179688 C 38.238281 174.328125 40.578125 174.292969 40.734375 174.292969 Z M 50.769531 179.578125 C 50.121094 178.917969 48.632812 177.386719 48.632812 173.597656 C 48.632812 169.796875 50.121094 168.273438 50.769531 167.601562 L 49.988281 167.601562 C 48.574219 168.789062 47.640625 170.839844 47.640625 173.585938 C 47.640625 176.4375 48.632812 178.441406 49.988281 179.578125 Z M 57.328125 172.695312 C 57.328125 171.558594 57.242188 170.503906 56.679688 169.554688 C 56.285156 168.894531 55.539062 168.488281 54.75 168.488281 C 54.148438 168.488281 53.296875 168.714844 52.757812 169.652344 C 52.195312 170.597656 52.160156 171.871094 52.160156 172.695312 C 52.160156 173.296875 52.160156 174.664062 52.699219 175.632812 C 53.296875 176.691406 54.210938 176.859375 54.738281 176.859375 C 55.433594 176.859375 56.296875 176.570312 56.8125 175.574219 C 57.265625 174.699219 57.328125 173.65625 57.328125 172.695312 Z M 56.332031 172.554688 C 56.332031 173.367188 56.332031 174.195312 56.054688 174.976562 C 55.78125 175.800781 55.21875 176.078125 54.75 176.078125 C 53.15625 176.078125 53.15625 173.679688 53.15625 172.554688 C 53.15625 171.796875 53.15625 171.1875 53.335938 170.5625 C 53.5625 169.734375 54.054688 169.269531 54.738281 169.269531 C 56.332031 169.269531 56.332031 171.4375 56.332031 172.554688 Z M 59.886719 176.59375 L 59.886719 175.5625 L 58.855469 175.5625 L 58.855469 176.59375 Z M 66.304688 174.421875 C 66.304688 173.667969 65.847656 172.792969 64.625 172.347656 C 65.378906 172.132812 66.136719 171.484375 66.136719 170.585938 C 66.136719 169.484375 65.105469 168.488281 63.75 168.488281 C 62.347656 168.488281 61.351562 169.507812 61.351562 170.585938 C 61.351562 171.484375 62.105469 172.132812 62.863281 172.347656 C 61.589844 172.804688 61.183594 173.714844 61.183594 174.421875 C 61.183594 175.742188 62.324219 176.859375 63.738281 176.859375 C 65.199219 176.859375 66.304688 175.71875 66.304688 174.421875 Z M 65.25 170.597656 C 65.25 171.34375 64.734375 171.964844 63.738281 171.964844 C 62.800781 171.964844 62.238281 171.390625 62.238281 170.597656 C 62.238281 169.796875 62.8125 169.230469 63.738281 169.230469 C 64.71875 169.230469 65.25 169.832031 65.25 170.597656 Z M 65.273438 174.421875 C 65.273438 175.539062 64.539062 176.078125 63.75 176.078125 C 62.898438 176.078125 62.214844 175.503906 62.214844 174.421875 C 62.214844 173.140625 63.125 172.746094 63.738281 172.746094 C 64.421875 172.746094 65.273438 173.1875 65.273438 174.421875 Z M 68.886719 176.59375 L 68.886719 175.5625 L 67.855469 175.5625 L 67.855469 176.59375 Z M 74.90625 176.59375 L 74.90625 175.851562 L 73.371094 175.851562 L 73.371094 168.488281 L 73.121094 168.488281 C 72.316406 169.269531 71.167969 169.269531 70.746094 169.269531 L 70.746094 170.011719 C 71.082031 170.011719 71.695312 170.011719 72.355469 169.734375 L 72.355469 175.851562 L 70.820312 175.851562 L 70.820312 176.59375 Z M 79.480469 173.597656 C 79.480469 170.742188 78.484375 168.738281 77.128906 167.601562 L 76.351562 167.601562 C 77 168.261719 78.484375 169.796875 78.484375 173.585938 C 78.484375 177.386719 77 178.90625 76.351562 179.578125 L 77.128906 179.578125 C 78.546875 178.390625 79.480469 176.34375 79.480469 173.597656 Z M 5.667969 162.746094 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.820312 208.324219 C 11.820312 206.96875 10.703125 206.082031 9.804688 205.863281 L 8.59375 205.578125 C 7.980469 205.433594 7.371094 204.953125 7.371094 204.257812 C 7.371094 203.574219 8.03125 202.878906 9.121094 202.878906 C 9.984375 202.878906 10.582031 203.132812 11.207031 203.660156 L 11.398438 202.617188 C 10.800781 202.316406 10.222656 202.027344 9.132812 202.027344 C 7.429688 202.027344 6.351562 203.214844 6.351562 204.414062 C 6.351562 204.714844 6.410156 205.195312 6.820312 205.746094 C 7.371094 206.464844 8.148438 206.644531 8.640625 206.765625 L 9.648438 207.015625 C 10.199219 207.183594 10.800781 207.710938 10.800781 208.453125 C 10.800781 209.246094 10.117188 209.988281 9.050781 209.988281 C 8.078125 209.988281 7.167969 209.617188 6.410156 209.007812 L 6.21875 210.050781 C 7.394531 210.78125 8.484375 210.878906 9.050781 210.878906 C 10.691406 210.878906 11.820312 209.652344 11.820312 208.324219 Z M 18.050781 210.613281 L 18.050781 205.289062 L 17.054688 205.289062 L 17.054688 208.753906 C 17.054688 209.714844 16.300781 210.039062 15.582031 210.039062 C 14.71875 210.039062 14.632812 209.761719 14.632812 209.246094 L 14.632812 205.289062 L 13.640625 205.289062 L 13.640625 209.304688 C 13.640625 210.324219 14.046875 210.746094 14.898438 210.746094 C 15.59375 210.746094 16.527344 210.542969 17.09375 210.039062 L 17.09375 210.613281 Z M 24.027344 210.613281 L 24.027344 202.292969 L 23.070312 202.292969 L 23.070312 205.816406 C 22.398438 205.253906 21.628906 205.15625 21.246094 205.15625 C 20.035156 205.15625 19.101562 206.417969 19.101562 207.953125 C 19.101562 209.5 20.023438 210.746094 21.199219 210.746094 C 21.558594 210.746094 22.3125 210.660156 23.03125 209.988281 L 23.03125 210.613281 Z M 23.03125 208.921875 C 23.03125 209.152344 23.03125 209.175781 22.902344 209.34375 C 22.578125 209.773438 22.167969 209.964844 21.738281 209.964844 C 20.957031 209.964844 20.09375 209.402344 20.09375 207.964844 C 20.09375 206.429688 21.113281 205.9375 21.871094 205.9375 C 22.421875 205.9375 22.804688 206.25 23.03125 206.5625 Z M 29.726562 210.613281 L 29.726562 207.148438 C 29.726562 205.960938 28.851562 205.085938 27.664062 205.085938 C 26.886719 205.085938 26.3125 205.289062 25.734375 205.613281 L 25.808594 206.464844 C 26.40625 206.019531 26.992188 205.839844 27.652344 205.839844 C 28.324219 205.839844 28.734375 206.378906 28.734375 207.160156 L 28.734375 207.664062 C 26.671875 207.710938 25.316406 208.25 25.316406 209.222656 C 25.316406 209.785156 25.6875 210.746094 26.730469 210.746094 C 26.96875 210.746094 28.011719 210.722656 28.769531 210.15625 L 28.769531 210.613281 Z M 28.734375 208.933594 C 28.734375 209.1875 28.734375 209.484375 28.300781 209.738281 C 28 209.917969 27.605469 209.964844 27.367188 209.964844 C 26.753906 209.964844 26.238281 209.679688 26.238281 209.199219 C 26.238281 208.347656 28.578125 208.3125 28.734375 208.3125 Z M 35.789062 210.167969 L 35.714844 209.316406 C 35.140625 209.714844 34.503906 209.929688 33.785156 209.929688 C 32.765625 209.929688 32.09375 209.078125 32.09375 207.941406 C 32.09375 207.015625 32.550781 205.902344 33.832031 205.902344 C 34.589844 205.902344 34.972656 206.046875 35.570312 206.453125 L 35.726562 205.613281 C 35.054688 205.230469 34.683594 205.085938 33.820312 205.085938 C 32.117188 205.085938 31.101562 206.523438 31.101562 207.953125 C 31.101562 209.4375 32.214844 210.746094 33.773438 210.746094 C 34.613281 210.746094 35.25 210.492188 35.789062 210.167969 Z M 41.050781 210.613281 L 41.050781 207.050781 C 41.050781 206.164062 40.8125 205.15625 39.359375 205.15625 C 38.675781 205.15625 38.03125 205.480469 37.597656 206.019531 L 37.597656 202.292969 L 36.640625 202.292969 L 36.640625 210.613281 L 37.632812 210.613281 L 37.632812 207.6875 C 37.632812 206.894531 37.945312 205.9375 38.855469 205.9375 C 40.054688 205.9375 40.054688 206.789062 40.054688 207.136719 L 40.054688 210.613281 Z M 43.609375 210.613281 L 43.609375 205.289062 L 42.652344 205.289062 L 42.652344 210.613281 Z M 43.71875 203.863281 L 43.71875 202.710938 L 42.566406 202.710938 L 42.566406 203.863281 Z M 52.769531 213.597656 C 52.121094 212.9375 50.632812 211.40625 50.632812 207.617188 C 50.632812 203.816406 52.121094 202.292969 52.769531 201.621094 L 51.988281 201.621094 C 50.574219 202.808594 49.640625 204.859375 49.640625 207.605469 C 49.640625 210.457031 50.632812 212.460938 51.988281 213.597656 Z M 59.328125 206.714844 C 59.328125 205.578125 59.242188 204.523438 58.679688 203.574219 C 58.285156 202.914062 57.539062 202.507812 56.75 202.507812 C 56.148438 202.507812 55.296875 202.734375 54.757812 203.671875 C 54.195312 204.617188 54.160156 205.890625 54.160156 206.714844 C 54.160156 207.316406 54.160156 208.683594 54.699219 209.652344 C 55.296875 210.710938 56.210938 210.878906 56.738281 210.878906 C 57.433594 210.878906 58.296875 210.589844 58.8125 209.59375 C 59.265625 208.71875 59.328125 207.675781 59.328125 206.714844 Z M 58.332031 206.574219 C 58.332031 207.386719 58.332031 208.214844 58.054688 208.996094 C 57.78125 209.820312 57.21875 210.097656 56.75 210.097656 C 55.15625 210.097656 55.15625 207.699219 55.15625 206.574219 C 55.15625 205.816406 55.15625 205.207031 55.335938 204.582031 C 55.5625 203.753906 56.054688 203.289062 56.738281 203.289062 C 58.332031 203.289062 58.332031 205.457031 58.332031 206.574219 Z M 61.886719 210.613281 L 61.886719 209.582031 L 60.855469 209.582031 L 60.855469 210.613281 Z M 68.207031 208.179688 C 68.207031 206.691406 67.140625 205.503906 65.871094 205.503906 C 65.714844 205.503906 65.125 205.503906 64.5625 205.984375 L 64.5625 203.585938 L 67.789062 203.585938 L 67.789062 202.773438 L 63.640625 202.773438 L 63.640625 207.230469 L 64.503906 207.230469 C 64.886719 206.34375 65.617188 206.285156 65.84375 206.285156 C 66.335938 206.285156 67.054688 206.65625 67.054688 208.167969 C 67.054688 209.628906 66.191406 210.097656 65.472656 210.097656 C 64.851562 210.097656 64.035156 209.796875 63.566406 208.996094 L 63.136719 209.703125 C 63.328125 209.964844 64.128906 210.878906 65.484375 210.878906 C 66.984375 210.878906 68.207031 209.679688 68.207031 208.179688 Z M 70.886719 210.613281 L 70.886719 209.582031 L 69.855469 209.582031 L 69.855469 210.613281 Z M 77.304688 208.441406 C 77.304688 207.496094 76.609375 206.703125 75.625 206.378906 C 76.464844 205.949219 76.96875 205.167969 76.96875 204.34375 C 76.96875 203.3125 75.949219 202.507812 74.714844 202.507812 C 73.707031 202.507812 72.855469 203.023438 72.351562 203.777344 L 72.820312 204.460938 C 73.347656 203.417969 74.257812 203.25 74.703125 203.25 C 75.3125 203.25 75.914062 203.585938 75.914062 204.34375 C 75.914062 204.785156 75.660156 205.816406 74.367188 205.9375 C 74.136719 205.960938 73.945312 205.972656 73.71875 205.984375 L 73.71875 206.765625 L 74.617188 206.765625 C 75.78125 206.765625 76.152344 207.710938 76.152344 208.429688 C 76.152344 209.378906 75.589844 210.097656 74.675781 210.097656 C 73.695312 210.097656 72.78125 209.558594 72.328125 208.910156 C 72.230469 209.390625 72.230469 209.414062 72.183594 209.679688 C 72.769531 210.386719 73.683594 210.878906 74.703125 210.878906 C 76.210938 210.878906 77.304688 209.703125 77.304688 208.441406 Z M 81.480469 207.617188 C 81.480469 204.761719 80.484375 202.757812 79.128906 201.621094 L 78.351562 201.621094 C 79 202.28125 80.484375 203.816406 80.484375 207.605469 C 80.484375 211.40625 79 212.925781 78.351562 213.597656 L 79.128906 213.597656 C 80.546875 212.410156 81.480469 210.363281 81.480469 207.617188 Z M 5.667969 196.765625 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 10.105469 237.410156 C 10.105469 236.992188 9.9375 236.570312 9.515625 236.210938 C 9.109375 235.863281 8.75 235.792969 8.007812 235.648438 C 7.574219 235.5625 6.976562 235.457031 6.976562 234.890625 C 6.976562 234.183594 7.828125 234.183594 8.007812 234.183594 C 8.882812 234.183594 9.398438 234.460938 9.746094 234.652344 L 9.902344 233.8125 C 9.15625 233.476562 8.605469 233.429688 8.113281 233.429688 C 7.839844 233.429688 6.074219 233.429688 6.074219 235.011719 C 6.074219 235.585938 6.421875 235.949219 6.554688 236.078125 C 7.011719 236.476562 7.324219 236.535156 8.136719 236.691406 C 8.523438 236.761719 9.207031 236.894531 9.207031 237.507812 C 9.207031 238.285156 8.234375 238.285156 8.078125 238.285156 C 7.371094 238.285156 6.6875 238.046875 6.171875 237.6875 L 6.015625 238.5625 C 6.734375 238.945312 7.464844 239.089844 8.078125 239.089844 C 8.855469 239.089844 10.105469 238.835938 10.105469 237.410156 Z M 16.050781 238.957031 L 16.050781 233.632812 L 15.054688 233.632812 L 15.054688 237.097656 C 15.054688 238.058594 14.300781 238.382812 13.582031 238.382812 C 12.71875 238.382812 12.632812 238.105469 12.632812 237.589844 L 12.632812 233.632812 L 11.640625 233.632812 L 11.640625 237.648438 C 11.640625 238.667969 12.046875 239.089844 12.898438 239.089844 C 13.59375 239.089844 14.527344 238.886719 15.09375 238.382812 L 15.09375 238.957031 Z M 22.027344 238.957031 L 22.027344 230.636719 L 21.070312 230.636719 L 21.070312 234.160156 C 20.398438 233.597656 19.628906 233.5 19.246094 233.5 C 18.035156 233.5 17.101562 234.761719 17.101562 236.296875 C 17.101562 237.84375 18.023438 239.089844 19.199219 239.089844 C 19.558594 239.089844 20.3125 239.003906 21.03125 238.332031 L 21.03125 238.957031 Z M 21.03125 237.265625 C 21.03125 237.496094 21.03125 237.519531 20.902344 237.6875 C 20.578125 238.117188 20.167969 238.308594 19.738281 238.308594 C 18.957031 238.308594 18.09375 237.746094 18.09375 236.308594 C 18.09375 234.773438 19.113281 234.28125 19.871094 234.28125 C 20.421875 234.28125 20.804688 234.59375 21.03125 234.90625 Z M 27.726562 238.957031 L 27.726562 235.492188 C 27.726562 234.304688 26.851562 233.429688 25.664062 233.429688 C 24.886719 233.429688 24.3125 233.632812 23.734375 233.957031 L 23.808594 234.808594 C 24.40625 234.363281 24.992188 234.183594 25.652344 234.183594 C 26.324219 234.183594 26.734375 234.722656 26.734375 235.503906 L 26.734375 236.007812 C 24.671875 236.054688 23.316406 236.59375 23.316406 237.566406 C 23.316406 238.128906 23.6875 239.089844 24.730469 239.089844 C 24.96875 239.089844 26.011719 239.066406 26.769531 238.5 L 26.769531 238.957031 Z M 26.734375 237.277344 C 26.734375 237.53125 26.734375 237.828125 26.300781 238.082031 C 26 238.261719 25.605469 238.308594 25.367188 238.308594 C 24.753906 238.308594 24.238281 238.023438 24.238281 237.542969 C 24.238281 236.691406 26.578125 236.65625 26.734375 236.65625 Z M 33.789062 238.511719 L 33.714844 237.660156 C 33.140625 238.058594 32.503906 238.273438 31.785156 238.273438 C 30.765625 238.273438 30.09375 237.421875 30.09375 236.285156 C 30.09375 235.359375 30.550781 234.246094 31.832031 234.246094 C 32.589844 234.246094 32.972656 234.390625 33.570312 234.796875 L 33.726562 233.957031 C 33.054688 233.574219 32.683594 233.429688 31.820312 233.429688 C 30.117188 233.429688 29.101562 234.867188 29.101562 236.296875 C 29.101562 237.78125 30.214844 239.089844 31.773438 239.089844 C 32.613281 239.089844 33.25 238.835938 33.789062 238.511719 Z M 39.050781 238.957031 L 39.050781 235.394531 C 39.050781 234.507812 38.8125 233.5 37.359375 233.5 C 36.675781 233.5 36.03125 233.824219 35.597656 234.363281 L 35.597656 230.636719 L 34.640625 230.636719 L 34.640625 238.957031 L 35.632812 238.957031 L 35.632812 236.03125 C 35.632812 235.238281 35.945312 234.28125 36.855469 234.28125 C 38.054688 234.28125 38.054688 235.132812 38.054688 235.480469 L 38.054688 238.957031 Z M 41.609375 238.957031 L 41.609375 233.632812 L 40.652344 233.632812 L 40.652344 238.957031 Z M 41.71875 232.207031 L 41.71875 231.054688 L 40.566406 231.054688 L 40.566406 232.207031 Z M 44.886719 238.957031 L 44.886719 237.925781 L 43.855469 237.925781 L 43.855469 238.957031 Z M 49.695312 234.363281 L 49.695312 233.5 C 48.832031 233.511719 48.042969 233.945312 47.550781 234.652344 L 47.550781 233.5625 L 46.664062 233.5625 L 46.664062 238.957031 L 47.621094 238.957031 L 47.621094 236.402344 C 47.621094 235.144531 48.605469 234.375 49.695312 234.363281 Z M 54.105469 237.410156 C 54.105469 236.992188 53.9375 236.570312 53.515625 236.210938 C 53.109375 235.863281 52.75 235.792969 52.007812 235.648438 C 51.574219 235.5625 50.976562 235.457031 50.976562 234.890625 C 50.976562 234.183594 51.828125 234.183594 52.007812 234.183594 C 52.882812 234.183594 53.398438 234.460938 53.746094 234.652344 L 53.902344 233.8125 C 53.15625 233.476562 52.605469 233.429688 52.113281 233.429688 C 51.839844 233.429688 50.074219 233.429688 50.074219 235.011719 C 50.074219 235.585938 50.421875 235.949219 50.554688 236.078125 C 51.011719 236.476562 51.324219 236.535156 52.136719 236.691406 C 52.523438 236.761719 53.207031 236.894531 53.207031 237.507812 C 53.207031 238.285156 52.234375 238.285156 52.078125 238.285156 C 51.371094 238.285156 50.6875 238.046875 50.171875 237.6875 L 50.015625 238.5625 C 50.734375 238.945312 51.464844 239.089844 52.078125 239.089844 C 52.855469 239.089844 54.105469 238.835938 54.105469 237.410156 Z M 62.769531 241.941406 C 62.121094 241.28125 60.632812 239.75 60.632812 235.960938 C 60.632812 232.160156 62.121094 230.636719 62.769531 229.964844 L 61.988281 229.964844 C 60.574219 231.152344 59.640625 233.203125 59.640625 235.949219 C 59.640625 238.800781 60.632812 240.804688 61.988281 241.941406 Z M 69.328125 235.058594 C 69.328125 233.921875 69.242188 232.867188 68.679688 231.917969 C 68.285156 231.257812 67.539062 230.851562 66.75 230.851562 C 66.148438 230.851562 65.296875 231.078125 64.757812 232.015625 C 64.195312 232.960938 64.160156 234.234375 64.160156 235.058594 C 64.160156 235.660156 64.160156 237.027344 64.699219 237.996094 C 65.296875 239.054688 66.210938 239.222656 66.738281 239.222656 C 67.433594 239.222656 68.296875 238.933594 68.8125 237.9375 C 69.265625 237.0625 69.328125 236.019531 69.328125 235.058594 Z M 68.332031 234.917969 C 68.332031 235.730469 68.332031 236.558594 68.054688 237.339844 C 67.78125 238.164062 67.21875 238.441406 66.75 238.441406 C 65.15625 238.441406 65.15625 236.042969 65.15625 234.917969 C 65.15625 234.160156 65.15625 233.550781 65.335938 232.925781 C 65.5625 232.097656 66.054688 231.632812 66.738281 231.632812 C 68.332031 231.632812 68.332031 233.800781 68.332031 234.917969 Z M 71.886719 238.957031 L 71.886719 237.925781 L 70.855469 237.925781 L 70.855469 238.957031 Z M 78.304688 236.285156 C 78.304688 235.613281 78.207031 235.011719 77.800781 234.363281 C 77.320312 233.609375 76.902344 233.40625 76.335938 233.40625 C 75.882812 233.40625 74.992188 233.539062 74.25 234.390625 C 74.382812 232.78125 75.199219 231.59375 76.480469 231.59375 C 76.71875 231.59375 77.164062 231.605469 77.777344 231.847656 L 77.777344 231.089844 C 77.199219 230.875 76.734375 230.851562 76.46875 230.851562 C 74.742188 230.851562 73.183594 232.53125 73.183594 235.109375 C 73.183594 238.394531 74.632812 239.222656 75.761719 239.222656 C 76.492188 239.222656 77.019531 238.957031 77.5 238.417969 C 78.074219 237.78125 78.304688 237.230469 78.304688 236.285156 Z M 77.261719 236.285156 C 77.261719 236.75 77.261719 237.242188 76.949219 237.757812 C 76.71875 238.117188 76.386719 238.441406 75.761719 238.441406 C 74.476562 238.441406 74.296875 236.726562 74.261719 236.332031 C 74.261719 236.164062 74.261719 236.140625 74.273438 236.007812 C 74.273438 235.097656 74.851562 234.183594 75.820312 234.183594 C 76.277344 234.183594 76.683594 234.351562 76.996094 234.917969 C 77.234375 235.347656 77.261719 235.730469 77.261719 236.285156 Z M 80.886719 238.957031 L 80.886719 237.925781 L 79.855469 237.925781 L 79.855469 238.957031 Z M 87.328125 235.058594 C 87.328125 233.921875 87.242188 232.867188 86.679688 231.917969 C 86.285156 231.257812 85.539062 230.851562 84.75 230.851562 C 84.148438 230.851562 83.296875 231.078125 82.757812 232.015625 C 82.195312 232.960938 82.160156 234.234375 82.160156 235.058594 C 82.160156 235.660156 82.160156 237.027344 82.699219 237.996094 C 83.296875 239.054688 84.210938 239.222656 84.738281 239.222656 C 85.433594 239.222656 86.296875 238.933594 86.8125 237.9375 C 87.265625 237.0625 87.328125 236.019531 87.328125 235.058594 Z M 86.332031 234.917969 C 86.332031 235.730469 86.332031 236.558594 86.054688 237.339844 C 85.78125 238.164062 85.21875 238.441406 84.75 238.441406 C 83.15625 238.441406 83.15625 236.042969 83.15625 234.917969 C 83.15625 234.160156 83.15625 233.550781 83.335938 232.925781 C 83.5625 232.097656 84.054688 231.632812 84.738281 231.632812 C 86.332031 231.632812 86.332031 233.800781 86.332031 234.917969 Z M 91.480469 235.960938 C 91.480469 233.105469 90.484375 231.101562 89.128906 229.964844 L 88.351562 229.964844 C 89 230.625 90.484375 232.160156 90.484375 235.949219 C 90.484375 239.75 89 241.269531 88.351562 241.941406 L 89.128906 241.941406 C 90.546875 240.753906 91.480469 238.707031 91.480469 235.960938 Z M 5.667969 225.109375 "/>
-<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.726562 71.898438 L 12.730469 71.898438 L 11.230469 75.636719 C 11.097656 75.960938 10.042969 78.601562 9.886719 79.308594 L 9.875 79.308594 C 9.757812 78.671875 8.808594 76.320312 8.507812 75.566406 L 7.046875 71.898438 L 5.835938 71.898438 L 9.207031 80.21875 L 10.355469 80.21875 Z M 18.726562 80.21875 L 18.726562 76.753906 C 18.726562 75.566406 17.851562 74.691406 16.664062 74.691406 C 15.886719 74.691406 15.3125 74.894531 14.734375 75.21875 L 14.808594 76.070312 C 15.40625 75.625 15.992188 75.445312 16.652344 75.445312 C 17.324219 75.445312 17.734375 75.984375 17.734375 76.765625 L 17.734375 77.269531 C 15.671875 77.316406 14.316406 77.855469 14.316406 78.828125 C 14.316406 79.390625 14.6875 80.351562 15.730469 80.351562 C 15.96875 80.351562 17.011719 80.328125 17.769531 79.761719 L 17.769531 80.21875 Z M 17.734375 78.539062 C 17.734375 78.792969 17.734375 79.089844 17.300781 79.34375 C 17 79.523438 16.605469 79.570312 16.367188 79.570312 C 15.753906 79.570312 15.238281 79.285156 15.238281 78.804688 C 15.238281 77.953125 17.578125 77.917969 17.734375 77.917969 Z M 25.589844 77.546875 C 25.589844 76.105469 24.835938 74.761719 23.671875 74.761719 C 23.335938 74.761719 22.449219 74.824219 21.621094 75.46875 L 21.621094 74.894531 L 20.664062 74.894531 L 20.664062 82.542969 L 21.660156 82.542969 L 21.660156 79.667969 C 22.101562 80.085938 22.703125 80.351562 23.324219 80.351562 C 24.535156 80.351562 25.589844 79.164062 25.589844 77.546875 Z M 24.597656 77.558594 C 24.597656 78.707031 23.757812 79.570312 22.820312 79.570312 C 22.4375 79.570312 22.007812 79.414062 21.660156 78.828125 L 21.660156 76.238281 C 21.8125 76.023438 22.257812 75.578125 22.941406 75.578125 C 23.839844 75.578125 24.597656 76.429688 24.597656 77.558594 Z M 32.460938 77.582031 C 32.460938 76.011719 31.261719 74.691406 29.738281 74.691406 C 28.25 74.691406 27.027344 75.984375 27.027344 77.582031 C 27.027344 79.164062 28.285156 80.351562 29.738281 80.351562 C 31.222656 80.351562 32.460938 79.140625 32.460938 77.582031 Z M 31.464844 77.460938 C 31.464844 78.851562 30.625 79.535156 29.75 79.535156 C 28.828125 79.535156 28.023438 78.816406 28.023438 77.460938 C 28.023438 76.058594 28.921875 75.46875 29.738281 75.46875 C 30.601562 75.46875 31.464844 76.082031 31.464844 77.460938 Z M 36.695312 75.625 L 36.695312 74.761719 C 35.832031 74.773438 35.042969 75.207031 34.550781 75.914062 L 34.550781 74.824219 L 33.664062 74.824219 L 33.664062 80.21875 L 34.621094 80.21875 L 34.621094 77.664062 C 34.621094 76.40625 35.605469 75.636719 36.695312 75.625 Z M 41.789062 77.59375 C 41.789062 77.222656 41.777344 76.308594 41.308594 75.589844 C 40.804688 74.835938 40.039062 74.691406 39.582031 74.691406 C 38.191406 74.691406 37.085938 75.972656 37.085938 77.507812 C 37.085938 79.089844 38.261719 80.351562 39.761719 80.351562 C 40.335938 80.351562 41.03125 80.207031 41.726562 79.761719 L 41.65625 78.9375 C 40.902344 79.476562 40.191406 79.570312 39.773438 79.570312 C 38.777344 79.570312 38.011719 78.695312 37.976562 77.59375 Z M 41.070312 76.886719 L 38.046875 76.886719 C 38.238281 76.117188 38.828125 75.46875 39.582031 75.46875 C 40.023438 75.46875 40.839844 75.675781 41.070312 76.886719 Z M 45.769531 79.90625 L 45.566406 79.152344 C 45.253906 79.390625 44.832031 79.535156 44.460938 79.535156 C 43.992188 79.535156 43.839844 79.117188 43.839844 78.492188 L 43.839844 75.636719 L 45.566406 75.636719 L 45.566406 74.894531 L 43.839844 74.894531 L 43.839844 73.371094 L 42.949219 73.371094 L 42.949219 74.894531 L 41.894531 74.894531 L 41.894531 75.636719 L 42.914062 75.636719 L 42.914062 78.707031 C 42.914062 79.414062 43.070312 80.351562 43.945312 80.351562 C 44.472656 80.351562 45.097656 80.242188 45.769531 79.90625 Z M 49.769531 79.90625 L 49.566406 79.152344 C 49.253906 79.390625 48.832031 79.535156 48.460938 79.535156 C 47.992188 79.535156 47.839844 79.117188 47.839844 78.492188 L 47.839844 75.636719 L 49.566406 75.636719 L 49.566406 74.894531 L 47.839844 74.894531 L 47.839844 73.371094 L 46.949219 73.371094 L 46.949219 74.894531 L 45.894531 74.894531 L 45.894531 75.636719 L 46.914062 75.636719 L 46.914062 78.707031 C 46.914062 79.414062 47.070312 80.351562 47.945312 80.351562 C 48.472656 80.351562 49.097656 80.242188 49.769531 79.90625 Z M 55.460938 77.582031 C 55.460938 76.011719 54.261719 74.691406 52.738281 74.691406 C 51.25 74.691406 50.027344 75.984375 50.027344 77.582031 C 50.027344 79.164062 51.285156 80.351562 52.738281 80.351562 C 54.222656 80.351562 55.460938 79.140625 55.460938 77.582031 Z M 54.464844 77.460938 C 54.464844 78.851562 53.625 79.535156 52.75 79.535156 C 51.828125 79.535156 51.023438 78.816406 51.023438 77.460938 C 51.023438 76.058594 51.921875 75.46875 52.738281 75.46875 C 53.601562 75.46875 54.464844 76.082031 54.464844 77.460938 Z M 63.769531 83.203125 C 63.121094 82.542969 61.632812 81.011719 61.632812 77.222656 C 61.632812 73.421875 63.121094 71.898438 63.769531 71.226562 L 62.988281 71.226562 C 61.574219 72.414062 60.640625 74.464844 60.640625 77.210938 C 60.640625 80.0625 61.632812 82.066406 62.988281 83.203125 Z M 70.328125 76.320312 C 70.328125 75.183594 70.242188 74.128906 69.679688 73.179688 C 69.285156 72.519531 68.539062 72.113281 67.75 72.113281 C 67.148438 72.113281 66.296875 72.339844 65.757812 73.277344 C 65.195312 74.222656 65.160156 75.496094 65.160156 76.320312 C 65.160156 76.921875 65.160156 78.289062 65.699219 79.257812 C 66.296875 80.316406 67.210938 80.484375 67.738281 80.484375 C 68.433594 80.484375 69.296875 80.195312 69.8125 79.199219 C 70.265625 78.324219 70.328125 77.28125 70.328125 76.320312 Z M 69.332031 76.179688 C 69.332031 76.992188 69.332031 77.820312 69.054688 78.601562 C 68.78125 79.425781 68.21875 79.703125 67.75 79.703125 C 66.15625 79.703125 66.15625 77.304688 66.15625 76.179688 C 66.15625 75.421875 66.15625 74.8125 66.335938 74.1875 C 66.5625 73.359375 67.054688 72.894531 67.738281 72.894531 C 69.332031 72.894531 69.332031 75.0625 69.332031 76.179688 Z M 72.886719 80.21875 L 72.886719 79.1875 L 71.855469 79.1875 L 71.855469 80.21875 Z M 79.304688 78.046875 C 79.304688 77.101562 78.609375 76.308594 77.625 75.984375 C 78.464844 75.554688 78.96875 74.773438 78.96875 73.949219 C 78.96875 72.917969 77.949219 72.113281 76.714844 72.113281 C 75.707031 72.113281 74.855469 72.628906 74.351562 73.382812 L 74.820312 74.066406 C 75.347656 73.023438 76.257812 72.855469 76.703125 72.855469 C 77.3125 72.855469 77.914062 73.191406 77.914062 73.949219 C 77.914062 74.390625 77.660156 75.421875 76.367188 75.542969 C 76.136719 75.566406 75.945312 75.578125 75.71875 75.589844 L 75.71875 76.371094 L 76.617188 76.371094 C 77.78125 76.371094 78.152344 77.316406 78.152344 78.035156 C 78.152344 78.984375 77.589844 79.703125 76.675781 79.703125 C 75.695312 79.703125 74.78125 79.164062 74.328125 78.515625 C 74.230469 78.996094 74.230469 79.019531 74.183594 79.285156 C 74.769531 79.992188 75.683594 80.484375 76.703125 80.484375 C 78.210938 80.484375 79.304688 79.308594 79.304688 78.046875 Z M 81.886719 80.21875 L 81.886719 79.1875 L 80.855469 79.1875 L 80.855469 80.21875 Z M 88.328125 76.320312 C 88.328125 75.183594 88.242188 74.128906 87.679688 73.179688 C 87.285156 72.519531 86.539062 72.113281 85.75 72.113281 C 85.148438 72.113281 84.296875 72.339844 83.757812 73.277344 C 83.195312 74.222656 83.160156 75.496094 83.160156 76.320312 C 83.160156 76.921875 83.160156 78.289062 83.699219 79.257812 C 84.296875 80.316406 85.210938 80.484375 85.738281 80.484375 C 86.433594 80.484375 87.296875 80.195312 87.8125 79.199219 C 88.265625 78.324219 88.328125 77.28125 88.328125 76.320312 Z M 87.332031 76.179688 C 87.332031 76.992188 87.332031 77.820312 87.054688 78.601562 C 86.78125 79.425781 86.21875 79.703125 85.75 79.703125 C 84.15625 79.703125 84.15625 77.304688 84.15625 76.179688 C 84.15625 75.421875 84.15625 74.8125 84.335938 74.1875 C 84.5625 73.359375 85.054688 72.894531 85.738281 72.894531 C 87.332031 72.894531 87.332031 75.0625 87.332031 76.179688 Z M 90.886719 80.207031 L 90.886719 79.1875 L 89.855469 79.1875 L 89.855469 80.21875 L 90.167969 80.21875 L 89.84375 81.742188 L 90.359375 81.742188 Z M 99.9375 72.75 L 99.9375 71.898438 C 99.792969 71.863281 99.410156 71.765625 98.953125 71.765625 C 97.789062 71.765625 96.902344 72.640625 96.902344 73.839844 L 96.902344 74.894531 L 96.003906 74.894531 L 96.003906 75.636719 L 96.902344 75.636719 L 96.902344 80.21875 L 97.863281 80.21875 L 97.863281 75.636719 L 99.191406 75.636719 L 99.191406 74.894531 L 97.828125 74.894531 L 97.828125 73.503906 C 97.828125 72.640625 98.605469 72.546875 98.941406 72.546875 C 99.050781 72.546875 99.445312 72.546875 99.9375 72.75 Z M 104.789062 77.59375 C 104.789062 77.222656 104.777344 76.308594 104.308594 75.589844 C 103.804688 74.835938 103.039062 74.691406 102.582031 74.691406 C 101.191406 74.691406 100.085938 75.972656 100.085938 77.507812 C 100.085938 79.089844 101.261719 80.351562 102.761719 80.351562 C 103.335938 80.351562 104.03125 80.207031 104.726562 79.761719 L 104.65625 78.9375 C 103.902344 79.476562 103.191406 79.570312 102.773438 79.570312 C 101.777344 79.570312 101.011719 78.695312 100.976562 77.59375 Z M 104.070312 76.886719 L 101.046875 76.886719 C 101.238281 76.117188 101.828125 75.46875 102.582031 75.46875 C 103.023438 75.46875 103.839844 75.675781 104.070312 76.886719 Z M 109.726562 80.21875 L 109.726562 76.753906 C 109.726562 75.566406 108.851562 74.691406 107.664062 74.691406 C 106.886719 74.691406 106.3125 74.894531 105.734375 75.21875 L 105.808594 76.070312 C 106.40625 75.625 106.992188 75.445312 107.652344 75.445312 C 108.324219 75.445312 108.734375 75.984375 108.734375 76.765625 L 108.734375 77.269531 C 106.671875 77.316406 105.316406 77.855469 105.316406 78.828125 C 105.316406 79.390625 105.6875 80.351562 106.730469 80.351562 C 106.96875 80.351562 108.011719 80.328125 108.769531 79.761719 L 108.769531 80.21875 Z M 108.734375 78.539062 C 108.734375 78.792969 108.734375 79.089844 108.300781 79.34375 C 108 79.523438 107.605469 79.570312 107.367188 79.570312 C 106.753906 79.570312 106.238281 79.285156 106.238281 78.804688 C 106.238281 77.953125 108.578125 77.917969 108.734375 77.917969 Z M 114.769531 79.90625 L 114.566406 79.152344 C 114.253906 79.390625 113.832031 79.535156 113.460938 79.535156 C 112.992188 79.535156 112.839844 79.117188 112.839844 78.492188 L 112.839844 75.636719 L 114.566406 75.636719 L 114.566406 74.894531 L 112.839844 74.894531 L 112.839844 73.371094 L 111.949219 73.371094 L 111.949219 74.894531 L 110.894531 74.894531 L 110.894531 75.636719 L 111.914062 75.636719 L 111.914062 78.707031 C 111.914062 79.414062 112.070312 80.351562 112.945312 80.351562 C 113.472656 80.351562 114.097656 80.242188 114.769531 79.90625 Z M 120.050781 80.21875 L 120.050781 74.894531 L 119.054688 74.894531 L 119.054688 78.359375 C 119.054688 79.320312 118.300781 79.644531 117.582031 79.644531 C 116.71875 79.644531 116.632812 79.367188 116.632812 78.851562 L 116.632812 74.894531 L 115.640625 74.894531 L 115.640625 78.910156 C 115.640625 79.929688 116.046875 80.351562 116.898438 80.351562 C 117.59375 80.351562 118.527344 80.148438 119.09375 79.644531 L 119.09375 80.21875 Z M 124.695312 75.625 L 124.695312 74.761719 C 123.832031 74.773438 123.042969 75.207031 122.550781 75.914062 L 122.550781 74.824219 L 121.664062 74.824219 L 121.664062 80.21875 L 122.621094 80.21875 L 122.621094 77.664062 C 122.621094 76.40625 123.605469 75.636719 124.695312 75.625 Z M 129.789062 77.59375 C 129.789062 77.222656 129.777344 76.308594 129.308594 75.589844 C 128.804688 74.835938 128.039062 74.691406 127.582031 74.691406 C 126.191406 74.691406 125.085938 75.972656 125.085938 77.507812 C 125.085938 79.089844 126.261719 80.351562 127.761719 80.351562 C 128.335938 80.351562 129.03125 80.207031 129.726562 79.761719 L 129.65625 78.9375 C 128.902344 79.476562 128.191406 79.570312 127.773438 79.570312 C 126.777344 79.570312 126.011719 78.695312 125.976562 77.59375 Z M 129.070312 76.886719 L 126.046875 76.886719 C 126.238281 76.117188 126.828125 75.46875 127.582031 75.46875 C 128.023438 75.46875 128.839844 75.675781 129.070312 76.886719 Z M 138.550781 75.960938 C 138.550781 75.699219 138.289062 75.699219 138.109375 75.699219 L 130.796875 75.699219 C 130.613281 75.699219 130.351562 75.699219 130.351562 75.960938 C 130.351562 76.214844 130.589844 76.214844 130.746094 76.214844 L 138.15625 76.214844 C 138.324219 76.214844 138.550781 76.214844 138.550781 75.960938 Z M 138.550781 78.46875 C 138.550781 78.214844 138.324219 78.214844 138.15625 78.214844 L 130.746094 78.214844 C 130.589844 78.214844 130.351562 78.214844 130.351562 78.46875 C 130.351562 78.730469 130.613281 78.730469 130.796875 78.730469 L 138.109375 78.730469 C 138.289062 78.730469 138.550781 78.730469 138.550781 78.46875 Z M 144.105469 78.671875 C 144.105469 78.253906 143.9375 77.832031 143.515625 77.472656 C 143.109375 77.125 142.75 77.054688 142.007812 76.910156 C 141.574219 76.824219 140.976562 76.71875 140.976562 76.152344 C 140.976562 75.445312 141.828125 75.445312 142.007812 75.445312 C 142.882812 75.445312 143.398438 75.722656 143.746094 75.914062 L 143.902344 75.074219 C 143.15625 74.738281 142.605469 74.691406 142.113281 74.691406 C 141.839844 74.691406 140.074219 74.691406 140.074219 76.273438 C 140.074219 76.847656 140.421875 77.210938 140.554688 77.339844 C 141.011719 77.738281 141.324219 77.796875 142.136719 77.953125 C 142.523438 78.023438 143.207031 78.15625 143.207031 78.769531 C 143.207031 79.546875 142.234375 79.546875 142.078125 79.546875 C 141.371094 79.546875 140.6875 79.308594 140.171875 78.949219 L 140.015625 79.824219 C 140.734375 80.207031 141.464844 80.351562 142.078125 80.351562 C 142.855469 80.351562 144.105469 80.097656 144.105469 78.671875 Z M 146.609375 80.21875 L 146.609375 74.894531 L 145.652344 74.894531 L 145.652344 80.21875 Z M 146.71875 73.46875 L 146.71875 72.316406 L 145.566406 72.316406 L 145.566406 73.46875 Z M 156.46875 80.21875 L 156.46875 76.65625 C 156.46875 75.769531 156.230469 74.761719 154.777344 74.761719 C 153.769531 74.761719 153.207031 75.375 152.917969 75.746094 C 152.835938 75.507812 152.582031 74.761719 151.359375 74.761719 C 150.414062 74.761719 149.839844 75.316406 149.5625 75.675781 L 149.5625 74.824219 L 148.640625 74.824219 L 148.640625 80.21875 L 149.632812 80.21875 L 149.632812 77.292969 C 149.632812 76.5 149.945312 75.542969 150.855469 75.542969 C 152.054688 75.542969 152.054688 76.394531 152.054688 76.742188 L 152.054688 80.21875 L 153.050781 80.21875 L 153.050781 77.292969 C 153.050781 76.5 153.363281 75.542969 154.273438 75.542969 C 155.472656 75.542969 155.472656 76.394531 155.472656 76.742188 L 155.472656 80.21875 Z M 163.027344 80.21875 L 163.027344 71.898438 L 162.070312 71.898438 L 162.070312 75.421875 C 161.398438 74.859375 160.628906 74.761719 160.246094 74.761719 C 159.035156 74.761719 158.101562 76.023438 158.101562 77.558594 C 158.101562 79.105469 159.023438 80.351562 160.199219 80.351562 C 160.558594 80.351562 161.3125 80.265625 162.03125 79.59375 L 162.03125 80.21875 Z M 162.03125 78.527344 C 162.03125 78.757812 162.03125 78.78125 161.902344 78.949219 C 161.578125 79.378906 161.167969 79.570312 160.738281 79.570312 C 159.957031 79.570312 159.09375 79.007812 159.09375 77.570312 C 159.09375 76.035156 160.113281 75.542969 160.871094 75.542969 C 161.421875 75.542969 161.804688 75.855469 162.03125 76.167969 Z M 167.480469 77.222656 C 167.480469 74.367188 166.484375 72.363281 165.128906 71.226562 L 164.351562 71.226562 C 165 71.886719 166.484375 73.421875 166.484375 77.210938 C 166.484375 81.011719 165 82.53125 164.351562 83.203125 L 165.128906 83.203125 C 166.546875 82.015625 167.480469 79.96875 167.480469 77.222656 Z M 5.667969 66.371094 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 160.949219 255.964844 L 157.734375 247.644531 L 156.632812 247.644531 L 153.417969 255.964844 L 154.367188 255.964844 L 155.277344 253.578125 L 158.835938 253.578125 L 159.761719 255.964844 Z M 158.550781 252.871094 L 155.5625 252.871094 L 157.050781 248.53125 Z M 166.453125 255.964844 L 166.453125 252.402344 C 166.453125 251.515625 166.214844 250.507812 164.761719 250.507812 C 163.816406 250.507812 163.242188 251.0625 162.964844 251.421875 L 162.964844 250.570312 L 162.042969 250.570312 L 162.042969 255.964844 L 163.035156 255.964844 L 163.035156 253.039062 C 163.035156 252.246094 163.347656 251.289062 164.257812 251.289062 C 165.457031 251.289062 165.457031 252.140625 165.457031 252.488281 L 165.457031 255.964844 Z M 172.128906 255.964844 L 172.128906 252.5 C 172.128906 251.3125 171.253906 250.4375 170.066406 250.4375 C 169.289062 250.4375 168.714844 250.640625 168.136719 250.964844 L 168.210938 251.816406 C 168.808594 251.371094 169.394531 251.191406 170.054688 251.191406 C 170.726562 251.191406 171.136719 251.730469 171.136719 252.511719 L 171.136719 253.015625 C 169.074219 253.0625 167.71875 253.601562 167.71875 254.574219 C 167.71875 255.136719 168.089844 256.097656 169.132812 256.097656 C 169.371094 256.097656 170.414062 256.074219 171.171875 255.507812 L 171.171875 255.964844 Z M 171.136719 254.285156 C 171.136719 254.539062 171.136719 254.835938 170.703125 255.089844 C 170.402344 255.269531 170.007812 255.316406 169.769531 255.316406 C 169.15625 255.316406 168.640625 255.03125 168.640625 254.550781 C 168.640625 253.699219 170.980469 253.664062 171.136719 253.664062 Z M 175.011719 255.964844 L 175.011719 247.644531 L 174.054688 247.644531 L 174.054688 255.964844 Z M 181.574219 250.640625 L 180.625 250.640625 C 179.714844 252.824219 179.054688 254.417969 178.996094 255.257812 C 178.960938 254.707031 178.457031 253.472656 178.3125 253.125 C 177.820312 251.898438 177.617188 251.492188 177.246094 250.640625 L 176.238281 250.640625 L 178.636719 255.964844 L 178.191406 257.03125 C 177.878906 257.703125 177.652344 257.703125 177.484375 257.703125 C 177.292969 257.703125 176.875 257.65625 176.515625 257.5 L 176.585938 258.328125 C 176.621094 258.339844 177.066406 258.421875 177.472656 258.421875 C 177.796875 258.421875 178.3125 258.421875 178.910156 256.996094 Z M 186.507812 254.417969 C 186.507812 254 186.339844 253.578125 185.917969 253.21875 C 185.511719 252.871094 185.152344 252.800781 184.410156 252.65625 C 183.976562 252.570312 183.378906 252.464844 183.378906 251.898438 C 183.378906 251.191406 184.230469 251.191406 184.410156 251.191406 C 185.285156 251.191406 185.800781 251.46875 186.148438 251.660156 L 186.304688 250.820312 C 185.558594 250.484375 185.007812 250.4375 184.515625 250.4375 C 184.242188 250.4375 182.476562 250.4375 182.476562 252.019531 C 182.476562 252.59375 182.824219 252.957031 182.957031 253.085938 C 183.414062 253.484375 183.726562 253.542969 184.539062 253.699219 C 184.925781 253.769531 185.609375 253.902344 185.609375 254.515625 C 185.609375 255.292969 184.636719 255.292969 184.480469 255.292969 C 183.773438 255.292969 183.089844 255.054688 182.574219 254.695312 L 182.417969 255.570312 C 183.136719 255.953125 183.867188 256.097656 184.480469 256.097656 C 185.257812 256.097656 186.507812 255.84375 186.507812 254.417969 Z M 189.011719 255.964844 L 189.011719 250.640625 L 188.054688 250.640625 L 188.054688 255.964844 Z M 189.121094 249.214844 L 189.121094 248.0625 L 187.96875 248.0625 L 187.96875 249.214844 Z M 194.507812 254.417969 C 194.507812 254 194.339844 253.578125 193.917969 253.21875 C 193.511719 252.871094 193.152344 252.800781 192.410156 252.65625 C 191.976562 252.570312 191.378906 252.464844 191.378906 251.898438 C 191.378906 251.191406 192.230469 251.191406 192.410156 251.191406 C 193.285156 251.191406 193.800781 251.46875 194.148438 251.660156 L 194.304688 250.820312 C 193.558594 250.484375 193.007812 250.4375 192.515625 250.4375 C 192.242188 250.4375 190.476562 250.4375 190.476562 252.019531 C 190.476562 252.59375 190.824219 252.957031 190.957031 253.085938 C 191.414062 253.484375 191.726562 253.542969 192.539062 253.699219 C 192.925781 253.769531 193.609375 253.902344 193.609375 254.515625 C 193.609375 255.292969 192.636719 255.292969 192.480469 255.292969 C 191.773438 255.292969 191.089844 255.054688 190.574219 254.695312 L 190.417969 255.570312 C 191.136719 255.953125 191.867188 256.097656 192.480469 256.097656 C 193.257812 256.097656 194.507812 255.84375 194.507812 254.417969 Z M 205.222656 253.675781 C 205.222656 252.320312 204.105469 251.433594 203.207031 251.214844 L 201.996094 250.929688 C 201.382812 250.785156 200.773438 250.304688 200.773438 249.609375 C 200.773438 248.925781 201.433594 248.230469 202.523438 248.230469 C 203.386719 248.230469 203.984375 248.484375 204.609375 249.011719 L 204.800781 247.96875 C 204.203125 247.667969 203.625 247.378906 202.535156 247.378906 C 200.832031 247.378906 199.753906 248.566406 199.753906 249.765625 C 199.753906 250.066406 199.8125 250.546875 200.222656 251.097656 C 200.773438 251.816406 201.550781 251.996094 202.042969 252.117188 L 203.050781 252.367188 C 203.601562 252.535156 204.203125 253.0625 204.203125 253.804688 C 204.203125 254.597656 203.519531 255.339844 202.453125 255.339844 C 201.480469 255.339844 200.570312 254.96875 199.8125 254.359375 L 199.621094 255.402344 C 200.796875 256.132812 201.886719 256.230469 202.453125 256.230469 C 204.09375 256.230469 205.222656 255.003906 205.222656 253.675781 Z M 211.992188 253.292969 C 211.992188 251.851562 211.238281 250.507812 210.074219 250.507812 C 209.738281 250.507812 208.851562 250.570312 208.023438 251.214844 L 208.023438 250.640625 L 207.066406 250.640625 L 207.066406 258.289062 L 208.0625 258.289062 L 208.0625 255.414062 C 208.503906 255.832031 209.105469 256.097656 209.726562 256.097656 C 210.9375 256.097656 211.992188 254.910156 211.992188 253.292969 Z M 211 253.304688 C 211 254.453125 210.160156 255.316406 209.222656 255.316406 C 208.839844 255.316406 208.410156 255.160156 208.0625 254.574219 L 208.0625 251.984375 C 208.214844 251.769531 208.660156 251.324219 209.34375 251.324219 C 210.242188 251.324219 211 252.175781 211 253.304688 Z M 218.191406 253.339844 C 218.191406 252.96875 218.179688 252.054688 217.710938 251.335938 C 217.207031 250.582031 216.441406 250.4375 215.984375 250.4375 C 214.59375 250.4375 213.488281 251.71875 213.488281 253.253906 C 213.488281 254.835938 214.664062 256.097656 216.164062 256.097656 C 216.738281 256.097656 217.433594 255.953125 218.128906 255.507812 L 218.058594 254.683594 C 217.304688 255.222656 216.59375 255.316406 216.175781 255.316406 C 215.179688 255.316406 214.414062 254.441406 214.378906 253.339844 Z M 217.472656 252.632812 L 214.449219 252.632812 C 214.640625 251.863281 215.230469 251.214844 215.984375 251.214844 C 216.425781 251.214844 217.242188 251.421875 217.472656 252.632812 Z M 223.191406 253.339844 C 223.191406 252.96875 223.179688 252.054688 222.710938 251.335938 C 222.207031 250.582031 221.441406 250.4375 220.984375 250.4375 C 219.59375 250.4375 218.488281 251.71875 218.488281 253.253906 C 218.488281 254.835938 219.664062 256.097656 221.164062 256.097656 C 221.738281 256.097656 222.433594 255.953125 223.128906 255.507812 L 223.058594 254.683594 C 222.304688 255.222656 221.59375 255.316406 221.175781 255.316406 C 220.179688 255.316406 219.414062 254.441406 219.378906 253.339844 Z M 222.472656 252.632812 L 219.449219 252.632812 C 219.640625 251.863281 220.230469 251.214844 220.984375 251.214844 C 221.425781 251.214844 222.242188 251.421875 222.472656 252.632812 Z M 228.429688 255.964844 L 228.429688 247.644531 L 227.472656 247.644531 L 227.472656 251.167969 C 226.800781 250.605469 226.03125 250.507812 225.648438 250.507812 C 224.4375 250.507812 223.503906 251.769531 223.503906 253.304688 C 223.503906 254.851562 224.425781 256.097656 225.601562 256.097656 C 225.960938 256.097656 226.714844 256.011719 227.433594 255.339844 L 227.433594 255.964844 Z M 227.433594 254.273438 C 227.433594 254.503906 227.433594 254.527344 227.304688 254.695312 C 226.980469 255.125 226.570312 255.316406 226.140625 255.316406 C 225.359375 255.316406 224.496094 254.753906 224.496094 253.316406 C 224.496094 251.78125 225.515625 251.289062 226.273438 251.289062 C 226.824219 251.289062 227.207031 251.601562 227.433594 251.914062 Z M 236.378906 258.960938 L 236.378906 258.183594 L 235.230469 258.183594 L 235.230469 247.75 L 236.378906 247.75 L 236.378906 246.972656 L 234.304688 246.972656 L 234.304688 258.960938 Z M 244.769531 255.855469 C 244.949219 255.675781 244.769531 255.496094 244.648438 255.378906 L 242.226562 252.957031 L 244.636719 250.546875 C 244.757812 250.425781 244.9375 250.246094 244.757812 250.066406 C 244.578125 249.875 244.410156 250.042969 244.277344 250.171875 L 241.855469 252.59375 L 239.433594 250.171875 C 239.300781 250.03125 239.132812 249.875 238.953125 250.054688 C 238.773438 250.234375 238.953125 250.414062 239.074219 250.535156 L 241.496094 252.957031 L 239.074219 255.378906 C 238.953125 255.496094 238.773438 255.675781 238.953125 255.855469 C 239.132812 256.046875 239.300781 255.882812 239.433594 255.75 L 241.855469 253.328125 L 244.289062 255.75 C 244.421875 255.882812 244.578125 256.046875 244.769531 255.855469 Z M 252.308594 255.964844 L 252.308594 255.222656 L 250.773438 255.222656 L 250.773438 247.859375 L 250.523438 247.859375 C 249.71875 248.640625 248.570312 248.640625 248.148438 248.640625 L 248.148438 249.382812 C 248.484375 249.382812 249.097656 249.382812 249.757812 249.105469 L 249.757812 255.222656 L 248.222656 255.222656 L 248.222656 255.964844 Z M 258.730469 252.066406 C 258.730469 250.929688 258.644531 249.875 258.082031 248.925781 C 257.6875 248.265625 256.941406 247.859375 256.152344 247.859375 C 255.550781 247.859375 254.699219 248.085938 254.160156 249.023438 C 253.597656 249.96875 253.5625 251.242188 253.5625 252.066406 C 253.5625 252.667969 253.5625 254.035156 254.101562 255.003906 C 254.699219 256.0625 255.613281 256.230469 256.140625 256.230469 C 256.835938 256.230469 257.699219 255.941406 258.214844 254.945312 C 258.667969 254.070312 258.730469 253.027344 258.730469 252.066406 Z M 257.734375 251.925781 C 257.734375 252.738281 257.734375 253.566406 257.457031 254.347656 C 257.183594 255.171875 256.621094 255.449219 256.152344 255.449219 C 254.558594 255.449219 254.558594 253.050781 254.558594 251.925781 C 254.558594 251.167969 254.558594 250.558594 254.738281 249.933594 C 254.964844 249.105469 255.457031 248.640625 256.140625 248.640625 C 257.734375 248.640625 257.734375 250.808594 257.734375 251.925781 Z M 153.070312 242.117188 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 263.011719 247.304688 C 263.011719 246.835938 262.945312 246.414062 262.660156 245.960938 C 262.324219 245.433594 262.03125 245.292969 261.636719 245.292969 C 261.320312 245.292969 260.699219 245.382812 260.179688 245.980469 C 260.269531 244.855469 260.839844 244.023438 261.738281 244.023438 C 261.90625 244.023438 262.214844 244.03125 262.644531 244.199219 L 262.644531 243.671875 C 262.242188 243.519531 261.914062 243.503906 261.730469 243.503906 C 260.523438 243.503906 259.429688 244.679688 259.429688 246.484375 C 259.429688 248.78125 260.445312 249.359375 261.234375 249.359375 C 261.746094 249.359375 262.117188 249.175781 262.453125 248.796875 C 262.855469 248.355469 263.011719 247.96875 263.011719 247.304688 Z M 262.285156 247.304688 C 262.285156 247.632812 262.285156 247.976562 262.066406 248.335938 C 261.90625 248.589844 261.671875 248.816406 261.234375 248.816406 C 260.335938 248.816406 260.210938 247.617188 260.1875 247.339844 C 260.1875 247.222656 260.1875 247.203125 260.195312 247.113281 C 260.195312 246.472656 260.597656 245.835938 261.277344 245.835938 C 261.59375 245.835938 261.878906 245.953125 262.097656 246.347656 C 262.265625 246.652344 262.285156 246.917969 262.285156 247.304688 Z M 259.070312 239.484375 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 272.191406 255.519531 L 272.117188 254.667969 C 271.542969 255.066406 270.90625 255.28125 270.1875 255.28125 C 269.167969 255.28125 268.496094 254.429688 268.496094 253.292969 C 268.496094 252.367188 268.953125 251.253906 270.234375 251.253906 C 270.992188 251.253906 271.375 251.398438 271.972656 251.804688 L 272.128906 250.964844 C 271.457031 250.582031 271.085938 250.4375 270.222656 250.4375 C 268.519531 250.4375 267.503906 251.875 267.503906 253.304688 C 267.503906 254.789062 268.617188 256.097656 270.175781 256.097656 C 271.015625 256.097656 271.652344 255.84375 272.191406 255.519531 Z M 277.453125 255.964844 L 277.453125 252.402344 C 277.453125 251.515625 277.214844 250.507812 275.761719 250.507812 C 275.078125 250.507812 274.433594 250.832031 274 251.371094 L 274 247.644531 L 273.042969 247.644531 L 273.042969 255.964844 L 274.035156 255.964844 L 274.035156 253.039062 C 274.035156 252.246094 274.347656 251.289062 275.257812 251.289062 C 276.457031 251.289062 276.457031 252.140625 276.457031 252.488281 L 276.457031 255.964844 Z M 283.128906 255.964844 L 283.128906 252.5 C 283.128906 251.3125 282.253906 250.4375 281.066406 250.4375 C 280.289062 250.4375 279.714844 250.640625 279.136719 250.964844 L 279.210938 251.816406 C 279.808594 251.371094 280.394531 251.191406 281.054688 251.191406 C 281.726562 251.191406 282.136719 251.730469 282.136719 252.511719 L 282.136719 253.015625 C 280.074219 253.0625 278.71875 253.601562 278.71875 254.574219 C 278.71875 255.136719 279.089844 256.097656 280.132812 256.097656 C 280.371094 256.097656 281.414062 256.074219 282.171875 255.507812 L 282.171875 255.964844 Z M 282.136719 254.285156 C 282.136719 254.539062 282.136719 254.835938 281.703125 255.089844 C 281.402344 255.269531 281.007812 255.316406 280.769531 255.316406 C 280.15625 255.316406 279.640625 255.03125 279.640625 254.550781 C 279.640625 253.699219 281.980469 253.664062 282.136719 253.664062 Z M 288.097656 251.371094 L 288.097656 250.507812 C 287.234375 250.519531 286.445312 250.953125 285.953125 251.660156 L 285.953125 250.570312 L 285.066406 250.570312 L 285.066406 255.964844 L 286.023438 255.964844 L 286.023438 253.410156 C 286.023438 252.152344 287.007812 251.382812 288.097656 251.371094 Z M 292.507812 254.417969 C 292.507812 254 292.339844 253.578125 291.917969 253.21875 C 291.511719 252.871094 291.152344 252.800781 290.410156 252.65625 C 289.976562 252.570312 289.378906 252.464844 289.378906 251.898438 C 289.378906 251.191406 290.230469 251.191406 290.410156 251.191406 C 291.285156 251.191406 291.800781 251.46875 292.148438 251.660156 L 292.304688 250.820312 C 291.558594 250.484375 291.007812 250.4375 290.515625 250.4375 C 290.242188 250.4375 288.476562 250.4375 288.476562 252.019531 C 288.476562 252.59375 288.824219 252.957031 288.957031 253.085938 C 289.414062 253.484375 289.726562 253.542969 290.539062 253.699219 C 290.925781 253.769531 291.609375 253.902344 291.609375 254.515625 C 291.609375 255.292969 290.636719 255.292969 290.480469 255.292969 C 289.773438 255.292969 289.089844 255.054688 288.574219 254.695312 L 288.417969 255.570312 C 289.136719 255.953125 289.867188 256.097656 290.480469 256.097656 C 291.257812 256.097656 292.507812 255.84375 292.507812 254.417969 Z M 298.539062 247.234375 C 298.539062 247.054688 298.394531 246.972656 298.285156 246.972656 C 298.09375 246.972656 298.046875 247.105469 298 247.246094 L 293.8125 258.496094 C 293.777344 258.601562 293.753906 258.652344 293.753906 258.699219 C 293.753906 258.878906 293.898438 258.960938 294.003906 258.960938 C 294.199219 258.960938 294.246094 258.832031 294.292969 258.6875 L 298.476562 247.441406 C 298.515625 247.332031 298.539062 247.285156 298.539062 247.234375 Z M 303.507812 254.417969 C 303.507812 254 303.339844 253.578125 302.917969 253.21875 C 302.511719 252.871094 302.152344 252.800781 301.410156 252.65625 C 300.976562 252.570312 300.378906 252.464844 300.378906 251.898438 C 300.378906 251.191406 301.230469 251.191406 301.410156 251.191406 C 302.285156 251.191406 302.800781 251.46875 303.148438 251.660156 L 303.304688 250.820312 C 302.558594 250.484375 302.007812 250.4375 301.515625 250.4375 C 301.242188 250.4375 299.476562 250.4375 299.476562 252.019531 C 299.476562 252.59375 299.824219 252.957031 299.957031 253.085938 C 300.414062 253.484375 300.726562 253.542969 301.539062 253.699219 C 301.925781 253.769531 302.609375 253.902344 302.609375 254.515625 C 302.609375 255.292969 301.636719 255.292969 301.480469 255.292969 C 300.773438 255.292969 300.089844 255.054688 299.574219 254.695312 L 299.417969 255.570312 C 300.136719 255.953125 300.867188 256.097656 301.480469 256.097656 C 302.257812 256.097656 303.507812 255.84375 303.507812 254.417969 Z M 306.410156 258.960938 L 306.410156 246.972656 L 304.335938 246.972656 L 304.335938 247.75 L 305.484375 247.75 L 305.484375 258.183594 L 304.335938 258.183594 L 304.335938 258.960938 Z M 263.070312 242.117188 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 15.199219 85.886719 L 15.199219 77.566406 L 13.761719 77.566406 L 12.15625 81.667969 C 11.84375 82.46875 11.195312 84.125 11.050781 84.710938 L 11.039062 84.710938 C 10.917969 84.21875 10.367188 82.769531 10.140625 82.195312 L 8.546875 78.105469 L 8.328125 77.566406 L 6.890625 77.566406 L 6.890625 85.886719 L 7.875 85.886719 L 7.875 78.5 L 7.886719 78.5 C 8.03125 79.113281 8.726562 80.898438 8.855469 81.234375 L 10.175781 84.617188 L 10.570312 85.621094 L 11.484375 85.621094 L 13.136719 81.402344 C 13.316406 80.957031 14.050781 79.0625 14.203125 78.5 L 14.21875 78.5 L 14.21875 85.886719 Z M 21.789062 83.261719 C 21.789062 82.890625 21.777344 81.976562 21.308594 81.257812 C 20.804688 80.503906 20.039062 80.359375 19.582031 80.359375 C 18.191406 80.359375 17.085938 81.640625 17.085938 83.175781 C 17.085938 84.757812 18.261719 86.019531 19.761719 86.019531 C 20.335938 86.019531 21.03125 85.875 21.726562 85.429688 L 21.65625 84.605469 C 20.902344 85.144531 20.191406 85.238281 19.773438 85.238281 C 18.777344 85.238281 18.011719 84.363281 17.976562 83.261719 Z M 21.070312 82.554688 L 18.046875 82.554688 C 18.238281 81.785156 18.828125 81.136719 19.582031 81.136719 C 20.023438 81.136719 20.839844 81.34375 21.070312 82.554688 Z M 28.898438 85.347656 L 28.824219 84.460938 C 28.464844 84.699219 28.105469 84.925781 27.699219 85.058594 C 27.292969 85.179688 26.871094 85.203125 26.441406 85.203125 C 25.660156 85.203125 24.941406 84.796875 24.4375 84.183594 C 23.875 83.488281 23.609375 82.613281 23.609375 81.726562 C 23.609375 80.828125 23.875 79.953125 24.4375 79.269531 C 24.941406 78.65625 25.660156 78.25 26.441406 78.25 C 26.824219 78.25 27.195312 78.285156 27.554688 78.394531 C 27.925781 78.5 28.261719 78.691406 28.574219 78.910156 L 28.765625 77.851562 C 28.394531 77.710938 28.011719 77.589844 27.613281 77.53125 C 27.230469 77.457031 26.835938 77.433594 26.441406 77.433594 C 25.371094 77.433594 24.355469 77.902344 23.621094 78.691406 C 22.867188 79.507812 22.460938 80.597656 22.460938 81.726562 C 22.460938 82.839844 22.867188 83.933594 23.621094 84.757812 C 24.355469 85.550781 25.371094 86.019531 26.441406 86.019531 C 26.871094 86.019531 27.304688 85.996094 27.722656 85.886719 C 28.144531 85.777344 28.527344 85.5625 28.898438 85.347656 Z M 34.726562 85.886719 L 34.726562 82.421875 C 34.726562 81.234375 33.851562 80.359375 32.664062 80.359375 C 31.886719 80.359375 31.3125 80.5625 30.734375 80.886719 L 30.808594 81.738281 C 31.40625 81.292969 31.992188 81.113281 32.652344 81.113281 C 33.324219 81.113281 33.734375 81.652344 33.734375 82.433594 L 33.734375 82.9375 C 31.671875 82.984375 30.316406 83.523438 30.316406 84.496094 C 30.316406 85.058594 30.6875 86.019531 31.730469 86.019531 C 31.96875 86.019531 33.011719 85.996094 33.769531 85.429688 L 33.769531 85.886719 Z M 33.734375 84.207031 C 33.734375 84.460938 33.734375 84.757812 33.300781 85.011719 C 33 85.191406 32.605469 85.238281 32.367188 85.238281 C 31.753906 85.238281 31.238281 84.953125 31.238281 84.472656 C 31.238281 83.621094 33.578125 83.585938 33.734375 83.585938 Z M 41.589844 83.214844 C 41.589844 81.761719 40.800781 80.429688 39.625 80.429688 C 39.300781 80.429688 38.4375 80.492188 37.621094 81.125 L 37.621094 77.566406 L 36.664062 77.566406 L 36.664062 85.886719 L 37.660156 85.886719 L 37.660156 85.324219 C 37.8125 85.492188 38.425781 86.019531 39.3125 86.019531 C 40.558594 86.019531 41.589844 84.808594 41.589844 83.214844 Z M 40.597656 83.214844 C 40.597656 84.757812 39.566406 85.238281 38.832031 85.238281 C 38.066406 85.238281 37.71875 84.605469 37.660156 84.496094 L 37.660156 81.882812 C 37.839844 81.628906 38.269531 81.210938 38.953125 81.210938 C 39.671875 81.210938 40.597656 81.714844 40.597656 83.214844 Z M 49.769531 88.871094 C 49.121094 88.210938 47.632812 86.679688 47.632812 82.890625 C 47.632812 79.089844 49.121094 77.566406 49.769531 76.894531 L 48.988281 76.894531 C 47.574219 78.082031 46.640625 80.132812 46.640625 82.878906 C 46.640625 85.730469 47.632812 87.734375 48.988281 88.871094 Z M 56.207031 85.886719 L 56.207031 84.988281 L 53.953125 84.988281 C 53.808594 84.988281 53.664062 85 53.523438 85 L 52.203125 85 L 54.167969 83.214844 C 55.429688 82.183594 56.207031 81.535156 56.207031 80.371094 C 56.207031 78.992188 55.210938 77.78125 53.582031 77.78125 C 52.3125 77.78125 51.554688 78.511719 51.183594 79.699219 L 51.734375 80.394531 C 52.046875 78.992188 52.671875 78.597656 53.414062 78.597656 C 54.480469 78.597656 55.152344 79.410156 55.152344 80.394531 C 55.152344 81.484375 54.386719 82.183594 53.496094 82.972656 L 51.28125 85.070312 L 51.28125 85.886719 Z M 62.328125 81.988281 C 62.328125 80.851562 62.242188 79.796875 61.679688 78.847656 C 61.285156 78.1875 60.539062 77.78125 59.75 77.78125 C 59.148438 77.78125 58.296875 78.007812 57.757812 78.945312 C 57.195312 79.890625 57.160156 81.164062 57.160156 81.988281 C 57.160156 82.589844 57.160156 83.957031 57.699219 84.925781 C 58.296875 85.984375 59.210938 86.152344 59.738281 86.152344 C 60.433594 86.152344 61.296875 85.863281 61.8125 84.867188 C 62.265625 83.992188 62.328125 82.949219 62.328125 81.988281 Z M 61.332031 81.847656 C 61.332031 82.660156 61.332031 83.488281 61.054688 84.269531 C 60.78125 85.09375 60.21875 85.371094 59.75 85.371094 C 58.15625 85.371094 58.15625 82.972656 58.15625 81.847656 C 58.15625 81.089844 58.15625 80.480469 58.335938 79.855469 C 58.5625 79.027344 59.054688 78.5625 59.738281 78.5625 C 61.332031 78.5625 61.332031 80.730469 61.332031 81.847656 Z M 68.207031 85.886719 L 68.207031 84.988281 L 65.953125 84.988281 C 65.808594 84.988281 65.664062 85 65.523438 85 L 64.203125 85 L 66.167969 83.214844 C 67.429688 82.183594 68.207031 81.535156 68.207031 80.371094 C 68.207031 78.992188 67.210938 77.78125 65.582031 77.78125 C 64.3125 77.78125 63.554688 78.511719 63.183594 79.699219 L 63.734375 80.394531 C 64.046875 78.992188 64.671875 78.597656 65.414062 78.597656 C 66.480469 78.597656 67.152344 79.410156 67.152344 80.394531 C 67.152344 81.484375 66.386719 82.183594 65.496094 82.972656 L 63.28125 85.070312 L 63.28125 85.886719 Z M 74.328125 81.988281 C 74.328125 80.851562 74.242188 79.796875 73.679688 78.847656 C 73.285156 78.1875 72.539062 77.78125 71.75 77.78125 C 71.148438 77.78125 70.296875 78.007812 69.757812 78.945312 C 69.195312 79.890625 69.160156 81.164062 69.160156 81.988281 C 69.160156 82.589844 69.160156 83.957031 69.699219 84.925781 C 70.296875 85.984375 71.210938 86.152344 71.738281 86.152344 C 72.433594 86.152344 73.296875 85.863281 73.8125 84.867188 C 74.265625 83.992188 74.328125 82.949219 74.328125 81.988281 Z M 73.332031 81.847656 C 73.332031 82.660156 73.332031 83.488281 73.054688 84.269531 C 72.78125 85.09375 72.21875 85.371094 71.75 85.371094 C 70.15625 85.371094 70.15625 82.972656 70.15625 81.847656 C 70.15625 81.089844 70.15625 80.480469 70.335938 79.855469 C 70.5625 79.027344 71.054688 78.5625 71.738281 78.5625 C 73.332031 78.5625 73.332031 80.730469 73.332031 81.847656 Z M 78.085938 83.597656 L 78.085938 82.851562 L 74.800781 82.851562 L 74.800781 83.597656 Z M 84.328125 81.988281 C 84.328125 80.851562 84.242188 79.796875 83.679688 78.847656 C 83.285156 78.1875 82.539062 77.78125 81.75 77.78125 C 81.148438 77.78125 80.296875 78.007812 79.757812 78.945312 C 79.195312 79.890625 79.160156 81.164062 79.160156 81.988281 C 79.160156 82.589844 79.160156 83.957031 79.699219 84.925781 C 80.296875 85.984375 81.210938 86.152344 81.738281 86.152344 C 82.433594 86.152344 83.296875 85.863281 83.8125 84.867188 C 84.265625 83.992188 84.328125 82.949219 84.328125 81.988281 Z M 83.332031 81.847656 C 83.332031 82.660156 83.332031 83.488281 83.054688 84.269531 C 82.78125 85.09375 82.21875 85.371094 81.75 85.371094 C 80.15625 85.371094 80.15625 82.972656 80.15625 81.847656 C 80.15625 81.089844 80.15625 80.480469 80.335938 79.855469 C 80.5625 79.027344 81.054688 78.5625 81.738281 78.5625 C 83.332031 78.5625 83.332031 80.730469 83.332031 81.847656 Z M 90.304688 81.882812 C 90.304688 78.621094 88.902344 77.78125 87.785156 77.78125 C 87.101562 77.78125 86.515625 78.007812 85.976562 78.597656 C 85.386719 79.257812 85.183594 79.808594 85.183594 80.707031 C 85.183594 81.535156 85.339844 82.109375 85.746094 82.722656 C 86.167969 83.367188 86.574219 83.597656 87.148438 83.597656 C 88.179688 83.597656 88.914062 82.996094 89.234375 82.601562 C 89.042969 84.746094 87.988281 85.371094 87.199219 85.371094 C 86.863281 85.371094 86.40625 85.300781 85.949219 84.914062 L 85.554688 85.585938 C 86.09375 86.019531 86.632812 86.152344 87.199219 86.152344 C 88.804688 86.152344 90.304688 84.484375 90.304688 81.882812 Z M 89.210938 80.972656 C 89.210938 81.953125 88.601562 82.816406 87.664062 82.816406 C 87.019531 82.816406 86.730469 82.46875 86.539062 82.167969 C 86.238281 81.691406 86.226562 81.210938 86.226562 80.707031 C 86.226562 80.191406 86.238281 79.675781 86.621094 79.148438 C 86.765625 78.945312 87.078125 78.523438 87.785156 78.523438 C 89.03125 78.523438 89.1875 80.261719 89.199219 80.695312 C 89.210938 80.765625 89.210938 80.886719 89.210938 80.972656 Z M 94.085938 83.597656 L 94.085938 82.851562 L 90.800781 82.851562 L 90.800781 83.597656 Z M 99.90625 85.886719 L 99.90625 85.144531 L 98.371094 85.144531 L 98.371094 77.78125 L 98.121094 77.78125 C 97.316406 78.5625 96.167969 78.5625 95.746094 78.5625 L 95.746094 79.304688 C 96.082031 79.304688 96.695312 79.304688 97.355469 79.027344 L 97.355469 85.144531 L 95.820312 85.144531 L 95.820312 85.886719 Z M 106.472656 83.882812 L 106.472656 83.105469 L 105.273438 83.105469 L 105.273438 78.046875 L 104.109375 78.046875 L 101.015625 83.105469 L 101.015625 83.882812 L 104.265625 83.882812 L 104.265625 85.886719 L 105.273438 85.886719 L 105.273438 83.882812 Z M 104.335938 83.105469 L 102.011719 83.105469 L 102.742188 81.90625 C 103.101562 81.292969 104.324219 79.207031 104.335938 78.597656 Z M 110.480469 82.890625 C 110.480469 80.035156 109.484375 78.03125 108.128906 76.894531 L 107.351562 76.894531 C 108 77.554688 109.484375 79.089844 109.484375 82.878906 C 109.484375 86.679688 108 88.199219 107.351562 88.871094 L 108.128906 88.871094 C 109.546875 87.683594 110.480469 85.636719 110.480469 82.890625 Z M 5.667969 72.039062 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 13.714844 119.902344 L 10.25 114.867188 L 13.535156 111.582031 L 12.359375 111.582031 L 7.933594 115.992188 L 7.933594 111.582031 L 6.84375 111.582031 L 6.84375 119.902344 L 7.933594 119.902344 L 7.933594 117.167969 L 9.539062 115.574219 L 12.527344 119.902344 Z M 20.050781 119.902344 L 20.050781 114.578125 L 19.054688 114.578125 L 19.054688 118.042969 C 19.054688 119.003906 18.300781 119.328125 17.582031 119.328125 C 16.71875 119.328125 16.632812 119.050781 16.632812 118.535156 L 16.632812 114.578125 L 15.640625 114.578125 L 15.640625 118.59375 C 15.640625 119.613281 16.046875 120.035156 16.898438 120.035156 C 17.59375 120.035156 18.527344 119.832031 19.09375 119.328125 L 19.09375 119.902344 Z M 24.695312 115.308594 L 24.695312 114.445312 C 23.832031 114.457031 23.042969 114.890625 22.550781 115.597656 L 22.550781 114.507812 L 21.664062 114.507812 L 21.664062 119.902344 L 22.621094 119.902344 L 22.621094 117.347656 C 22.621094 116.089844 23.605469 115.320312 24.695312 115.308594 Z M 30.460938 117.265625 C 30.460938 115.695312 29.261719 114.375 27.738281 114.375 C 26.25 114.375 25.027344 115.667969 25.027344 117.265625 C 25.027344 118.847656 26.285156 120.035156 27.738281 120.035156 C 29.222656 120.035156 30.460938 118.824219 30.460938 117.265625 Z M 29.464844 117.144531 C 29.464844 118.535156 28.625 119.21875 27.75 119.21875 C 26.828125 119.21875 26.023438 118.5 26.023438 117.144531 C 26.023438 115.742188 26.921875 115.152344 27.738281 115.152344 C 28.601562 115.152344 29.464844 115.765625 29.464844 117.144531 Z M 39.46875 119.902344 L 39.46875 116.339844 C 39.46875 115.453125 39.230469 114.445312 37.777344 114.445312 C 36.769531 114.445312 36.207031 115.058594 35.917969 115.429688 C 35.835938 115.191406 35.582031 114.445312 34.359375 114.445312 C 33.414062 114.445312 32.839844 115 32.5625 115.359375 L 32.5625 114.507812 L 31.640625 114.507812 L 31.640625 119.902344 L 32.632812 119.902344 L 32.632812 116.976562 C 32.632812 116.183594 32.945312 115.226562 33.855469 115.226562 C 35.054688 115.226562 35.054688 116.078125 35.054688 116.425781 L 35.054688 119.902344 L 36.050781 119.902344 L 36.050781 116.976562 C 36.050781 116.183594 36.363281 115.226562 37.273438 115.226562 C 38.472656 115.226562 38.472656 116.078125 38.472656 116.425781 L 38.472656 119.902344 Z M 46.460938 117.265625 C 46.460938 115.695312 45.261719 114.375 43.738281 114.375 C 42.25 114.375 41.027344 115.667969 41.027344 117.265625 C 41.027344 118.847656 42.285156 120.035156 43.738281 120.035156 C 45.222656 120.035156 46.460938 118.824219 46.460938 117.265625 Z M 45.464844 117.144531 C 45.464844 118.535156 44.625 119.21875 43.75 119.21875 C 42.828125 119.21875 42.023438 118.5 42.023438 117.144531 C 42.023438 115.742188 42.921875 115.152344 43.738281 115.152344 C 44.601562 115.152344 45.464844 115.765625 45.464844 117.144531 Z M 48.957031 120.609375 L 48.957031 114.578125 L 48 114.578125 L 48 120.71875 C 48 121.507812 47.28125 121.546875 47.085938 121.546875 C 46.75 121.546875 46.453125 121.414062 46.1875 121.171875 L 45.914062 121.953125 C 46.511719 122.289062 47.085938 122.359375 47.398438 122.359375 C 48.214844 122.359375 48.957031 121.710938 48.957031 120.609375 Z M 48.957031 113.152344 L 48.957031 112 L 47.808594 112 L 47.808594 113.152344 Z M 51.609375 119.902344 L 51.609375 114.578125 L 50.652344 114.578125 L 50.652344 119.902344 Z M 51.71875 113.152344 L 51.71875 112 L 50.566406 112 L 50.566406 113.152344 Z M 60.769531 122.886719 C 60.121094 122.226562 58.632812 120.695312 58.632812 116.90625 C 58.632812 113.105469 60.121094 111.582031 60.769531 110.910156 L 59.988281 110.910156 C 58.574219 112.097656 57.640625 114.148438 57.640625 116.894531 C 57.640625 119.746094 58.632812 121.75 59.988281 122.886719 Z M 67.328125 116.003906 C 67.328125 114.867188 67.242188 113.8125 66.679688 112.863281 C 66.285156 112.203125 65.539062 111.796875 64.75 111.796875 C 64.148438 111.796875 63.296875 112.023438 62.757812 112.960938 C 62.195312 113.90625 62.160156 115.179688 62.160156 116.003906 C 62.160156 116.605469 62.160156 117.972656 62.699219 118.941406 C 63.296875 120 64.210938 120.167969 64.738281 120.167969 C 65.433594 120.167969 66.296875 119.878906 66.8125 118.882812 C 67.265625 118.007812 67.328125 116.964844 67.328125 116.003906 Z M 66.332031 115.863281 C 66.332031 116.675781 66.332031 117.503906 66.054688 118.285156 C 65.78125 119.109375 65.21875 119.386719 64.75 119.386719 C 63.15625 119.386719 63.15625 116.988281 63.15625 115.863281 C 63.15625 115.105469 63.15625 114.496094 63.335938 113.871094 C 63.5625 113.042969 64.054688 112.578125 64.738281 112.578125 C 66.332031 112.578125 66.332031 114.746094 66.332031 115.863281 Z M 69.886719 119.902344 L 69.886719 118.871094 L 68.855469 118.871094 L 68.855469 119.902344 Z M 76.304688 115.898438 C 76.304688 112.636719 74.902344 111.796875 73.785156 111.796875 C 73.101562 111.796875 72.515625 112.023438 71.976562 112.613281 C 71.386719 113.273438 71.183594 113.824219 71.183594 114.722656 C 71.183594 115.550781 71.339844 116.125 71.746094 116.738281 C 72.167969 117.382812 72.574219 117.613281 73.148438 117.613281 C 74.179688 117.613281 74.914062 117.011719 75.234375 116.617188 C 75.042969 118.761719 73.988281 119.386719 73.199219 119.386719 C 72.863281 119.386719 72.40625 119.316406 71.949219 118.929688 L 71.554688 119.601562 C 72.09375 120.035156 72.632812 120.167969 73.199219 120.167969 C 74.804688 120.167969 76.304688 118.5 76.304688 115.898438 Z M 75.210938 114.988281 C 75.210938 115.96875 74.601562 116.832031 73.664062 116.832031 C 73.019531 116.832031 72.730469 116.484375 72.539062 116.183594 C 72.238281 115.707031 72.226562 115.226562 72.226562 114.722656 C 72.226562 114.207031 72.238281 113.691406 72.621094 113.164062 C 72.765625 112.960938 73.078125 112.539062 73.785156 112.539062 C 75.03125 112.539062 75.1875 114.277344 75.199219 114.710938 C 75.210938 114.78125 75.210938 114.902344 75.210938 114.988281 Z M 78.886719 119.902344 L 78.886719 118.871094 L 77.855469 118.871094 L 77.855469 119.902344 Z M 85.328125 116.003906 C 85.328125 114.867188 85.242188 113.8125 84.679688 112.863281 C 84.285156 112.203125 83.539062 111.796875 82.75 111.796875 C 82.148438 111.796875 81.296875 112.023438 80.757812 112.960938 C 80.195312 113.90625 80.160156 115.179688 80.160156 116.003906 C 80.160156 116.605469 80.160156 117.972656 80.699219 118.941406 C 81.296875 120 82.210938 120.167969 82.738281 120.167969 C 83.433594 120.167969 84.296875 119.878906 84.8125 118.882812 C 85.265625 118.007812 85.328125 116.964844 85.328125 116.003906 Z M 84.332031 115.863281 C 84.332031 116.675781 84.332031 117.503906 84.054688 118.285156 C 83.78125 119.109375 83.21875 119.386719 82.75 119.386719 C 81.15625 119.386719 81.15625 116.988281 81.15625 115.863281 C 81.15625 115.105469 81.15625 114.496094 81.335938 113.871094 C 81.5625 113.042969 82.054688 112.578125 82.738281 112.578125 C 84.332031 112.578125 84.332031 114.746094 84.332031 115.863281 Z M 89.480469 116.90625 C 89.480469 114.050781 88.484375 112.046875 87.128906 110.910156 L 86.351562 110.910156 C 87 111.570312 88.484375 113.105469 88.484375 116.894531 C 88.484375 120.695312 87 122.214844 86.351562 122.886719 L 87.128906 122.886719 C 88.546875 121.699219 89.480469 119.652344 89.480469 116.90625 Z M 5.667969 106.054688 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.855469 148.25 L 11.855469 147.410156 L 11.328125 147.410156 L 8.414062 147.433594 L 7.957031 147.433594 L 7.957031 139.929688 L 6.808594 139.929688 L 6.808594 148.25 Z M 14.609375 148.25 L 14.609375 142.925781 L 13.652344 142.925781 L 13.652344 148.25 Z M 14.71875 141.5 L 14.71875 140.347656 L 13.566406 140.347656 L 13.566406 141.5 Z M 21.050781 148.25 L 21.050781 144.6875 C 21.050781 143.800781 20.8125 142.792969 19.359375 142.792969 C 18.414062 142.792969 17.839844 143.347656 17.5625 143.707031 L 17.5625 142.855469 L 16.640625 142.855469 L 16.640625 148.25 L 17.632812 148.25 L 17.632812 145.324219 C 17.632812 144.53125 17.945312 143.574219 18.855469 143.574219 C 20.054688 143.574219 20.054688 144.425781 20.054688 144.773438 L 20.054688 148.25 Z M 27.027344 148.25 L 27.027344 139.929688 L 26.070312 139.929688 L 26.070312 143.453125 C 25.398438 142.890625 24.628906 142.792969 24.246094 142.792969 C 23.035156 142.792969 22.101562 144.054688 22.101562 145.589844 C 22.101562 147.136719 23.023438 148.382812 24.199219 148.382812 C 24.558594 148.382812 25.3125 148.296875 26.03125 147.625 L 26.03125 148.25 Z M 26.03125 146.558594 C 26.03125 146.789062 26.03125 146.8125 25.902344 146.980469 C 25.578125 147.410156 25.167969 147.601562 24.738281 147.601562 C 23.957031 147.601562 23.09375 147.039062 23.09375 145.601562 C 23.09375 144.066406 24.113281 143.574219 24.871094 143.574219 C 25.421875 143.574219 25.804688 143.886719 26.03125 144.199219 Z M 32.789062 145.625 C 32.789062 145.253906 32.777344 144.339844 32.308594 143.621094 C 31.804688 142.867188 31.039062 142.722656 30.582031 142.722656 C 29.191406 142.722656 28.085938 144.003906 28.085938 145.539062 C 28.085938 147.121094 29.261719 148.382812 30.761719 148.382812 C 31.335938 148.382812 32.03125 148.238281 32.726562 147.792969 L 32.65625 146.96875 C 31.902344 147.507812 31.191406 147.601562 30.773438 147.601562 C 29.777344 147.601562 29.011719 146.726562 28.976562 145.625 Z M 32.070312 144.917969 L 29.046875 144.917969 C 29.238281 144.148438 29.828125 143.5 30.582031 143.5 C 31.023438 143.5 31.839844 143.707031 32.070312 144.917969 Z M 36.695312 143.65625 L 36.695312 142.792969 C 35.832031 142.804688 35.042969 143.238281 34.550781 143.945312 L 34.550781 142.855469 L 33.664062 142.855469 L 33.664062 148.25 L 34.621094 148.25 L 34.621094 145.695312 C 34.621094 144.4375 35.605469 143.667969 36.695312 143.65625 Z M 41.726562 148.25 L 41.726562 144.785156 C 41.726562 143.597656 40.851562 142.722656 39.664062 142.722656 C 38.886719 142.722656 38.3125 142.925781 37.734375 143.25 L 37.808594 144.101562 C 38.40625 143.65625 38.992188 143.476562 39.652344 143.476562 C 40.324219 143.476562 40.734375 144.015625 40.734375 144.796875 L 40.734375 145.300781 C 38.671875 145.347656 37.316406 145.886719 37.316406 146.859375 C 37.316406 147.421875 37.6875 148.382812 38.730469 148.382812 C 38.96875 148.382812 40.011719 148.359375 40.769531 147.792969 L 40.769531 148.25 Z M 40.734375 146.570312 C 40.734375 146.824219 40.734375 147.121094 40.300781 147.375 C 40 147.554688 39.605469 147.601562 39.367188 147.601562 C 38.753906 147.601562 38.238281 147.316406 38.238281 146.835938 C 38.238281 145.984375 40.578125 145.949219 40.734375 145.949219 Z M 50.769531 151.234375 C 50.121094 150.574219 48.632812 149.042969 48.632812 145.253906 C 48.632812 141.453125 50.121094 139.929688 50.769531 139.257812 L 49.988281 139.257812 C 48.574219 140.445312 47.640625 142.496094 47.640625 145.242188 C 47.640625 148.09375 48.632812 150.097656 49.988281 151.234375 Z M 57.328125 144.351562 C 57.328125 143.214844 57.242188 142.160156 56.679688 141.210938 C 56.285156 140.550781 55.539062 140.144531 54.75 140.144531 C 54.148438 140.144531 53.296875 140.371094 52.757812 141.308594 C 52.195312 142.253906 52.160156 143.527344 52.160156 144.351562 C 52.160156 144.953125 52.160156 146.320312 52.699219 147.289062 C 53.296875 148.347656 54.210938 148.515625 54.738281 148.515625 C 55.433594 148.515625 56.296875 148.226562 56.8125 147.230469 C 57.265625 146.355469 57.328125 145.3125 57.328125 144.351562 Z M 56.332031 144.210938 C 56.332031 145.023438 56.332031 145.851562 56.054688 146.632812 C 55.78125 147.457031 55.21875 147.734375 54.75 147.734375 C 53.15625 147.734375 53.15625 145.335938 53.15625 144.210938 C 53.15625 143.453125 53.15625 142.84375 53.335938 142.21875 C 53.5625 141.390625 54.054688 140.925781 54.738281 140.925781 C 56.332031 140.925781 56.332031 143.09375 56.332031 144.210938 Z M 59.886719 148.25 L 59.886719 147.21875 L 58.855469 147.21875 L 58.855469 148.25 Z M 66.304688 146.078125 C 66.304688 145.324219 65.847656 144.449219 64.625 144.003906 C 65.378906 143.789062 66.136719 143.140625 66.136719 142.242188 C 66.136719 141.140625 65.105469 140.144531 63.75 140.144531 C 62.347656 140.144531 61.351562 141.164062 61.351562 142.242188 C 61.351562 143.140625 62.105469 143.789062 62.863281 144.003906 C 61.589844 144.460938 61.183594 145.371094 61.183594 146.078125 C 61.183594 147.398438 62.324219 148.515625 63.738281 148.515625 C 65.199219 148.515625 66.304688 147.375 66.304688 146.078125 Z M 65.25 142.253906 C 65.25 143 64.734375 143.621094 63.738281 143.621094 C 62.800781 143.621094 62.238281 143.046875 62.238281 142.253906 C 62.238281 141.453125 62.8125 140.886719 63.738281 140.886719 C 64.71875 140.886719 65.25 141.488281 65.25 142.253906 Z M 65.273438 146.078125 C 65.273438 147.195312 64.539062 147.734375 63.75 147.734375 C 62.898438 147.734375 62.214844 147.160156 62.214844 146.078125 C 62.214844 144.796875 63.125 144.402344 63.738281 144.402344 C 64.421875 144.402344 65.273438 144.84375 65.273438 146.078125 Z M 68.886719 148.25 L 68.886719 147.21875 L 67.855469 147.21875 L 67.855469 148.25 Z M 74.90625 148.25 L 74.90625 147.507812 L 73.371094 147.507812 L 73.371094 140.144531 L 73.121094 140.144531 C 72.316406 140.925781 71.167969 140.925781 70.746094 140.925781 L 70.746094 141.667969 C 71.082031 141.667969 71.695312 141.667969 72.355469 141.390625 L 72.355469 147.507812 L 70.820312 147.507812 L 70.820312 148.25 Z M 79.480469 145.253906 C 79.480469 142.398438 78.484375 140.394531 77.128906 139.257812 L 76.351562 139.257812 C 77 139.917969 78.484375 141.453125 78.484375 145.242188 C 78.484375 149.042969 77 150.5625 76.351562 151.234375 L 77.128906 151.234375 C 78.546875 150.046875 79.480469 148 79.480469 145.253906 Z M 5.667969 134.402344 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 11.820312 179.976562 C 11.820312 178.621094 10.703125 177.734375 9.804688 177.515625 L 8.59375 177.230469 C 7.980469 177.085938 7.371094 176.605469 7.371094 175.910156 C 7.371094 175.226562 8.03125 174.53125 9.121094 174.53125 C 9.984375 174.53125 10.582031 174.785156 11.207031 175.3125 L 11.398438 174.269531 C 10.800781 173.96875 10.222656 173.679688 9.132812 173.679688 C 7.429688 173.679688 6.351562 174.867188 6.351562 176.066406 C 6.351562 176.367188 6.410156 176.847656 6.820312 177.398438 C 7.371094 178.117188 8.148438 178.296875 8.640625 178.417969 L 9.648438 178.667969 C 10.199219 178.835938 10.800781 179.363281 10.800781 180.105469 C 10.800781 180.898438 10.117188 181.640625 9.050781 181.640625 C 8.078125 181.640625 7.167969 181.269531 6.410156 180.660156 L 6.21875 181.703125 C 7.394531 182.433594 8.484375 182.53125 9.050781 182.53125 C 10.691406 182.53125 11.820312 181.304688 11.820312 179.976562 Z M 18.050781 182.265625 L 18.050781 176.941406 L 17.054688 176.941406 L 17.054688 180.40625 C 17.054688 181.367188 16.300781 181.691406 15.582031 181.691406 C 14.71875 181.691406 14.632812 181.414062 14.632812 180.898438 L 14.632812 176.941406 L 13.640625 176.941406 L 13.640625 180.957031 C 13.640625 181.976562 14.046875 182.398438 14.898438 182.398438 C 15.59375 182.398438 16.527344 182.195312 17.09375 181.691406 L 17.09375 182.265625 Z M 24.027344 182.265625 L 24.027344 173.945312 L 23.070312 173.945312 L 23.070312 177.46875 C 22.398438 176.90625 21.628906 176.808594 21.246094 176.808594 C 20.035156 176.808594 19.101562 178.070312 19.101562 179.605469 C 19.101562 181.152344 20.023438 182.398438 21.199219 182.398438 C 21.558594 182.398438 22.3125 182.3125 23.03125 181.640625 L 23.03125 182.265625 Z M 23.03125 180.574219 C 23.03125 180.804688 23.03125 180.828125 22.902344 180.996094 C 22.578125 181.425781 22.167969 181.617188 21.738281 181.617188 C 20.957031 181.617188 20.09375 181.054688 20.09375 179.617188 C 20.09375 178.082031 21.113281 177.589844 21.871094 177.589844 C 22.421875 177.589844 22.804688 177.902344 23.03125 178.214844 Z M 29.726562 182.265625 L 29.726562 178.800781 C 29.726562 177.613281 28.851562 176.738281 27.664062 176.738281 C 26.886719 176.738281 26.3125 176.941406 25.734375 177.265625 L 25.808594 178.117188 C 26.40625 177.671875 26.992188 177.492188 27.652344 177.492188 C 28.324219 177.492188 28.734375 178.03125 28.734375 178.8125 L 28.734375 179.316406 C 26.671875 179.363281 25.316406 179.902344 25.316406 180.875 C 25.316406 181.4375 25.6875 182.398438 26.730469 182.398438 C 26.96875 182.398438 28.011719 182.375 28.769531 181.808594 L 28.769531 182.265625 Z M 28.734375 180.585938 C 28.734375 180.839844 28.734375 181.136719 28.300781 181.390625 C 28 181.570312 27.605469 181.617188 27.367188 181.617188 C 26.753906 181.617188 26.238281 181.332031 26.238281 180.851562 C 26.238281 180 28.578125 179.964844 28.734375 179.964844 Z M 35.789062 181.820312 L 35.714844 180.96875 C 35.140625 181.367188 34.503906 181.582031 33.785156 181.582031 C 32.765625 181.582031 32.09375 180.730469 32.09375 179.59375 C 32.09375 178.667969 32.550781 177.554688 33.832031 177.554688 C 34.589844 177.554688 34.972656 177.699219 35.570312 178.105469 L 35.726562 177.265625 C 35.054688 176.882812 34.683594 176.738281 33.820312 176.738281 C 32.117188 176.738281 31.101562 178.175781 31.101562 179.605469 C 31.101562 181.089844 32.214844 182.398438 33.773438 182.398438 C 34.613281 182.398438 35.25 182.144531 35.789062 181.820312 Z M 41.050781 182.265625 L 41.050781 178.703125 C 41.050781 177.816406 40.8125 176.808594 39.359375 176.808594 C 38.675781 176.808594 38.03125 177.132812 37.597656 177.671875 L 37.597656 173.945312 L 36.640625 173.945312 L 36.640625 182.265625 L 37.632812 182.265625 L 37.632812 179.339844 C 37.632812 178.546875 37.945312 177.589844 38.855469 177.589844 C 40.054688 177.589844 40.054688 178.441406 40.054688 178.789062 L 40.054688 182.265625 Z M 43.609375 182.265625 L 43.609375 176.941406 L 42.652344 176.941406 L 42.652344 182.265625 Z M 43.71875 175.515625 L 43.71875 174.363281 L 42.566406 174.363281 L 42.566406 175.515625 Z M 52.769531 185.25 C 52.121094 184.589844 50.632812 183.058594 50.632812 179.269531 C 50.632812 175.46875 52.121094 173.945312 52.769531 173.273438 L 51.988281 173.273438 C 50.574219 174.460938 49.640625 176.511719 49.640625 179.257812 C 49.640625 182.109375 50.632812 184.113281 51.988281 185.25 Z M 59.328125 178.367188 C 59.328125 177.230469 59.242188 176.175781 58.679688 175.226562 C 58.285156 174.566406 57.539062 174.160156 56.75 174.160156 C 56.148438 174.160156 55.296875 174.386719 54.757812 175.324219 C 54.195312 176.269531 54.160156 177.542969 54.160156 178.367188 C 54.160156 178.96875 54.160156 180.335938 54.699219 181.304688 C 55.296875 182.363281 56.210938 182.53125 56.738281 182.53125 C 57.433594 182.53125 58.296875 182.242188 58.8125 181.246094 C 59.265625 180.371094 59.328125 179.328125 59.328125 178.367188 Z M 58.332031 178.226562 C 58.332031 179.039062 58.332031 179.867188 58.054688 180.648438 C 57.78125 181.472656 57.21875 181.75 56.75 181.75 C 55.15625 181.75 55.15625 179.351562 55.15625 178.226562 C 55.15625 177.46875 55.15625 176.859375 55.335938 176.234375 C 55.5625 175.40625 56.054688 174.941406 56.738281 174.941406 C 58.332031 174.941406 58.332031 177.109375 58.332031 178.226562 Z M 61.886719 182.265625 L 61.886719 181.234375 L 60.855469 181.234375 L 60.855469 182.265625 Z M 68.207031 179.832031 C 68.207031 178.34375 67.140625 177.15625 65.871094 177.15625 C 65.714844 177.15625 65.125 177.15625 64.5625 177.636719 L 64.5625 175.238281 L 67.789062 175.238281 L 67.789062 174.425781 L 63.640625 174.425781 L 63.640625 178.882812 L 64.503906 178.882812 C 64.886719 177.996094 65.617188 177.9375 65.84375 177.9375 C 66.335938 177.9375 67.054688 178.308594 67.054688 179.820312 C 67.054688 181.28125 66.191406 181.75 65.472656 181.75 C 64.851562 181.75 64.035156 181.449219 63.566406 180.648438 L 63.136719 181.355469 C 63.328125 181.617188 64.128906 182.53125 65.484375 182.53125 C 66.984375 182.53125 68.207031 181.332031 68.207031 179.832031 Z M 70.886719 182.265625 L 70.886719 181.234375 L 69.855469 181.234375 L 69.855469 182.265625 Z M 77.304688 180.09375 C 77.304688 179.148438 76.609375 178.355469 75.625 178.03125 C 76.464844 177.601562 76.96875 176.820312 76.96875 175.996094 C 76.96875 174.964844 75.949219 174.160156 74.714844 174.160156 C 73.707031 174.160156 72.855469 174.675781 72.351562 175.429688 L 72.820312 176.113281 C 73.347656 175.070312 74.257812 174.902344 74.703125 174.902344 C 75.3125 174.902344 75.914062 175.238281 75.914062 175.996094 C 75.914062 176.4375 75.660156 177.46875 74.367188 177.589844 C 74.136719 177.613281 73.945312 177.625 73.71875 177.636719 L 73.71875 178.417969 L 74.617188 178.417969 C 75.78125 178.417969 76.152344 179.363281 76.152344 180.082031 C 76.152344 181.03125 75.589844 181.75 74.675781 181.75 C 73.695312 181.75 72.78125 181.210938 72.328125 180.5625 C 72.230469 181.042969 72.230469 181.066406 72.183594 181.332031 C 72.769531 182.039062 73.683594 182.53125 74.703125 182.53125 C 76.210938 182.53125 77.304688 181.355469 77.304688 180.09375 Z M 81.480469 179.269531 C 81.480469 176.414062 80.484375 174.410156 79.128906 173.273438 L 78.351562 173.273438 C 79 173.933594 80.484375 175.46875 80.484375 179.257812 C 80.484375 183.058594 79 184.578125 78.351562 185.25 L 79.128906 185.25 C 80.546875 184.0625 81.480469 182.015625 81.480469 179.269531 Z M 5.667969 168.417969 "/>
+<path style=" stroke:none;fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 10.105469 209.066406 C 10.105469 208.648438 9.9375 208.226562 9.515625 207.867188 C 9.109375 207.519531 8.75 207.449219 8.007812 207.304688 C 7.574219 207.21875 6.976562 207.113281 6.976562 206.546875 C 6.976562 205.839844 7.828125 205.839844 8.007812 205.839844 C 8.882812 205.839844 9.398438 206.117188 9.746094 206.308594 L 9.902344 205.46875 C 9.15625 205.132812 8.605469 205.085938 8.113281 205.085938 C 7.839844 205.085938 6.074219 205.085938 6.074219 206.667969 C 6.074219 207.242188 6.421875 207.605469 6.554688 207.734375 C 7.011719 208.132812 7.324219 208.191406 8.136719 208.347656 C 8.523438 208.417969 9.207031 208.550781 9.207031 209.164062 C 9.207031 209.941406 8.234375 209.941406 8.078125 209.941406 C 7.371094 209.941406 6.6875 209.703125 6.171875 209.34375 L 6.015625 210.21875 C 6.734375 210.601562 7.464844 210.746094 8.078125 210.746094 C 8.855469 210.746094 10.105469 210.492188 10.105469 209.066406 Z M 16.050781 210.613281 L 16.050781 205.289062 L 15.054688 205.289062 L 15.054688 208.753906 C 15.054688 209.714844 14.300781 210.039062 13.582031 210.039062 C 12.71875 210.039062 12.632812 209.761719 12.632812 209.246094 L 12.632812 205.289062 L 11.640625 205.289062 L 11.640625 209.304688 C 11.640625 210.324219 12.046875 210.746094 12.898438 210.746094 C 13.59375 210.746094 14.527344 210.542969 15.09375 210.039062 L 15.09375 210.613281 Z M 22.027344 210.613281 L 22.027344 202.292969 L 21.070312 202.292969 L 21.070312 205.816406 C 20.398438 205.253906 19.628906 205.15625 19.246094 205.15625 C 18.035156 205.15625 17.101562 206.417969 17.101562 207.953125 C 17.101562 209.5 18.023438 210.746094 19.199219 210.746094 C 19.558594 210.746094 20.3125 210.660156 21.03125 209.988281 L 21.03125 210.613281 Z M 21.03125 208.921875 C 21.03125 209.152344 21.03125 209.175781 20.902344 209.34375 C 20.578125 209.773438 20.167969 209.964844 19.738281 209.964844 C 18.957031 209.964844 18.09375 209.402344 18.09375 207.964844 C 18.09375 206.429688 19.113281 205.9375 19.871094 205.9375 C 20.421875 205.9375 20.804688 206.25 21.03125 206.5625 Z M 27.726562 210.613281 L 27.726562 207.148438 C 27.726562 205.960938 26.851562 205.085938 25.664062 205.085938 C 24.886719 205.085938 24.3125 205.289062 23.734375 205.613281 L 23.808594 206.464844 C 24.40625 206.019531 24.992188 205.839844 25.652344 205.839844 C 26.324219 205.839844 26.734375 206.378906 26.734375 207.160156 L 26.734375 207.664062 C 24.671875 207.710938 23.316406 208.25 23.316406 209.222656 C 23.316406 209.785156 23.6875 210.746094 24.730469 210.746094 C 24.96875 210.746094 26.011719 210.722656 26.769531 210.15625 L 26.769531 210.613281 Z M 26.734375 208.933594 C 26.734375 209.1875 26.734375 209.484375 26.300781 209.738281 C 26 209.917969 25.605469 209.964844 25.367188 209.964844 C 24.753906 209.964844 24.238281 209.679688 24.238281 209.199219 C 24.238281 208.347656 26.578125 208.3125 26.734375 208.3125 Z M 33.789062 210.167969 L 33.714844 209.316406 C 33.140625 209.714844 32.503906 209.929688 31.785156 209.929688 C 30.765625 209.929688 30.09375 209.078125 30.09375 207.941406 C 30.09375 207.015625 30.550781 205.902344 31.832031 205.902344 C 32.589844 205.902344 32.972656 206.046875 33.570312 206.453125 L 33.726562 205.613281 C 33.054688 205.230469 32.683594 205.085938 31.820312 205.085938 C 30.117188 205.085938 29.101562 206.523438 29.101562 207.953125 C 29.101562 209.4375 30.214844 210.746094 31.773438 210.746094 C 32.613281 210.746094 33.25 210.492188 33.789062 210.167969 Z M 39.050781 210.613281 L 39.050781 207.050781 C 39.050781 206.164062 38.8125 205.15625 37.359375 205.15625 C 36.675781 205.15625 36.03125 205.480469 35.597656 206.019531 L 35.597656 202.292969 L 34.640625 202.292969 L 34.640625 210.613281 L 35.632812 210.613281 L 35.632812 207.6875 C 35.632812 206.894531 35.945312 205.9375 36.855469 205.9375 C 38.054688 205.9375 38.054688 206.789062 38.054688 207.136719 L 38.054688 210.613281 Z M 41.609375 210.613281 L 41.609375 205.289062 L 40.652344 205.289062 L 40.652344 210.613281 Z M 41.71875 203.863281 L 41.71875 202.710938 L 40.566406 202.710938 L 40.566406 203.863281 Z M 44.886719 210.613281 L 44.886719 209.582031 L 43.855469 209.582031 L 43.855469 210.613281 Z M 49.695312 206.019531 L 49.695312 205.15625 C 48.832031 205.167969 48.042969 205.601562 47.550781 206.308594 L 47.550781 205.21875 L 46.664062 205.21875 L 46.664062 210.613281 L 47.621094 210.613281 L 47.621094 208.058594 C 47.621094 206.800781 48.605469 206.03125 49.695312 206.019531 Z M 54.105469 209.066406 C 54.105469 208.648438 53.9375 208.226562 53.515625 207.867188 C 53.109375 207.519531 52.75 207.449219 52.007812 207.304688 C 51.574219 207.21875 50.976562 207.113281 50.976562 206.546875 C 50.976562 205.839844 51.828125 205.839844 52.007812 205.839844 C 52.882812 205.839844 53.398438 206.117188 53.746094 206.308594 L 53.902344 205.46875 C 53.15625 205.132812 52.605469 205.085938 52.113281 205.085938 C 51.839844 205.085938 50.074219 205.085938 50.074219 206.667969 C 50.074219 207.242188 50.421875 207.605469 50.554688 207.734375 C 51.011719 208.132812 51.324219 208.191406 52.136719 208.347656 C 52.523438 208.417969 53.207031 208.550781 53.207031 209.164062 C 53.207031 209.941406 52.234375 209.941406 52.078125 209.941406 C 51.371094 209.941406 50.6875 209.703125 50.171875 209.34375 L 50.015625 210.21875 C 50.734375 210.601562 51.464844 210.746094 52.078125 210.746094 C 52.855469 210.746094 54.105469 210.492188 54.105469 209.066406 Z M 62.769531 213.597656 C 62.121094 212.9375 60.632812 211.40625 60.632812 207.617188 C 60.632812 203.816406 62.121094 202.292969 62.769531 201.621094 L 61.988281 201.621094 C 60.574219 202.808594 59.640625 204.859375 59.640625 207.605469 C 59.640625 210.457031 60.632812 212.460938 61.988281 213.597656 Z M 69.328125 206.714844 C 69.328125 205.578125 69.242188 204.523438 68.679688 203.574219 C 68.285156 202.914062 67.539062 202.507812 66.75 202.507812 C 66.148438 202.507812 65.296875 202.734375 64.757812 203.671875 C 64.195312 204.617188 64.160156 205.890625 64.160156 206.714844 C 64.160156 207.316406 64.160156 208.683594 64.699219 209.652344 C 65.296875 210.710938 66.210938 210.878906 66.738281 210.878906 C 67.433594 210.878906 68.296875 210.589844 68.8125 209.59375 C 69.265625 208.71875 69.328125 207.675781 69.328125 206.714844 Z M 68.332031 206.574219 C 68.332031 207.386719 68.332031 208.214844 68.054688 208.996094 C 67.78125 209.820312 67.21875 210.097656 66.75 210.097656 C 65.15625 210.097656 65.15625 207.699219 65.15625 206.574219 C 65.15625 205.816406 65.15625 205.207031 65.335938 204.582031 C 65.5625 203.753906 66.054688 203.289062 66.738281 203.289062 C 68.332031 203.289062 68.332031 205.457031 68.332031 206.574219 Z M 71.886719 210.613281 L 71.886719 209.582031 L 70.855469 209.582031 L 70.855469 210.613281 Z M 78.304688 207.941406 C 78.304688 207.269531 78.207031 206.667969 77.800781 206.019531 C 77.320312 205.265625 76.902344 205.0625 76.335938 205.0625 C 75.882812 205.0625 74.992188 205.195312 74.25 206.046875 C 74.382812 204.4375 75.199219 203.25 76.480469 203.25 C 76.71875 203.25 77.164062 203.261719 77.777344 203.503906 L 77.777344 202.746094 C 77.199219 202.53125 76.734375 202.507812 76.46875 202.507812 C 74.742188 202.507812 73.183594 204.1875 73.183594 206.765625 C 73.183594 210.050781 74.632812 210.878906 75.761719 210.878906 C 76.492188 210.878906 77.019531 210.613281 77.5 210.074219 C 78.074219 209.4375 78.304688 208.886719 78.304688 207.941406 Z M 77.261719 207.941406 C 77.261719 208.40625 77.261719 208.898438 76.949219 209.414062 C 76.71875 209.773438 76.386719 210.097656 75.761719 210.097656 C 74.476562 210.097656 74.296875 208.382812 74.261719 207.988281 C 74.261719 207.820312 74.261719 207.796875 74.273438 207.664062 C 74.273438 206.753906 74.851562 205.839844 75.820312 205.839844 C 76.277344 205.839844 76.683594 206.007812 76.996094 206.574219 C 77.234375 207.003906 77.261719 207.386719 77.261719 207.941406 Z M 80.886719 210.613281 L 80.886719 209.582031 L 79.855469 209.582031 L 79.855469 210.613281 Z M 87.207031 210.613281 L 87.207031 209.714844 L 84.953125 209.714844 C 84.808594 209.714844 84.664062 209.726562 84.523438 209.726562 L 83.203125 209.726562 L 85.167969 207.941406 C 86.429688 206.910156 87.207031 206.261719 87.207031 205.097656 C 87.207031 203.71875 86.210938 202.507812 84.582031 202.507812 C 83.3125 202.507812 82.554688 203.238281 82.183594 204.425781 L 82.734375 205.121094 C 83.046875 203.71875 83.671875 203.324219 84.414062 203.324219 C 85.480469 203.324219 86.152344 204.136719 86.152344 205.121094 C 86.152344 206.210938 85.386719 206.910156 84.496094 207.699219 L 82.28125 209.796875 L 82.28125 210.613281 Z M 91.480469 207.617188 C 91.480469 204.761719 90.484375 202.757812 89.128906 201.621094 L 88.351562 201.621094 C 89 202.28125 90.484375 203.816406 90.484375 207.605469 C 90.484375 211.40625 89 212.925781 88.351562 213.597656 L 89.128906 213.597656 C 90.546875 212.410156 91.480469 210.363281 91.480469 207.617188 Z M 5.667969 196.765625 "/>
 </g>
 </svg>
diff --git a/figures/comparison.txt b/figures/comparison.txt
index 595c5403..8d480d0e 100644
--- a/figures/comparison.txt
+++ b/figures/comparison.txt
@@ -1,9 +1,8 @@
 Tool Name (version),Speed [M chars/s]
-KyTea (2020-04-03),1.4674450789921388
-Vaporetto (0.3.0),10.07734841348238
-Vaporetto (0.3.0+feature=simd),11.414333204815095
-MeCab (2020-09-14),4.619055018595073
-Kuromoji (0.9.0),1.4837693905013502
-Lindera (0.8.1),1.4499374143314385
-Sudachi (0.5.3),0.3185670881795747
-sudachi.rs (0.6.0),0.9658781319147613
+KyTea (2020-04-03),1.460792571965816
+Vaporetto (0.3.0),12.058625370294768
+MeCab (2020-09-14),4.606421319380639
+Kuromoji (0.9.0),1.4720151761081284
+Lindera (0.8.1),1.4635669561072055
+Sudachi (0.5.3),0.3197113821666647
+sudachi.rs (0.6.2),0.9940936718943929

From 2d774bcc3abc0a38bbd381666ae36a8efc602826 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 10 Feb 2022 11:49:20 +0900
Subject: [PATCH 57/60] Update README (#51)

* Update README

* fix

* Update README.md

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 README.md | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 12995262..25941336 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,31 @@ This repository includes both a Rust crate that provides APIs for Vaporetto and
 
 This software is implemented in Rust. Install `rustc` and `cargo` following [the documentation](https://www.rust-lang.org/tools/install) beforehand.
 
-Vaporetto provides two ways to generate tokenization models:
+Vaporetto provides three ways to generate tokenization models:
+
+#### Download Distribution Model
+
+The first is the simplest way, which is to download a model that has been trained by us.
+You can find models [here](https://github.com/legalforce-research/vaporetto/releases/tag/v0.3.0).
+
+We chose `bccwj-suw+unidic+tag.model.zst`:
+```
+% wget https://github.com/legalforce-research/vaporetto/releases/download/v0.3.0/bccwj-suw+unidic+tag.model.zst
+```
+
+To perform tokenization, run the following command:
+```
+% echo 'ヴェネツィアはイタリアにあります。' | cargo run --release -p predict -- --model path/to/bccwj-suw+unidic+tag.model.zst
+```
+
+The following will be output:
+```
+ヴェネツィア は イタリア に あり ます 。
+```
 
 #### Convert KyTea's Model
 
-The first is the simplest way, which is to convert a model that has been trained by KyTea.
+The second is also a simple way, which is to convert a model that has been trained by KyTea.
 First of all, download the model of your choice from the [KyTea Models](http://www.phontron.com/kytea/model.html) page.
 
 We chose `jp-0.4.7-5.mod.gz`:
@@ -40,17 +60,17 @@ If necessary, the Rust code will be compiled before the conversion process.
 
 Now you can perform tokenization. Run the following command:
 ```
-% echo '火星猫の生態の調査結果' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
+% echo 'ヴェネツィアはイタリアにあります。' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
 ```
 
 The following will be output:
 ```
-火星 猫 の 生態 の 調査 結果
+ヴェネツィア は イタリア に あ り ま す 。
 ```
 
 #### Train Your Own Model
 
-The second way, which is mainly for researchers, is to prepare your own training corpus and train your own tokenization models.
+The third way, which is mainly for researchers, is to prepare your own training corpus and train your own tokenization models.
 
 Vaporetto can train from two types of corpora: fully annotated corpora and partially annotated corpora.
 
@@ -58,7 +78,7 @@ Fully annotated corpora are corpora in which all character boundaries are annota
 This is the data in the form of spaces inserted into the boundaries of the tokens, as shown below:
 
 ```
-ヴェネツィア は イタリア に あ り ま す 。
+ヴェネツィア は イタリア に あり ます 。
 火星 猫 の 生態 の 調査 結果
 ```
 

From 289224f649e2a532aa34cf03b55f41ee26e609a8 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Thu, 10 Feb 2022 18:12:58 +0900
Subject: [PATCH 58/60] Add Japanese README (#52)

* Create README-ja.md

* Update README.md

* Update README-ja.md

* Update README.md

* Update README-ja.md

* Update README.md

* Apply suggestions from code review

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>

* Update README.md

Co-authored-by: Shunsuke Kanda <shnsk.knd@gmail.com>
---
 README-ja.md | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md    |  12 ++-
 2 files changed, 226 insertions(+), 3 deletions(-)
 create mode 100644 README-ja.md

diff --git a/README-ja.md b/README-ja.md
new file mode 100644
index 00000000..cd4f79e5
--- /dev/null
+++ b/README-ja.md
@@ -0,0 +1,217 @@
+# 🛥 VAporetto: POintwise pREdicTion based TOkenizer
+
+Vaporetto は、高速で軽量な点予測に基づくトークナイザです。
+このリポジトリには、 Vaporetto の API を提供する Rust のクレートと、 CLI フロントエンドが含まれています。
+
+[![Crates.io](https://img.shields.io/crates/v/vaporetto)](https://crates.io/crates/vaporetto)
+[![Documentation](https://docs.rs/vaporetto/badge.svg)](https://docs.rs/vaporetto)
+![Build Status](https://github.com/legalforce-research/vaporetto/actions/workflows/rust.yml/badge.svg)
+
+[技術解説](https://tech.legalforce.co.jp/entry/2021/09/28/180844)
+
+[English document](README.md)
+
+## 使用例
+
+### トークン化を試す
+
+このソフトウェアは Rust で実装されています。事前に[ドキュメント](https://www.rust-lang.org/tools/install)に従って `rustc` と `cargo` をインストールしてください。
+
+Vaporetto はトークン化モデルを生成するための方法を3つ用意しています。
+
+#### 配布モデルをダウンロードする
+
+1番目は最も単純な方法で、我々によって学習されたモデルをダウンロードすることです。
+モデルファイルは[ここ](https://github.com/legalforce-research/vaporetto/releases/tag/v0.3.0)にあります。
+
+`bccwj-suw+unidic+tag` を選びました。
+```
+% wget https://github.com/legalforce-research/vaporetto/releases/download/v0.3.0/bccwj-suw+unidic+tag.tar.xz
+```
+
+各ファイルにはモデルファイルとライセンス条項が含まれているので、以下のようなコマンドでダウンロードしたファイルを展開する必要があります。
+```
+% tar xf ./bccwj-suw+unidic+tag.tar.xz
+```
+
+トークン化を行うには、以下のコマンドを実行します。
+```
+% echo 'ヴェネツィアはイタリアにあります。' | cargo run --release -p predict -- --model path/to/bccwj-suw+unidic+tag.model.zst
+```
+
+以下が出力されるでしょう。
+```
+ヴェネツィア は イタリア に あり ます 。
+```
+
+#### KyTea のモデルを変換する
+
+2番目の方法も単純で、 KyTea で学習されたモデルを変換することです。
+まずはじめに、好きなモデルを [KyTea Models](http://www.phontron.com/kytea/model.html) ページからダウンロードします。
+
+`jp-0.4.7-5.mod.gz` を選びました。
+```
+% wget http://www.phontron.com/kytea/download/model/jp-0.4.7-5.mod.gz
+```
+
+各モデルは圧縮されているので、以下のようなコマンドでダウンロードしたモデルを展開する必要があります。
+```
+% gunzip ./jp-0.4.7-5.mod.gz
+```
+
+KyTea のモデルを Vaporetto のモデルに変換するには、 Vaporetto のルートディレクトリで以下のコマンドを実行します。
+```
+% cargo run --release -p convert_kytea_model -- --model-in path/to/jp-0.4.7-5.mod --model-out path/to/jp-0.4.7-5-tokenize.model.zst
+```
+
+これでトークン化を行えます。以下のコマンドを実行します。
+```
+% echo 'ヴェネツィアはイタリアにあります。' | cargo run --release -p predict -- --model path/to/jp-0.4.7-5-tokenize.model.zst
+```
+
+以下が出力されるでしょう。
+```
+ヴェネツィア は イタリア に あ り ま す 。
+```
+
+#### 自分のモデルを学習する
+
+3番目は主に研究者向けで、自分で学習コーパスを用意し、自分でトークン化モデルを学習することです。
+
+Vaporetto は2種類のコーパス、すなわちフルアノテーションコーパスと部分アノテーションコーパスから学習することが可能です。
+
+フルアノテーションコーパスは、すべての文字境界に対してトークン境界であるかトークンの内部であるかがアノテーションされたコーパスです。
+このデータは、以下に示すようにトークン境界に空白が挿入された形式です。
+
+```
+ヴェネツィア は イタリア に あり ます 。
+火星 猫 の 生態 の 調査 結果
+```
+
+一方、部分アノテーションコーパスは一部の文字境界のみに対してアノテーションされたコーパスです。
+各文字境界には `|` (トークン境界)、 `-` (非トークン境界)、 ` ` (不明) のいずれかの形式でアノテーションされます。
+
+ここに例を示します。
+```
+ヴ-ェ-ネ-ツ-ィ-ア|は|イ-タ-リ-ア|に|あ り ま す|。
+火-星 猫|の|生-態|の|調-査 結-果
+```
+
+モデルを学習するには、以下のコマンドを使用します。
+```
+% cargo run --release -p train -- --model ./your.model.zst --tok path/to/full.txt --part path/to/part.txt --dict path/to/dict.txt
+```
+
+`--tok` 引数ではフルアノテーションコーパスを指定し、 `--part` 引数では部分アノテーションコーパスを指定します。
+`--dict` 引数によって単語辞書を指定することもできます。
+単語辞書は、1行1単語のファイルです。
+
+学習器は空行の入力を受け付けません。
+このため、学習の前にコーパスから空行を削除してください。
+
+上記の引数は複数回指定することが可能です。
+
+### モデルの編集
+
+時々、モデルが期待とは異なる結果を出力することがあるでしょう。
+例えば、以下のコマンドで `メロンパン` は2つのトークンに分割されます。
+`--scores` オプションを使って、各文字間のスコアを出力します。
+```
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --scores --model path/to/jp-0.4.7-5-tokenize.model.zst
+朝食 は メロン パン 1 個 だっ た
+0:朝食 -15398
+1:食は 24623
+2:はメ 30261
+3:メロ -26885
+4:ロン -38896
+5:ンパ 8162
+6:パン -23416
+7:ン１ 23513
+8:１個 18435
+9:個だ 24964
+10:だっ -15065
+11:った 14178
+```
+
+`メロンパン` を単一のトークンに連結するには、以下の手順でモデルを編集し、 `ンパ` のスコアを負にします。
+
+1. 以下のコマンドで辞書を吐き出します。
+   ```
+   % cargo run --release -p manipulate_model -- --model-in path/to/jp-0.4.7-5-tokenize.model.zst --dump-dict path/to/dictionary.csv
+   ```
+
+2. 辞書を編集します。
+
+   辞書は CSV ファイルです。各行には単語と、対応する重みとコメントが以下の順で含まれています。
+
+   * `right_weight` - 単語が境界の右側に見つかった際に追加される重み。
+   * `inside_weight` - 単語が境界に重なっている際に追加される重み。
+   * `left_weight` - 単語が境界の左側に見つかった際に追加される重み。
+   * `comment` - 挙動に影響しないコメント
+
+   Vaporetto は、重みの合計が正の値になった際にテキストを分割するので、以下のように新しいエントリを追加します。
+   ```diff
+    メロレオストーシス,6944,-2553,5319,
+    メロン,8924,-10861,7081,
+   +メロンパン,0,-100000,0,melon🍈 bread🍞 in English.
+    メロン果実,4168,-1165,3558,
+    メロヴィング,6999,-15413,7583,
+   ```
+
+   この場合、境界が `メロンパン` の内側だった際に `-100000` が追加されます。
+
+   Vaporetto は重みの合計値に 32-bit 整数を利用しているため、オーバーフローに気をつけてください。
+
+   加えて、辞書には重複する単語を含めることができません。
+   単語が既に辞書に含まれている際は、既存の重みを編集する必要があります。
+
+3. モデルファイルの重みを置換します。
+   ```
+   % cargo run --release -p manipulate_model -- --model-in path/to/jp-0.4.7-5-tokenize.model.zst --replace-dict path/to/dictionary.csv --model-out path/to/jp-0.4.7-5-tokenize-new.model.zst
+   ```
+
+これで `メロンパン` が単一のトークンに分割されます。
+```
+% echo '朝食はメロンパン1個だった' | cargo run --release -p predict -- --scores --model path/to/jp-0.4.7-5-tokenize-new.model.zst
+朝食 は メロンパン 1 個 だっ た
+0:朝食 -15398
+1:食は 24623
+2:はメ 30261
+3:メロ -126885
+4:ロン -138896
+5:ンパ -91838
+6:パン -123416
+7:ン１ 23513
+8:１個 18435
+9:個だ 24964
+10:だっ -15065
+11:った 14178
+```
+
+### 品詞推定
+
+Vaporettoは実験的に品詞推定に対応しています。
+
+品詞を学習するには、以下のように、データセットの各トークンに続けてスラッシュと品詞を追加します。
+
+* フルアノテーションコーパスの場合
+  ```
+  この/連体詞 人/名詞 は/助詞 火星/名詞 人/接尾辞 です/助動詞
+  ```
+
+* 部分アノテーションコーパスの場合
+  ```
+  ヴ-ェ-ネ-ツ-ィ-ア/名詞|は/助詞|イ-タ-リ-ア/名詞|に/助詞|あ-り ま-す
+  ```
+
+データセットに品詞が含まれる場合、 `train` コマンドは自動的にそれらを学習します。
+
+推定時は、デフォルトでは品詞は推定されないため、必要に応じで `predict` コマンドに `--predict-tags` 引数を指定してください。
+
+## 各種トークナイザの速度比較
+
+Vaporetto は KyTea に比べて 8.25 倍速く動作します。
+
+詳細は[ここ](https://github.com/legalforce-research/vaporetto/wiki/Speed-Comparison)を参照してください。
+
+![](./figures/comparison.svg)
diff --git a/README.md b/README.md
index 25941336..2e488453 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,8 @@ This repository includes both a Rust crate that provides APIs for Vaporetto and
 
 [Technical details](https://tech.legalforce.co.jp/entry/2021/09/28/180844) (Japanese)
 
+[日本語のドキュメント](README-ja.md)
+
 ## Example Usage
 
 ### Try Word Segmentation
@@ -22,9 +24,14 @@ Vaporetto provides three ways to generate tokenization models:
 The first is the simplest way, which is to download a model that has been trained by us.
 You can find models [here](https://github.com/legalforce-research/vaporetto/releases/tag/v0.3.0).
 
-We chose `bccwj-suw+unidic+tag.model.zst`:
+We chose `bccwj-suw+unidic+tag`:
+```
+% wget https://github.com/legalforce-research/vaporetto/releases/download/v0.3.0/bccwj-suw+unidic+tag.tar.xz
+```
+
+Each file contains a model file and license terms, so you need to extract the downloaded file like the following command:
 ```
-% wget https://github.com/legalforce-research/vaporetto/releases/download/v0.3.0/bccwj-suw+unidic+tag.model.zst
+% tar xf ./bccwj-suw+unidic+tag.tar.xz
 ```
 
 To perform tokenization, run the following command:
@@ -53,7 +60,6 @@ Each model is compressed, so you need to decompress the downloaded model file li
 ```
 
 To convert a KyTea model into a Vaporetto model, run the following command in the Vaporetto root directory.
-If necessary, the Rust code will be compiled before the conversion process.
 ```
 % cargo run --release -p convert_kytea_model -- --model-in path/to/jp-0.4.7-5.mod --model-out path/to/jp-0.4.7-5-tokenize.model.zst
 ```

From cc620c030306c0a28cdde3aaad9813bd606d72f0 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 14 Feb 2022 09:21:55 +0900
Subject: [PATCH 59/60] Add vaporetto_tantivy (#53)

* Add vaporetto_tantivy

* Add README

* fix

* Refactor

* fmt

* fix

* fix

* Add SplitLinebreaksFilter

* Add chars_and_boundaries_mut()
---
 Cargo.toml                                    |   1 +
 evaluate/src/main.rs                          |   6 +-
 manipulate_model/Cargo.toml                   |   2 +-
 predict/src/main.rs                           |   6 +-
 train/src/main.rs                             |   2 +-
 vaporetto/Cargo.toml                          |   1 -
 vaporetto/src/char_scorer.rs                  |   8 +-
 vaporetto/src/feature.rs                      |  37 +-
 vaporetto/src/predictor.rs                    |  10 +-
 vaporetto/src/sentence.rs                     |  78 ++-
 vaporetto_rules/Cargo.toml                    |   5 +-
 vaporetto_rules/src/lib.rs                    |   8 +-
 vaporetto_rules/src/sentence_filters.rs       |   2 +
 .../concat_grapheme_clusters.rs               |  45 +-
 .../src/sentence_filters/kytea_wsconst.rs     |  19 +-
 .../src/sentence_filters/split_linebreaks.rs  |  51 ++
 .../src/string_filters/kytea_fullwidth.rs     |  27 +-
 vaporetto_tantivy/Cargo.toml                  |  20 +
 vaporetto_tantivy/README.md                   |  40 ++
 vaporetto_tantivy/src/lib.rs                  | 448 ++++++++++++++++++
 vaporetto_tantivy/test_model/model.zst        | Bin 0 -> 401 bytes
 21 files changed, 669 insertions(+), 147 deletions(-)
 create mode 100644 vaporetto_rules/src/sentence_filters/split_linebreaks.rs
 create mode 100644 vaporetto_tantivy/Cargo.toml
 create mode 100644 vaporetto_tantivy/README.md
 create mode 100644 vaporetto_tantivy/src/lib.rs
 create mode 100644 vaporetto_tantivy/test_model/model.zst

diff --git a/Cargo.toml b/Cargo.toml
index 3c5b1193..0a60007c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@
 members = [
     "vaporetto",
     "vaporetto_rules",
+    "vaporetto_tantivy",
     "manipulate_model",
     "predict",
     "train",
diff --git a/evaluate/src/main.rs b/evaluate/src/main.rs
index dd0d4b4c..99a28e8e 100644
--- a/evaluate/src/main.rs
+++ b/evaluate/src/main.rs
@@ -83,13 +83,11 @@ struct Opt {
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let opt = Opt::from_args();
 
-    let fullwidth_filter = KyteaFullwidthFilter::new();
+    let fullwidth_filter = KyteaFullwidthFilter;
     let mut post_filters: Vec<Box<dyn SentenceFilter>> = vec![];
     for wsconst in &opt.wsconst {
         match wsconst {
-            WsConst::GraphemeCluster => {
-                post_filters.push(Box::new(ConcatGraphemeClustersFilter::new()))
-            }
+            WsConst::GraphemeCluster => post_filters.push(Box::new(ConcatGraphemeClustersFilter)),
             WsConst::CharType(char_type) => {
                 post_filters.push(Box::new(KyteaWsConstFilter::new(*char_type)))
             }
diff --git a/manipulate_model/Cargo.toml b/manipulate_model/Cargo.toml
index 5139cfd9..3c27d5b9 100644
--- a/manipulate_model/Cargo.toml
+++ b/manipulate_model/Cargo.toml
@@ -4,7 +4,7 @@ version = "0.1.0"
 edition = "2018"
 
 [dependencies]
-csv = "1.1"  # Unlicense OR MIT 
+csv = "1.1"  # Unlicense or MIT
 serde = { version = "1.0", features = ["derive"] }  # MIT or Apache-2.0
 structopt = "0.3"  # MIT or Apache-2.0
 vaporetto = { path = "../vaporetto" }  # MIT or Apache-2.0
diff --git a/predict/src/main.rs b/predict/src/main.rs
index 3be29879..a6d4f9be 100644
--- a/predict/src/main.rs
+++ b/predict/src/main.rs
@@ -110,14 +110,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let mut pre_filters: Vec<Box<dyn StringFilter>> = vec![];
     if !opt.no_norm {
-        pre_filters.push(Box::new(KyteaFullwidthFilter::new()));
+        pre_filters.push(Box::new(KyteaFullwidthFilter));
     }
     let mut post_filters: Vec<Box<dyn SentenceFilter>> = vec![];
     for wsconst in &opt.wsconst {
         match wsconst {
-            WsConst::GraphemeCluster => {
-                post_filters.push(Box::new(ConcatGraphemeClustersFilter::new()))
-            }
+            WsConst::GraphemeCluster => post_filters.push(Box::new(ConcatGraphemeClustersFilter)),
             WsConst::CharType(char_type) => {
                 post_filters.push(Box::new(KyteaWsConstFilter::new(*char_type)))
             }
diff --git a/train/src/main.rs b/train/src/main.rs
index 44f5cd5d..6d60f579 100644
--- a/train/src/main.rs
+++ b/train/src/main.rs
@@ -70,7 +70,7 @@ struct Opt {
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let opt = Opt::from_args();
 
-    let fullwidth_filter = KyteaFullwidthFilter::new();
+    let fullwidth_filter = KyteaFullwidthFilter;
 
     eprintln!("Loading dataset...");
     let mut train_sents = vec![];
diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index 02680225..fcbaffc4 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -10,7 +10,6 @@ repository = "https://github.com/legalforce-research/vaporetto"
 readme = "README.md"
 keywords = ["japanese", "analyzer", "tokenizer", "morphological"]
 categories = ["text-processing"]
-autotests = false
 
 [dependencies]
 daachorse = "0.4.0"  # MIT or Apache-2.0
diff --git a/vaporetto/src/char_scorer.rs b/vaporetto/src/char_scorer.rs
index 18bd0d7a..e5bc3f92 100644
--- a/vaporetto/src/char_scorer.rs
+++ b/vaporetto/src/char_scorer.rs
@@ -1,5 +1,5 @@
 use std::iter;
-use std::rc::Rc;
+use std::sync::Arc;
 
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -148,7 +148,7 @@ impl NaiveWeightSet {
             boundary: None,
             tag_left: None,
             tag_right: None,
-            tag_self: Some(Rc::new(vec![TagRangeScore::new(
+            tag_self: Some(Arc::new(vec![TagRangeScore::new(
                 start_rel_position,
                 weight,
             )])),
@@ -171,7 +171,7 @@ impl MergableWeight for NaiveWeightSet {
             tag_self: utils::xor_or_zip_with(&weight1.tag_self, &weight2.tag_self, |w1, w2| {
                 let mut w = w1.to_vec();
                 w.append(&mut w2.to_vec());
-                Rc::new(w)
+                Arc::new(w)
             }),
         }
     }
@@ -345,7 +345,7 @@ impl CharScorerWithTags {
                     .add_weight(&mut tag_ys.right_scores, offset);
             }
             if let Some(weight) = weight_set.tag_self.as_ref() {
-                tag_ys.self_scores[m_end - 1].replace(Rc::clone(weight));
+                tag_ys.self_scores[m_end - 1].replace(Arc::clone(weight));
             }
         }
     }
diff --git a/vaporetto/src/feature.rs b/vaporetto/src/feature.rs
index f8eecdc8..f1f80bb1 100644
--- a/vaporetto/src/feature.rs
+++ b/vaporetto/src/feature.rs
@@ -1,5 +1,5 @@
 use std::hash::Hash;
-use std::rc::Rc;
+use std::sync::Arc;
 
 use daachorse::DoubleArrayAhoCorasick;
 
@@ -213,7 +213,7 @@ impl<'a> TagFeature<'a> {
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub struct TagExample<'a> {
     pub features: Vec<TagFeature<'a>>,
-    pub tag: Rc<String>,
+    pub tag: Arc<String>,
 }
 
 pub struct TagExampleGenerator {
@@ -240,8 +240,11 @@ impl TagExampleGenerator {
                 sentence.char_substring(start, sentence.chars.len()),
             ));
         }
-        let mut current_tag: Option<Rc<String>> =
-            sentence.tags.last().and_then(|x| x.as_ref()).map(Rc::clone);
+        let mut current_tag: Option<Arc<String>> = sentence
+            .tags
+            .last()
+            .and_then(|x| x.as_ref())
+            .map(Arc::clone);
         let mut tag_right_pos = sentence.chars.len();
         for (i, (t, b)) in sentence
             .tags
@@ -279,7 +282,7 @@ impl TagExampleGenerator {
                         features = vec![];
                     }
                     if let Some(tag) = t.as_ref() {
-                        current_tag.replace(Rc::clone(tag));
+                        current_tag.replace(Arc::clone(tag));
                         tag_right_pos = i + 1;
                         for j in
                             (i + 2)..(i + 2 + self.char_window_size).min(sentence.chars.len() + 1)
@@ -479,7 +482,7 @@ mod tests {
                     TagFeature::left_char_ngram_bos(-1, "Ar"),
                     TagFeature::chars("Aria"),
                 ],
-                tag: Rc::new("名詞".to_string()),
+                tag: Arc::new("名詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -503,7 +506,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "aは火"),
                     TagFeature::chars("は"),
                 ],
-                tag: Rc::new("助詞".to_string()),
+                tag: Arc::new("助詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -520,7 +523,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "猫だ"),
                     TagFeature::chars("だ"),
                 ],
-                tag: Rc::new("助動詞".to_string()),
+                tag: Arc::new("助動詞".to_string()),
             },
         ];
 
@@ -560,7 +563,7 @@ mod tests {
                     TagFeature::left_char_ngram_bos(-1, "Ar"),
                     TagFeature::chars("Aria"),
                 ],
-                tag: Rc::new("名詞".to_string()),
+                tag: Arc::new("名詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -578,7 +581,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "aは火"),
                     TagFeature::chars("は"),
                 ],
-                tag: Rc::new("助詞".to_string()),
+                tag: Arc::new("助詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -592,7 +595,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "猫だ"),
                     TagFeature::chars("だ"),
                 ],
-                tag: Rc::new("助動詞".to_string()),
+                tag: Arc::new("助動詞".to_string()),
             },
         ];
 
@@ -631,7 +634,7 @@ mod tests {
                     TagFeature::left_char_ngram_bos(-1, "A"),
                     TagFeature::chars("Aria"),
                 ],
-                tag: Rc::new("名詞".to_string()),
+                tag: Arc::new("名詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -649,7 +652,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "aは"),
                     TagFeature::chars("は"),
                 ],
-                tag: Rc::new("助詞".to_string()),
+                tag: Arc::new("助詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -663,7 +666,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "猫だ"),
                     TagFeature::chars("だ"),
                 ],
-                tag: Rc::new("助動詞".to_string()),
+                tag: Arc::new("助動詞".to_string()),
             },
         ];
 
@@ -704,7 +707,7 @@ mod tests {
                     TagFeature::left_char_ngram_bos(-1, "僕は"),
                     TagFeature::chars("僕"),
                 ],
-                tag: Rc::new("代名詞".to_string()),
+                tag: Arc::new("代名詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -725,7 +728,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "僕"),
                     TagFeature::chars("は"),
                 ],
-                tag: Rc::new("助詞".to_string()),
+                tag: Arc::new("助詞".to_string()),
             },
             TagExample {
                 features: vec![
@@ -743,7 +746,7 @@ mod tests {
                     TagFeature::left_char_ngram(-1, "は"),
                     TagFeature::chars("人間"),
                 ],
-                tag: Rc::new("名詞".to_string()),
+                tag: Arc::new("名詞".to_string()),
             },
         ];
 
diff --git a/vaporetto/src/predictor.rs b/vaporetto/src/predictor.rs
index 1a28804a..2fde23c5 100644
--- a/vaporetto/src/predictor.rs
+++ b/vaporetto/src/predictor.rs
@@ -1,7 +1,7 @@
 use std::mem;
 
 use std::cmp::Ordering;
-use std::rc::Rc;
+use std::sync::Arc;
 
 use crate::char_scorer::{self, CharScorer, CharScorerWithTags};
 use crate::errors::Result;
@@ -24,7 +24,7 @@ pub struct Predictor {
     padding: usize,
 
     // for tag prediction
-    tag_names: Vec<Rc<String>>,
+    tag_names: Vec<Arc<String>>,
     tag_bias: Vec<i32>,
 }
 
@@ -45,7 +45,7 @@ impl Predictor {
 
         let char_scorer = if predict_tags {
             for cls in model.tag_model.class_info {
-                tag_names.push(Rc::new(cls.name));
+                tag_names.push(Arc::new(cls.name));
                 tag_bias.push(cls.bias);
             }
             CharScorerWrapper::BoundaryAndTags(CharScorerWithTags::new(
@@ -142,8 +142,8 @@ impl Predictor {
         sentence
     }
 
-    fn best_tag(&self, scores: &[i32]) -> Rc<String> {
-        Rc::clone(
+    fn best_tag(&self, scores: &[i32]) -> Arc<String> {
+        Arc::clone(
             scores
                 .iter()
                 .zip(&self.tag_names)
diff --git a/vaporetto/src/sentence.rs b/vaporetto/src/sentence.rs
index f2908fe7..a3c0cf41 100644
--- a/vaporetto/src/sentence.rs
+++ b/vaporetto/src/sentence.rs
@@ -1,4 +1,4 @@
-use std::rc::Rc;
+use std::sync::Arc;
 
 use crate::errors::{Result, VaporettoError};
 
@@ -110,7 +110,7 @@ impl TagRangeScore {
     }
 }
 
-pub type TagRangeScores = Rc<Vec<TagRangeScore>>;
+pub type TagRangeScores = Arc<Vec<TagRangeScore>>;
 
 #[derive(Debug, PartialEq, Clone, Default)]
 pub struct TagScores {
@@ -153,7 +153,7 @@ pub struct Sentence {
     pub(crate) boundaries: Vec<BoundaryType>,
     pub(crate) boundary_scores: Vec<i32>,
     pub(crate) tag_scores: TagScores,
-    pub(crate) tags: Vec<Option<Rc<String>>>,
+    pub(crate) tags: Vec<Option<Arc<String>>>,
 }
 
 impl Sentence {
@@ -161,7 +161,7 @@ impl Sentence {
         text: String,
         chars: Vec<char>,
         boundaries: Vec<BoundaryType>,
-        tags: Vec<Option<Rc<String>>>,
+        tags: Vec<Option<Arc<String>>>,
     ) -> Self {
         let mut s = Self {
             text,
@@ -202,7 +202,7 @@ impl Sentence {
         raw_text: &str,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
-        tags: &mut Vec<Option<Rc<String>>>,
+        tags: &mut Vec<Option<Arc<String>>>,
     ) -> Result<()> {
         if raw_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -235,7 +235,7 @@ impl Sentence {
         text: &mut String,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
-        tags: &mut Vec<Option<Rc<String>>>,
+        tags: &mut Vec<Option<Arc<String>>>,
     ) -> Result<()> {
         if tokenized_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -305,7 +305,7 @@ impl Sentence {
                         } else {
                             BoundaryType::NotWordBoundary
                         });
-                        tags.push(tag_str.take().map(Rc::new));
+                        tags.push(tag_str.take().map(Arc::new));
                     }
                     if c == '\0' {
                         return Err(VaporettoError::invalid_argument(
@@ -327,7 +327,7 @@ impl Sentence {
                 "must not end with a whitespace",
             ));
         }
-        tags.push(tag_str_tmp.take().map(Rc::new));
+        tags.push(tag_str_tmp.take().map(Arc::new));
 
         Ok(())
     }
@@ -337,7 +337,7 @@ impl Sentence {
         text: &mut String,
         chars: &mut Vec<char>,
         boundaries: &mut Vec<BoundaryType>,
-        tags: &mut Vec<Option<Rc<String>>>,
+        tags: &mut Vec<Option<Arc<String>>>,
     ) -> Result<()> {
         if labeled_text.is_empty() {
             return Err(VaporettoError::invalid_argument(
@@ -391,7 +391,7 @@ impl Sentence {
                             "POS tag must be annotated to a token".to_string(),
                         ));
                     }
-                    tags.push(tag_str.take().map(Rc::new));
+                    tags.push(tag_str.take().map(Arc::new));
                     boundaries.push(BoundaryType::WordBoundary);
                     is_char = true;
                     fixed_token = true;
@@ -424,7 +424,7 @@ impl Sentence {
                 }
             }
         }
-        tags.push(tag_str.take().map(Rc::new));
+        tags.push(tag_str.take().map(Arc::new));
         if chars.len() != boundaries.len() + 1 {
             return Err(VaporettoError::invalid_argument(
                 "labeled_text",
@@ -1031,23 +1031,23 @@ impl Sentence {
     /// # Examples
     ///
     /// ```
-    /// use std::rc::Rc;
+    /// use std::sync::Arc;
     ///
     /// use vaporetto::{BoundaryType, Sentence};
     ///
     /// let s = Sentence::from_tokenized("I/PRP am a/DT cat/NN ./.").unwrap();
     /// assert_eq!(&[
-    ///     Some(Rc::new("PRP".to_string())), // 'I'
+    ///     Some(Arc::new("PRP".to_string())), // 'I'
     ///     None,                             // 'a'
     ///     None,                             // 'm'
-    ///     Some(Rc::new("DT".to_string())),  // 'a'
+    ///     Some(Arc::new("DT".to_string())),  // 'a'
     ///     None,                             // 'c'
     ///     None,                             // 'a'
-    ///     Some(Rc::new("NN".to_string())),  // 't'
-    ///     Some(Rc::new(".".to_string())),   // '.'
+    ///     Some(Arc::new("NN".to_string())),  // 't'
+    ///     Some(Arc::new(".".to_string())),   // '.'
     /// ], s.tags());
     /// ```
-    pub fn tags(&self) -> &[Option<Rc<String>>] {
+    pub fn tags(&self) -> &[Option<Arc<String>>] {
         &self.tags
     }
 
@@ -1056,7 +1056,7 @@ impl Sentence {
     /// # Returns
     ///
     /// A mutable reference to the part-of-speech information.
-    pub fn tags_mut(&mut self) -> &mut [Option<Rc<String>>] {
+    pub fn tags_mut(&mut self) -> &mut [Option<Arc<String>>] {
         &mut self.tags
     }
 
@@ -1078,6 +1078,34 @@ impl Sentence {
         &self.chars
     }
 
+    /// Gets immutable references to the characters and character types, and a mutable reference to
+    /// boundaries.
+    ///
+    /// # Returns
+    ///
+    /// A tuple of references.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use vaporetto::{BoundaryType, Sentence};
+    ///
+    /// let mut s = Sentence::from_partial_annotation("A-1|あ エ-漢|?").unwrap();
+    /// let (chars, char_types, boundaries) = s.chars_and_boundaries_mut();
+    /// assert_eq!(&['A', '1', 'あ', 'エ', '漢', '?'], chars);
+    /// assert_eq!(&[b'R', b'D', b'H', b'T', b'K', b'O'], char_types);
+    /// assert_eq!(&[
+    ///     BoundaryType::NotWordBoundary,
+    ///     BoundaryType::WordBoundary,
+    ///     BoundaryType::Unknown,
+    ///     BoundaryType::NotWordBoundary,
+    ///     BoundaryType::WordBoundary,
+    ///  ], boundaries);
+    /// ```
+    pub fn chars_and_boundaries_mut(&mut self) -> (&[char], &[u8], &mut [BoundaryType]) {
+        (&self.chars, &self.char_type, &mut self.boundaries)
+    }
+
     /// Gets a reference to the character type information.
     ///
     /// # Returns
@@ -1090,7 +1118,7 @@ impl Sentence {
     /// use vaporetto::Sentence;
     ///
     /// let s = Sentence::from_raw("A1あエ漢?").unwrap();
-    /// assert_eq!(&[b'R', b'D', b'H', b'T', b'K', b'O',], s.char_types());
+    /// assert_eq!(&[b'R', b'D', b'H', b'T', b'K', b'O'], s.char_types());
     /// ```
     pub fn char_types(&self) -> &[u8] {
         &self.char_type
@@ -1601,10 +1629,10 @@ mod tests {
                 None,
                 None,
                 None,
-                Some(Rc::new("名詞".to_string())),
+                Some(Arc::new("名詞".to_string())),
                 None,
                 None,
-                Some(Rc::new("形容詞".to_string())),
+                Some(Arc::new("形容詞".to_string())),
                 None,
                 None,
                 None,
@@ -1615,7 +1643,7 @@ mod tests {
                 None,
                 None,
                 None,
-                Some(Rc::new("補助記号".to_string())),
+                Some(Arc::new("補助記号".to_string())),
             ],
         };
         assert_eq!(expected, s.unwrap());
@@ -1712,10 +1740,10 @@ mod tests {
                 None,
                 None,
                 None,
-                Some(Rc::new("名詞".to_string())),
+                Some(Arc::new("名詞".to_string())),
                 None,
                 None,
-                Some(Rc::new("形容詞".to_string())),
+                Some(Arc::new("形容詞".to_string())),
                 None,
                 None,
                 None,
@@ -1726,7 +1754,7 @@ mod tests {
                 None,
                 None,
                 None,
-                Some(Rc::new("補助記号".to_string())),
+                Some(Arc::new("補助記号".to_string())),
             ],
         };
         assert_eq!(expected, s);
diff --git a/vaporetto_rules/Cargo.toml b/vaporetto_rules/Cargo.toml
index df653066..2233f39f 100644
--- a/vaporetto_rules/Cargo.toml
+++ b/vaporetto_rules/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vaporetto_rules"
-version = "0.1.5"
+version = "0.3.0"
 edition = "2018"
 authors = ["Koichi Akabe <vbkaisetsu@gmail.com>"]
 description = "Rule-base filters for Vaporetto"
@@ -10,8 +10,7 @@ repository = "https://github.com/legalforce-research/vaporetto"
 readme = "README.md"
 keywords = ["japanese", "analyzer", "tokenizer", "morphological"]
 categories = ["text-processing"]
-autotests = false
 
 [dependencies]
-unicode-segmentation = "1.8.0"  # MIT or Apache-2.0
+unicode-segmentation = "1.9.0"  # MIT or Apache-2.0
 vaporetto = { path = "../vaporetto", version = "0.3.0" }  # MIT or Apache-2.0
diff --git a/vaporetto_rules/src/lib.rs b/vaporetto_rules/src/lib.rs
index 6cf9de17..fb3585f5 100644
--- a/vaporetto_rules/src/lib.rs
+++ b/vaporetto_rules/src/lib.rs
@@ -21,10 +21,10 @@
 //! let mut predictor = Predictor::new(model, false).unwrap();
 //!
 //! let pre_filters: Vec<Box<dyn StringFilter>> = vec![
-//!     Box::new(KyteaFullwidthFilter::new()),
+//!     Box::new(KyteaFullwidthFilter),
 //! ];
 //! let post_filters: Vec<Box<dyn SentenceFilter>> = vec![
-//!     Box::new(ConcatGraphemeClustersFilter::new()),
+//!     Box::new(ConcatGraphemeClustersFilter),
 //!     Box::new(KyteaWsConstFilter::new(CharacterType::Digit)),
 //! ];
 //!
@@ -52,7 +52,7 @@ pub mod string_filters;
 
 use vaporetto::Sentence;
 
-pub trait SentenceFilter {
+pub trait SentenceFilter: Send + Sync {
     /// Filter a specified sentence using rules.
     ///
     /// # Arguments:
@@ -65,7 +65,7 @@ pub trait SentenceFilter {
     fn filter(&self, sentence: Sentence) -> Sentence;
 }
 
-pub trait StringFilter {
+pub trait StringFilter: Send + Sync {
     /// Filter a specified string using rules.
     ///
     /// # Arguments:
diff --git a/vaporetto_rules/src/sentence_filters.rs b/vaporetto_rules/src/sentence_filters.rs
index cd968e80..b701ec2a 100644
--- a/vaporetto_rules/src/sentence_filters.rs
+++ b/vaporetto_rules/src/sentence_filters.rs
@@ -2,6 +2,8 @@
 
 mod concat_grapheme_clusters;
 mod kytea_wsconst;
+mod split_linebreaks;
 
 pub use concat_grapheme_clusters::ConcatGraphemeClustersFilter;
 pub use kytea_wsconst::KyteaWsConstFilter;
+pub use split_linebreaks::SplitLinebreaksFilter;
diff --git a/vaporetto_rules/src/sentence_filters/concat_grapheme_clusters.rs b/vaporetto_rules/src/sentence_filters/concat_grapheme_clusters.rs
index 287ae38b..39d149b2 100644
--- a/vaporetto_rules/src/sentence_filters/concat_grapheme_clusters.rs
+++ b/vaporetto_rules/src/sentence_filters/concat_grapheme_clusters.rs
@@ -4,47 +4,18 @@ use vaporetto::{BoundaryType, Sentence};
 use crate::SentenceFilter;
 
 /// Grapheme cluster concatenator.
+#[derive(Clone, Default)]
 pub struct ConcatGraphemeClustersFilter;
 
-impl ConcatGraphemeClustersFilter {
-    /// Creates a new ConcatGraphemeClustersFilter.
-    ///
-    /// # Returns
-    ///
-    /// A new ConcatGraphemeClustersFilter.
-    pub const fn new() -> Self {
-        Self {}
-    }
-}
-
-impl Default for ConcatGraphemeClustersFilter {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl SentenceFilter for ConcatGraphemeClustersFilter {
-    /// Concatenates grapheme clusters.
-    ///
-    /// # Arguments:
-    ///
-    /// * `sentence` - Input sentence.
-    ///
-    /// # Returns
-    ///
-    /// A processed sentence.
     fn filter(&self, mut sentence: Sentence) -> Sentence {
         let mut tmp = sentence.boundaries().to_vec();
-        for (i, c) in UnicodeSegmentation::grapheme_indices(sentence.to_raw_string(), true) {
+        for (i, c) in sentence.to_raw_string().grapheme_indices(true) {
             let start = sentence.get_char_pos(i).unwrap();
             let end = sentence.get_char_pos(i + c.len()).unwrap() - 1;
-            for b in &mut tmp[start..end] {
-                *b = BoundaryType::NotWordBoundary;
-            }
-        }
-        for (b, t) in sentence.boundaries_mut().iter_mut().zip(&tmp) {
-            *b = *t;
+            tmp[start..end].fill(BoundaryType::NotWordBoundary);
         }
+        sentence.boundaries_mut().copy_from_slice(&tmp);
         sentence
     }
 }
@@ -56,7 +27,7 @@ mod tests {
     #[test]
     fn test_concat_grapheme_clusters_no_boundary() {
         let s = Sentence::from_tokenized("\u{200d}").unwrap();
-        let filter = ConcatGraphemeClustersFilter::new();
+        let filter = ConcatGraphemeClustersFilter;
         let s = filter.filter(s);
         assert_eq!("\u{200d}", s.to_tokenized_string().unwrap());
     }
@@ -65,7 +36,7 @@ mod tests {
     fn test_concat_grapheme_clusters_zwj() {
         let s =
             Sentence::from_tokenized("\u{1f468} \u{200d} \u{1f469} \u{200d} \u{1f466}").unwrap();
-        let filter = ConcatGraphemeClustersFilter::new();
+        let filter = ConcatGraphemeClustersFilter;
         let s = filter.filter(s);
         assert_eq!(
             "\u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f466}",
@@ -76,7 +47,7 @@ mod tests {
     #[test]
     fn test_concat_grapheme_clusters_color() {
         let s = Sentence::from_tokenized("\u{1f44f} \u{1f3fd}").unwrap();
-        let filter = ConcatGraphemeClustersFilter::new();
+        let filter = ConcatGraphemeClustersFilter;
         let s = filter.filter(s);
         assert_eq!("\u{1f44f}\u{1f3fd}", s.to_tokenized_string().unwrap());
     }
@@ -84,7 +55,7 @@ mod tests {
     #[test]
     fn test_concat_grapheme_clusters_combined() {
         let s = Sentence::from_tokenized("これ は 手 \u{1f44f} \u{1f3fd} で す").unwrap();
-        let filter = ConcatGraphemeClustersFilter::new();
+        let filter = ConcatGraphemeClustersFilter;
         let s = filter.filter(s);
         assert_eq!(
             "これ は 手 \u{1f44f}\u{1f3fd} で す",
diff --git a/vaporetto_rules/src/sentence_filters/kytea_wsconst.rs b/vaporetto_rules/src/sentence_filters/kytea_wsconst.rs
index bd0d1318..07d69964 100644
--- a/vaporetto_rules/src/sentence_filters/kytea_wsconst.rs
+++ b/vaporetto_rules/src/sentence_filters/kytea_wsconst.rs
@@ -3,6 +3,7 @@ use vaporetto::{BoundaryType, CharacterType, Sentence};
 use crate::SentenceFilter;
 
 /// Character type concatenator. This filter works like KyTea's wsconst option.
+#[derive(Clone)]
 pub struct KyteaWsConstFilter {
     char_type: CharacterType,
 }
@@ -23,26 +24,14 @@ impl KyteaWsConstFilter {
 }
 
 impl SentenceFilter for KyteaWsConstFilter {
-    /// Concatenates consecutive character types.
-    ///
-    /// # Arguments:
-    ///
-    /// * `sentence` - Input sentence.
-    ///
-    /// # Returns
-    ///
-    /// A processed sentence.
     fn filter(&self, mut sentence: Sentence) -> Sentence {
         let t_flag = self.char_type as u8;
-        let mut tmp = sentence.boundaries().to_vec();
-        for (i, (b, &t)) in tmp.iter_mut().zip(sentence.char_types()).enumerate() {
-            if t == t_flag && t == sentence.char_types()[i + 1] {
+        let (_, char_types, boundaries) = sentence.chars_and_boundaries_mut();
+        for ((t1, t2), b) in char_types.iter().zip(&char_types[1..]).zip(boundaries) {
+            if *t1 == t_flag && *t2 == t_flag {
                 *b = BoundaryType::NotWordBoundary;
             }
         }
-        for (b, t) in sentence.boundaries_mut().iter_mut().zip(&tmp) {
-            *b = *t;
-        }
         sentence
     }
 }
diff --git a/vaporetto_rules/src/sentence_filters/split_linebreaks.rs b/vaporetto_rules/src/sentence_filters/split_linebreaks.rs
new file mode 100644
index 00000000..71156946
--- /dev/null
+++ b/vaporetto_rules/src/sentence_filters/split_linebreaks.rs
@@ -0,0 +1,51 @@
+use vaporetto::{BoundaryType, Sentence};
+
+use crate::SentenceFilter;
+
+/// Line breaks splitter.
+#[derive(Clone, Default)]
+pub struct SplitLinebreaksFilter;
+
+impl SentenceFilter for SplitLinebreaksFilter {
+    fn filter(&self, mut sentence: Sentence) -> Sentence {
+        let (chars, _, boundaries) = sentence.chars_and_boundaries_mut();
+        for ((c1, c2), b) in chars.iter().zip(&chars[1..]).zip(boundaries) {
+            match (*c1, *c2) {
+                ('\r' | '\n', _) | (_, '\r' | '\n') => {
+                    *b = BoundaryType::WordBoundary;
+                }
+                _ => {}
+            }
+        }
+        sentence
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_split_lf() {
+        let s = Sentence::from_tokenized("前の行\n次の行").unwrap();
+        let filter = SplitLinebreaksFilter;
+        let s = filter.filter(s);
+        assert_eq!("前の行 \n 次の行", s.to_tokenized_string().unwrap());
+    }
+
+    #[test]
+    fn test_split_cr() {
+        let s = Sentence::from_tokenized("前の行\r次の行").unwrap();
+        let filter = SplitLinebreaksFilter;
+        let s = filter.filter(s);
+        assert_eq!("前の行 \r 次の行", s.to_tokenized_string().unwrap());
+    }
+
+    #[test]
+    fn test_split_crlf() {
+        let s = Sentence::from_tokenized("前の行\r\n次の行").unwrap();
+        let filter = SplitLinebreaksFilter;
+        let s = filter.filter(s);
+        assert_eq!("前の行 \r \n 次の行", s.to_tokenized_string().unwrap());
+    }
+}
diff --git a/vaporetto_rules/src/string_filters/kytea_fullwidth.rs b/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
index 3dc841fc..abefc99d 100644
--- a/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
+++ b/vaporetto_rules/src/string_filters/kytea_fullwidth.rs
@@ -1,35 +1,10 @@
 use crate::StringFilter;
 
 /// Half-width to full-width filter. This filter works like KyTea's preprocessor.
+#[derive(Clone, Default)]
 pub struct KyteaFullwidthFilter;
 
-impl KyteaFullwidthFilter {
-    /// Creates a new KyteaFullwidthFilter.
-    ///
-    /// # Returns
-    ///
-    /// A new KyteaFullwidthFilter.
-    pub const fn new() -> Self {
-        Self {}
-    }
-}
-
-impl Default for KyteaFullwidthFilter {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl StringFilter for KyteaFullwidthFilter {
-    /// Replace alphanumerics and symbols to full-width characters.
-    ///
-    /// # Arguments:
-    ///
-    /// * `text` - Input text.
-    ///
-    /// # Returns
-    ///
-    /// A processed text.
     fn filter(&self, string: &str) -> String {
         let mut chars: Vec<_> = string.chars().collect();
         for c in &mut chars {
diff --git a/vaporetto_tantivy/Cargo.toml b/vaporetto_tantivy/Cargo.toml
new file mode 100644
index 00000000..4694b884
--- /dev/null
+++ b/vaporetto_tantivy/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "vaporetto_tantivy"
+version = "0.3.0"
+edition = "2021"
+authors = ["Koichi Akabe <vbkaisetsu@gmail.com>"]
+description = "Vaporetto Tokenizer for Tantivy"
+license = "MIT OR Apache-2.0"
+homepage = "https://github.com/legalforce-research/vaporetto"
+repository = "https://github.com/legalforce-research/vaporetto"
+readme = "README.md"
+keywords = ["japanese", "tokenizer", "tantivy"]
+categories = ["text-processing"]
+
+[dependencies]
+vaporetto = { path = "../vaporetto", version = "0.3.0" }  # MIT or Apache-2.0
+vaporetto_rules = { path = "../vaporetto_rules", version = "0.3.0" }  # MIT or Apache-2.0
+tantivy = "0.16"  # MIT
+
+[dev-dependencies]
+ruzstd = "0.2.4"  # MIT
diff --git a/vaporetto_tantivy/README.md b/vaporetto_tantivy/README.md
new file mode 100644
index 00000000..acbb8d73
--- /dev/null
+++ b/vaporetto_tantivy/README.md
@@ -0,0 +1,40 @@
+# vaporetto_tantivy
+
+Vaporetto is a fast and lightweight pointwise prediction based tokenizer.
+vaporetto_tantivy is a crate to use Vaporetto in [Tantivy](https://github.com/quickwit-oss/tantivy).
+
+# Example
+
+```rust
+use std::fs::File;
+use std::io::{Read, BufReader};
+
+use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions};
+use tantivy::Index;
+use vaporetto::Model;
+use vaporetto_tantivy::VaporettoTokenizer;
+
+let mut schema_builder = Schema::builder();
+let text_field_indexing = TextFieldIndexing::default()
+    .set_tokenizer("ja_vaporetto")
+    .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+let text_options = TextOptions::default()
+    .set_indexing_options(text_field_indexing)
+    .set_stored();
+schema_builder.add_text_field("title", text_options);
+let schema = schema_builder.build();
+let index = Index::create_in_ram(schema);
+
+// Loads a model with decompression.
+let mut f = BufReader::new(File::open("bccwj-suw+unidic.model.zst").unwrap());
+let mut decoder = ruzstd::StreamingDecoder::new(&mut f).unwrap();
+let mut buff = vec![];
+decoder.read_to_end(&mut buff).unwrap();
+let model = Model::read(&mut buff.as_slice()).unwrap();
+
+// Creates VaporettoTokenizer with wsconst=DGR.
+let tokenizer = VaporettoTokenizer::new(model, "DGR").unwrap();
+index
+    .tokenizers()
+    .register("ja_vaporetto", tokenizer);
+```
diff --git a/vaporetto_tantivy/src/lib.rs b/vaporetto_tantivy/src/lib.rs
new file mode 100644
index 00000000..ec2f375e
--- /dev/null
+++ b/vaporetto_tantivy/src/lib.rs
@@ -0,0 +1,448 @@
+//! # vaporetto_tantivy
+//!
+//! Vaporetto Tokenizer for Tantivy
+//!
+//! ## Examples
+//!
+//! ```no_run
+//! use std::fs::File;
+//! use std::io::{Read, BufReader};
+//!
+//! use tantivy::tokenizer::Tokenizer;
+//! use vaporetto::Model;
+//! use vaporetto_tantivy::VaporettoTokenizer;
+//!
+//! let mut f = BufReader::new(File::open("model.zst").unwrap());
+//! let mut decoder = ruzstd::StreamingDecoder::new(&mut f).unwrap();
+//! let mut buff = vec![];
+//! decoder.read_to_end(&mut buff).unwrap();
+//! let model = Model::read(&mut buff.as_slice()).unwrap();
+//!
+//! let tokenizer = VaporettoTokenizer::new(model, "DGR").unwrap();
+//!
+//! let mut stream = tokenizer.token_stream("東京特許許可局");
+//!
+//! let token = stream.next().unwrap();
+//! assert_eq!(token.text, "東京");
+//! assert_eq!(token.offset_from, 0);
+//! assert_eq!(token.offset_to, 6);
+//! assert_eq!(token.position, 0);
+//!
+//! let token = stream.next().unwrap();
+//! assert_eq!(token.text, "特許");
+//! assert_eq!(token.offset_from, 6);
+//! assert_eq!(token.offset_to, 12);
+//! assert_eq!(token.position, 1);
+//!
+//! let token = stream.next().unwrap();
+//! assert_eq!(token.text, "許可");
+//! assert_eq!(token.offset_from, 12);
+//! assert_eq!(token.offset_to, 18);
+//! assert_eq!(token.position, 2);
+//!
+//! let token = stream.next().unwrap();
+//! assert_eq!(token.text, "局");
+//! assert_eq!(token.offset_from, 18);
+//! assert_eq!(token.offset_to, 21);
+//! assert_eq!(token.position, 3);
+//!
+//! assert!(stream.next().is_none());
+/// ```
+use std::sync::Arc;
+
+use tantivy::tokenizer::{BoxTokenStream, Token, TokenStream, Tokenizer};
+use vaporetto::{BoundaryType, CharacterType, Model, Predictor, Sentence};
+use vaporetto_rules::{
+    sentence_filters::{ConcatGraphemeClustersFilter, KyteaWsConstFilter, SplitLinebreaksFilter},
+    string_filters::KyteaFullwidthFilter,
+    SentenceFilter, StringFilter,
+};
+
+/// Tokenize the text using Vaporetto.
+#[derive(Clone)]
+pub struct VaporettoTokenizer {
+    predictor: Arc<Predictor>,
+    prefilter: KyteaFullwidthFilter,
+    postfilters: Vec<Arc<dyn SentenceFilter>>,
+}
+
+impl VaporettoTokenizer {
+    /// Creates a new VaporettoTokenizer.
+    ///
+    /// # Arguments
+    ///
+    /// * `model` - A model data of Vaporetto.
+    /// * `wsconst` - Character types that the tokenizer does not segment.
+    ///               D: Digit, R: Roman, H: Hiragana, T: Katakana, K: Kanji, O: Other,
+    ///               G: Grapheme cluster.
+    ///
+    /// # Errors
+    ///
+    /// Error is returned when
+    ///   - the model is invalid, or
+    ///   - `wsconst` contains an invalid character type.
+    pub fn new(model: Model, wsconst: &str) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut postfilters: Vec<Arc<dyn SentenceFilter>> = vec![Arc::new(SplitLinebreaksFilter)];
+        for c in wsconst.chars() {
+            postfilters.push(match c {
+                'D' => Arc::new(KyteaWsConstFilter::new(CharacterType::Digit)),
+                'R' => Arc::new(KyteaWsConstFilter::new(CharacterType::Roman)),
+                'H' => Arc::new(KyteaWsConstFilter::new(CharacterType::Hiragana)),
+                'T' => Arc::new(KyteaWsConstFilter::new(CharacterType::Katakana)),
+                'K' => Arc::new(KyteaWsConstFilter::new(CharacterType::Kanji)),
+                'O' => Arc::new(KyteaWsConstFilter::new(CharacterType::Other)),
+                'G' => Arc::new(ConcatGraphemeClustersFilter),
+                _ => return Err("Could not parse a wsconst value".into()),
+            });
+        }
+        Ok(Self {
+            predictor: Arc::new(Predictor::new(model, false)?),
+            prefilter: KyteaFullwidthFilter,
+            postfilters,
+        })
+    }
+}
+
+pub struct VaporettoTokenStream<'a> {
+    text: &'a str,
+    token: Token,
+    boundary_pos: Vec<usize>,
+    offset_to: usize,
+    position: usize,
+}
+
+impl Tokenizer for VaporettoTokenizer {
+    fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
+        if text.is_empty() {
+            return BoxTokenStream::from(VaporettoTokenStream {
+                text,
+                boundary_pos: vec![],
+                token: Token::default(),
+                offset_to: 0,
+                position: 0,
+            });
+        }
+
+        // pre filter
+        let prefiltered_text = self.prefilter.filter(text);
+        let prefiltered_sentence = Sentence::from_raw(prefiltered_text).unwrap();
+
+        // tokenize
+        let tokenized_sentence = self.predictor.predict(prefiltered_sentence);
+
+        // post filter
+        let postfiltered_sentence = self
+            .postfilters
+            .iter()
+            .fold(tokenized_sentence, |s, filter| filter.filter(s));
+
+        let mut char_indices = text.char_indices();
+        char_indices.next();
+        let mut boundary_pos = Vec::with_capacity(postfiltered_sentence.chars().len());
+        for ((i, _), &b) in char_indices.zip(postfiltered_sentence.boundaries()) {
+            if b == BoundaryType::WordBoundary {
+                boundary_pos.push(i);
+            }
+        }
+        boundary_pos.push(text.len());
+
+        BoxTokenStream::from(VaporettoTokenStream {
+            text,
+            token: Token::default(),
+            boundary_pos,
+            offset_to: 0,
+            position: 0,
+        })
+    }
+}
+
+impl<'a> TokenStream for VaporettoTokenStream<'a> {
+    fn advance(&mut self) -> bool {
+        if self.position < self.boundary_pos.len() {
+            self.token.offset_from = self.offset_to;
+            self.offset_to = self.boundary_pos[self.position];
+            self.token.offset_to = self.offset_to;
+            self.token.text.clear();
+            self.token
+                .text
+                .push_str(&self.text[self.token.offset_from..self.token.offset_to]);
+            self.token.position = self.position;
+            self.token.position_length = self.boundary_pos.len();
+            self.position += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    fn token(&self) -> &Token {
+        &self.token
+    }
+
+    fn token_mut(&mut self) -> &mut Token {
+        &mut self.token
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::io::{Cursor, Read};
+
+    use tantivy::tokenizer::TextAnalyzer;
+
+    fn token_stream_helper(text: &str, wsconst: &str) -> Vec<Token> {
+        let mut f = Cursor::new(include_bytes!("../test_model/model.zst"));
+        let mut decoder = ruzstd::StreamingDecoder::new(&mut f).unwrap();
+        let mut buff = vec![];
+        decoder.read_to_end(&mut buff).unwrap();
+        let model = Model::read(&mut buff.as_slice()).unwrap();
+        let a = TextAnalyzer::from(VaporettoTokenizer::new(model, wsconst).unwrap());
+        let mut token_stream = a.token_stream(text);
+        let mut tokens: Vec<Token> = vec![];
+        let mut add_token = |token: &Token| {
+            tokens.push(token.clone());
+        };
+        token_stream.process(&mut add_token);
+        tokens
+    }
+
+    #[test]
+    fn test_tokenize_empty() {
+        let tokens = token_stream_helper("", "");
+
+        assert_eq!(tokens.len(), 0);
+    }
+
+    #[test]
+    fn test_tokenizer_tokyo() {
+        let tokens = token_stream_helper("東京特許許可局", "");
+
+        assert_eq!(tokens.len(), 4);
+
+        let token = &tokens[0];
+        assert_eq!(token.text, "東京");
+        assert_eq!(token.offset_from, 0);
+        assert_eq!(token.offset_to, 6);
+        assert_eq!(token.position, 0);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[1];
+        assert_eq!(token.text, "特許");
+        assert_eq!(token.offset_from, 6);
+        assert_eq!(token.offset_to, 12);
+        assert_eq!(token.position, 1);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[2];
+        assert_eq!(token.text, "許可");
+        assert_eq!(token.offset_from, 12);
+        assert_eq!(token.offset_to, 18);
+        assert_eq!(token.position, 2);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[3];
+        assert_eq!(token.text, "局");
+        assert_eq!(token.offset_from, 18);
+        assert_eq!(token.offset_to, 21);
+        assert_eq!(token.position, 3);
+        assert_eq!(token.position_length, 4);
+    }
+
+    #[test]
+    fn test_tokenizer_no_wsconst() {
+        let tokens = token_stream_helper("123456円🤌🏿", "");
+
+        assert_eq!(tokens.len(), 9);
+
+        let token = &tokens[0];
+        assert_eq!(token.text, "1");
+        assert_eq!(token.offset_from, 0);
+        assert_eq!(token.offset_to, 1);
+        assert_eq!(token.position, 0);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[1];
+        assert_eq!(token.text, "2");
+        assert_eq!(token.offset_from, 1);
+        assert_eq!(token.offset_to, 2);
+        assert_eq!(token.position, 1);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[2];
+        assert_eq!(token.text, "3");
+        assert_eq!(token.offset_from, 2);
+        assert_eq!(token.offset_to, 3);
+        assert_eq!(token.position, 2);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[3];
+        assert_eq!(token.text, "4");
+        assert_eq!(token.offset_from, 3);
+        assert_eq!(token.offset_to, 4);
+        assert_eq!(token.position, 3);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[4];
+        assert_eq!(token.text, "5");
+        assert_eq!(token.offset_from, 4);
+        assert_eq!(token.offset_to, 5);
+        assert_eq!(token.position, 4);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[5];
+        assert_eq!(token.text, "6");
+        assert_eq!(token.offset_from, 5);
+        assert_eq!(token.offset_to, 6);
+        assert_eq!(token.position, 5);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[6];
+        assert_eq!(token.text, "円");
+        assert_eq!(token.offset_from, 6);
+        assert_eq!(token.offset_to, 9);
+        assert_eq!(token.position, 6);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[7];
+        assert_eq!(token.text, "🤌");
+        assert_eq!(token.offset_from, 9);
+        assert_eq!(token.offset_to, 13);
+        assert_eq!(token.position, 7);
+        assert_eq!(token.position_length, 9);
+
+        let token = &tokens[8];
+        assert_eq!(token.text, "🏿");
+        assert_eq!(token.offset_from, 13);
+        assert_eq!(token.offset_to, 17);
+        assert_eq!(token.position, 8);
+        assert_eq!(token.position_length, 9);
+    }
+
+    #[test]
+    fn test_tokenize_wsconst_d() {
+        let tokens = token_stream_helper("123456円🤌🏿", "D");
+
+        assert_eq!(tokens.len(), 4);
+
+        let token = &tokens[0];
+        assert_eq!(token.text, "123456");
+        assert_eq!(token.offset_from, 0);
+        assert_eq!(token.offset_to, 6);
+        assert_eq!(token.position, 0);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[1];
+        assert_eq!(token.text, "円");
+        assert_eq!(token.offset_from, 6);
+        assert_eq!(token.offset_to, 9);
+        assert_eq!(token.position, 1);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[2];
+        assert_eq!(token.text, "🤌");
+        assert_eq!(token.offset_from, 9);
+        assert_eq!(token.offset_to, 13);
+        assert_eq!(token.position, 2);
+        assert_eq!(token.position_length, 4);
+
+        let token = &tokens[3];
+        assert_eq!(token.text, "🏿");
+        assert_eq!(token.offset_from, 13);
+        assert_eq!(token.offset_to, 17);
+        assert_eq!(token.position, 3);
+        assert_eq!(token.position_length, 4);
+    }
+
+    #[test]
+    fn test_tokenizer_wsconst_g() {
+        let tokens = token_stream_helper("123456円🤌🏿", "G");
+
+        assert_eq!(tokens.len(), 8);
+
+        let token = &tokens[0];
+        assert_eq!(token.text, "1");
+        assert_eq!(token.offset_from, 0);
+        assert_eq!(token.offset_to, 1);
+        assert_eq!(token.position, 0);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[1];
+        assert_eq!(token.text, "2");
+        assert_eq!(token.offset_from, 1);
+        assert_eq!(token.offset_to, 2);
+        assert_eq!(token.position, 1);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[2];
+        assert_eq!(token.text, "3");
+        assert_eq!(token.offset_from, 2);
+        assert_eq!(token.offset_to, 3);
+        assert_eq!(token.position, 2);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[3];
+        assert_eq!(token.text, "4");
+        assert_eq!(token.offset_from, 3);
+        assert_eq!(token.offset_to, 4);
+        assert_eq!(token.position, 3);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[4];
+        assert_eq!(token.text, "5");
+        assert_eq!(token.offset_from, 4);
+        assert_eq!(token.offset_to, 5);
+        assert_eq!(token.position, 4);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[5];
+        assert_eq!(token.text, "6");
+        assert_eq!(token.offset_from, 5);
+        assert_eq!(token.offset_to, 6);
+        assert_eq!(token.position, 5);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[6];
+        assert_eq!(token.text, "円");
+        assert_eq!(token.offset_from, 6);
+        assert_eq!(token.offset_to, 9);
+        assert_eq!(token.position, 6);
+        assert_eq!(token.position_length, 8);
+
+        let token = &tokens[7];
+        assert_eq!(token.text, "🤌🏿");
+        assert_eq!(token.offset_from, 9);
+        assert_eq!(token.offset_to, 17);
+        assert_eq!(token.position, 7);
+        assert_eq!(token.position_length, 8);
+    }
+
+    #[test]
+    fn test_tokenize_wsconst_dg() {
+        let tokens = token_stream_helper("123456円🤌🏿", "DG");
+
+        assert_eq!(tokens.len(), 3);
+
+        let token = &tokens[0];
+        assert_eq!(token.text, "123456");
+        assert_eq!(token.offset_from, 0);
+        assert_eq!(token.offset_to, 6);
+        assert_eq!(token.position, 0);
+        assert_eq!(token.position_length, 3);
+
+        let token = &tokens[1];
+        assert_eq!(token.text, "円");
+        assert_eq!(token.offset_from, 6);
+        assert_eq!(token.offset_to, 9);
+        assert_eq!(token.position, 1);
+        assert_eq!(token.position_length, 3);
+
+        let token = &tokens[2];
+        assert_eq!(token.text, "🤌🏿");
+        assert_eq!(token.offset_from, 9);
+        assert_eq!(token.offset_to, 17);
+        assert_eq!(token.position, 2);
+        assert_eq!(token.position_length, 3);
+    }
+}
diff --git a/vaporetto_tantivy/test_model/model.zst b/vaporetto_tantivy/test_model/model.zst
new file mode 100644
index 0000000000000000000000000000000000000000..e51157d35e5e50c64a6683611b0f67d189c8daa9
GIT binary patch
literal 401
zcmV;C0dD>%wJ-euXhjSFqK!2-Kyxz&M73CHO1B;y&eLE31Qoh3hqB92T->B0tG;!d
zL;4-RE&g8M$3vEuEq;I#9*N7YtlcS?Cx)cceiiKp2{f~&yi~1<P-vq8intgaP^Jh=
z8g!BtCVQ_5lfZ_^UXnrxspBVmH!QW;JK}6vz{p;ay}u}9cLsiw4?4f_2sE|?#s}md
zJxb?SIk3Tn15W|)hoZ<LUf2#0{}XCB{+X4owEDxAg8wMntv}_ZqF_!^B_8$5{?Xuo
z&d>ipp;tMuE3IVbA2Sv_qVjBWY-6@P0ad8cM9^%eg~D=hh{&MP17RHFpi+u4At`NT
z4Ce@c#4Ie>gJie6yNH0`Lj{SY%vtuC4(Lm=gtGzAnHqzu7AI2ZqS$re3r(aVMW09&
z|1vejt(p-9#bS=U%|Pz=##i~qsU0(h!*tnbdjp^KLs2R1Nzo#QV~7$0f(|#Fh!pMJ
v;sDWR)`zMUA*_}2jb7uSEyf^wmE$jlu@bsZp$L5Xm>y;t!^Lp7OlQ1R0LZ^^

literal 0
HcmV?d00001


From f5b87abc42161a1853d5d22af4db79c0433e7607 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 14 Feb 2022 14:09:08 +0900
Subject: [PATCH 60/60] Replace byteorder with {from,to}_le_bytes() (#55)

* Replace byteorder with {from,to}_le_bytes()

* Add cfg switch
---
 vaporetto/Cargo.toml         |   1 -
 vaporetto/src/dict_model.rs  |  55 +++++------
 vaporetto/src/kytea_model.rs | 185 +++++++++++++++++++++--------------
 vaporetto/src/model.rs       |  15 ++-
 vaporetto/src/ngram_model.rs |  51 +++++-----
 vaporetto/src/tag_model.rs   |  15 ++-
 vaporetto/src/utils.rs       |  75 ++++++++++++++
 7 files changed, 252 insertions(+), 145 deletions(-)

diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml
index fcbaffc4..82bb33df 100644
--- a/vaporetto/Cargo.toml
+++ b/vaporetto/Cargo.toml
@@ -13,7 +13,6 @@ categories = ["text-processing"]
 
 [dependencies]
 daachorse = "0.4.0"  # MIT or Apache-2.0
-byteorder = "1.4"  # Unlicense or MIT
 
 liblinear = { version = "1", optional = true }  # MIT
 
diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs
index 54a90d4b..f1a678c0 100644
--- a/vaporetto/src/dict_model.rs
+++ b/vaporetto/src/dict_model.rs
@@ -1,9 +1,8 @@
 use std::io::{Read, Write};
 use std::mem;
 
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-
 use crate::errors::Result;
+use crate::utils;
 
 #[derive(Clone, Copy, Default)]
 pub struct DictWeight {
@@ -13,24 +12,24 @@ pub struct DictWeight {
 }
 
 impl DictWeight {
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<usize>
     where
         W: Write,
     {
-        buf.write_i32::<LittleEndian>(self.right)?;
-        buf.write_i32::<LittleEndian>(self.inside)?;
-        buf.write_i32::<LittleEndian>(self.left)?;
+        utils::write_i32(&mut wtr, self.right)?;
+        utils::write_i32(&mut wtr, self.inside)?;
+        utils::write_i32(&mut wtr, self.left)?;
         Ok(mem::size_of::<i32>() * 3)
     }
 
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
         Ok(Self {
-            right: buf.read_i32::<LittleEndian>()?,
-            inside: buf.read_i32::<LittleEndian>()?,
-            left: buf.read_i32::<LittleEndian>()?,
+            right: utils::read_i32(&mut rdr)?,
+            inside: utils::read_i32(&mut rdr)?,
+            left: utils::read_i32(&mut rdr)?,
         })
     }
 }
@@ -44,33 +43,33 @@ pub struct WordWeightRecord {
 }
 
 impl WordWeightRecord {
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<usize>
     where
         W: Write,
     {
         let word_size = self.word.len();
         let comment_size = self.comment.len();
-        buf.write_u32::<LittleEndian>(word_size.try_into().unwrap())?;
-        buf.write_u32::<LittleEndian>(comment_size.try_into().unwrap())?;
-        buf.write_all(self.word.as_bytes())?;
-        buf.write_all(self.comment.as_bytes())?;
-        let weights_size = self.weights.serialize(&mut buf)?;
+        utils::write_u32(&mut wtr, u32::try_from(word_size).unwrap())?;
+        utils::write_u32(&mut wtr, u32::try_from(comment_size).unwrap())?;
+        wtr.write_all(self.word.as_bytes())?;
+        wtr.write_all(self.comment.as_bytes())?;
+        let weights_size = self.weights.serialize(&mut wtr)?;
         Ok(mem::size_of::<u32>() * 2 + word_size + weights_size + comment_size)
     }
 
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let word_size = buf.read_u32::<LittleEndian>()?;
-        let comment_size = buf.read_u32::<LittleEndian>()?;
+        let word_size = utils::read_u32(&mut rdr)?;
+        let comment_size = utils::read_u32(&mut rdr)?;
         let mut word_bytes = vec![0; word_size.try_into().unwrap()];
-        buf.read_exact(&mut word_bytes)?;
+        rdr.read_exact(&mut word_bytes)?;
         let mut comment_bytes = vec![0; comment_size.try_into().unwrap()];
-        buf.read_exact(&mut comment_bytes)?;
+        rdr.read_exact(&mut comment_bytes)?;
         Ok(Self {
             word: String::from_utf8(word_bytes)?,
-            weights: DictWeight::deserialize(&mut buf)?,
+            weights: DictWeight::deserialize(&mut rdr)?,
             comment: String::from_utf8(comment_bytes)?,
         })
     }
@@ -141,27 +140,27 @@ impl DictModel {
         &self.dict
     }
 
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<usize>
     where
         W: Write,
     {
         let dict_size = self.dict.len();
-        buf.write_u32::<LittleEndian>(dict_size.try_into().unwrap())?;
+        utils::write_u32(&mut wtr, dict_size.try_into().unwrap())?;
         let mut total_size = mem::size_of::<u32>();
         for entry in &self.dict {
-            total_size += entry.serialize(&mut buf)?;
+            total_size += entry.serialize(&mut wtr)?;
         }
         Ok(total_size)
     }
 
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let dict_size = buf.read_u32::<LittleEndian>()?;
+        let dict_size = utils::read_u32(&mut rdr)?;
         let mut dict = Vec::with_capacity(dict_size.try_into().unwrap());
         for _ in 0..dict_size {
-            dict.push(WordWeightRecord::deserialize(&mut buf)?);
+            dict.push(WordWeightRecord::deserialize(&mut rdr)?);
         }
         Ok(Self { dict })
     }
diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs
index 2f774c48..67ee42d7 100644
--- a/vaporetto/src/kytea_model.rs
+++ b/vaporetto/src/kytea_model.rs
@@ -1,13 +1,12 @@
 use std::convert::TryFrom;
 use std::io::BufRead;
 
-use byteorder::{LittleEndian, ReadBytesExt};
-
 use crate::dict_model::{DictModel, DictWeight, WordWeightRecord};
 use crate::errors::{Result, VaporettoError};
 use crate::model::Model;
 use crate::ngram_model::{NgramData, NgramModel};
 use crate::tag_model::TagModel;
+use crate::utils;
 
 struct KyteaConfig {
     _model_tag: String,
@@ -26,20 +25,23 @@ struct KyteaConfig {
 }
 
 impl KyteaConfig {
-    fn read<R: BufRead>(rdr: &mut R) -> Result<Self> {
+    fn read<R>(mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
         let mut model_tag = String::new();
         rdr.read_line(&mut model_tag)?;
-        let do_ws = rdr.read_u8()? != 0;
-        let do_tags = rdr.read_u8()? != 0;
-        let n_tags = rdr.read_u32::<LittleEndian>()?;
-        let char_w = rdr.read_u8()?;
-        let char_n = rdr.read_u8()?;
-        let type_w = rdr.read_u8()?;
-        let type_n = rdr.read_u8()?;
-        let dict_n = rdr.read_u8()?;
-        let bias = rdr.read_u8()? != 0;
-        let epsilon = rdr.read_f64::<LittleEndian>()?;
-        let solver_type = rdr.read_u8()?;
+        let do_ws = utils::read_u8(&mut rdr)? != 0;
+        let do_tags = utils::read_u8(&mut rdr)? != 0;
+        let n_tags = utils::read_u32(&mut rdr)?;
+        let char_w = utils::read_u8(&mut rdr)?;
+        let char_n = utils::read_u8(&mut rdr)?;
+        let type_w = utils::read_u8(&mut rdr)?;
+        let type_n = utils::read_u8(&mut rdr)?;
+        let dict_n = utils::read_u8(&mut rdr)?;
+        let bias = utils::read_u8(&mut rdr)? != 0;
+        let epsilon = utils::read_f64(&mut rdr)?;
+        let solver_type = utils::read_u8(&mut rdr)?;
         let mut char_map = vec![];
         rdr.read_until(0, &mut char_map)?;
         let char_map: Vec<char> = String::from_utf8(char_map)?.chars().collect();
@@ -62,24 +64,35 @@ impl KyteaConfig {
 }
 
 trait Readable: Sized {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self>;
+    fn read<R>(config: &KyteaConfig, rdr: R) -> Result<Self>
+    where
+        R: BufRead;
 }
 
 impl Readable for i16 {
-    fn read<R: BufRead>(_config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        Ok(rdr.read_i16::<LittleEndian>()?)
+    fn read<R>(_config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        Ok(utils::read_i16(&mut rdr)?)
     }
 }
 
 impl Readable for f64 {
-    fn read<R: BufRead>(_config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        Ok(rdr.read_f64::<LittleEndian>()?)
+    fn read<R>(_config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        Ok(utils::read_f64(&mut rdr)?)
     }
 }
 
 impl Readable for char {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let cidx = rdr.read_u16::<LittleEndian>()? as usize;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let cidx = utils::read_u16(&mut rdr)? as usize;
         Ok(config.char_map[cidx - 1])
     }
 }
@@ -88,22 +101,28 @@ impl<T> Readable for Vec<T>
 where
     T: Readable,
 {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let size = rdr.read_u32::<LittleEndian>()?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let size = utils::read_u32(&mut rdr)?;
         let mut result = Self::with_capacity(size as usize);
         for _ in 0..size {
-            result.push(T::read(config, rdr)?);
+            result.push(T::read(config, &mut rdr)?);
         }
         Ok(result)
     }
 }
 
 impl Readable for String {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let size = rdr.read_u32::<LittleEndian>()?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let size = utils::read_u32(&mut rdr)?;
         let mut result = Self::new();
         for _ in 0..size {
-            let cidx = rdr.read_u16::<LittleEndian>()? as usize;
+            let cidx = utils::read_u16(&mut rdr)? as usize;
             result.push(config.char_map[cidx - 1]);
         }
         Ok(result)
@@ -152,29 +171,32 @@ impl<T> Dictionary<T>
 where
     T: Readable,
 {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Option<Self>> {
-        let n_dicts = rdr.read_u8()?;
-        let n_states = rdr.read_u32::<LittleEndian>()? as usize;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Option<Self>>
+    where
+        R: BufRead,
+    {
+        let n_dicts = utils::read_u8(&mut rdr)?;
+        let n_states = utils::read_u32(&mut rdr)? as usize;
         if n_states == 0 {
             return Ok(None);
         }
         let mut states = Vec::with_capacity(n_states);
         for _ in 0..n_states {
-            let failure = rdr.read_u32::<LittleEndian>()?;
-            let n_gotos = rdr.read_u32::<LittleEndian>()?;
+            let failure = utils::read_u32(&mut rdr)?;
+            let n_gotos = utils::read_u32(&mut rdr)?;
             let mut gotos = vec![];
             for _ in 0..n_gotos {
-                let k = char::read(config, rdr)?;
-                let v = rdr.read_u32::<LittleEndian>()?;
+                let k = char::read(config, &mut rdr)?;
+                let v = utils::read_u32(&mut rdr)?;
                 gotos.push((k, v));
             }
             gotos.sort_unstable();
-            let n_outputs = rdr.read_u32::<LittleEndian>()? as usize;
+            let n_outputs = utils::read_u32(&mut rdr)? as usize;
             let mut outputs = Vec::with_capacity(n_outputs);
             for _ in 0..n_outputs {
-                outputs.push(rdr.read_u32::<LittleEndian>()?);
+                outputs.push(utils::read_u32(&mut rdr)?);
             }
-            let is_branch = rdr.read_u8()? != 0;
+            let is_branch = utils::read_u8(&mut rdr)? != 0;
             states.push(State {
                 _failure: failure,
                 gotos,
@@ -182,10 +204,10 @@ where
                 is_branch,
             });
         }
-        let n_entries = rdr.read_u32::<LittleEndian>()? as usize;
+        let n_entries = utils::read_u32(&mut rdr)? as usize;
         let mut entries = Vec::with_capacity(n_entries);
         for _ in 0..n_entries {
-            entries.push(T::read(config, rdr)?);
+            entries.push(T::read(config, &mut rdr)?);
         }
         Ok(Some(Self {
             n_dicts,
@@ -212,18 +234,21 @@ impl<T> FeatureLookup<T>
 where
     T: Readable,
 {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Option<Self>> {
-        let active = rdr.read_u8()?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Option<Self>>
+    where
+        R: BufRead,
+    {
+        let active = utils::read_u8(&mut rdr)?;
         if active == 0 {
             return Ok(None);
         }
-        let char_dict = Dictionary::read(config, rdr)?;
-        let type_dict = Dictionary::read(config, rdr)?;
-        let self_dict = Dictionary::read(config, rdr)?;
-        let dict_vec = Vec::<T>::read(config, rdr)?;
-        let biases = Vec::<T>::read(config, rdr)?;
-        let tag_dict_vec = Vec::<T>::read(config, rdr)?;
-        let tag_unk_vec = Vec::<T>::read(config, rdr)?;
+        let char_dict = Dictionary::read(config, &mut rdr)?;
+        let type_dict = Dictionary::read(config, &mut rdr)?;
+        let self_dict = Dictionary::read(config, &mut rdr)?;
+        let dict_vec = Vec::<T>::read(config, &mut rdr)?;
+        let biases = Vec::<T>::read(config, &mut rdr)?;
+        let tag_dict_vec = Vec::<T>::read(config, &mut rdr)?;
+        let tag_unk_vec = Vec::<T>::read(config, &mut rdr)?;
         Ok(Some(Self {
             char_dict,
             type_dict,
@@ -246,20 +271,23 @@ struct LinearModel {
 }
 
 impl Readable for Option<LinearModel> {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let n_classes = rdr.read_u32::<LittleEndian>()?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let n_classes = utils::read_u32(&mut rdr)?;
         if n_classes == 0 {
             return Ok(None);
         }
         let add_features = false;
-        let solver_type = rdr.read_u8()?;
+        let solver_type = utils::read_u8(&mut rdr)?;
         let mut labels = vec![];
         for _ in 0..n_classes {
-            labels.push(rdr.read_i32::<LittleEndian>()?);
+            labels.push(utils::read_i32(&mut rdr)?);
         }
-        let bias = rdr.read_u8()? != 0;
-        let multiplier = rdr.read_f64::<LittleEndian>()?;
-        let feature_lookup = FeatureLookup::read(config, rdr)?;
+        let bias = utils::read_u8(&mut rdr)? != 0;
+        let multiplier = utils::read_f64(&mut rdr)?;
+        let feature_lookup = FeatureLookup::read(config, &mut rdr)?;
         Ok(Some(LinearModel {
             _add_features: add_features,
             _solver_type: solver_type,
@@ -280,25 +308,28 @@ struct ModelTagEntry {
 }
 
 impl Readable for ModelTagEntry {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let word = String::read(config, rdr)?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let word = String::read(config, &mut rdr)?;
         let mut tags = Vec::with_capacity(config.n_tags as usize);
         let mut tags_in_dicts = Vec::with_capacity(config.n_tags as usize);
         for _ in 0..config.n_tags {
-            let size = rdr.read_u32::<LittleEndian>()? as usize;
+            let size = utils::read_u32(&mut rdr)? as usize;
             let mut t = Vec::with_capacity(size);
             let mut td = Vec::with_capacity(size);
             for _ in 0..size {
-                t.push(String::read(config, rdr)?);
-                td.push(rdr.read_u8()?);
+                t.push(String::read(config, &mut rdr)?);
+                td.push(utils::read_u8(&mut rdr)?);
             }
             tags.push(t);
             tags_in_dicts.push(td);
         }
-        let in_dict = rdr.read_u8()?;
+        let in_dict = utils::read_u8(&mut rdr)?;
         let mut tag_models = Vec::with_capacity(config.n_tags as usize);
         for _ in 0..config.n_tags {
-            tag_models.push(Option::<LinearModel>::read(config, rdr)?);
+            tag_models.push(Option::<LinearModel>::read(config, &mut rdr)?);
         }
         Ok(Self {
             _word: word,
@@ -317,17 +348,20 @@ struct ProbTagEntry {
 }
 
 impl Readable for ProbTagEntry {
-    fn read<R: BufRead>(config: &KyteaConfig, rdr: &mut R) -> Result<Self> {
-        let word = String::read(config, rdr)?;
+    fn read<R>(config: &KyteaConfig, mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let word = String::read(config, &mut rdr)?;
         let mut tags = Vec::with_capacity(config.n_tags as usize);
         let mut probs = Vec::with_capacity(config.n_tags as usize);
         for _ in 0..config.n_tags {
-            let size = rdr.read_u32::<LittleEndian>()? as usize;
+            let size = utils::read_u32(&mut rdr)? as usize;
             let mut t = Vec::with_capacity(size);
             let mut p = Vec::with_capacity(size);
             for _ in 0..size {
-                t.push(String::read(config, rdr)?);
-                p.push(rdr.read_f64::<LittleEndian>()?);
+                t.push(String::read(config, &mut rdr)?);
+                p.push(utils::read_f64(&mut rdr)?);
             }
             tags.push(t);
             probs.push(p);
@@ -365,21 +399,24 @@ impl KyteaModel {
     /// # Errors
     ///
     /// When `rdr` generates an error, it will be returned as is.
-    pub fn read<R: BufRead>(rdr: &mut R) -> Result<Self> {
-        let config = KyteaConfig::read(rdr)?;
+    pub fn read<R>(mut rdr: R) -> Result<Self>
+    where
+        R: BufRead,
+    {
+        let config = KyteaConfig::read(&mut rdr)?;
 
-        let wordseg_model = Option::<LinearModel>::read(&config, rdr)?;
+        let wordseg_model = Option::<LinearModel>::read(&config, &mut rdr)?;
 
         let mut global_tags = Vec::with_capacity(config.n_tags as usize);
         let mut global_models = Vec::with_capacity(config.n_tags as usize);
 
         for _ in 0..config.n_tags {
-            global_tags.push(Vec::<String>::read(&config, rdr)?);
-            global_models.push(Option::<LinearModel>::read(&config, rdr)?);
+            global_tags.push(Vec::<String>::read(&config, &mut rdr)?);
+            global_models.push(Option::<LinearModel>::read(&config, &mut rdr)?);
         }
 
-        let dict = Dictionary::<ModelTagEntry>::read(&config, rdr)?;
-        let subword_dict = Dictionary::<ProbTagEntry>::read(&config, rdr)?;
+        let dict = Dictionary::<ModelTagEntry>::read(&config, &mut rdr)?;
+        let subword_dict = Dictionary::<ProbTagEntry>::read(&config, &mut rdr)?;
 
         Ok(Self {
             config,
diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs
index e7d37d8a..f31505cf 100644
--- a/vaporetto/src/model.rs
+++ b/vaporetto/src/model.rs
@@ -1,11 +1,10 @@
 use std::io::{Read, Write};
 
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-
 use crate::dict_model::{DictModel, WordWeightRecord};
 use crate::errors::Result;
 use crate::ngram_model::NgramModel;
 use crate::tag_model::TagModel;
+use crate::utils;
 
 /// Model data.
 pub struct Model {
@@ -35,9 +34,9 @@ impl Model {
         self.char_ngram_model.serialize(&mut wtr)?;
         self.type_ngram_model.serialize(&mut wtr)?;
         self.dict_model.serialize(&mut wtr)?;
-        wtr.write_i32::<LittleEndian>(self.bias)?;
-        wtr.write_u32::<LittleEndian>(self.char_window_size.try_into().unwrap())?;
-        wtr.write_u32::<LittleEndian>(self.type_window_size.try_into().unwrap())?;
+        utils::write_i32(&mut wtr, self.bias)?;
+        utils::write_u32(&mut wtr, self.char_window_size.try_into().unwrap())?;
+        utils::write_u32(&mut wtr, self.type_window_size.try_into().unwrap())?;
         self.tag_model.serialize(&mut wtr)?;
         Ok(())
     }
@@ -63,9 +62,9 @@ impl Model {
             char_ngram_model: NgramModel::<String>::deserialize(&mut rdr)?,
             type_ngram_model: NgramModel::<Vec<u8>>::deserialize(&mut rdr)?,
             dict_model: DictModel::deserialize(&mut rdr)?,
-            bias: rdr.read_i32::<LittleEndian>()?,
-            char_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
-            type_window_size: rdr.read_u32::<LittleEndian>()?.try_into().unwrap(),
+            bias: utils::read_i32(&mut rdr)?,
+            char_window_size: utils::read_u32(&mut rdr)?.try_into().unwrap(),
+            type_window_size: utils::read_u32(&mut rdr)?.try_into().unwrap(),
             tag_model: TagModel::deserialize(&mut rdr)?,
         })
     }
diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs
index 1427e575..d79b287d 100644
--- a/vaporetto/src/ngram_model.rs
+++ b/vaporetto/src/ngram_model.rs
@@ -1,9 +1,8 @@
 use std::io::{Read, Write};
 use std::mem;
 
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-
 use crate::errors::Result;
+use crate::utils;
 
 #[derive(Clone)]
 pub struct NgramData<T>
@@ -18,53 +17,53 @@ impl<T> NgramData<T>
 where
     T: AsRef<[u8]> + Clone,
 {
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<usize>
     where
         W: Write,
     {
         let ngram = self.ngram.as_ref();
         let ngram_size = ngram.len();
         let weights_size = self.weights.len();
-        buf.write_u32::<LittleEndian>(ngram_size.try_into().unwrap())?;
-        buf.write_u32::<LittleEndian>(weights_size.try_into().unwrap())?;
-        buf.write_all(ngram)?;
+        utils::write_u32(&mut wtr, ngram_size.try_into().unwrap())?;
+        utils::write_u32(&mut wtr, weights_size.try_into().unwrap())?;
+        wtr.write_all(ngram)?;
         for &w in &self.weights {
-            buf.write_i32::<LittleEndian>(w)?;
+            utils::write_i32(&mut wtr, w)?;
         }
         Ok(mem::size_of::<u32>() * 2 + ngram_size + mem::size_of::<i32>() * weights_size)
     }
 }
 
 impl NgramData<String> {
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let ngram_size = buf.read_u32::<LittleEndian>()?;
-        let weights_size = buf.read_u32::<LittleEndian>()?;
+        let ngram_size = utils::read_u32(&mut rdr)?;
+        let weights_size = utils::read_u32(&mut rdr)?;
         let mut ngram_bytes = vec![0; ngram_size.try_into().unwrap()];
-        buf.read_exact(&mut ngram_bytes)?;
+        rdr.read_exact(&mut ngram_bytes)?;
         let ngram = String::from_utf8(ngram_bytes)?;
         let mut weights = vec![];
         for _ in 0..weights_size {
-            weights.push(buf.read_i32::<LittleEndian>()?);
+            weights.push(utils::read_i32(&mut rdr)?);
         }
         Ok(Self { ngram, weights })
     }
 }
 
 impl NgramData<Vec<u8>> {
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let ngram_size = buf.read_u32::<LittleEndian>()?;
-        let weights_size = buf.read_u32::<LittleEndian>()?;
+        let ngram_size = utils::read_u32(&mut rdr)?;
+        let weights_size = utils::read_u32(&mut rdr)?;
         let mut ngram = vec![0; ngram_size.try_into().unwrap()];
-        buf.read_exact(&mut ngram)?;
+        rdr.read_exact(&mut ngram)?;
         let mut weights = Vec::with_capacity(weights_size.try_into().unwrap());
         for _ in 0..weights_size {
-            weights.push(buf.read_i32::<LittleEndian>()?);
+            weights.push(utils::read_i32(&mut rdr)?);
         }
         Ok(Self { ngram, weights })
     }
@@ -87,43 +86,43 @@ where
         Self { data }
     }
 
-    pub fn serialize<W>(&self, mut buf: W) -> Result<usize>
+    pub fn serialize<W>(&self, mut wtr: W) -> Result<usize>
     where
         W: Write,
     {
         let data_size = self.data.len();
-        buf.write_u32::<LittleEndian>(data_size.try_into().unwrap())?;
+        utils::write_u32(&mut wtr, data_size.try_into().unwrap())?;
         let mut total_size = mem::size_of::<u32>();
         for d in &self.data {
-            total_size += d.serialize(&mut buf)?;
+            total_size += d.serialize(&mut wtr)?;
         }
         Ok(total_size + mem::size_of::<u8>())
     }
 }
 
 impl NgramModel<String> {
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let data_size = buf.read_u32::<LittleEndian>()?;
+        let data_size = utils::read_u32(&mut rdr)?;
         let mut data = Vec::with_capacity(data_size.try_into().unwrap());
         for _ in 0..data_size {
-            data.push(NgramData::<String>::deserialize(&mut buf)?);
+            data.push(NgramData::<String>::deserialize(&mut rdr)?);
         }
         Ok(Self { data })
     }
 }
 
 impl NgramModel<Vec<u8>> {
-    pub fn deserialize<R>(mut buf: R) -> Result<Self>
+    pub fn deserialize<R>(mut rdr: R) -> Result<Self>
     where
         R: Read,
     {
-        let data_size = buf.read_u32::<LittleEndian>()?;
+        let data_size = utils::read_u32(&mut rdr)?;
         let mut data = Vec::with_capacity(data_size.try_into().unwrap());
         for _ in 0..data_size {
-            data.push(NgramData::<Vec<u8>>::deserialize(&mut buf)?);
+            data.push(NgramData::<Vec<u8>>::deserialize(&mut rdr)?);
         }
         Ok(Self { data })
     }
diff --git a/vaporetto/src/tag_model.rs b/vaporetto/src/tag_model.rs
index e4cfd697..205ccfb7 100644
--- a/vaporetto/src/tag_model.rs
+++ b/vaporetto/src/tag_model.rs
@@ -1,9 +1,8 @@
 use std::io::{Read, Write};
 
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-
 use crate::errors::Result;
 use crate::ngram_model::NgramModel;
+use crate::utils;
 
 pub struct TagClassInfo {
     pub(crate) name: String,
@@ -15,9 +14,9 @@ impl TagClassInfo {
     where
         W: Write,
     {
-        wtr.write_u32::<LittleEndian>(self.name.len().try_into().unwrap())?;
+        utils::write_u32(&mut wtr, self.name.len().try_into().unwrap())?;
         wtr.write_all(self.name.as_bytes())?;
-        wtr.write_i32::<LittleEndian>(self.bias)?;
+        utils::write_i32(&mut wtr, self.bias)?;
         Ok(())
     }
 
@@ -25,13 +24,13 @@ impl TagClassInfo {
     where
         R: Read,
     {
-        let name_size = rdr.read_u32::<LittleEndian>()?;
+        let name_size = utils::read_u32(&mut rdr)?;
         let mut name_bytes = vec![0; name_size.try_into().unwrap()];
         rdr.read_exact(&mut name_bytes)?;
         let name = String::from_utf8(name_bytes)?;
         Ok(Self {
             name,
-            bias: rdr.read_i32::<LittleEndian>()?,
+            bias: utils::read_i32(&mut rdr)?,
         })
     }
 }
@@ -57,7 +56,7 @@ impl TagModel {
     where
         W: Write,
     {
-        wtr.write_u32::<LittleEndian>(self.class_info.len().try_into().unwrap())?;
+        utils::write_u32(&mut wtr, self.class_info.len().try_into().unwrap())?;
         for cls in &self.class_info {
             cls.serialize(&mut wtr)?;
         }
@@ -71,7 +70,7 @@ impl TagModel {
     where
         R: Read,
     {
-        let n_class = rdr.read_u32::<LittleEndian>()?;
+        let n_class = utils::read_u32(&mut rdr)?;
         let mut class_info = vec![];
         for _ in 0..n_class {
             class_info.push(TagClassInfo::deserialize(&mut rdr)?);
diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs
index e68a9889..8d926549 100644
--- a/vaporetto/src/utils.rs
+++ b/vaporetto/src/utils.rs
@@ -1,5 +1,6 @@
 use std::cell::RefCell;
 use std::collections::BTreeMap;
+use std::io::{self, Read, Write};
 
 pub trait AddWeight {
     fn add_weight(&self, target: &mut [i32], offset: isize);
@@ -94,3 +95,77 @@ where
         |x1| Some(rhs.as_ref().map_or_else(|| x1.clone(), |x2| f(x1, x2))),
     )
 }
+
+#[cfg(feature = "kytea")]
+pub fn read_u8<R>(mut rdr: R) -> io::Result<u8>
+where
+    R: Read,
+{
+    let mut buf = [0];
+    rdr.read_exact(&mut buf)?;
+    Ok(buf[0])
+}
+
+#[cfg(feature = "kytea")]
+pub fn read_u16<R>(mut rdr: R) -> io::Result<u16>
+where
+    R: Read,
+{
+    let mut buf = [0; 2];
+    rdr.read_exact(&mut buf)?;
+    Ok(u16::from_le_bytes(buf))
+}
+
+#[cfg(feature = "kytea")]
+pub fn read_i16<R>(mut rdr: R) -> io::Result<i16>
+where
+    R: Read,
+{
+    let mut buf = [0; 2];
+    rdr.read_exact(&mut buf)?;
+    Ok(i16::from_le_bytes(buf))
+}
+
+pub fn read_u32<R>(mut rdr: R) -> io::Result<u32>
+where
+    R: Read,
+{
+    let mut buf = [0; 4];
+    rdr.read_exact(&mut buf)?;
+    Ok(u32::from_le_bytes(buf))
+}
+
+pub fn write_u32<W>(mut wtr: W, data: u32) -> io::Result<()>
+where
+    W: Write,
+{
+    wtr.write_all(&data.to_le_bytes())?;
+    Ok(())
+}
+
+pub fn read_i32<R>(mut rdr: R) -> io::Result<i32>
+where
+    R: Read,
+{
+    let mut buf = [0; 4];
+    rdr.read_exact(&mut buf)?;
+    Ok(i32::from_le_bytes(buf))
+}
+
+pub fn write_i32<W>(mut wtr: W, data: i32) -> io::Result<()>
+where
+    W: Write,
+{
+    wtr.write_all(&data.to_le_bytes())?;
+    Ok(())
+}
+
+#[cfg(feature = "kytea")]
+pub fn read_f64<R>(mut rdr: R) -> io::Result<f64>
+where
+    R: Read,
+{
+    let mut buf = [0; 8];
+    rdr.read_exact(&mut buf)?;
+    Ok(f64::from_le_bytes(buf))
+}