Skip to content

Commit

Permalink
Ignore character type features that contain invalid type identifiers (#…
Browse files Browse the repository at this point in the history
…111)

* Ignore character type features that contain invalid type identifiers

* Update kytea_model.rs

* Update kytea_model.rs

* Update kytea_model.rs
  • Loading branch information
vbkaisetsu authored Sep 20, 2023
1 parent dbbe794 commit 14dcd57
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion vaporetto/src/kytea_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ impl TryFrom<KyteaModel> for Model {
}

let mut type_ngrams = vec![];
for (type_ngram, v) in type_dict.dump_items() {
'a: for (type_ngram, v) in type_dict.dump_items() {
let weight_size = config.type_w as usize * 2 - type_ngram.len() + 1;
let mut ngram = type_ngram
.into_iter()
Expand All @@ -494,6 +494,12 @@ impl TryFrom<KyteaModel> for Model {
b'T' => CharacterType::Katakana as u8,
b'K' => CharacterType::Kanji as u8,
b'O' => CharacterType::Other as u8,
// https://github.com/daac-tools/vaporetto/issues/110
// Some models distributed on KyTea's web site contain the invalid character
// type `0x04`. The following supports them.
4 => {
continue 'a;
}
t => {
return Err(VaporettoError::invalid_model(format!(
"unsupported character type: {t}"
Expand Down

0 comments on commit 14dcd57

Please sign in to comment.