Skip to content

Commit

Permalink
Merge branch 'master' of github.com:oscar-corpus/ungoliant into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Uinelj committed Dec 13, 2022
2 parents fe7205f + b4f148c commit 6918d3d
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/filtering/sentence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ impl FilterMut<&str> for MeanLength {
self.update_mean(length);

// ensure that mu-sig<length<mu+sig (eq.to 0<length-mu<sig)
(f64::from(length) - self.mean) < self.std
(f64::from(length) - self.mean).abs() < self.std
}
}

impl Filter<&str> for MeanLength {
fn detect(&self, sentence: &str) -> bool {
let length: u32 = sentence.chars().count().try_into().unwrap_or_default();
(f64::from(length) - self.mean) < self.std
(f64::from(length) - self.mean).abs() < self.std
}
}

Expand Down Expand Up @@ -158,16 +158,18 @@ mod tests {
f.detect_mut(&sentence);
}

// create two obvious examples that are resp. valid and invalid
// create three obvious examples that the first is valid and the others invalid
let valid: String = ['a'].iter().cycle().take(105).collect();
let invalid: String = ['a'].iter().cycle().take(130).collect();
let long_invalid: String = ['a'].iter().cycle().take(130).collect();
let short_invalid: String = ['a'].iter().cycle().take(80).collect();

// in case of failure, this will be printed
println!("init rng : mu:{:.3} sig:{:.3}", 100.0, 10.0);
println!("from filter: mu:{:.3} sig:{:.3}", f.mean(), f.std());

// ensure distribution is correctly learnt
assert_eq!(f.detect(&valid), true);
assert_eq!(f.detect(&invalid), false);
assert_eq!(f.detect(&long_invalid), false);
assert_eq!(f.detect(&short_invalid), false);
}
}

0 comments on commit 6918d3d

Please sign in to comment.