Skip to content

Commit

Permalink
Fix incorrect end index in DetectionResult
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Dec 6, 2023
1 parent d39452a commit 72f2d89
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ members = ["language-models/*"]

[package]
name = "lingua"
version = "1.6.1"
version = "1.6.2"
authors = ["Peter M. Stahl <[email protected]>"]
description = """
An accurate natural language detection library, suitable for short text and mixed-language text
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

[project]
name = "lingua-language-detector"
version = "2.0.1"
version = "2.0.2"
authors = [{name = "Peter M. Stahl", email = "[email protected]"}]
description = "An accurate natural language detection library, suitable for short text and mixed-language text"
readme = "README_PYPI.md"
Expand Down
5 changes: 3 additions & 2 deletions src/detector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ impl LanguageDetector {
if languages.len() == 1 {
let result = DetectionResult {
start_index: 0,
end_index: text_str.chars().count(),
end_index: text_str.len(),
word_count: tokens_without_whitespace.len(),
language: *languages.iter().next().unwrap(),
};
Expand Down Expand Up @@ -1872,11 +1872,12 @@ mod tests {
sentence,
expected_word_count,
expected_language,
case::english(
case::english_1(
"I'm really not sure whether multi-language detection is a good idea.",
11,
English
),
case::english_2("I'm frightened! 🙈", 3, English),
case::kazakh("V төзімділік спорт", 3, Kazakh)
)]
fn test_detect_multiple_languages_with_one_language(
Expand Down
3 changes: 2 additions & 1 deletion tests/python/test_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@ def test_detect_multiple_languages_for_empty_string():
"I'm really not sure whether multi-language detection is a good idea.",
11,
Language.ENGLISH,
id="ENGLISH"
id="ENGLISH 1"
),
pytest.param("I'm frightened! 🙈", 3, Language.ENGLISH, id="ENGLISH 2"),
pytest.param("V төзімділік спорт", 3, Language.KAZAKH, id="KAZAKH"),
],
)
Expand Down

0 comments on commit 72f2d89

Please sign in to comment.