Skip to content

Commit

Permalink
softmax
Browse files Browse the repository at this point in the history
  • Loading branch information
leks committed Dec 28, 2022
1 parent a31080e commit 7371471
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions lingua/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
FrozenSet,
NamedTuple,
Optional,
List,
List
)

from ._constant import (
Expand All @@ -50,6 +50,10 @@ def _split_text_into_words(text: str) -> List[str]:
return LETTERS.findall(text.lower())


def _softmax(x: np.ndarray) -> np.ndarray:
return np.exp(x) / np.sum(np.exp(x))


def _load_language_models(
language: Language,
ngram_length: int,
Expand Down Expand Up @@ -497,19 +501,14 @@ def compute_language_confidence_values(self, text: str) -> List[ConfidenceValue]
_sort_confidence_values(values)
return values

sorted_probabilities = sorted(summed_up_probabilities.values())
lowest_probability = sorted_probabilities[0]
highest_probability = sorted_probabilities[-1]
denominator = highest_probability - lowest_probability
lang, prob = zip(*summed_up_probabilities.items())
prob = np.round(_softmax(np.array(prob)), 2)
summed_up_probabilities = dict(zip(lang, prob))

for language, probability in summed_up_probabilities.items():
# Apply min-max normalization
normalized_probability = (
0.98 * (probability - lowest_probability) / denominator + 0.01
)
for i in range(len(values)):
if values[i].language == language:
values[i] = ConfidenceValue(language, normalized_probability)
values[i] = ConfidenceValue(language, probability)
break

_sort_confidence_values(values)
Expand Down

0 comments on commit 7371471

Please sign in to comment.