diff --git a/asent/about.py b/asent/about.py index 542c882..71f2a95 100644 --- a/asent/about.py +++ b/asent/about.py @@ -1,5 +1,5 @@ __title__ = "asent" -__version__ = "0.4.3" # the ONLY source of version ID +__version__ = "0.5.4" # the ONLY source of version ID __download_url__ = "https://github.com/kennethenevoldsen/asent" __documentation__ = "https://kennethenevoldsen.github.io/asent" diff --git a/asent/getters.py b/asent/getters.py index be845ff..bee0f9e 100644 --- a/asent/getters.py +++ b/asent/getters.py @@ -223,14 +223,16 @@ def make_is_negated_getter( def is_negated_getter(token: Token) -> bool: """Determine if token is negated.""" - for t in token.doc[token.i - lookback : token.i]: + # don't look back before the sentence start + min_token_idx = max(token.sent.start, token.i - lookback) + for t in token.doc[min_token_idx : token.i]: if t._.is_negation: return t return is_negated_getter -def make_token_polarity_getter( +def make_token_polarity_getter( # noqa: C901 valence_getter: Optional[Callable[[Token], float]] = None, is_negated_getter: Optional[Callable[[Token], Union[bool, Optional[Token]]]] = None, intensifier_getter: Optional[Callable[[Token], float]] = None, @@ -306,8 +308,14 @@ def token_polarity_getter( # dampen the scalar modifier of preceding words and emoticons # (excluding the ones that immediately preceed the item) based # on their distance from the current item. + if token.i > start_i: - prev_token = token.doc[token.i - start_i] + tok_id = token.i - start_i + # stop if before sentence start + if tok_id < token.sent.start: + break + + prev_token = token.doc[tok_id] b = prev_token._.intensifier if b != 0: intensifiers.append(prev_token) @@ -409,7 +417,7 @@ def sift_sentiment_scores(sentiments: Iterable[float]) -> Tuple[float, float, in return pos_sum, neg_sum, neu_count -def make_span_polarity_getter( +def make_span_polarity_getter( # noqa: C901 polarity_getter: Optional[Callable[[Token], float]], contrastive_conj_getter: Optional[Callable[[Token], bool]], ) -> SpanPolarityOutput: diff --git a/docs/news.rst b/docs/news.rst index 3cad70c..f0ea43f 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -1,6 +1,11 @@ News and Changelog ============================== +* 0.5.3 (26/05/22) + + - Fixed bug where negation and intensifiers were considered outside of the sentences boundaries. Adressing `58 `__. + - Improvements to the documentation. Thanks to @tomaarsen for the pull request. + * 0.4.2 (28/05/22) - Added new Danish Dictionary from `AFINN `__ diff --git a/tests/test_bugs.py b/tests/test_bugs.py new file mode 100644 index 0000000..732fae9 --- /dev/null +++ b/tests/test_bugs.py @@ -0,0 +1,26 @@ +"""Test specifically targeted an bugs.""" + +import spacy + +import asent # noqa + + +def test_no_negations_and_intensifiers_out_of_sentence(): + """Test that no negations are not found outside the sentence span. + + https://github.com/KennethEnevoldsen/asent/issues/58 + """ + + # create spacy pipeline + nlp = spacy.blank("en") + nlp.add_pipe("sentencizer") + + nlp.add_pipe("asent_en_v1") + + text = "Would you do that? I would not. Very stupid is what that is." + doc = nlp(text) + assert doc[10]._.is_negated is None + + text = "Would you do that? I would not very. Stupid is what that is." + doc = nlp(text) + assert doc[10]._.valence == doc[10]._.polarity.polarity