Skip to content

Commit

Permalink
fix: Sentence bounderies are now considered when check negations and …
Browse files Browse the repository at this point in the history
…intensifiers
  • Loading branch information
KennethEnevoldsen committed Aug 26, 2022
1 parent ac2c2c6 commit 4caa5a1
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
2 changes: 1 addition & 1 deletion asent/about.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__title__ = "asent"

__version__ = "0.4.3" # the ONLY source of version ID
__version__ = "0.5.4" # the ONLY source of version ID
__download_url__ = "https://github.com/kennethenevoldsen/asent"
__documentation__ = "https://kennethenevoldsen.github.io/asent"
16 changes: 12 additions & 4 deletions asent/getters.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,16 @@ def make_is_negated_getter(

def is_negated_getter(token: Token) -> bool:
"""Determine if token is negated."""
for t in token.doc[token.i - lookback : token.i]:
# don't look back before the sentence start
min_token_idx = max(token.sent.start, token.i - lookback)
for t in token.doc[min_token_idx : token.i]:
if t._.is_negation:
return t

return is_negated_getter


def make_token_polarity_getter(
def make_token_polarity_getter( # noqa: C901
valence_getter: Optional[Callable[[Token], float]] = None,
is_negated_getter: Optional[Callable[[Token], Union[bool, Optional[Token]]]] = None,
intensifier_getter: Optional[Callable[[Token], float]] = None,
Expand Down Expand Up @@ -306,8 +308,14 @@ def token_polarity_getter(
# dampen the scalar modifier of preceding words and emoticons
# (excluding the ones that immediately preceed the item) based
# on their distance from the current item.

if token.i > start_i:
prev_token = token.doc[token.i - start_i]
tok_id = token.i - start_i
# stop if before sentence start
if tok_id < token.sent.start:
break

prev_token = token.doc[tok_id]
b = prev_token._.intensifier
if b != 0:
intensifiers.append(prev_token)
Expand Down Expand Up @@ -409,7 +417,7 @@ def sift_sentiment_scores(sentiments: Iterable[float]) -> Tuple[float, float, in
return pos_sum, neg_sum, neu_count


def make_span_polarity_getter(
def make_span_polarity_getter( # noqa: C901
polarity_getter: Optional[Callable[[Token], float]],
contrastive_conj_getter: Optional[Callable[[Token], bool]],
) -> SpanPolarityOutput:
Expand Down
5 changes: 5 additions & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
News and Changelog
==============================

* 0.5.3 (26/05/22)

- Fixed bug where negation and intensifiers were considered outside of the sentences boundaries. Adressing `58 <https://github.com/KennethEnevoldsen/asent/issues/58>`__.
- Improvements to the documentation. Thanks to @tomaarsen for the pull request.

* 0.4.2 (28/05/22)

- Added new Danish Dictionary from `AFINN <https://github.com/fnielsen/afinn>`__
Expand Down
26 changes: 26 additions & 0 deletions tests/test_bugs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Test specifically targeted an bugs."""

import spacy

import asent # noqa


def test_no_negations_and_intensifiers_out_of_sentence():
"""Test that no negations are not found outside the sentence span.
https://github.com/KennethEnevoldsen/asent/issues/58
"""

# create spacy pipeline
nlp = spacy.blank("en")
nlp.add_pipe("sentencizer")

nlp.add_pipe("asent_en_v1")

text = "Would you do that? I would not. Very stupid is what that is."
doc = nlp(text)
assert doc[10]._.is_negated is None

text = "Would you do that? I would not very. Stupid is what that is."
doc = nlp(text)
assert doc[10]._.valence == doc[10]._.polarity.polarity

0 comments on commit 4caa5a1

Please sign in to comment.