diff --git a/CHANGELOG.md b/CHANGELOG.md index 97f569e..b922c0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ +## v0.7.4 (2023-09-04) + +### Fix + +* Added option ([`75932a8`](https://github.com/KennethEnevoldsen/asent/commit/75932a848902f656b79ff30f6a190feb09f4f1a2)) + ## v0.7.3 (2023-09-04) ### Fix diff --git a/pyproject.toml b/pyproject.toml index 2c80d37..22f5423 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "asent" -version = "0.7.3" +version = "0.7.4" description = "A python package for flexible and transparent sentiment analysis." authors = [ { name = "Kenneth Enevoldsen", email = "kennethcenevoldsen@gmail.com" }, diff --git a/src/asent/component.py b/src/asent/component.py index ee4e3bf..04be619 100644 --- a/src/asent/component.py +++ b/src/asent/component.py @@ -133,10 +133,10 @@ def __init__( ) if (not Doc.has_extension("polarity")) or (force is True): - doc_pol_getter = make_doc_polarity_getter(span_polarity_getter=None) + self.doc_pol_getter = make_doc_polarity_getter(span_polarity_getter=None) Doc.set_extension( "polarity", - getter=doc_pol_getter, + default=None, force=force, ) @@ -149,6 +149,8 @@ def __call__(self, doc: Doc) -> Doc: Returns: Doc: A processed spacy Document. """ + pol = self.doc_pol_getter(doc) + doc._.polarity = pol return doc diff --git a/src/asent/data_classes.py b/src/asent/data_classes.py index 78dd8ef..843ba6d 100644 --- a/src/asent/data_classes.py +++ b/src/asent/data_classes.py @@ -126,6 +126,20 @@ def __repr_str__(self, join_str: str) -> str: ] ) + def as_span_polarity(self) -> SpanPolarityOutput: + span = self.doc.doc[:] + pol = SpanPolarityOutput( + negative=self.negative, + positive=self.positive, + neutral=self.neutral, + compound=self.compound, + span=span, + polarities=[ + t_pol for span_pol in self.polarities for t_pol in span_pol.polarities + ], + ) + return pol + def __lt__(self, other: object): if not isinstance(other, (DocPolarityOutput, float)): return NotImplemented diff --git a/src/asent/visualize.py b/src/asent/visualize.py index d6740dc..0b94b78 100644 --- a/src/asent/visualize.py +++ b/src/asent/visualize.py @@ -1,11 +1,13 @@ from distutils.log import warn -from typing import Union +from typing import Tuple, Union import spacy from packaging import version from spacy import displacy from spacy.tokens import Doc, Span +from asent.data_classes import DocPolarityOutput, SpanPolarityOutput + def make_colors(n=10, cmap="RdYlGn"): """A utility function for creating a stepped color gradient.""" @@ -27,26 +29,46 @@ def make_colors(n=10, cmap="RdYlGn"): # display(HTML(f'

{color}

')) -def visualize_prediction_no_overlap(doc: Union[Span, Doc], cmap="RdYlGn") -> str: +def _normalize_doc_input( + doc: Union[Span, Doc, DocPolarityOutput, SpanPolarityOutput], +) -> Tuple[Span, SpanPolarityOutput]: + if isinstance(doc, Doc): + span = doc[:] + pol = span._.polarity + elif isinstance(doc, DocPolarityOutput): + pol = doc.as_span_polarity() + span = pol.span + elif isinstance(doc, SpanPolarityOutput): + pol = doc + span = doc.span + else: + span = doc + # turn span into doc + pol = span._.polarity + + return span, pol + + +def visualize_prediction_no_overlap( + doc: Union[Span, Doc, DocPolarityOutput, SpanPolarityOutput], + cmap="RdYlGn", +) -> str: """Render displaCy visualisation of model prediction of sentiment. This visualization is similar to visualize_prediction, but it does not allow for overlapping spans. Args: - doc (Union[Span, Doc]): The span or document you wish to apply the visualizer + doc: The span or document you wish to apply the visualizer to. - cmap (str, optional): The color map derived from matplotlib. Defaults to + cmap: The color map derived from matplotlib. Defaults to "RdYlGn". Returns: - str: Rendered HTML markup. + Rendered HTML markup. """ - if isinstance(doc, Doc): - span = doc[:] - else: - span = doc + span, pol = _normalize_doc_input(doc) thresholds = [t / 10 for t in range(-50, 51)] sentiment_colors = make_colors(n=len(thresholds), cmap=cmap) @@ -55,7 +77,6 @@ def visualize_prediction_no_overlap(doc: Union[Span, Doc], cmap="RdYlGn") -> str def __normalize(val: float) -> str: return str(max(min(round(val, 1), 5), -5)) - pol = span._.polarity t_pols = list(filter(lambda p: p, pol.polarities)) c_spans = [ @@ -85,23 +106,22 @@ def __normalize(val: float) -> str: return html -def visualize_prediction(doc: Union[Span, Doc], cmap="RdYlGn") -> str: +def visualize_prediction( + doc: Union[Span, Doc, SpanPolarityOutput, DocPolarityOutput], + cmap="RdYlGn", +) -> str: """Render displaCy visualisation of model prediction of sentiment. Args: - doc (Union[Span, Doc]): The span or document you wish to apply the visualizer + doc: The span or document you wish to apply the visualizer to. - cmap (str, optional): The color map derived from matplotlib. Defaults to + cmap: The color map derived from matplotlib. Defaults to "RdYlGn". Returns: - str: Rendered HTML markup. + Rendered HTML markup. """ - - if isinstance(doc, Doc): - span = doc[:] - else: - span = doc + span, pol = _normalize_doc_input(doc) thresholds = [t / 10 for t in range(-50, 51)] sentiment_colors = make_colors(n=len(thresholds), cmap=cmap) @@ -110,7 +130,6 @@ def visualize_prediction(doc: Union[Span, Doc], cmap="RdYlGn") -> str: def __normalize(val: float) -> str: return str(max(min(round(val, 1), 5), -5)) - pol = span._.polarity t_pols = list(filter(lambda p: p, pol.polarities)) c_spans = [ @@ -139,23 +158,22 @@ def __normalize(val: float) -> str: return html -def visualize_analysis(doc: Union[Span, Doc]) -> str: +def visualize_analysis( + doc: Union[Span, Doc, DocPolarityOutput, SpanPolarityOutput], +) -> str: """Render displaCy visualisation of model analysis. Args: - doc (Union[Span, Doc]): The span or document you wish to apply the visualizer + doc: The span or document you wish to apply the visualizer to. + cmap: The color map derived from matplotlib. Defaults to + "RdYlGn". Returns: - str: Rendered HTML markup. + Rendered HTML markup. """ - if isinstance(doc, Doc): - span = doc[:] - else: - span = doc - - pol = span._.polarity + span, pol = _normalize_doc_input(doc) arcs = [] words = [] @@ -199,24 +217,28 @@ def visualize_analysis(doc: Union[Span, Doc]) -> str: return html -def visualize(doc: Union[Span, Doc], style: str = "prediction", cmap="RdYlGn") -> str: +def visualize( + doc: Union[Span, Doc, DocPolarityOutput, SpanPolarityOutput], + style: str = "prediction", + cmap: str = "RdYlGn", +) -> str: """Render displaCy visualisation of model prediction of sentiment or analysis of sentiment. Args: - doc (Union[Span, Doc]): The span or document you wish to apply the visualizer + doc: The span or document you wish to apply the visualizer to. - style (str): A string indicating whether it should visualize + style: A string indicating whether it should visualize "prediction" or "analysis". "prediction", color codes positive or negative spans according to the cmap. "analysis" visualize for each sentimental word if it has by negated or intensified a word, and which word. If you are looking for the previous visualizer for "prediction", use "prediction-no-overlap". Note that this does not allow for overlapping span. Thus it can lead to odd results. Defaults to "prediction". - cmap (str): The color map derived from matplotlib. Defaults to "RdYlGn". + cmap: The color map derived from matplotlib. Defaults to "RdYlGn". Returns: - str: Rendered HTML markup. + Rendered HTML markup. Examples: >>> nlp = spacy.load("en_core_web_lg") diff --git a/tests/test_visualize.py b/tests/test_visualize.py index b6d286d..eb71887 100644 --- a/tests/test_visualize.py +++ b/tests/test_visualize.py @@ -33,6 +33,18 @@ def test_visualize(example: str, lang: str, nlp_dict): # noqa asent.visualize(doc[:2], style="prediction-no-overlap") asent.visualize(doc[:2], style="analysis") + # test on polarities + pol = doc._.polarity + asent.visualize(pol, style="prediction") + asent.visualize(pol, style="prediction-no-overlap") + asent.visualize(pol, style="analysis") + + # test on span polarities + span_pol = doc[:2]._.polarity + asent.visualize(span_pol, style="prediction") + asent.visualize(span_pol, style="prediction-no-overlap") + asent.visualize(span_pol, style="analysis") + # error with pytest.raises(ValueError): asent.visualize(doc[:2], style="invalid")