Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix usage of empty spaCy model #5762

Merged
merged 7 commits into from
May 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/5638.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix: DIET breaks with empty spaCy model
32 changes: 20 additions & 12 deletions rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import typing
import logging
from typing import Any, Optional, Text, Dict, List, Type

from rasa.nlu.config import RasaNLUModelConfig
Expand All @@ -20,6 +21,9 @@
from spacy.tokens import Doc


logger = logging.getLogger(__name__)
koaning marked this conversation as resolved.
Show resolved Hide resolved


class SpacyFeaturizer(DenseFeaturizer):
@classmethod
def required_components(cls) -> List[Type[Component]]:
Expand Down Expand Up @@ -52,25 +56,29 @@ def train(
self._set_spacy_features(example, attribute)

def get_doc(self, message: Message, attribute: Text) -> Any:

return message.get(SPACY_DOCS[attribute])

def process(self, message: Message, **kwargs: Any) -> None:

self._set_spacy_features(message)

def _set_spacy_features(self, message: Message, attribute: Text = TEXT):
def _set_spacy_features(self, message: Message, attribute: Text = TEXT) -> None:
"""Adds the spacy word vectors to the messages features."""
doc = self.get_doc(message, attribute)

if doc is None:
return

message_attribute_doc = self.get_doc(message, attribute)
# in case an empty spaCy model was used, no vectors are present
if doc.vocab.vectors_length == 0:
logger.debug("No features present. You are using an empty spaCy model.")
return

if message_attribute_doc is not None:
features = self._features_for_doc(message_attribute_doc)
features = self._features_for_doc(doc)

cls_token_vec = self._calculate_cls_vector(features, self.pooling_operation)
features = np.concatenate([features, cls_token_vec])
cls_token_vec = self._calculate_cls_vector(features, self.pooling_operation)
features = np.concatenate([features, cls_token_vec])

features = self._combine_with_existing_dense_features(
message, features, DENSE_FEATURE_NAMES[attribute]
)
message.set(DENSE_FEATURE_NAMES[attribute], features)
features = self._combine_with_existing_dense_features(
message, features, DENSE_FEATURE_NAMES[attribute]
)
message.set(DENSE_FEATURE_NAMES[attribute], features)
25 changes: 23 additions & 2 deletions tests/nlu/featurizers/test_spacy_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def test_spacy_featurizer_sequence(sentence, expected, spacy_nlp):
greet = {"intent": "greet", "text_features": [0.5]}

message = Message(sentence, greet)
message.set("text_spacy_doc", doc)
message.set(SPACY_DOCS[TEXT], doc)

ftr._set_spacy_features(message)

vecs = message.get("text_dense_features")[0][:5]
vecs = message.get(DENSE_FEATURE_NAMES[TEXT])[0][:5]

assert np.allclose(token_vectors[0][:5], vecs, atol=1e-4)
assert np.allclose(vecs, expected, atol=1e-4)
Expand Down Expand Up @@ -165,3 +165,24 @@ def test_spacy_featurizer_train(spacy_nlp):
vecs = message.get(DENSE_FEATURE_NAMES[INTENT])

assert vecs is None


def test_spacy_featurizer_using_empty_model():
from rasa.nlu.featurizers.dense_featurizer.spacy_featurizer import SpacyFeaturizer
import spacy

sentence = "This test is using an empty spaCy model"

model = spacy.blank("en")
doc = model(sentence)

ftr = SpacyFeaturizer.create({}, RasaNLUModelConfig())

message = Message(sentence)
message.set(SPACY_DOCS[TEXT], doc)

ftr._set_spacy_features(message)

vecs = message.get(DENSE_FEATURE_NAMES[TEXT])

assert vecs is None