Skip to content

Commit

Permalink
adds support for configurable entity splitting by comma to CRFEntityE…
Browse files Browse the repository at this point in the history
…xtractor (#6852)
  • Loading branch information
tttthomasssss committed Oct 26, 2020
1 parent ff0fb9d commit cb9d3a7
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion rasa/nlu/extractors/crf_entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ENTITY_ATTRIBUTE_GROUP,
ENTITY_ATTRIBUTE_ROLE,
NO_ENTITY_TAG,
SPLIT_ENTITIES_BY_COMMA,
)
from rasa.shared.constants import DOCS_URL_COMPONENTS
from rasa.utils.tensorflow.constants import BILOU_FLAG
Expand Down Expand Up @@ -138,6 +139,13 @@ def __init__(

self._validate_configuration()

split_entities_config = self.component_config[SPLIT_ENTITIES_BY_COMMA]
if isinstance(split_entities_config, bool):
split_entities_config = {SPLIT_ENTITIES_BY_COMMA: split_entities_config}
else:
split_entities_config[SPLIT_ENTITIES_BY_COMMA] = self.defaults[SPLIT_ENTITIES_BY_COMMA]
self.split_entities_config = split_entities_config

def _validate_configuration(self) -> None:
if len(self.component_config.get("features", [])) % 2 != 1:
raise ValueError(
Expand Down Expand Up @@ -220,7 +228,7 @@ def extract_entities(self, message: Message) -> List[Dict[Text, Any]]:
tags, confidences = self._tag_confidences(tokens, predictions)

return self.convert_predictions_into_entities(
message.get(TEXT), tokens, tags, confidences
message.get(TEXT), tokens, tags, confidences, self.split_entities_config
)

def _add_tag_to_crf_token(
Expand Down

0 comments on commit cb9d3a7

Please sign in to comment.