Skip to content

Commit

Permalink
Add use_cls_token to default dict.
Browse files Browse the repository at this point in the history
  • Loading branch information
tabergma committed Oct 18, 2019
1 parent 95fe8da commit 3b51563
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 1 deletion.
2 changes: 2 additions & 0 deletions rasa/nlu/tokenizers/jieba_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class JiebaTokenizer(Tokenizer):
"intent_tokenization_flag": False,
# Symbol on which intent should be split
"intent_split_symbol": "_",
# add __CLS__ token to the end of the list of tokens
"use_cls_token": True,
} # default don't load custom dictionary

def __init__(self, component_config: Dict[Text, Any] = None) -> None:
Expand Down
5 changes: 5 additions & 0 deletions rasa/nlu/tokenizers/mitie_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class MitieTokenizer(Tokenizer):

provides = [MESSAGE_TOKENS_NAMES[attribute] for attribute in MESSAGE_ATTRIBUTES]

defaults = {
# add __CLS__ token to the end of the list of tokens
"use_cls_token": True
}

@classmethod
def required_packages(cls) -> List[Text]:
return ["mitie"]
Expand Down
5 changes: 5 additions & 0 deletions rasa/nlu/tokenizers/spacy_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ class SpacyTokenizer(Tokenizer):
for attribute in SPACY_FEATURIZABLE_ATTRIBUTES
]

defaults = {
# add __CLS__ token to the end of the list of tokens
"use_cls_token": True
}

def train(
self, training_data: TrainingData, config: RasaNLUModelConfig, **kwargs: Any
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion rasa/nlu/tokenizers/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
if "use_cls_token" in self.component_config:
self.use_cls_token = self.component_config["use_cls_token"]
else:
self.use_cls_token = False
self.use_cls_token = True

def add_cls_token(
self, tokens: List[Token], attribute: Text = MESSAGE_TEXT_ATTRIBUTE
Expand Down
2 changes: 2 additions & 0 deletions rasa/nlu/tokenizers/whitespace_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class WhitespaceTokenizer(Tokenizer):
"intent_split_symbol": "_",
# Text will be tokenized with case sensitive as default
"case_sensitive": True,
# add __CLS__ token to the end of the list of tokens
"use_cls_token": True,
}

def __init__(self, component_config: Dict[Text, Any] = None) -> None:
Expand Down

0 comments on commit 3b51563

Please sign in to comment.