diff --git a/changelog/6591.improvement.md b/changelog/6591.improvement.md new file mode 100644 index 000000000000..76970a18eb4a --- /dev/null +++ b/changelog/6591.improvement.md @@ -0,0 +1,5 @@ +Retrieval actions with `respond_` prefix are now replaced with usual utterance actions with `utter_` prefix. + +If you were using retrieval actions before, rename all of them to start with `utter_` prefix. For example, `respond_chitchat` becomes `utter_chitchat`. +Also, in order to keep the response templates more consistent, you should now add the `utter_` prefix to all response templates defined for retrieval intents. For example, a response template `chitchat/ask_name` becomes `utter_chitchat/ask_name`. Note that the NLU examples for this will still be under `chitchat/ask_name` intent. +The example `responseselectorbot` should help clarify these changes further. \ No newline at end of file diff --git a/data/examples/rasa/demo-rasa-responses.md b/data/examples/rasa/demo-rasa-responses.md index db03efbff623..8438e2db37c1 100644 --- a/data/examples/rasa/demo-rasa-responses.md +++ b/data/examples/rasa/demo-rasa-responses.md @@ -1,7 +1,7 @@ ## -* chitchat/ask_weather +* utter_chitchat/ask_weather - It's sunny where I live ## -* chitchat/ask_name +* utter_chitchat/ask_name - I am Mr. Bot \ No newline at end of file diff --git a/data/test/duplicate_intents_markdown/demo-rasa-intents-2.md b/data/test/duplicate_intents_markdown/demo-rasa-intents-2.md index 7c960a8cd93b..0616acd6e826 100644 --- a/data/test/duplicate_intents_markdown/demo-rasa-intents-2.md +++ b/data/test/duplicate_intents_markdown/demo-rasa-intents-2.md @@ -34,14 +34,6 @@ - I am looking for [mexican indian fusion](cuisine) - [central](location) [indian](cuisine) restaurant -## intent:chitchat/ask_name -- What's your name? -- What can I call you? - -## intent:chitchat/ask_weather -- How's the weather? -- Is it too hot outside? - ## synonym:chinese + Chines * Chinese diff --git a/data/test/duplicate_intents_markdown/demo-rasa-intents-3.md b/data/test/duplicate_intents_markdown/demo-rasa-intents-3.md new file mode 100644 index 000000000000..a9293e8f3355 --- /dev/null +++ b/data/test/duplicate_intents_markdown/demo-rasa-intents-3.md @@ -0,0 +1,21 @@ + +## intent:affirm +- yes +- yep +- yeah +- indeed +- that's right +- ok +- great +- right, thank you +- correct +- great choice +- sounds really good + +## intent:chitchat/ask_name +- What's your name? +- What can I call you? + +## intent:chitchat/ask_weather +- How's the weather? +- Is it too hot outside? \ No newline at end of file diff --git a/data/test_domains/default_retrieval_intents.yml b/data/test_domains/default_retrieval_intents.yml index 4d41d889716c..4a23002adecc 100644 --- a/data/test_domains/default_retrieval_intents.yml +++ b/data/test_domains/default_retrieval_intents.yml @@ -7,8 +7,6 @@ intents: - mood_unhappy - bot_challenge - chitchat - - chitchat/ask_name - - chitchat/ask_weather responses: utter_greet: @@ -26,7 +24,7 @@ responses: - text: I am a bot, powered by Rasa. actions: - - respond_chitchat + - utter_chitchat - utter_greet - utter_cheer_up - utter_did_that_help diff --git a/data/test_responses/default.md b/data/test_responses/default.md index aeae1f9c9ac1..bbfcb8579aa2 100644 --- a/data/test_responses/default.md +++ b/data/test_responses/default.md @@ -1,7 +1,7 @@ ## ask name -* chitchat/ask_name +* utter_chitchat/ask_name - my name is Sara, Rasa's documentation bot! ## ask weather -* chitchat/ask_weather +* utter_chitchat/ask_weather - it's always sunny where I live diff --git a/docs/docs/components/selectors.mdx b/docs/docs/components/selectors.mdx index 8c3fd7b9f830..d46e3f3abb8c 100644 --- a/docs/docs/components/selectors.mdx +++ b/docs/docs/components/selectors.mdx @@ -47,7 +47,8 @@ Selectors predict a bot response from a set of candidate responses. { "text": "I think it's about to rain." } - ] + ], + "template_name": "utter_chitchat/ask_weather" }, "ranking": [ { @@ -71,7 +72,7 @@ Selectors predict a bot response from a set of candidate responses. * **Description** Response Selector component can be used to build a response retrieval model to directly predict a bot response from - a set of candidate responses. The prediction of this model is used by [Retrieval Actions](../retrieval-actions). + a set of candidate responses. The prediction of this model is used by the dialogue manager to utter the predicted responses. It embeds user inputs and response labels into the same space and follows the exact same neural network architecture and optimization as the [DIETClassifier](../components/intent-classifiers.mdx#dietclassifier). @@ -142,6 +143,10 @@ Selectors predict a bot response from a set of candidate responses. template which has a text attribute for training. If none are found, it falls back to using the retrieval intent combined with the response key as the label. + See this [example assistant](https://github.com/RasaHQ/rasa/tree/master/examples/responseselectorbot) to understand + how you can use the `ResponseSelector` component in your assistant. Additionally, you will find this tutorial on + [handling FAQs](./chitchat-faqs.mdx#handling-faqs-using-a-response-selector) using a `ResponseSelector` useful as well. +
The above configuration parameters are the ones you should configure to fit your model to your data. @@ -289,3 +294,94 @@ Selectors predict a bot response from a set of candidate responses. :::
+ +* **Under the hood: Parsing Response Selector Output** + + The parsed output from NLU will have a property named `response_selector` + containing the output for each response selector component. Each response selector is + identified by `retrieval_intent` parameter of that response selector + and stores two properties: + + * `response`: The predicted response key under the corresponding retrieval intent, + prediction's confidence and the associated response templates. + + * `ranking`: Ranking with confidences of top 10 candidate response keys. + + Example result: + + ```json + { + "text": "How's the weather today?", + "response_selector": { + "faq": { + "response": { + "id": 1388783286124361986, + "confidence": 0.7, + "intent_response_key": "chitchat/ask_weather", + "response_templates": [ + { + "text": "It's sunny in Berlin today", + "image": "https://i.imgur.com/nGF1K8f.jpg" + }, + { + "text": "I think it's about to rain." + } + ], + "template_name": "utter_chitchat/ask_weather" + }, + "ranking": [ + { + "id": 1388783286124361986, + "confidence": 0.7, + "intent_response_key": "chitchat/ask_weather" + }, + { + "id": 1388783286124361986, + "confidence": 0.3, + "intent_response_key": "chitchat/ask_name" + } + ] + } + } + } + ``` + + If the `retrieval_intent` parameter of a particular response selector was left to its default value, + the corresponding response selector will be identified as `default` in the returned output. + + ```json {4} + { + "text": "How's the weather today?", + "response_selector": { + "default": { + "response": { + "id": 1388783286124361986, + "confidence": 0.7, + "intent_response_key": "chitchat/ask_weather", + "response_templates": [ + { + "text": "It's sunny in Berlin today", + "image": "https://i.imgur.com/nGF1K8f.jpg" + }, + { + "text": "I think it's about to rain." + } + ], + "template_name": "utter_chitchat/ask_weather" + }, + "ranking": [ + { + "id": 1388783286124361986, + "confidence": 0.7, + "intent_response_key": "chitchat/ask_weather" + }, + { + "id": 1388783286124361986, + "confidence": 0.3, + "intent_response_key": "chitchat/ask_name" + } + ] + } + } + } + ``` diff --git a/docs/docs/glossary.mdx b/docs/docs/glossary.mdx index 01815716a035..c1dee0c45e87 100644 --- a/docs/docs/glossary.mdx +++ b/docs/docs/glossary.mdx @@ -55,6 +55,10 @@ description: Glossary for all Rasa-related terms Something that a user is trying to convey or accomplish (e,g., greeting, specifying a location). +## Retrieval Intent + + A special instance of an intent which can be divided into smaller sub-intents. Each sub-intent has a fixed response and hence the context of the conversation does not matter when user expresses one of these sub-intents. + ## [Interactive Learning](./writing-stories.mdx#using-interactive-learning) A mode of training the bot where the user provides feedback to the bot while talking to it. diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 748786cd287a..a133b19d068a 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -378,13 +378,21 @@ This means that the output in these files should look like - } ``` +* Retrieval actions with `respond_` prefix are now replaced with usual utterance actions with `utter_` prefix. +If you were using retrieval actions before, rename all of them to start with `utter_` prefix. For example, +`respond_chitchat` becomes `utter_chitchat`. Also, in order to keep the response templates more consistent, +you should now add the `utter_` prefix to all response templates defined for retrieval intents. For example, +a response template `chitchat/ask_name` becomes `utter_chitchat/ask_name`. Note that the NLU examples for this +will still be under `chitchat/ask_name` intent. The example `responseselectorbot` should help clarify these changes further. + * The output schema of `ResponseSelector` has changed - `full_retrieval_intent` and `name` have been deprecated in favour of `intent_response_key` and `response_templates` respectively. -Additionally a key `all_retrieval_intents` is added to the response selector output which will -hold a list of all retrieval intents(faq, chitchat, etc.) that are present in the training data. +Additionally, two keys are added to the output - +1. `all_retrieval_intents` - Holds a list of all retrieval intents(faq, chitchat, etc.) that are present in the training data. +2. `template_name` - Holds the name of the response template which is predicted by the response selector(`utter_faq/is_legit`) An example output looks like this - -```json {3-4,10,11} +```json {3-4,10,11,20} { "response_selector": { "all_retrieval_intents": [ @@ -404,6 +412,7 @@ An example output looks like this - "text": "I think so." } ] + "template_name": "utter_faq/is_legit" }, "ranking": [ { diff --git a/docs/docs/responses.mdx b/docs/docs/responses.mdx index 3915f0c62e06..580ac71adc15 100644 --- a/docs/docs/responses.mdx +++ b/docs/docs/responses.mdx @@ -258,3 +258,30 @@ responses: - text: "Hey, {name}. How are you?" - text: "Hey, {name}. How is your day going?" ``` + +## Responses for Retrieval Intents + +If you are using retrieval intents in your assistant, you also need to add response templates +for your assistant's replies to these intents: + +```yaml-rasa +responses: + utter_chitchat/ask_name: + - image: "https://i.imgur.com/zTvA58i.jpeg" + text: hello, my name is retrieval bot. + - text: Oh yeah, I am called the retrieval bot. + + utter_chitchat/ask_weather: + - text: Oh, it does look sunny right now in Berlin. + image: "https://i.imgur.com/vwv7aHN.png" + - text: I am not sure of the whole week but I can see the sun is out today. +``` +All such response templates (e.g. `utter_chitchat/ask_name`) start with the `utter_` prefix followed by the retrieval intent name (`chitchat`) +and the associated response key (`ask_name`). + +:::info Responses format +The responses for retrieval intents use the same format as the [responses in the domain](responses.mdx). +This means, you can also use buttons, images and any other multimedia elements in +your responses. +::: + diff --git a/docs/docs/training-data-format.mdx b/docs/docs/training-data-format.mdx index 0c66a2e0133b..3fb667c6de32 100644 --- a/docs/docs/training-data-format.mdx +++ b/docs/docs/training-data-format.mdx @@ -185,6 +185,35 @@ nlu: The `metadata` key can contain arbitrary key-value data that stays with an example and is accessible by the components in the NLU pipeline. In the example above, the sentiment of the example could be used by a custom component in the pipeline for sentiment analysis. +If you want to specify [retrieval intents](glossary.mdx#retrieval-intent), then your NLU examples will look as follows: +```yaml-rasa +nlu: +- intent: chitchat/ask_name + examples: | + - What is your name? + - May I know your name? + - What do people call you? + - Do you have a name for yourself? + +- intent: chitchat/ask_weather + examples: | + - What's the weather like today? + - Does it look sunny outside today? + - Oh, do you mind checking the weather for me please? + - I like sunny days in Berlin. +``` +All retrieval intents have a suffix +added to them which identifies a particular response key for your assistant, in the +above example `ask_name` and `ask_weather` are the suffixes. The suffix is separated from +the retrieval intent name by a `/` delimiter. + +:::note Special meaning of `/` +As shown in the above examples, the `/` symbol is reserved as a delimiter to separate +retrieval intents from their associated response keys. Make sure not to use it in the +name of your intents. +::: + + ### Entities [Entities](glossary.mdx#entity) are structured pieces of information that can be extracted from a user's message. For entity extraction to work, you need to either specify training data to train an ML model or you need to define [regular expressions](#regular-expressions-for-entity-extraction) to extract entities using the [`RegexEntityExtractor`](components/entity-extractors.mdx#regexentityextractor) based on a character pattern. diff --git a/examples/responseselectorbot/data/nlu.yml b/examples/responseselectorbot/data/nlu.yml index bbd4acc898b1..71ff79a9a081 100644 --- a/examples/responseselectorbot/data/nlu.yml +++ b/examples/responseselectorbot/data/nlu.yml @@ -103,3 +103,14 @@ nlu: - Does it look sunny outside today? - Oh, do you mind checking the weather for me please? - I like sunny days in Berlin. + +responses: + utter_chitchat/ask_name: + - image: "https://i.imgur.com/zTvA58i.jpeg" + text: hello, my name is retrieval bot. + - text: Oh yeah, I am called the retrieval bot. + + utter_chitchat/ask_weather: + - text: Oh, it does look sunny right now in Berlin. + image: "https://i.imgur.com/vwv7aHN.png" + - text: I am not sure of the whole week but I can see the sun is out today. diff --git a/examples/responseselectorbot/data/responses.yml b/examples/responseselectorbot/data/responses.yml deleted file mode 100644 index cef24554ff62..000000000000 --- a/examples/responseselectorbot/data/responses.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: "2.0" - -responses: - chitchat/ask_name: - - image: "https://i.imgur.com/zTvA58i.jpeg" - text: hello, my name is retrieval bot. - - text: Oh yeah, I am called the retrieval bot. - - chitchat/ask_weather: - - text: Oh, it does look sunny right now in Berlin. - image: "https://i.imgur.com/vwv7aHN.png" - - text: I am not sure of the whole week but I can see the sun is out today. diff --git a/examples/responseselectorbot/data/rules.yml b/examples/responseselectorbot/data/rules.yml index d95269f3ee74..4bbb69938c35 100644 --- a/examples/responseselectorbot/data/rules.yml +++ b/examples/responseselectorbot/data/rules.yml @@ -15,4 +15,4 @@ rules: - rule: Response with a chitchat utterance whenever user indulges in some chitchat steps: - intent: chitchat - - action: respond_chitchat + - action: utter_chitchat diff --git a/examples/responseselectorbot/domain.yml b/examples/responseselectorbot/domain.yml index 5b845f9ea0c1..35cfeb27093d 100644 --- a/examples/responseselectorbot/domain.yml +++ b/examples/responseselectorbot/domain.yml @@ -10,9 +10,6 @@ intents: - bot_challenge - chitchat -actions: - - respond_chitchat - responses: utter_greet: - text: "Hey! How are you?" diff --git a/rasa/core/actions/action.py b/rasa/core/actions/action.py index 6c47c942b06d..5ce487c2baaa 100644 --- a/rasa/core/actions/action.py +++ b/rasa/core/actions/action.py @@ -2,22 +2,28 @@ import json import logging import typing -from typing import List, Text, Optional, Dict, Any -import random +from typing import List, Text, Optional, Dict, Any, Set import aiohttp import rasa.core + from rasa.shared.core import events -from rasa.core.constants import DEFAULT_REQUEST_TIMEOUT, RESPOND_PREFIX +from rasa.core.constants import DEFAULT_REQUEST_TIMEOUT + from rasa.nlu.constants import ( RESPONSE_SELECTOR_DEFAULT_INTENT, RESPONSE_SELECTOR_PROPERTY_NAME, - RESPONSE_SELECTOR_RESPONSES_KEY, RESPONSE_SELECTOR_PREDICTION_KEY, + RESPONSE_SELECTOR_TEMPLATE_NAME_KEY, INTENT_RANKING_KEY, ) -from rasa.shared.constants import DOCS_BASE_URL, DEFAULT_NLU_FALLBACK_INTENT_NAME + +from rasa.shared.constants import ( + DOCS_BASE_URL, + DEFAULT_NLU_FALLBACK_INTENT_NAME, + UTTER_PREFIX, +) from rasa.shared.core.constants import ( USER_INTENT_OUT_OF_SCOPE, ACTION_LISTEN_NAME, @@ -30,9 +36,8 @@ ACTION_DEFAULT_ASK_REPHRASE_NAME, ACTION_BACK_NAME, REQUESTED_SLOT, - UTTER_PREFIX, ) -from rasa.shared.nlu.constants import INTENT_RESPONSE_KEY, INTENT_NAME_KEY +from rasa.shared.nlu.constants import INTENT_NAME_KEY from rasa.shared.core.events import ( UserUtteranceReverted, UserUttered, @@ -99,6 +104,21 @@ def action_for_index( return action_for_name(domain.action_names[index], domain, action_endpoint) +def construct_retrieval_action_names(retrieval_intents: Set[Text]) -> List[Text]: + """List names of all retrieval actions corresponding to passed retrieval intents. + + Args: + retrieval_intents: List of retrieval intents defined in the NLU training data. + + Returns: Names of corresponding retrieval actions + """ + + return [ + ActionRetrieveResponse.action_name_from_intent(intent) + for intent in retrieval_intents + ] + + def action_for_name( action_name: Text, domain: Domain, action_endpoint: Optional[EndpointConfig] ) -> "Action": @@ -128,6 +148,27 @@ def action_for_name( action_endpoint, domain.user_actions_and_forms, should_use_form_action, + domain.retrieval_intents, + ) + + +def is_retrieval_action(action_name: Text, retrieval_intents: List[Text]) -> bool: + """Check if an action name is a retrieval action. + + The name for a retrieval action has an extra `utter_` prefix added to + the corresponding retrieval intent name. + + Args: + action_name: Name of the action. + retrieval_intents: List of retrieval intents defined in the NLU training data. + + Returns: + `True` if the resolved intent name is present in the list of retrieval + intents, `False` otherwise. + """ + + return ( + ActionRetrieveResponse.intent_name_from_action(action_name) in retrieval_intents ) @@ -136,6 +177,7 @@ def action_from_name( action_endpoint: Optional[EndpointConfig], user_actions: List[Text], should_use_form_action: bool = False, + retrieval_intents: Optional[List[Text]] = None, ) -> "Action": """Return an action instance for the name.""" @@ -143,10 +185,12 @@ def action_from_name( if name in defaults and name not in user_actions: return defaults[name] + elif name.startswith(UTTER_PREFIX) and is_retrieval_action( + name, retrieval_intents or [] + ): + return ActionRetrieveResponse(name) elif name.startswith(UTTER_PREFIX): return ActionUtterTemplate(name) - elif name.startswith(RESPOND_PREFIX): - return ActionRetrieveResponse(name) elif should_use_form_action: from rasa.core.actions.forms import FormAction @@ -216,76 +260,62 @@ def __str__(self) -> Text: return "Action('{}')".format(self.name()) -class ActionRetrieveResponse(Action): - """An action which queries the Response Selector for the appropriate response.""" +class ActionUtterTemplate(Action): + """An action which only effect is to utter a template when it is run. + + Both, name and utter template, need to be specified using + the `name` method.""" def __init__(self, name: Text, silent_fail: Optional[bool] = False): - self.action_name = name + self.template_name = name self.silent_fail = silent_fail - def intent_name_from_action(self) -> Text: - return self.action_name.split(RESPOND_PREFIX)[1] - async def run( self, output_channel: "OutputChannel", nlg: "NaturalLanguageGenerator", tracker: "DialogueStateTracker", domain: "Domain", - ): - """Query the appropriate response and create a bot utterance with that.""" - - response_selector_properties = tracker.latest_message.parse_data[ - RESPONSE_SELECTOR_PROPERTY_NAME - ] + ) -> List[Event]: + """Simple run implementation uttering a (hopefully defined) template.""" - if self.intent_name_from_action() in response_selector_properties: - query_key = self.intent_name_from_action() - elif RESPONSE_SELECTOR_DEFAULT_INTENT in response_selector_properties: - query_key = RESPONSE_SELECTOR_DEFAULT_INTENT - else: + message = await nlg.generate(self.template_name, tracker, output_channel.name()) + if message is None: if not self.silent_fail: logger.error( - "Couldn't create message for response action '{}'." - "".format(self.action_name) + "Couldn't create message for response '{}'." + "".format(self.template_name) ) return [] + message["template_name"] = self.template_name - logger.debug(f"Picking response from selector of type {query_key}") - selected = response_selector_properties[query_key] - possible_messages = selected[RESPONSE_SELECTOR_PREDICTION_KEY][ - RESPONSE_SELECTOR_RESPONSES_KEY - ] - - # Pick a random message from list of candidate messages. - # This should ideally be done by the NLG class but that's not - # possible until the domain has all the response templates of the response selector. - picked_message_idx = random.randint(0, len(possible_messages) - 1) - picked_message = copy.deepcopy(possible_messages[picked_message_idx]) - - picked_message["template_name"] = selected[RESPONSE_SELECTOR_PREDICTION_KEY][ - INTENT_RESPONSE_KEY - ] - - return [create_bot_utterance(picked_message)] + return [create_bot_utterance(message)] def name(self) -> Text: - return self.action_name + return self.template_name def __str__(self) -> Text: - return "ActionRetrieveResponse('{}')".format(self.name()) + return "ActionUtterTemplate('{}')".format(self.name()) -class ActionUtterTemplate(Action): - """An action which only effect is to utter a template when it is run. - - Both, name and utter template, need to be specified using - the `name` method.""" +class ActionRetrieveResponse(ActionUtterTemplate): + """An action which queries the Response Selector for the appropriate response.""" def __init__(self, name: Text, silent_fail: Optional[bool] = False): - self.template_name = name + super().__init__(name, silent_fail) + self.action_name = name self.silent_fail = silent_fail + @staticmethod + def intent_name_from_action(action_name: Text) -> Text: + """Resolve the name of the intent from the action name.""" + return action_name.split(UTTER_PREFIX)[1] + + @staticmethod + def action_name_from_intent(intent_name: Text) -> Text: + """Resolve the action name from the name of the intent.""" + return f"{UTTER_PREFIX}{intent_name}" + async def run( self, output_channel: "OutputChannel", @@ -293,25 +323,44 @@ async def run( tracker: "DialogueStateTracker", domain: "Domain", ) -> List[Event]: - """Simple run implementation uttering a (hopefully defined) template.""" + """Query the appropriate response and create a bot utterance with that.""" - message = await nlg.generate(self.template_name, tracker, output_channel.name()) - if message is None: + response_selector_properties = tracker.latest_message.parse_data[ + RESPONSE_SELECTOR_PROPERTY_NAME + ] + + if ( + self.intent_name_from_action(self.action_name) + in response_selector_properties + ): + query_key = self.intent_name_from_action(self.action_name) + elif RESPONSE_SELECTOR_DEFAULT_INTENT in response_selector_properties: + query_key = RESPONSE_SELECTOR_DEFAULT_INTENT + else: if not self.silent_fail: logger.error( - "Couldn't create message for response '{}'." - "".format(self.template_name) + "Couldn't create message for response action '{}'." + "".format(self.action_name) ) return [] - message["template_name"] = self.template_name - return [create_bot_utterance(message)] + logger.debug(f"Picking response from selector of type {query_key}") + selected = response_selector_properties[query_key] + + # Override template name of ActionUtterTemplate + # with the complete template name retrieved from + # the output of response selector. + self.template_name = selected[RESPONSE_SELECTOR_PREDICTION_KEY][ + RESPONSE_SELECTOR_TEMPLATE_NAME_KEY + ] + + return await super().run(output_channel, nlg, tracker, domain) def name(self) -> Text: - return self.template_name + return self.action_name def __str__(self) -> Text: - return "ActionUtterTemplate('{}')".format(self.name()) + return "ActionRetrieveResponse('{}')".format(self.name()) class ActionBack(ActionUtterTemplate): diff --git a/rasa/core/actions/forms.py b/rasa/core/actions/forms.py index 9b4a9cbda1b4..2cf23a2690e2 100644 --- a/rasa/core/actions/forms.py +++ b/rasa/core/actions/forms.py @@ -8,12 +8,8 @@ from rasa.shared.core.domain import Domain from rasa.core.actions.action import ActionExecutionRejection, RemoteAction -from rasa.shared.core.constants import ( - ACTION_LISTEN_NAME, - LOOP_VALIDATE, - REQUESTED_SLOT, - UTTER_PREFIX, -) +from rasa.shared.core.constants import ACTION_LISTEN_NAME, LOOP_VALIDATE, REQUESTED_SLOT +from rasa.shared.constants import UTTER_PREFIX from rasa.shared.core.events import Event, SlotSet, ActionExecuted from rasa.core.nlg import NaturalLanguageGenerator from rasa.shared.core.trackers import DialogueStateTracker diff --git a/rasa/core/constants.py b/rasa/core/constants.py index ec5bbc2275ab..63f8c6bd6093 100644 --- a/rasa/core/constants.py +++ b/rasa/core/constants.py @@ -29,7 +29,6 @@ # the priority intended to be used by form policies # it is the highest to prioritize form to the rest of the policies FORM_POLICY_PRIORITY = 5 -RESPOND_PREFIX = "respond_" DIALOGUE = "dialogue" diff --git a/rasa/core/processor.py b/rasa/core/processor.py index e32ed9f4b00d..8060511d5f0a 100644 --- a/rasa/core/processor.py +++ b/rasa/core/processor.py @@ -20,7 +20,6 @@ ACTION_LISTEN_NAME, ACTION_SESSION_START_NAME, REQUESTED_SLOT, - UTTER_PREFIX, ) from rasa.shared.core.domain import Domain from rasa.shared.core.events import ( @@ -38,6 +37,7 @@ INTENT_MESSAGE_PREFIX, DOCS_URL_DOMAINS, DEFAULT_SENDER_ID, + UTTER_PREFIX, ) from rasa.core.nlg import NaturalLanguageGenerator from rasa.core.policies.ensemble import PolicyEnsemble diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index 02944b9e6c6e..e12679fc7c17 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -31,7 +31,6 @@ LOOP_VALIDATE, LOOP_REJECTED, REQUESTED_SLOT, - UTTER_PREFIX, ) from rasa.core import run, train, utils from rasa.core.constants import DEFAULT_SERVER_FORMAT, DEFAULT_SERVER_PORT @@ -47,7 +46,7 @@ UserUtteranceReverted, ) import rasa.core.interpreter -from rasa.shared.constants import INTENT_MESSAGE_PREFIX, DEFAULT_SENDER_ID +from rasa.shared.constants import INTENT_MESSAGE_PREFIX, DEFAULT_SENDER_ID, UTTER_PREFIX from rasa.shared.core.trackers import EventVerbosity, DialogueStateTracker from rasa.shared.core.training_data import visualization from rasa.shared.core.training_data.visualization import ( diff --git a/rasa/importers/importer.py b/rasa/importers/importer.py index 1b2eb3e2f033..d1f2b64c3deb 100644 --- a/rasa/importers/importer.py +++ b/rasa/importers/importer.py @@ -1,6 +1,6 @@ import asyncio from functools import reduce -from typing import Text, Optional, List, Dict +from typing import Text, Optional, List, Dict, Set, Any import logging import rasa.shared.utils.common @@ -15,6 +15,7 @@ from rasa.importers.autoconfig import TrainingType import rasa.utils.io as io_utils import rasa.utils.common as common_utils +from rasa.shared.core.domain import IS_RETRIEVAL_INTENT_KEY logger = logging.getLogger(__name__) @@ -153,7 +154,7 @@ def load_from_dict( ) ] - return E2EImporter(CombinedDataImporter(importers)) + return E2EImporter(RetrievalModelsDataImporter(CombinedDataImporter(importers))) @staticmethod def _importer_from_dict( @@ -243,7 +244,6 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: class CombinedDataImporter(TrainingDataImporter): """A `TrainingDataImporter` that combines multiple importers. - Uses multiple `TrainingDataImporter` instances to load the data as if they were a single instance. """ @@ -290,6 +290,119 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: ) +class RetrievalModelsDataImporter(TrainingDataImporter): + """A `TrainingDataImporter` that sets up the data for training retrieval models. + + Synchronizes response templates between Domain and NLU + and adds retrieval intent properties from the NLU training data + back to the Domain. + """ + + def __init__(self, importer: TrainingDataImporter): + self._importer = importer + + async def get_config(self) -> Dict: + return await self._importer.get_config() + + async def get_domain(self) -> Domain: + """Merge existing domain with properties of retrieval intents in NLU data.""" + + existing_domain = await self._importer.get_domain() + existing_nlu_data = await self._importer.get_nlu_data() + + # Check if NLU data has any retrieval intents, if yes + # add corresponding retrieval actions with `utter_` prefix automatically + # to an empty domain, update the properties of existing retrieval intents + # and merge response templates + if existing_nlu_data.retrieval_intents: + + domain_with_retrieval_intents = self._get_domain_with_retrieval_intents( + existing_nlu_data.retrieval_intents, + existing_nlu_data.responses, + existing_domain, + ) + + existing_domain = existing_domain.merge(domain_with_retrieval_intents) + + return existing_domain + + @staticmethod + def _get_domain_with_retrieval_intents( + retrieval_intents: Set[Text], + response_templates: Dict[Text, List[Dict[Text, Any]]], + existing_domain: Domain, + ) -> Domain: + """Construct a domain consisting of retrieval intents listed in the NLU training data. + + Args: + retrieval_intents: Set of retrieval intents defined in NLU training data. + existing_domain: Domain which is already loaded from the domain file. + + Returns: Domain with retrieval actions added to action names and properties + for retrieval intents updated. + """ + from rasa.core.actions import action + + # Get all the properties already defined + # for each retrieval intent in other domains + # and add the retrieval intent property to them + retrieval_intent_properties = [] + for intent in retrieval_intents: + intent_properties = ( + existing_domain.intent_properties[intent] + if intent in existing_domain.intent_properties + else {} + ) + intent_properties[IS_RETRIEVAL_INTENT_KEY] = True + retrieval_intent_properties.append({intent: intent_properties}) + + return Domain( + retrieval_intent_properties, + [], + [], + response_templates, + action.construct_retrieval_action_names(retrieval_intents), + [], + ) + + async def get_stories( + self, + template_variables: Optional[Dict] = None, + use_e2e: bool = False, + exclusion_percentage: Optional[int] = None, + ) -> StoryGraph: + + return await self._importer.get_stories( + template_variables, use_e2e, exclusion_percentage + ) + + async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: + """Update NLU data with response templates defined in the domain""" + + existing_nlu_data = await self._importer.get_nlu_data(language) + existing_domain = await self._importer.get_domain() + + return existing_nlu_data.merge( + self._get_nlu_data_with_responses(existing_domain.templates) + ) + + @staticmethod + def _get_nlu_data_with_responses( + response_templates: Dict[Text, List[Dict[Text, Any]]] + ) -> TrainingData: + """Construct training data object with only the response templates supplied. + + Args: + response_templates: Response templates the NLU data should + be initialized with. + + Returns: TrainingData object with response templates. + + """ + + return TrainingData(responses=response_templates) + + class E2EImporter(TrainingDataImporter): """Importer which - enhances the NLU training data with actions / user messages from the stories. diff --git a/rasa/importers/rasa.py b/rasa/importers/rasa.py index 7b009295a663..dd6e0a28bdf9 100644 --- a/rasa/importers/rasa.py +++ b/rasa/importers/rasa.py @@ -58,6 +58,10 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: async def get_domain(self) -> Domain: domain = Domain.empty() + + # If domain path is None, return an empty domain + if not self._domain_path: + return domain try: domain = Domain.load(self._domain_path) domain.check_missing_templates() diff --git a/rasa/nlu/constants.py b/rasa/nlu/constants.py index 77b9c429fb4c..df787b756983 100644 --- a/rasa/nlu/constants.py +++ b/rasa/nlu/constants.py @@ -65,6 +65,8 @@ RESPONSE_SELECTOR_PREDICTION_KEY = "response" RESPONSE_SELECTOR_RANKING_KEY = "ranking" RESPONSE_SELECTOR_RESPONSES_KEY = "response_templates" +RESPONSE_SELECTOR_TEMPLATE_NAME_KEY = "template_name" +RESPONSE_IDENTIFIER_DELIMITER = "/" INTENT_RANKING_KEY = "intent_ranking" PREDICTED_CONFIDENCE_KEY = "confidence" diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 08e39ba76c1d..f73ccf43dc25 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -6,6 +6,7 @@ from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type +from rasa.shared.nlu.training_data import util import rasa.shared.utils.io from rasa.nlu.config import InvalidConfigError from rasa.shared.nlu.training_data.training_data import TrainingData @@ -78,6 +79,7 @@ RESPONSE_SELECTOR_RESPONSES_KEY, RESPONSE_SELECTOR_PREDICTION_KEY, RESPONSE_SELECTOR_RANKING_KEY, + RESPONSE_SELECTOR_TEMPLATE_NAME_KEY, PREDICTED_CONFIDENCE_KEY, RESPONSE_SELECTOR_DEFAULT_INTENT, ) @@ -355,13 +357,14 @@ def _resolve_intent_response_key( for key, responses in self.responses.items(): # First check if the predicted label was the key itself - if hash(key) == label.get("id"): - return key + search_key = util.template_key_to_intent_response_key(key) + if hash(search_key) == label.get("id"): + return search_key # Otherwise loop over the responses to check if the text has a direct match for response in responses: if hash(response.get(TEXT, "")) == label.get("id"): - return key + return search_key return None def process(self, message: Message, **kwargs: Any) -> None: @@ -375,7 +378,9 @@ def process(self, message: Message, **kwargs: Any) -> None: label_intent_response_key = ( self._resolve_intent_response_key(top_label) or top_label[INTENT_NAME_KEY] ) - label_response_templates = self.responses.get(label_intent_response_key) + label_response_templates = self.responses.get( + util.intent_response_key_to_template_key(label_intent_response_key) + ) if label_intent_response_key and not label_response_templates: # response templates seem to be unavailable, @@ -413,6 +418,9 @@ def process(self, message: Message, **kwargs: Any) -> None: RESPONSE_SELECTOR_RESPONSES_KEY: label_response_templates, PREDICTED_CONFIDENCE_KEY: top_label[PREDICTED_CONFIDENCE_KEY], INTENT_RESPONSE_KEY: label_intent_response_key, + RESPONSE_SELECTOR_TEMPLATE_NAME_KEY: util.intent_response_key_to_template_key( + label_intent_response_key + ), }, RESPONSE_SELECTOR_RANKING_KEY: label_ranking, } diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index d506bb6052be..f1fcfb4e550b 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -35,7 +35,6 @@ from rasa.shared.nlu.constants import ( TEXT, INTENT, - RESPONSE, INTENT_RESPONSE_KEY, ENTITIES, EXTRACTOR, @@ -1288,9 +1287,9 @@ def get_eval_data( intent_results, entity_results, response_selection_results = [], [], [] response_labels = [ - e.get(RESPONSE) + e.get(INTENT_RESPONSE_KEY) for e in test_data.intent_examples - if e.get(RESPONSE) is not None + if e.get(INTENT_RESPONSE_KEY) is not None ] intent_labels = [e.get(INTENT) for e in test_data.intent_examples] should_eval_intents = ( diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index 91b105cae1c0..eb4054855cc2 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -29,3 +29,4 @@ ENV_LOG_LEVEL = "LOG_LEVEL" DEFAULT_SENDER_ID = "default" +UTTER_PREFIX = "utter_" diff --git a/rasa/shared/core/constants.py b/rasa/shared/core/constants.py index 8464e913a9d7..7f074f1f9fca 100644 --- a/rasa/shared/core/constants.py +++ b/rasa/shared/core/constants.py @@ -70,8 +70,6 @@ SLOT_LAST_OBJECT_TYPE = "knowledge_base_last_object_type" DEFAULT_KNOWLEDGE_BASE_ACTION = "action_query_knowledge_base" -UTTER_PREFIX = "utter_" - # the keys for `State` (USER, PREVIOUS_ACTION, SLOTS, ACTIVE_LOOP) # represent the origin of a `SubState` USER = "user" diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index 01e34a7873a6..a658f9643011 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -27,6 +27,7 @@ USED_ENTITIES_KEY = "used_entities" USE_ENTITIES_KEY = "use_entities" IGNORE_ENTITIES_KEY = "ignore_entities" +IS_RETRIEVAL_INTENT_KEY = "is_retrieval_intent" KEY_SLOTS = "slots" KEY_INTENTS = "intents" @@ -335,6 +336,15 @@ def _transform_intent_properties_for_internal_use( return intent + @rasa.shared.utils.common.lazy_property + def retrieval_intents(self) -> List[Text]: + """List retrieval intents present in the domain.""" + return [ + intent + for intent in self.intent_properties + if self.intent_properties[intent].get(IS_RETRIEVAL_INTENT_KEY) + ] + @classmethod def collect_intent_properties( cls, intents: List[Union[Text, Dict[Text, Any]]], entities: List[Text] @@ -1112,7 +1122,7 @@ def check_missing_templates(self) -> None: utterances = [ a for a in self.action_names - if a.startswith(rasa.shared.core.constants.UTTER_PREFIX) + if a.startswith(rasa.shared.constants.UTTER_PREFIX) ] missing_templates = [t for t in utterances if t not in self.templates.keys()] diff --git a/rasa/shared/nlu/training_data/training_data.py b/rasa/shared/nlu/training_data/training_data.py index a17ce053db70..535dfea2e99c 100644 --- a/rasa/shared/nlu/training_data/training_data.py +++ b/rasa/shared/nlu/training_data/training_data.py @@ -24,7 +24,8 @@ INTENT_NAME, ) from rasa.shared.nlu.training_data.message import Message -from rasa.shared.nlu.training_data.util import check_duplicate_synonym +from rasa.shared.nlu.training_data import util + DEFAULT_TRAINING_DATA_OUTPUT_PATH = "training_data.json" @@ -75,7 +76,7 @@ def merge(self, *others: "TrainingData") -> "TrainingData": lookup_tables.extend(copy.deepcopy(o.lookup_tables)) for text, syn in o.entity_synonyms.items(): - check_duplicate_synonym( + util.check_duplicate_synonym( entity_synonyms, text, syn, "merging training data" ) @@ -135,7 +136,7 @@ def intent_examples(self) -> List[Message]: @lazy_property def response_examples(self) -> List[Message]: - return [ex for ex in self.training_examples if ex.get(RESPONSE)] + return [ex for ex in self.training_examples if ex.get(INTENT_RESPONSE_KEY)] @lazy_property def entity_examples(self) -> List[Message]: @@ -152,7 +153,7 @@ def retrieval_intents(self) -> Set[Text]: return { ex.get(INTENT) for ex in self.training_examples - if ex.get(RESPONSE) is not None + if ex.get(INTENT_RESPONSE_KEY) } @lazy_property @@ -165,7 +166,9 @@ def number_of_examples_per_intent(self) -> Dict[Text, int]: def number_of_examples_per_response(self) -> Dict[Text, int]: """Calculates the number of examples per response.""" responses = [ - ex.get(RESPONSE) for ex in self.training_examples if ex.get(RESPONSE) + ex.get(INTENT_RESPONSE_KEY) + for ex in self.training_examples + if ex.get(INTENT_RESPONSE_KEY) ] return dict(Counter(responses)) @@ -238,8 +241,10 @@ def _fill_response_phrases(self) -> None: continue # look for corresponding bot utterance - story_lookup_intent = example.get_full_intent() - assistant_utterances = self.responses.get(story_lookup_intent, []) + story_lookup_key = util.intent_response_key_to_template_key( + example.get_full_intent() + ) + assistant_utterances = self.responses.get(story_lookup_key, []) if assistant_utterances: # Use the first response text as training label if needed downstream @@ -249,7 +254,7 @@ def _fill_response_phrases(self) -> None: # If no text attribute was found use the key for training if not example.get(RESPONSE): - example.set(RESPONSE, story_lookup_intent) + example.set(RESPONSE, story_lookup_key) def nlu_as_json(self, **kwargs: Any) -> Text: """Represent this set of training examples as json.""" @@ -376,7 +381,8 @@ def sorted_intent_examples(self) -> List[Message]: """Sorts the intent examples by the name of the intent and then response""" return sorted( - self.intent_examples, key=lambda e: (e.get(INTENT), e.get(RESPONSE)) + self.intent_examples, + key=lambda e: (e.get(INTENT), e.get(INTENT_RESPONSE_KEY)), ) def validate(self) -> None: @@ -426,7 +432,7 @@ def validate(self) -> None: f"You either need to add a response phrase or correct the " f"intent for this example in your training data. " f"If you intend to use Response Selector in the pipeline, the " - f"training ." + f"training may fail." ) def train_test_split( @@ -475,7 +481,7 @@ def _needed_responses_for_examples( responses = {} for ex in examples: if ex.get(INTENT_RESPONSE_KEY) and ex.get(RESPONSE): - key = ex.get_full_intent() + key = util.intent_response_key_to_template_key(ex.get_full_intent()) responses[key] = self.responses[key] return responses @@ -512,7 +518,7 @@ def _split(_examples: List[Message], _count: int) -> None: examples = [ e for e in training_examples - if RESPONSE in e.data and e.data[RESPONSE] == response + if e.get(INTENT_RESPONSE_KEY) and e.get(INTENT_RESPONSE_KEY) == response ] _split(examples, count) training_examples = training_examples - set(examples) diff --git a/rasa/shared/nlu/training_data/util.py b/rasa/shared/nlu/training_data/util.py index b9e967998516..2c98d91b6b84 100644 --- a/rasa/shared/nlu/training_data/util.py +++ b/rasa/shared/nlu/training_data/util.py @@ -15,6 +15,7 @@ ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP, ) +from rasa.shared.constants import UTTER_PREFIX import rasa.shared.utils.io logger = logging.getLogger(__name__) @@ -103,6 +104,30 @@ def remove_untrainable_entities_from(example: Dict[Text, Any]) -> None: example[ENTITIES] = trainable_entities +def intent_response_key_to_template_key(intent_response_key: Text) -> Text: + """Resolve the response template key for a given intent response key. + + Args: + intent_response_key: retrieval intent with the response key suffix attached. + + Returns: The corresponding response template. + + """ + return f"{UTTER_PREFIX}{intent_response_key}" + + +def template_key_to_intent_response_key(template_key: Text) -> Text: + """Resolve the intent response key for the given response template. + + Args: + template_key: Name of the response template. + + Returns: The corresponding intent response key. + + """ + return template_key.split(UTTER_PREFIX)[1] + + def encode_string(s: Text) -> Text: """Return an encoded python string.""" diff --git a/rasa/validator.py b/rasa/validator.py index aa61fc33af3c..5f4873039b55 100644 --- a/rasa/validator.py +++ b/rasa/validator.py @@ -4,8 +4,7 @@ import rasa.core.training.story_conflict from rasa.constants import DOCS_URL_ACTIONS -from rasa.shared.constants import DOCS_BASE_URL, DOCS_URL_DOMAINS -from rasa.shared.core.constants import UTTER_PREFIX +from rasa.shared.constants import DOCS_BASE_URL, DOCS_URL_DOMAINS, UTTER_PREFIX from rasa.shared.core.domain import Domain from rasa.shared.core.events import ActionExecuted from rasa.shared.core.events import UserUttered diff --git a/tests/core/test_actions.py b/tests/core/test_actions.py index e2255c96d8b7..99cd7460bdea 100644 --- a/tests/core/test_actions.py +++ b/tests/core/test_actions.py @@ -85,18 +85,18 @@ def test_text_format(): == "ActionUtterTemplate('my_action_name')" ) assert ( - "{}".format(ActionRetrieveResponse("respond_test")) - == "ActionRetrieveResponse('respond_test')" + "{}".format(ActionRetrieveResponse("utter_test")) + == "ActionRetrieveResponse('utter_test')" ) def test_domain_action_instantiation(): domain = Domain( - intents={}, + intents=[{"chitchat": {"is_retrieval_intent": True}}], entities=[], slots=[], templates={}, - action_names=["my_module.ActionTest", "utter_test", "respond_test"], + action_names=["my_module.ActionTest", "utter_test", "utter_chitchat"], forms=[], ) @@ -119,7 +119,7 @@ def test_domain_action_instantiation(): assert instantiated_actions[10].name() == RULE_SNIPPET_ACTION_NAME assert instantiated_actions[11].name() == "my_module.ActionTest" assert instantiated_actions[12].name() == "utter_test" - assert instantiated_actions[13].name() == "respond_test" + assert instantiated_actions[13].name() == "utter_chitchat" async def test_remote_action_runs( @@ -350,7 +350,7 @@ async def test_action_utter_retrieved_response( ): from rasa.core.channels.channel import UserMessage - action_name = "respond_chitchat" + action_name = "utter_chitchat" default_tracker.latest_message = UserMessage( "Who are you?", parse_data={ @@ -359,11 +359,17 @@ async def test_action_utter_retrieved_response( "response": { "intent_response_key": "chitchat/ask_name", "response_templates": [{"text": "I am a bot."}], + "template_name": "utter_chitchat/ask_name", } } } }, ) + + default_domain.templates.update( + {"utter_chitchat/ask_name": [{"text": "I am a bot."}]} + ) + events = await ActionRetrieveResponse(action_name).run( default_channel, default_nlg, default_tracker, default_domain ) @@ -372,7 +378,8 @@ async def test_action_utter_retrieved_response( "text" ) assert ( - events[0].as_dict().get("metadata").get("template_name") == "chitchat/ask_name" + events[0].as_dict().get("metadata").get("template_name") + == "utter_chitchat/ask_name" ) @@ -381,7 +388,7 @@ async def test_action_utter_default_retrieved_response( ): from rasa.core.channels.channel import UserMessage - action_name = "respond_chitchat" + action_name = "utter_chitchat" default_tracker.latest_message = UserMessage( "Who are you?", parse_data={ @@ -390,11 +397,17 @@ async def test_action_utter_default_retrieved_response( "response": { "intent_response_key": "chitchat/ask_name", "response_templates": [{"text": "I am a bot."}], + "template_name": "utter_chitchat/ask_name", } } } }, ) + + default_domain.templates.update( + {"utter_chitchat/ask_name": [{"text": "I am a bot."}]} + ) + events = await ActionRetrieveResponse(action_name).run( default_channel, default_nlg, default_tracker, default_domain ) @@ -404,7 +417,8 @@ async def test_action_utter_default_retrieved_response( ) assert ( - events[0].as_dict().get("metadata").get("template_name") == "chitchat/ask_name" + events[0].as_dict().get("metadata").get("template_name") + == "utter_chitchat/ask_name" ) @@ -413,7 +427,7 @@ async def test_action_utter_retrieved_empty_response( ): from rasa.core.channels.channel import UserMessage - action_name = "respond_chitchat" + action_name = "utter_chitchat" default_tracker.latest_message = UserMessage( "Who are you?", parse_data={ @@ -422,11 +436,17 @@ async def test_action_utter_retrieved_empty_response( "response": { "intent_response_key": "chitchat/ask_name", "response_templates": [{"text": "I am a bot."}], + "template_name": "utter_chitchat/ask_name", } } } }, ) + + default_domain.templates.update( + {"utter_chitchat/ask_name": [{"text": "I am a bot."}]} + ) + events = await ActionRetrieveResponse(action_name).run( default_channel, default_nlg, default_tracker, default_domain ) diff --git a/tests/importers/test_importer.py b/tests/importers/test_importer.py index 609259b9940e..1afaeaa9cd1c 100644 --- a/tests/importers/test_importer.py +++ b/tests/importers/test_importer.py @@ -16,6 +16,7 @@ NluDataImporter, CoreDataImporter, E2EImporter, + RetrievalModelsDataImporter, ) from rasa.importers.rasa import RasaFileImporter @@ -100,9 +101,9 @@ def test_load_from_dict( ) assert isinstance(actual, E2EImporter) - assert isinstance(actual.importer, CombinedDataImporter) + assert isinstance(actual.importer, RetrievalModelsDataImporter) - actual_importers = [i.__class__ for i in actual.importer._importers] + actual_importers = [i.__class__ for i in actual.importer._importer._importers] assert actual_importers == expected @@ -115,8 +116,8 @@ def test_load_from_config(tmpdir: Path): importer = TrainingDataImporter.load_from_config(config_path) assert isinstance(importer, E2EImporter) - assert isinstance(importer.importer, CombinedDataImporter) - assert isinstance(importer.importer._importers[0], MultiProjectImporter) + assert isinstance(importer.importer, RetrievalModelsDataImporter) + assert isinstance(importer.importer._importer._importers[0], MultiProjectImporter) async def test_nlu_only(project: Text): @@ -127,7 +128,7 @@ async def test_nlu_only(project: Text): ) assert isinstance(actual, NluDataImporter) - assert isinstance(actual._importer, CombinedDataImporter) + assert isinstance(actual._importer, RetrievalModelsDataImporter) stories = await actual.get_stories() assert stories.is_empty() @@ -292,3 +293,29 @@ async def test_adding_e2e_actions_to_domain(project: Text): domain = await importer.get_domain() assert all(action_name in domain.action_names for action_name in additional_actions) + + +async def test_nlu_data_domain_sync_with_retrieval_intents(project: Text): + config_path = os.path.join(project, DEFAULT_CONFIG_PATH) + domain_path = "data/test_domains/default_retrieval_intents.yml" + data_paths = [ + "data/test_nlu/default_retrieval_intents.md", + "data/test_responses/default.md", + ] + base_data_importer = TrainingDataImporter.load_from_dict( + {}, config_path, domain_path, data_paths + ) + + nlu_importer = NluDataImporter(base_data_importer) + core_importer = CoreDataImporter(base_data_importer) + + importer = RetrievalModelsDataImporter( + CombinedDataImporter([nlu_importer, core_importer]) + ) + domain = await importer.get_domain() + nlu_data = await importer.get_nlu_data() + + assert domain.retrieval_intents == ["chitchat"] + assert domain.intent_properties["chitchat"].get("is_retrieval_intent") + assert domain.templates == nlu_data.responses + assert "utter_chitchat" in domain.action_names diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 01c169604eca..8a998557119b 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -1,5 +1,6 @@ import pytest +from rasa.shared.nlu.training_data import util from rasa.nlu.config import RasaNLUModelConfig import rasa.shared.nlu.training_data.loading from rasa.nlu.train import Trainer, Interpreter @@ -66,6 +67,12 @@ def test_train_selector(pipeline, component_builder, tmpdir): .get("response") .get("intent_response_key") ) is not None + assert ( + parsed.get("response_selector") + .get("default") + .get("response") + .get("template_name") + ) is not None assert ( parsed.get("response_selector") .get("default") @@ -140,6 +147,10 @@ def test_resolve_intent_response_key_from_label( ) assert resolved_intent_response_key == label_intent_response_key assert ( - response_selector.responses[label_intent_response_key] - == training_data.responses[resolved_intent_response_key] + response_selector.responses[ + util.intent_response_key_to_template_key(label_intent_response_key) + ] + == training_data.responses[ + util.intent_response_key_to_template_key(resolved_intent_response_key) + ] ) diff --git a/tests/nlu/test_config.py b/tests/nlu/test_config.py index c11a46d337a3..2f7af32ca62c 100644 --- a/tests/nlu/test_config.py +++ b/tests/nlu/test_config.py @@ -183,7 +183,7 @@ async def test_train_docker_and_docs_configs( ), ( "data/test_config/config_spacy_entity_extractor.yml", - "data/test/md_converted_to_json.json", + "data/test/duplicate_intents_markdown/demo-rasa-intents-2.md", [f"add one of {TRAINABLE_EXTRACTORS}"], ), ( diff --git a/tests/nlu/test_evaluation.py b/tests/nlu/test_evaluation.py index 1bcac736ca49..0bc3b9c4f371 100644 --- a/tests/nlu/test_evaluation.py +++ b/tests/nlu/test_evaluation.py @@ -53,8 +53,10 @@ import rasa.shared.nlu.training_data.loading from tests.nlu.conftest import DEFAULT_DATA_PATH from rasa.nlu.selectors.response_selector import ResponseSelector -from rasa.nlu.test import is_response_selector_present +from rasa.nlu.test import is_response_selector_present, get_eval_data from rasa.utils.tensorflow.constants import EPOCHS, ENTITY_RECOGNITION +from rasa.nlu import train +from rasa.importers.importer import TrainingDataImporter # https://github.com/pytest-dev/pytest-asyncio/issues/68 # this event_loop is used by pytest-asyncio, and redefining it @@ -356,6 +358,48 @@ def test_run_evaluation(unpacked_trained_moodbot_path): assert result.get("intent_evaluation") +async def test_eval_data(component_builder, tmpdir, project): + _config = RasaNLUModelConfig( + { + "pipeline": [ + {"name": "WhitespaceTokenizer"}, + {"name": "CountVectorsFeaturizer"}, + {"name": "DIETClassifier", "epochs": 2}, + {"name": "ResponseSelector", "epochs": 2}, + ], + "language": "en", + } + ) + + config_path = os.path.join(project, "config.yml") + data_importer = TrainingDataImporter.load_nlu_importer_from_config( + config_path, + training_data_paths=[ + "data/examples/rasa/demo-rasa.md", + "data/examples/rasa/demo-rasa-responses.md", + ], + ) + + (_, _, persisted_path) = await train( + _config, + path=tmpdir.strpath, + data=data_importer, + component_builder=component_builder, + persist_nlu_training_data=True, + ) + + interpreter = Interpreter.load(persisted_path, component_builder) + + data = await data_importer.get_nlu_data() + intent_results, response_selection_results, entity_results, = get_eval_data( + interpreter, data + ) + + assert len(intent_results) == 46 + assert len(response_selection_results) == 46 + assert len(entity_results) == 46 + + @pytest.mark.timeout(240) # these can take a longer time than the default timeout def test_run_cv_evaluation(pretrained_embeddings_spacy_config): td = rasa.shared.nlu.training_data.loading.load_data( diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index 8178d41955ad..7a084552950d 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -1,7 +1,7 @@ import copy import json from pathlib import Path -from typing import Dict +from typing import Dict, List, Text, Any, Union, Set import pytest @@ -491,9 +491,26 @@ def test_merge_domain_with_forms(): "goodbye": {USED_ENTITIES_KEY: []}, }, ), + ( + [ + "greet", + "goodbye", + {"chitchat": {"is_retrieval_intent": True, "use_entities": None}}, + ], + ["entity", "other", "third"], + { + "greet": {USED_ENTITIES_KEY: ["entity", "other", "third"]}, + "goodbye": {USED_ENTITIES_KEY: ["entity", "other", "third"]}, + "chitchat": {USED_ENTITIES_KEY: [], "is_retrieval_intent": True}, + }, + ), ], ) -def test_collect_intent_properties(intents, entities, intent_properties): +def test_collect_intent_properties( + intents: Union[Set[Text], List[Union[Text, Dict[Text, Any]]]], + entities: List[Text], + intent_properties: Dict[Text, Dict[Text, Union[bool, List]]], +): Domain._add_default_intents(intent_properties, entities) assert Domain.collect_intent_properties(intents, entities) == intent_properties diff --git a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py index 2eba93239fee..0c2fdbb7c72a 100644 --- a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py +++ b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py @@ -271,7 +271,7 @@ def test_nlg_reads_text(): responses_yml = textwrap.dedent( """ responses: - chitchat/ask_weather: + utter_chitchat/ask_weather: - text: Where do you want to check the weather? """ ) @@ -280,7 +280,9 @@ def test_nlg_reads_text(): result = reader.reads(responses_yml) assert result.responses == { - "chitchat/ask_weather": [{"text": "Where do you want to check the weather?"}] + "utter_chitchat/ask_weather": [ + {"text": "Where do you want to check the weather?"} + ] } @@ -288,7 +290,7 @@ def test_nlg_reads_any_multimedia(): responses_yml = textwrap.dedent( """ responses: - chitchat/ask_weather: + utter_chitchat/ask_weather: - text: Where do you want to check the weather? image: https://example.com/weather.jpg """ @@ -298,7 +300,7 @@ def test_nlg_reads_any_multimedia(): result = reader.reads(responses_yml) assert result.responses == { - "chitchat/ask_weather": [ + "utter_chitchat/ask_weather": [ { "text": "Where do you want to check the weather?", "image": "https://example.com/weather.jpg", @@ -324,7 +326,7 @@ def test_nlg_fails_on_empty_response(): responses_yml = textwrap.dedent( """ responses: - chitchat/ask_weather: + utter_chitchat/ask_weather: """ ) @@ -338,11 +340,11 @@ def test_nlg_multimedia_load_dump_roundtrip(): responses_yml = textwrap.dedent( """ responses: - chitchat/ask_weather: + utter_chitchat/ask_weather: - text: Where do you want to check the weather? image: https://example.com/weather.jpg - chitchat/ask_name: + utter_chitchat/ask_name: - text: My name is Sara. """ ) diff --git a/tests/shared/nlu/training_data/test_training_data.py b/tests/shared/nlu/training_data/test_training_data.py index 00bdd0f8e5aa..66284ad71473 100644 --- a/tests/shared/nlu/training_data/test_training_data.py +++ b/tests/shared/nlu/training_data/test_training_data.py @@ -1,6 +1,6 @@ import asyncio from pathlib import Path -from typing import Text +from typing import Text, List import pytest @@ -22,7 +22,11 @@ MARKDOWN, load_data, ) -from rasa.shared.nlu.training_data.util import get_file_format +from rasa.shared.nlu.training_data.util import ( + get_file_format, + template_key_to_intent_response_key, + intent_response_key_to_template_key, +) def test_luis_data(): @@ -115,6 +119,18 @@ def test_composite_entities_data(): assert td.number_of_examples_per_entity["role 'from'"] == 3 +def test_intent_response_key_to_template_key(): + intent_response_key = "chitchat/ask_name" + template_key = "utter_chitchat/ask_name" + assert intent_response_key_to_template_key(intent_response_key) == template_key + + +def test_template_key_to_intent_response_key(): + intent_response_key = "chitchat/ask_name" + template_key = "utter_chitchat/ask_name" + assert template_key_to_intent_response_key(template_key) == intent_response_key + + @pytest.mark.parametrize( "files", [ @@ -128,20 +144,29 @@ def test_composite_entities_data(): ], ], ) -def test_demo_data(files): +def test_demo_data(files: List[Text]): from rasa.importers.utils import training_data_from_paths - td = training_data_from_paths(files, language="en") - assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye", "chitchat"} - assert td.entities == {"location", "cuisine"} - assert set(td.responses.keys()) == {"chitchat/ask_name", "chitchat/ask_weather"} - assert len(td.training_examples) == 46 - assert len(td.intent_examples) == 46 - assert len(td.response_examples) == 4 - assert len(td.entity_examples) == 11 - assert len(td.responses) == 2 + trainingdata = training_data_from_paths(files, language="en") + assert trainingdata.intents == { + "affirm", + "greet", + "restaurant_search", + "goodbye", + "chitchat", + } + assert trainingdata.entities == {"location", "cuisine"} + assert set(trainingdata.responses.keys()) == { + "utter_chitchat/ask_name", + "utter_chitchat/ask_weather", + } + assert len(trainingdata.training_examples) == 46 + assert len(trainingdata.intent_examples) == 46 + assert len(trainingdata.response_examples) == 4 + assert len(trainingdata.entity_examples) == 11 + assert len(trainingdata.responses) == 2 - assert td.entity_synonyms == { + assert trainingdata.entity_synonyms == { "Chines": "chinese", "Chinese": "chinese", "chines": "chinese", @@ -149,7 +174,7 @@ def test_demo_data(files): "veggie": "vegetarian", } - assert td.regex_features == [ + assert trainingdata.regex_features == [ {"name": "greet", "pattern": r"hey[^\s]*"}, {"name": "zipcode", "pattern": r"[0-9]{5}"}, ] @@ -192,36 +217,51 @@ def test_demo_data_filter_out_retrieval_intents(files): "filepaths", [["data/examples/rasa/demo-rasa.md", "data/examples/rasa/demo-rasa-responses.md"]], ) -def test_train_test_split(filepaths): +def test_train_test_split(filepaths: List[Text]): from rasa.importers.utils import training_data_from_paths - td = training_data_from_paths(filepaths, language="en") + trainingdata = training_data_from_paths(filepaths, language="en") - assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye", "chitchat"} - assert td.entities == {"location", "cuisine"} - assert set(td.responses.keys()) == {"chitchat/ask_name", "chitchat/ask_weather"} + assert trainingdata.intents == { + "affirm", + "greet", + "restaurant_search", + "goodbye", + "chitchat", + } + assert trainingdata.entities == {"location", "cuisine"} + assert set(trainingdata.responses.keys()) == { + "utter_chitchat/ask_name", + "utter_chitchat/ask_weather", + } - assert len(td.training_examples) == 46 - assert len(td.intent_examples) == 46 - assert len(td.response_examples) == 4 + assert len(trainingdata.training_examples) == 46 + assert len(trainingdata.intent_examples) == 46 + assert len(trainingdata.response_examples) == 4 - td_train, td_test = td.train_test_split(train_frac=0.8) + trainingdata_train, trainingdata_test = trainingdata.train_test_split( + train_frac=0.8 + ) - assert len(td_test.training_examples) + len(td_train.training_examples) == 46 - assert len(td_train.training_examples) == 34 - assert len(td_test.training_examples) == 12 + assert ( + len(trainingdata_test.training_examples) + + len(trainingdata_train.training_examples) + == 46 + ) + assert len(trainingdata_train.training_examples) == 34 + assert len(trainingdata_test.training_examples) == 12 - assert len(td.number_of_examples_per_intent.keys()) == len( - td_test.number_of_examples_per_intent.keys() + assert len(trainingdata.number_of_examples_per_intent.keys()) == len( + trainingdata_test.number_of_examples_per_intent.keys() ) - assert len(td.number_of_examples_per_intent.keys()) == len( - td_train.number_of_examples_per_intent.keys() + assert len(trainingdata.number_of_examples_per_intent.keys()) == len( + trainingdata_train.number_of_examples_per_intent.keys() ) - assert len(td.number_of_examples_per_response.keys()) == len( - td_test.number_of_examples_per_response.keys() + assert len(trainingdata.number_of_examples_per_response.keys()) == len( + trainingdata_test.number_of_examples_per_response.keys() ) - assert len(td.number_of_examples_per_response.keys()) == len( - td_train.number_of_examples_per_response.keys() + assert len(trainingdata.number_of_examples_per_response.keys()) == len( + trainingdata_train.number_of_examples_per_response.keys() )