diff --git a/data/test_config/config_defaults.yml b/data/test_config/config_defaults.yml index f387a6b7be11..b3014c3069a1 100644 --- a/data/test_config/config_defaults.yml +++ b/data/test_config/config_defaults.yml @@ -1,4 +1,23 @@ language: en pipeline: [] +# # No configuration for the NLU pipeline was provided. The following default pipeline was used to train your model. +# # If you'd like to customize it, uncomment and adjust the pipeline. +# # See https://rasa.com/docs/rasa/tuning-your-model for more information. +# - name: WhitespaceTokenizer +# - name: RegexFeaturizer +# - name: LexicalSyntacticFeaturizer +# - name: CountVectorsFeaturizer +# - name: CountVectorsFeaturizer +# analyzer: char_wb +# min_ngram: 1 +# max_ngram: 4 +# - name: DIETClassifier +# epochs: 100 +# - name: EntitySynonymMapper +# - name: ResponseSelector +# epochs: 100 +# - name: FallbackClassifier +# threshold: 0.3 +# ambiguity_threshold: 0.1 data: diff --git a/data/test_config/config_response_selector_minimal.yml b/data/test_config/config_response_selector_minimal.yml new file mode 100644 index 000000000000..a99e2904b370 --- /dev/null +++ b/data/test_config/config_response_selector_minimal.yml @@ -0,0 +1,6 @@ +language: en +pipeline: + - name: WhitespaceTokenizer + - name: CountVectorsFeaturizer + - name: ResponseSelector + epochs: 1 diff --git a/data/test_nlu_no_responses/domain_with_only_responses.yml b/data/test_nlu_no_responses/domain_with_only_responses.yml new file mode 100644 index 000000000000..d366ae41134b --- /dev/null +++ b/data/test_nlu_no_responses/domain_with_only_responses.yml @@ -0,0 +1,10 @@ +responses: + utter_chitchat/ask_name: + - image: "https://i.imgur.com/zTvA58i.jpeg" + text: hello, my name is retrieval bot. + - text: Oh yeah, I am called the retrieval bot. + + utter_chitchat/ask_weather: + - text: Oh, it does look sunny right now in Berlin. + image: "https://i.imgur.com/vwv7aHN.png" + - text: I am not sure of the whole week but I can see the sun is out today. diff --git a/data/test_nlu_no_responses/nlu_no_responses.yml b/data/test_nlu_no_responses/nlu_no_responses.yml new file mode 100644 index 000000000000..96b7e0466a87 --- /dev/null +++ b/data/test_nlu_no_responses/nlu_no_responses.yml @@ -0,0 +1,16 @@ +version: "2.0" + +nlu: +- intent: chitchat/ask_name + examples: | + - What is your name? + - May I know your name? + - What do people call you? + - Do you have a name for yourself? + +- intent: chitchat/ask_weather + examples: | + - What's the weather like today? + - Does it look sunny outside today? + - Oh, do you mind checking the weather for me please? + - I like sunny days in Berlin. diff --git a/rasa/cli/arguments/default_arguments.py b/rasa/cli/arguments/default_arguments.py index d996f3562c2b..2157fcb10973 100644 --- a/rasa/cli/arguments/default_arguments.py +++ b/rasa/cli/arguments/default_arguments.py @@ -49,13 +49,14 @@ def add_nlu_data_param( def add_domain_param( - parser: Union[argparse.ArgumentParser, argparse._ActionsContainer] + parser: Union[argparse.ArgumentParser, argparse._ActionsContainer], + default: Optional[Text] = DEFAULT_DOMAIN_PATH, ) -> None: parser.add_argument( "-d", "--domain", type=str, - default=DEFAULT_DOMAIN_PATH, + default=default, help="Domain specification. This can be a single YAML file, or a directory " "that contains several files with domain specifications in it. The content " "of these files will be read and merged together.", diff --git a/rasa/cli/arguments/train.py b/rasa/cli/arguments/train.py index 34b2c7e86f45..cd90e9c96ffa 100644 --- a/rasa/cli/arguments/train.py +++ b/rasa/cli/arguments/train.py @@ -46,6 +46,7 @@ def set_train_core_arguments(parser: argparse.ArgumentParser): def set_train_nlu_arguments(parser: argparse.ArgumentParser): add_config_param(parser) + add_domain_param(parser, default=None) add_out_param(parser, help_text="Directory where your models should be stored.") add_nlu_data_param(parser, help_text="File or folder containing your NLU data.") diff --git a/rasa/cli/train.py b/rasa/cli/train.py index 48f8b7dd0896..a713fee5d37f 100644 --- a/rasa/cli/train.py +++ b/rasa/cli/train.py @@ -143,6 +143,11 @@ def train_nlu( args.nlu, "nlu", DEFAULT_DATA_PATH, none_is_valid=True ) + if args.domain: + args.domain = rasa.cli.utils.get_validated_path( + args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True + ) + return train_nlu( config=config, nlu_data=nlu_data, @@ -151,6 +156,7 @@ def train_nlu( fixed_model_name=args.fixed_model_name, persist_nlu_training_data=args.persist_nlu_data, additional_arguments=extract_nlu_additional_arguments(args), + domain=args.domain, ) diff --git a/rasa/train.py b/rasa/train.py index ea9c945883a9..68406287968d 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -434,6 +434,7 @@ def train_nlu( fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, + domain: Optional[Union[Domain, Text]] = None, ) -> Optional[Text]: """Trains an NLU model. @@ -448,6 +449,7 @@ def train_nlu( with the model. additional_arguments: Additional training parameters which will be passed to the `train` method of each component. + domain: Path to the optional domain file/Domain object. Returns: @@ -465,6 +467,7 @@ def train_nlu( fixed_model_name, persist_nlu_training_data, additional_arguments, + domain=domain, ) ) @@ -477,7 +480,8 @@ async def _train_nlu_async( fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, -): + domain: Optional[Union[Domain, Text]] = None, +) -> Optional[Text]: if not nlu_data: print_error( "No NLU data given. Please provide NLU data in order to train " @@ -487,7 +491,7 @@ async def _train_nlu_async( # training NLU only hence the training files still have to be selected file_importer = TrainingDataImporter.load_nlu_importer_from_config( - config, training_data_paths=[nlu_data] + config, domain, training_data_paths=[nlu_data] ) training_data = await file_importer.get_nlu_data() diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py index f734da1f0b7a..6a078221dfe9 100644 --- a/tests/cli/test_rasa_train.py +++ b/tests/cli/test_rasa_train.py @@ -339,8 +339,8 @@ def test_train_help(run): def test_train_nlu_help(run: Callable[..., RunResult]): output = run("train", "nlu", "--help") - help_text = """usage: rasa train nlu [-h] [-v] [-vv] [--quiet] [-c CONFIG] [--out OUT] - [-u NLU] [--num-threads NUM_THREADS] + help_text = """usage: rasa train nlu [-h] [-v] [-vv] [--quiet] [-c CONFIG] [-d DOMAIN] + [--out OUT] [-u NLU] [--num-threads NUM_THREADS] [--fixed-model-name FIXED_MODEL_NAME] [--persist-nlu-data]""" diff --git a/tests/test_train.py b/tests/test_train.py index bce567d992d2..14d855691b14 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -14,6 +14,8 @@ import rasa.core import rasa.shared.importers.autoconfig as autoconfig from rasa.core.interpreter import RasaNLUInterpreter +from rasa.shared.core.domain import Domain +from rasa.shared.importers.importer import TrainingDataImporter from rasa.train import train_core, train_nlu, train from tests.conftest import DEFAULT_CONFIG_PATH, DEFAULT_NLU_DATA @@ -164,6 +166,42 @@ def test_train_nlu_wrong_format_error_message( assert "Please verify the data format" in captured.out +def test_train_nlu_with_responses_no_domain_warns(tmp_path: Path): + data_path = "data/test_nlu_no_responses/nlu_no_responses.yml" + + with pytest.warns(UserWarning) as records: + train_nlu( + "data/test_config/config_response_selector_minimal.yml", + data_path, + output=str(tmp_path / "models"), + ) + + assert any( + "You either need to add a response phrase or correct the intent" + in record.message.args[0] + for record in records + ) + + +def test_train_nlu_with_responses_and_domain_no_warns(tmp_path: Path): + data_path = "data/test_nlu_no_responses/nlu_no_responses.yml" + domain_path = "data/test_nlu_no_responses/domain_with_only_responses.yml" + + with pytest.warns(None) as records: + train_nlu( + "data/test_config/config_response_selector_minimal.yml", + data_path, + output=str(tmp_path / "models"), + domain=domain_path, + ) + + assert not any( + "You either need to add a response phrase or correct the intent" + in record.message.args[0] + for record in records + ) + + def test_train_nlu_no_nlu_file_error_message( capsys: CaptureFixture, tmp_path: Text,