From 6339856514897056716bb531acb8489c9cf05d26 Mon Sep 17 00:00:00 2001 From: Tayfun Sen Date: Wed, 2 Feb 2022 15:19:26 +0000 Subject: [PATCH] Add support for different recipes (#10641) * Add support for different recipes Fixes https://github.com/RasaHQ/rasa/issues/10473 * Update docs/docs/graph-recipe.mdx Co-authored-by: Joe Juzl --- changelog/10473.feature.md | 7 + .../graph_config_short_predict_schema.yml | 73 +++ .../graph_config_short_train_schema.yml | 27 + data/test_config/graph_config.yml | 534 ++++++++++++++++++ data/test_config/graph_config_short.yml | 115 ++++ docs/docs/custom-graph-components.mdx | 12 +- docs/docs/graph-recipe.mdx | 134 +++++ docs/docs/model-configuration.mdx | 3 +- docs/docs/telemetry/events.json | 29 +- docs/sidebars.js | 1 + rasa/cli/interactive.py | 2 +- rasa/cli/shell.py | 2 +- rasa/core/processor.py | 2 +- rasa/engine/graph.py | 2 +- .../recipes/config_files}/default_config.yml | 0 rasa/engine/recipes/default_recipe.py | 277 ++++++++- rasa/engine/recipes/graph_recipe.py | 77 +++ rasa/engine/recipes/recipe.py | 23 +- rasa/engine/storage/storage.py | 2 +- rasa/model_testing.py | 2 +- rasa/model_training.py | 13 +- rasa/shared/constants.py | 1 + rasa/shared/data.py | 20 +- rasa/shared/importers/autoconfig.py | 286 ---------- rasa/shared/importers/importer.py | 56 +- rasa/shared/importers/multi_project.py | 4 + rasa/shared/importers/rasa.py | 14 +- rasa/telemetry.py | 17 +- tests/conftest.py | 2 +- tests/engine/recipes/test_default_recipe.py | 219 ++++++- tests/engine/recipes/test_graph_recipe.py | 162 ++++++ .../storage/test_local_model_storage.py | 2 +- tests/engine/storage/test_storage.py | 2 +- tests/engine/test_loader.py | 2 +- tests/engine/test_validation.py | 2 +- tests/engine/training/test_graph_trainer.py | 2 +- .../test_default_recipe_validator.py | 26 +- tests/shared/importers/test_autoconfig.py | 223 -------- tests/shared/importers/test_importer.py | 10 - tests/test_model_training.py | 28 +- tests/test_telemetry.py | 11 +- tests/test_validator.py | 6 - 42 files changed, 1818 insertions(+), 614 deletions(-) create mode 100644 changelog/10473.feature.md create mode 100644 data/graph_schemas/graph_config_short_predict_schema.yml create mode 100644 data/graph_schemas/graph_config_short_train_schema.yml create mode 100644 data/test_config/graph_config.yml create mode 100644 data/test_config/graph_config_short.yml create mode 100644 docs/docs/graph-recipe.mdx rename rasa/{shared/importers => engine/recipes/config_files}/default_config.yml (100%) create mode 100644 rasa/engine/recipes/graph_recipe.py delete mode 100644 rasa/shared/importers/autoconfig.py create mode 100644 tests/engine/recipes/test_graph_recipe.py delete mode 100644 tests/shared/importers/test_autoconfig.py diff --git a/changelog/10473.feature.md b/changelog/10473.feature.md new file mode 100644 index 000000000000..b9ab4a78d1ba --- /dev/null +++ b/changelog/10473.feature.md @@ -0,0 +1,7 @@ +Support other recipe types. + +This pull request also adds support for graph recipes, see details at +https://rasa.com/docs/rasa/model-configuration and check Graph Recipe page. + +Graph recipe is a raw format for specifying executed graph directly. This is +useful if you need a more powerful way to specify your model creation. diff --git a/data/graph_schemas/graph_config_short_predict_schema.yml b/data/graph_schemas/graph_config_short_predict_schema.yml new file mode 100644 index 000000000000..eeeffb0940d7 --- /dev/null +++ b/data/graph_schemas/graph_config_short_predict_schema.yml @@ -0,0 +1,73 @@ +nodes: + nlu_message_converter: + needs: + messages: __message__ + uses: rasa.graph_components.converters.nlu_message_converter.NLUMessageConverter + constructor_name: load + fn: convert_user_message + config: {} + eager: true + is_target: false + is_input: false + resource: null + custom_nlu_target: + needs: + messages: nlu_message_converter + domain: domain_provider + uses: rasa.nlu.classifiers.regex_message_handler.RegexMessageHandler + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: null + domain_provider: + needs: {} + uses: rasa.graph_components.providers.domain_provider.DomainProvider + constructor_name: load + fn: provide_inference + config: {} + eager: true + is_target: false + is_input: false + resource: + name: domain_provider + run_MemoizationPolicy0: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.memoization.MemoizationPolicy + constructor_name: load + fn: predict_action_probabilities + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_MemoizationPolicy0 + rule_only_data_provider: + needs: {} + uses: rasa.graph_components.providers.rule_only_provider.RuleOnlyDataProvider + constructor_name: load + fn: provide + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_RulePolicy1 + custom_core_target: + needs: + policy0: run_MemoizationPolicy0 + domain: domain_provider + tracker: __tracker__ + uses: rasa.core.policies.ensemble.DefaultPolicyPredictionEnsemble + constructor_name: load + fn: combine_predictions_from_kwargs + config: {} + eager: true + is_target: false + is_input: false + resource: null diff --git a/data/graph_schemas/graph_config_short_train_schema.yml b/data/graph_schemas/graph_config_short_train_schema.yml new file mode 100644 index 000000000000..be042f18a3d0 --- /dev/null +++ b/data/graph_schemas/graph_config_short_train_schema.yml @@ -0,0 +1,27 @@ +nodes: + finetuning_validator: + needs: + importer: __importer__ + uses: rasa.graph_components.validators.finetuning_validator.FinetuningValidator + constructor_name: create + fn: validate + config: + validate_core: true + validate_nlu: true + eager: false + is_target: false + is_input: true + resource: null + nlu_training_data_provider: + needs: + importer: finetuning_validator + uses: rasa.graph_components.providers.nlu_training_data_provider.NLUTrainingDataProvider + constructor_name: create + fn: provide + config: + language: en + persist: false + eager: false + is_target: false + is_input: true + resource: null diff --git a/data/test_config/graph_config.yml b/data/test_config/graph_config.yml new file mode 100644 index 000000000000..2d8bd16e0aa1 --- /dev/null +++ b/data/test_config/graph_config.yml @@ -0,0 +1,534 @@ +# The config recipe. +# https://rasa.com/docs/rasa/model-configuration/ +recipe: graph.v1 + +language: en + +nlu_target: run_RegexMessageHandler + +core_target: select_prediction + +train_schema: + nodes: + schema_validator: + needs: + importer: __importer__ + uses: rasa.graph_components.validators.default_recipe_validator.DefaultV1RecipeValidator + constructor_name: create + fn: validate + config: {} + eager: false + is_target: false + is_input: true + resource: null + finetuning_validator: + needs: + importer: schema_validator + uses: rasa.graph_components.validators.finetuning_validator.FinetuningValidator + constructor_name: create + fn: validate + config: + validate_core: true + validate_nlu: true + eager: false + is_target: false + is_input: true + resource: null + nlu_training_data_provider: + needs: + importer: finetuning_validator + uses: rasa.graph_components.providers.nlu_training_data_provider.NLUTrainingDataProvider + constructor_name: create + fn: provide + config: + language: en + persist: false + eager: false + is_target: false + is_input: true + resource: null + run_WhitespaceTokenizer0: + needs: + training_data: nlu_training_data_provider + uses: rasa.nlu.tokenizers.whitespace_tokenizer.WhitespaceTokenizer + constructor_name: load + fn: process_training_data + config: {} + eager: false + is_target: false + is_input: false + resource: null + train_RegexFeaturizer1: + needs: + training_data: run_WhitespaceTokenizer0 + uses: rasa.nlu.featurizers.sparse_featurizer.regex_featurizer.RegexFeaturizer + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + run_RegexFeaturizer1: + needs: + training_data: run_WhitespaceTokenizer0 + resource: train_RegexFeaturizer1 + uses: rasa.nlu.featurizers.sparse_featurizer.regex_featurizer.RegexFeaturizer + constructor_name: load + fn: process_training_data + config: {} + eager: false + is_target: false + is_input: false + resource: null + train_LexicalSyntacticFeaturizer2: + needs: + training_data: run_RegexFeaturizer1 + uses: rasa.nlu.featurizers.sparse_featurizer.lexical_syntactic_featurizer.LexicalSyntacticFeaturizer + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + run_LexicalSyntacticFeaturizer2: + needs: + training_data: run_RegexFeaturizer1 + resource: train_LexicalSyntacticFeaturizer2 + uses: rasa.nlu.featurizers.sparse_featurizer.lexical_syntactic_featurizer.LexicalSyntacticFeaturizer + constructor_name: load + fn: process_training_data + config: {} + eager: false + is_target: false + is_input: false + resource: null + train_CountVectorsFeaturizer3: + needs: + training_data: run_LexicalSyntacticFeaturizer2 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + run_CountVectorsFeaturizer3: + needs: + training_data: run_LexicalSyntacticFeaturizer2 + resource: train_CountVectorsFeaturizer3 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: load + fn: process_training_data + config: {} + eager: false + is_target: false + is_input: false + resource: null + train_CountVectorsFeaturizer4: + needs: + training_data: run_CountVectorsFeaturizer3 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: create + fn: train + config: + analyzer: char_wb + min_ngram: 1 + max_ngram: 4 + eager: false + is_target: true + is_input: false + resource: null + run_CountVectorsFeaturizer4: + needs: + training_data: run_CountVectorsFeaturizer3 + resource: train_CountVectorsFeaturizer4 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: load + fn: process_training_data + config: + analyzer: char_wb + min_ngram: 1 + max_ngram: 4 + eager: false + is_target: false + is_input: false + resource: null + train_DIETClassifier5: + needs: + training_data: run_CountVectorsFeaturizer4 + uses: rasa.nlu.classifiers.diet_classifier.DIETClassifier + constructor_name: create + fn: train + config: + epochs: 100 + constrain_similarities: true + eager: false + is_target: true + is_input: false + resource: null + train_EntitySynonymMapper6: + needs: + training_data: run_CountVectorsFeaturizer4 + uses: rasa.nlu.extractors.entity_synonyms.EntitySynonymMapper + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + train_ResponseSelector7: + needs: + training_data: run_CountVectorsFeaturizer4 + uses: rasa.nlu.selectors.response_selector.ResponseSelector + constructor_name: create + fn: train + config: + epochs: 100 + constrain_similarities: true + eager: false + is_target: true + is_input: false + resource: null + domain_provider: + needs: + importer: finetuning_validator + uses: rasa.graph_components.providers.domain_provider.DomainProvider + constructor_name: create + fn: provide_train + config: {} + eager: false + is_target: true + is_input: true + resource: null + domain_for_core_training_provider: + needs: + domain: domain_provider + uses: rasa.graph_components.providers.domain_for_core_training_provider.DomainForCoreTrainingProvider + constructor_name: create + fn: provide + config: {} + eager: false + is_target: false + is_input: true + resource: null + story_graph_provider: + needs: + importer: finetuning_validator + uses: rasa.graph_components.providers.story_graph_provider.StoryGraphProvider + constructor_name: create + fn: provide + config: + exclusion_percentage: null + eager: false + is_target: false + is_input: true + resource: null + training_tracker_provider: + needs: + story_graph: story_graph_provider + domain: domain_for_core_training_provider + uses: rasa.graph_components.providers.training_tracker_provider.TrainingTrackerProvider + constructor_name: create + fn: provide + config: {} + eager: false + is_target: false + is_input: false + resource: null + train_MemoizationPolicy0: + needs: + training_trackers: training_tracker_provider + domain: domain_for_core_training_provider + uses: rasa.core.policies.memoization.MemoizationPolicy + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + train_RulePolicy1: + needs: + training_trackers: training_tracker_provider + domain: domain_for_core_training_provider + uses: rasa.core.policies.rule_policy.RulePolicy + constructor_name: create + fn: train + config: {} + eager: false + is_target: true + is_input: false + resource: null + train_UnexpecTEDIntentPolicy2: + needs: + training_trackers: training_tracker_provider + domain: domain_for_core_training_provider + uses: rasa.core.policies.unexpected_intent_policy.UnexpecTEDIntentPolicy + constructor_name: create + fn: train + config: + max_history: 5 + epochs: 100 + eager: false + is_target: true + is_input: false + resource: null + train_TEDPolicy3: + needs: + training_trackers: training_tracker_provider + domain: domain_for_core_training_provider + uses: rasa.core.policies.ted_policy.TEDPolicy + constructor_name: create + fn: train + config: + max_history: 5 + epochs: 100 + constrain_similarities: true + eager: false + is_target: true + is_input: false + resource: null + +predict_schema: + nodes: + nlu_message_converter: + needs: + messages: __message__ + uses: rasa.graph_components.converters.nlu_message_converter.NLUMessageConverter + constructor_name: load + fn: convert_user_message + config: {} + eager: true + is_target: false + is_input: false + resource: null + run_WhitespaceTokenizer0: + needs: + messages: nlu_message_converter + uses: rasa.nlu.tokenizers.whitespace_tokenizer.WhitespaceTokenizer + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: null + run_RegexFeaturizer1: + needs: + messages: run_WhitespaceTokenizer0 + uses: rasa.nlu.featurizers.sparse_featurizer.regex_featurizer.RegexFeaturizer + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_RegexFeaturizer1 + run_LexicalSyntacticFeaturizer2: + needs: + messages: run_RegexFeaturizer1 + uses: rasa.nlu.featurizers.sparse_featurizer.lexical_syntactic_featurizer.LexicalSyntacticFeaturizer + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_LexicalSyntacticFeaturizer2 + run_CountVectorsFeaturizer3: + needs: + messages: run_LexicalSyntacticFeaturizer2 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_CountVectorsFeaturizer3 + run_CountVectorsFeaturizer4: + needs: + messages: run_CountVectorsFeaturizer3 + uses: rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer + constructor_name: load + fn: process + config: + analyzer: char_wb + min_ngram: 1 + max_ngram: 4 + eager: true + is_target: false + is_input: false + resource: + name: train_CountVectorsFeaturizer4 + run_DIETClassifier5: + needs: + messages: run_CountVectorsFeaturizer4 + uses: rasa.nlu.classifiers.diet_classifier.DIETClassifier + constructor_name: load + fn: process + config: + epochs: 100 + constrain_similarities: true + eager: true + is_target: false + is_input: false + resource: + name: train_DIETClassifier5 + run_EntitySynonymMapper6: + needs: + messages: run_DIETClassifier5 + uses: rasa.nlu.extractors.entity_synonyms.EntitySynonymMapper + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_EntitySynonymMapper6 + run_ResponseSelector7: + needs: + messages: run_EntitySynonymMapper6 + uses: rasa.nlu.selectors.response_selector.ResponseSelector + constructor_name: load + fn: process + config: + epochs: 100 + constrain_similarities: true + eager: true + is_target: false + is_input: false + resource: + name: train_ResponseSelector7 + run_FallbackClassifier8: + needs: + messages: run_ResponseSelector7 + uses: rasa.nlu.classifiers.fallback_classifier.FallbackClassifier + constructor_name: load + fn: process + config: + threshold: 0.3 + ambiguity_threshold: 0.1 + eager: true + is_target: false + is_input: false + resource: null + run_RegexMessageHandler: + needs: + messages: run_FallbackClassifier8 + domain: domain_provider + uses: rasa.nlu.classifiers.regex_message_handler.RegexMessageHandler + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: null + domain_provider: + needs: {} + uses: rasa.graph_components.providers.domain_provider.DomainProvider + constructor_name: load + fn: provide_inference + config: {} + eager: true + is_target: false + is_input: false + resource: + name: domain_provider + run_MemoizationPolicy0: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.memoization.MemoizationPolicy + constructor_name: load + fn: predict_action_probabilities + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_MemoizationPolicy0 + run_RulePolicy1: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.rule_policy.RulePolicy + constructor_name: load + fn: predict_action_probabilities + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_RulePolicy1 + run_UnexpecTEDIntentPolicy2: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.unexpected_intent_policy.UnexpecTEDIntentPolicy + constructor_name: load + fn: predict_action_probabilities + config: + max_history: 5 + epochs: 100 + eager: true + is_target: false + is_input: false + resource: + name: train_UnexpecTEDIntentPolicy2 + run_TEDPolicy3: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.ted_policy.TEDPolicy + constructor_name: load + fn: predict_action_probabilities + config: + max_history: 5 + epochs: 100 + constrain_similarities: true + eager: true + is_target: false + is_input: false + resource: + name: train_TEDPolicy3 + rule_only_data_provider: + needs: {} + uses: rasa.graph_components.providers.rule_only_provider.RuleOnlyDataProvider + constructor_name: load + fn: provide + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_RulePolicy1 + select_prediction: + needs: + policy0: run_MemoizationPolicy0 + policy1: run_RulePolicy1 + policy2: run_UnexpecTEDIntentPolicy2 + policy3: run_TEDPolicy3 + domain: domain_provider + tracker: __tracker__ + uses: rasa.core.policies.ensemble.DefaultPolicyPredictionEnsemble + constructor_name: load + fn: combine_predictions_from_kwargs + config: {} + eager: true + is_target: false + is_input: false + resource: null diff --git a/data/test_config/graph_config_short.yml b/data/test_config/graph_config_short.yml new file mode 100644 index 000000000000..7aa42f97606e --- /dev/null +++ b/data/test_config/graph_config_short.yml @@ -0,0 +1,115 @@ +# The config recipe. +# https://rasa.com/docs/rasa/model-configuration/ +recipe: graph.v1 + +language: en + +core_target: custom_core_target + +nlu_target: custom_nlu_target + +train_schema: + nodes: + # We skip schema_validator node (we only have this for DefaultV1Recipe + # since we don't do validation for the GraphV1Recipe) + finetuning_validator: + needs: + importer: __importer__ + uses: rasa.graph_components.validators.finetuning_validator.FinetuningValidator + constructor_name: create + fn: validate + config: + validate_core: true + validate_nlu: true + eager: false + is_target: false + is_input: true + resource: null + nlu_training_data_provider: + needs: + importer: finetuning_validator + uses: rasa.graph_components.providers.nlu_training_data_provider.NLUTrainingDataProvider + constructor_name: create + fn: provide + config: + language: en + persist: false + eager: false + is_target: false + is_input: true + resource: null + +predict_schema: + nodes: + nlu_message_converter: + needs: + messages: __message__ + uses: rasa.graph_components.converters.nlu_message_converter.NLUMessageConverter + constructor_name: load + fn: convert_user_message + config: {} + eager: true + is_target: false + is_input: false + resource: null + custom_nlu_target: + needs: + messages: nlu_message_converter + domain: domain_provider + uses: rasa.nlu.classifiers.regex_message_handler.RegexMessageHandler + constructor_name: load + fn: process + config: {} + eager: true + is_target: false + is_input: false + resource: null + domain_provider: + needs: {} + uses: rasa.graph_components.providers.domain_provider.DomainProvider + constructor_name: load + fn: provide_inference + config: {} + eager: true + is_target: false + is_input: false + resource: + name: domain_provider + run_MemoizationPolicy0: + needs: + domain: domain_provider + tracker: __tracker__ + rule_only_data: rule_only_data_provider + uses: rasa.core.policies.memoization.MemoizationPolicy + constructor_name: load + fn: predict_action_probabilities + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_MemoizationPolicy0 + rule_only_data_provider: + needs: {} + uses: rasa.graph_components.providers.rule_only_provider.RuleOnlyDataProvider + constructor_name: load + fn: provide + config: {} + eager: true + is_target: false + is_input: false + resource: + name: train_RulePolicy1 + custom_core_target: + needs: + policy0: run_MemoizationPolicy0 + domain: domain_provider + tracker: __tracker__ + uses: rasa.core.policies.ensemble.DefaultPolicyPredictionEnsemble + constructor_name: load + fn: combine_predictions_from_kwargs + config: {} + eager: true + is_target: false + is_input: false + resource: null diff --git a/docs/docs/custom-graph-components.mdx b/docs/docs/custom-graph-components.mdx index b46d77114984..7785d3b2cadb 100644 --- a/docs/docs/custom-graph-components.mdx +++ b/docs/docs/custom-graph-components.mdx @@ -228,7 +228,7 @@ Your graph component's train method must return the value of `resource` so that the training results between trainings. The `self._model_storage.write_to(self._resource)` context manager provides a path to a directory where you can persist any data required by your -graph component. +graph component. ```python from __future__ import annotations @@ -328,16 +328,16 @@ class MyComponent(GraphComponent): ## Registering Graph Components with the Model Configuration -To make your graph component available to Rasa Open Source you have to register your +To make your graph component available to Rasa Open Source you may have to register your graph component with a recipe. Rasa Open Source uses recipes to translate the content of your model configuration to executable [graphs](custom-graph-components.mdx#graph-components). -Currently, Rasa Open Source only supports the `default.v1` recipe. -Register your graph component with this recipe by using the `DefaultV1Recipe.register` +Currently, Rasa Open Source supports the `default.v1` and the experimental `graph.v1` recipes. +For `default.v1` recipe, you need to register your graph component by using the `DefaultV1Recipe.register` decorator: -:::code language="python" source="docs/sources/data/test_classes/registered_component.py" - highlight="5-9"::: +```python (docs/sources/data/test_classes/registered_component.py) +``` Rasa Open Source uses the information provided in the `register` decorator and the position of your graph component within the configuration file to schedule the execution diff --git a/docs/docs/graph-recipe.mdx b/docs/docs/graph-recipe.mdx new file mode 100644 index 000000000000..c322b48dfcc1 --- /dev/null +++ b/docs/docs/graph-recipe.mdx @@ -0,0 +1,134 @@ +--- +id: graph-recipe +sidebar_label: Graph Recipe +title: Graph Recipe +description: Learn about Graph Recipe for Rasa Open Source. +abstract: Graph recipes provide a more fine tuned configuration for your executable graphs. +--- + +:::tip Default Recipe or Graph Recipe? + +You will probably only need graph recipes if you're running ML experiments or ablation studies on an existing model. We recommend starting with the default recipe and for many applications that will be all that's needed. + +::: + +We now support graph recipes in addition to the default recipe. Graph recipes provide more granular control over how execution graph schemas are built. + +:::caution New in 3.1 +This feature is experimental. +We introduce experimental features to get feedback from our community, so we encourage you to try it out! +However, the functionality might be changed or removed in the future. +If you have feedback (positive or negative) please share it with us on the [Rasa Forum](https://forum.rasa.com). + +::: + + +## Differences with Default Recipe + +There are some differences between the default recipe and the new graph recipe. Main differences are: + +- Default recipe is named `default.v1` in the config file whereas graph recipes are named `graph.v1`. +- Default recipes provide an easy to use recipe structure whereas graph recipes are more advanced and powerful. +- Default recipes are very opinionated and provide various defaults whereas graph recipes are more explicit. +- Default recipes can auto-configure themselves and dump the defaults used to the file if some sections in `config.yml` are missing, whereas graph recipes do none of this and assume what you see is what you get. There are no surprises with graph recipes. +- Default recipe divides graph configuration into mainly two parts: `pipeline` and `policies`. These can also be described as NLU and core (dialogue management) parts. For graph recipe on the other hand, the separation is between training (ie. `train_schema`) and prediction (ie. `predict_schema`). + +:::tip Starting from scratch? + +If you don't know which recipe to choose, use the default recipe to bootstrap your project fast. If later you find that you need more fine-grained control, you can always change your recipe to be a graph recipe. + +::: + +## Graph Configuration File Structure + +Graph recipes share `recipe` and `language` keys with the same meaning. Similarities end there as graph recipes do not have `pipeline` or `policies` keys but they do have `train_schema` and `predict_schema` keys for determining the graph nodes during train and predict runs respectively. In addition to this, target nodes for NLU and core can be specified explicitly with graph recipes, these can be declared with `nlu_target` and `core_target`. If targets are omitted, node names used by default recipe will take over, and these are `run_RegexMessageHandler` and `select_prediction` for nlu and core respectively. + +Here's an example graph recipe: + +```yaml-rasa (docs/sources/data/test_config/graph_config_short.yml) +``` + +:::note graph targets +For NLU, default target name of `run_RegexMessageHandler` will be used, while for core (dialogue management) the target will be called `select_prediction` if omitted. Make sure you have graph nodes with relevant names in your schema definitions. + +In a similar fashion, note that the default resource needed by the first graph node is fixed to be `__importer__` (representing configuration, training data etc.) for training task and it is `__message__` (representing the message received) for prediction task. Make sure your first nodes make use of these dependencies. + +::: + +## Graph Node Configuration + +As you can see in the example above, graph recipes are very much explicit and you can configure each graph node as you would like. Here is an explanation of what some of the keys mean: + +- `needs`: You can define here what data your graph node requires and from which parent node. Key is the data name, whereas the value would refer to the node name. +```yaml-rasa +needs: + messages: nlu_message_converter +``` +Current graph node needs `messages` which is provided by `nlu_message_converter` node. + +- `uses`: You can provide the class used to instantiate this node with this key. Please provide the full path in Python path syntax, eg. + +```yaml-rasa +uses: rasa.graph_components.converters.nlu_message_converter.NLUMessageConverter +``` +You are not required to use Rasa internal graph component classes and you +can use your own components here. Refer to [custom graph +components](custom-graph-components.mdx) pages to find out how to write your +own graph components. + +- `constructor_name`: This is the constructor used to instantiate your component. Example: + +```yaml-rasa +constructor_name: load +``` + +- `fn`: This is the function used in executing the graph component. Example: + +```yaml-rasa +fn: combine_predictions_from_kwargs +``` + +- `config`: You can provide any configuration parameters for your components using this key. + +```yaml-rasa +config: + language: en + persist: false +``` + +- `eager`: This determines if your component should be eagerly loaded +when the graph is constructed or if it should wait until the +runtime (this is called lazy instantiation). Usually we always +instantiate lazily during training and eagerly during inference (to +avoid slow first prediction). + + +```yaml-rasa +eager: true +``` + +- `resource`: If given, graph node is loaded from this resource instead of of instantiated from scratch. This is e.g. used to load a trained component for predictions. + +```yaml-rasa +resource: + name: train_RulePolicy1 +``` + +- `is_target`: Boolean value, if `True` then this node can't be pruned +during fingerprinting (it might be replaced with a cached value +though). This +is e.g. used for all components which train as their result always needs +to be added to the model archive so that the data is available during +inference. + +```yaml-rasa +is_target: false +``` + +- `is_input`: Boolean value; nodes with `is_input` are _always_ run (also during the +fingerprint run). This makes sure that we e.g. detect changes in file +contents. + +```yaml-rasa + is_input: false +``` diff --git a/docs/docs/model-configuration.mdx b/docs/docs/model-configuration.mdx index 1da6ab87a31e..7e2d7962338f 100644 --- a/docs/docs/model-configuration.mdx +++ b/docs/docs/model-configuration.mdx @@ -6,7 +6,8 @@ description: Learn about model configuration for Rasa Open Source. abstract: The configuration file defines the components and policies that your model will use to make predictions based on user input. --- -The recipe key allows for different types of config and model architecture. Currently, only "default.v1" is supported. +The recipe key allows for different types of config and model architecture. +Currently, "default.v1" and the experimental "graph.v1" recipes are supported. The language and pipeline keys specify the [components](./components.mdx) used by the model to make NLU predictions. The policies key defines the [policies](./policies.mdx) used by the model to predict the next action. diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index 2ca399ba116a..5071c8f6b0ed 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -30,18 +30,33 @@ }, "pipeline": { "oneOf": [ + {"type": "null"}, {"type": "string"}, {"type": "array", "items": {"type": "object"}} ], "description": "List of the pipeline configurations used for training." }, "policies": { - "type": "array", - "items": { - "type": "object" - }, + "oneOf": [ + {"type": "null"}, + {"type": "array", "items": {"type": "object"}} + ], "description": "List of the policy configurations used for training." }, + "train_schema": { + "oneOf": [ + {"type": "null"}, + {"type": "object"} + ], + "description": "Training graph schema for graph recipe" + }, + "predict_schema": { + "oneOf": [ + {"type": "null"}, + {"type": "object"} + ], + "description": "Predict graph schema for graph recipe" + }, "num_intent_examples": { "type": "integer", "description": "Number of NLU examples." @@ -109,6 +124,10 @@ "is_finetuning": { "type": "boolean", "description": "True if a model is trained by finetuning an existing model." + }, + "recipe": { + "type": "string", + "description": "Recipe used in training the model, either 'default.v1' or 'graph.v1'." } }, "additionalProperties": false, @@ -118,6 +137,8 @@ "type", "pipeline", "policies", + "train_schema", + "predict_schema", "num_intent_examples", "num_entity_examples", "num_actions", diff --git a/docs/sidebars.js b/docs/sidebars.js index b4d17d1277f8..9dc27a162951 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -70,6 +70,7 @@ module.exports = { 'custom-graph-components', 'training-data-importers', 'language-support', + 'graph-recipe', ], }, { diff --git a/rasa/cli/interactive.py b/rasa/cli/interactive.py index f82922e69bda..3d4ae3459911 100644 --- a/rasa/cli/interactive.py +++ b/rasa/cli/interactive.py @@ -10,7 +10,7 @@ import rasa.cli.utils from rasa.engine.storage.local_model_storage import LocalModelStorage from rasa.shared.constants import DEFAULT_ENDPOINTS_PATH, DEFAULT_MODELS_PATH -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.importers.importer import TrainingDataImporter import rasa.shared.utils.cli import rasa.utils.common diff --git a/rasa/cli/shell.py b/rasa/cli/shell.py index bd16aea5ca99..ea891ad878ef 100644 --- a/rasa/cli/shell.py +++ b/rasa/cli/shell.py @@ -9,7 +9,7 @@ from rasa.cli.arguments import shell as arguments from rasa.engine.storage.local_model_storage import LocalModelStorage from rasa.model import get_latest_model -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.utils.cli import print_error from rasa.exceptions import ModelNotFound diff --git a/rasa/core/processor.py b/rasa/core/processor.py index 7056e5024a5d..38518141f75c 100644 --- a/rasa/core/processor.py +++ b/rasa/core/processor.py @@ -14,7 +14,7 @@ from rasa.engine.storage.local_model_storage import LocalModelStorage from rasa.engine.storage.storage import ModelMetadata from rasa.model import get_latest_model -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType import rasa.shared.utils.io import rasa.core.actions.action from rasa.core import jobs diff --git a/rasa/engine/graph.py b/rasa/engine/graph.py index 540e24e50979..0ad6bf751d7d 100644 --- a/rasa/engine/graph.py +++ b/rasa/engine/graph.py @@ -13,7 +13,7 @@ from rasa.engine.storage.storage import ModelStorage from rasa.shared.exceptions import InvalidConfigException, RasaException -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType logger = logging.getLogger(__name__) diff --git a/rasa/shared/importers/default_config.yml b/rasa/engine/recipes/config_files/default_config.yml similarity index 100% rename from rasa/shared/importers/default_config.yml rename to rasa/engine/recipes/config_files/default_config.yml diff --git a/rasa/engine/recipes/default_recipe.py b/rasa/engine/recipes/default_recipe.py index 6547f9b0fec6..6f5cbaffacda 100644 --- a/rasa/engine/recipes/default_recipe.py +++ b/rasa/engine/recipes/default_recipe.py @@ -13,6 +13,7 @@ CoreFeaturizationInputConverter, CoreFeaturizationCollector, ) +from rasa.shared.exceptions import FileNotFoundException from rasa.core.policies.ensemble import DefaultPolicyPredictionEnsemble from rasa.engine.graph import ( @@ -41,17 +42,36 @@ from rasa.graph_components.providers.training_tracker_provider import ( TrainingTrackerProvider, ) +import rasa.shared.constants from rasa.shared.exceptions import RasaException, InvalidConfigException -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.utils.tensorflow.constants import EPOCHS -import rasa.shared.utils.common +from rasa.shared.utils.common import ( + class_from_module_path, + transform_collection_to_sentence, +) logger = logging.getLogger(__name__) DEFAULT_PREDICT_KWARGS = dict(constructor_name="load", eager=True, is_target=False) +COMMENTS_FOR_KEYS = { + "pipeline": ( + f"# # No configuration for the NLU pipeline was provided. The following " + f"default pipeline was used to train your model.\n" + f"# # If you'd like to customize it, uncomment and adjust the pipeline.\n" + f"# # See {rasa.shared.constants.DOCS_URL_PIPELINE} for more information.\n" + ), + "policies": ( + f"# # No configuration for policies was provided. The following default " + f"policies were used to train your model.\n" + f"# # If you'd like to customize them, uncomment and adjust the policies.\n" + f"# # See {rasa.shared.constants.DOCS_URL_POLICIES} for more information.\n" + ), +} + class DefaultV1RecipeRegisterException(RasaException): """If you register a class which is not of type `GraphComponent`.""" @@ -144,7 +164,7 @@ def _from_registry(cls, name: Text) -> RegisteredComponent: return cls._registered_components[name] if "." in name: - clazz = rasa.shared.utils.common.class_from_module_path(name) + clazz = class_from_module_path(name) if clazz.__name__ in cls._registered_components: return cls._registered_components[clazz.__name__] @@ -816,3 +836,254 @@ def _add_end_to_end_features_for_inference( config={}, ) return node_with_e2e_features + + @staticmethod + def auto_configure( + config_file_path: Optional[Text], + config: Dict, + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> Tuple[Dict[Text, Any], Set[str], Set[str]]: + """Determine configuration from auto-filled configuration file. + + Keys that are provided and have a value in the file are kept. Keys that are not + provided are configured automatically. + + Note that this needs to be called explicitly; ie. we cannot + auto-configure automatically from importers because importers are not + allowed to access code outside of `rasa.shared`. + + Args: + config_file_path: The path to the configuration file. + config: Configuration in dictionary format. + training_type: Optional training type to auto-configure. By default + both core and NLU will be auto-configured. + """ + missing_keys = DefaultV1Recipe._get_missing_config_keys(config, training_type) + keys_to_configure = DefaultV1Recipe._get_unspecified_autoconfigurable_keys( + config, training_type + ) + + if keys_to_configure: + config = DefaultV1Recipe.complete_config(config, keys_to_configure) + DefaultV1Recipe._dump_config( + config, config_file_path, missing_keys, keys_to_configure, training_type + ) + + return config, missing_keys, keys_to_configure + + @staticmethod + def _get_unspecified_autoconfigurable_keys( + config: Dict[Text, Any], + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> Set[Text]: + if training_type == TrainingType.NLU: + all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS_NLU + elif training_type == TrainingType.CORE: + all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS_CORE + else: + all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS + + return {k for k in all_keys if config.get(k) is None} + + @staticmethod + def _get_missing_config_keys( + config: Dict[Text, Any], + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> Set[Text]: + if training_type == TrainingType.NLU: + all_keys = rasa.shared.constants.CONFIG_KEYS_NLU + elif training_type == TrainingType.CORE: + all_keys = rasa.shared.constants.CONFIG_KEYS_CORE + else: + all_keys = rasa.shared.constants.CONFIG_KEYS + + return {k for k in all_keys if k not in config.keys()} + + @staticmethod + def complete_config( + config: Dict[Text, Any], keys_to_configure: Set[Text] + ) -> Dict[Text, Any]: + """Complete a config by adding automatic configuration for the specified keys. + + Args: + config: The provided configuration. + keys_to_configure: Keys to be configured automatically (e.g. `policies`). + + Returns: + The resulting configuration including both the provided and + the automatically configured keys. + """ + import pkg_resources + + if keys_to_configure: + logger.debug( + f"The provided configuration does not contain the key(s) " + f"{transform_collection_to_sentence(keys_to_configure)}. " # noqa: E501, W505 + f"Values will be provided from the default configuration." + ) + + filename = "config_files/default_config.yml" + + default_config_file = pkg_resources.resource_filename(__name__, filename) + default_config = rasa.shared.utils.io.read_config_file(default_config_file) + + config = copy.deepcopy(config) + for key in keys_to_configure: + config[key] = default_config[key] + + return config + + @staticmethod + def _dump_config( + config: Dict[Text, Any], + config_file_path: Text, + missing_keys: Set[Text], + auto_configured_keys: Set[Text], + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> None: + """Dump the automatically configured keys into the config file. + + The configuration provided in the file is kept as it is (preserving the order of + keys and comments). + For keys that were automatically configured, an explanatory + comment is added and the automatically chosen configuration is + added commented-out. + If there are already blocks with comments from a previous auto + configuration run, they are replaced with the new auto + configuration. + + Args: + config: The configuration including the automatically configured keys. + config_file_path: The file into which the configuration should be dumped. + missing_keys: Keys that need to be added to the config file. + auto_configured_keys: Keys for which a commented out auto + configuration section needs to be added to the config file. + training_type: NLU, CORE or BOTH depending on which is trained. + """ + config_as_expected = DefaultV1Recipe._is_config_file_as_expected( + config_file_path, missing_keys, auto_configured_keys, training_type + ) + if not config_as_expected: + rasa.shared.utils.cli.print_error( + f"The configuration file at '{config_file_path}' has been removed or " + f"modified while the automatic configuration was running. The current " + f"configuration will therefore not be dumped to the file. If you want " + f"your model to use the configuration provided in " + f"'{config_file_path}' you need to re-run training." + ) + return + + DefaultV1Recipe._add_missing_config_keys_to_file(config_file_path, missing_keys) + + autoconfig_lines = DefaultV1Recipe._get_commented_out_autoconfig_lines( + config, auto_configured_keys + ) + + current_config_content = rasa.shared.utils.io.read_file(config_file_path) + current_config_lines = current_config_content.splitlines(keepends=True) + + updated_lines = DefaultV1Recipe._get_lines_including_autoconfig( + current_config_lines, autoconfig_lines + ) + + rasa.shared.utils.io.write_text_file("".join(updated_lines), config_file_path) + + auto_configured_keys = transform_collection_to_sentence(auto_configured_keys) + rasa.shared.utils.cli.print_info( + f"The configuration for {auto_configured_keys} was chosen automatically. " + f"It was written into the config file at '{config_file_path}'." + ) + + @staticmethod + def _is_config_file_as_expected( + config_file_path: Text, + missing_keys: Set[Text], + auto_configured_keys: Set[Text], + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> bool: + try: + content = rasa.shared.utils.io.read_config_file(config_file_path) + except FileNotFoundException: + content = "" + + return ( + bool(content) + and missing_keys + == DefaultV1Recipe._get_missing_config_keys(content, training_type) + and auto_configured_keys + == DefaultV1Recipe._get_unspecified_autoconfigurable_keys( + content, training_type + ) + ) + + @staticmethod + def _add_missing_config_keys_to_file( + config_file_path: Text, missing_keys: Set[Text] + ) -> None: + if not missing_keys: + return + with open( + config_file_path, "a", encoding=rasa.shared.utils.io.DEFAULT_ENCODING + ) as f: + for key in missing_keys: + f.write(f"{key}:\n") + + @staticmethod + def _get_lines_including_autoconfig( + lines: List[Text], autoconfig_lines: Dict[Text, List[Text]] + ) -> List[Text]: + auto_configured_keys = autoconfig_lines.keys() + + lines_with_autoconfig = [] + remove_comments_until_next_uncommented_line = False + for line in lines: + insert_section = None + + # remove old auto configuration + if remove_comments_until_next_uncommented_line: + if line.startswith("#"): + continue + remove_comments_until_next_uncommented_line = False + + # add an explanatory comment to auto configured sections + for key in auto_configured_keys: + if line.startswith(f"{key}:"): # start of next auto-section + line = line + COMMENTS_FOR_KEYS[key] + insert_section = key + remove_comments_until_next_uncommented_line = True + + lines_with_autoconfig.append(line) + + if not insert_section: + continue + + # add the auto configuration (commented out) + lines_with_autoconfig += autoconfig_lines[insert_section] + + return lines_with_autoconfig + + @staticmethod + def _get_commented_out_autoconfig_lines( + config: Dict[Text, Any], auto_configured_keys: Set[Text] + ) -> Dict[Text, List[Text]]: + import ruamel.yaml + import ruamel.yaml.compat + + yaml_parser = ruamel.yaml.YAML() + yaml_parser.indent(mapping=2, sequence=4, offset=2) + + autoconfig_lines = {} + + for key in auto_configured_keys: + stream = ruamel.yaml.compat.StringIO() + yaml_parser.dump(config.get(key), stream) + dump = stream.getvalue() + + lines = dump.split("\n") + if not lines[-1]: + lines = lines[:-1] # yaml dump adds an empty line at the end + lines = [f"# {line}\n" for line in lines] + + autoconfig_lines[key] = lines + + return autoconfig_lines diff --git a/rasa/engine/recipes/graph_recipe.py b/rasa/engine/recipes/graph_recipe.py new file mode 100644 index 000000000000..5a5d208986f4 --- /dev/null +++ b/rasa/engine/recipes/graph_recipe.py @@ -0,0 +1,77 @@ +import logging + +from rasa.engine.recipes.recipe import Recipe +from rasa.engine.graph import GraphModelConfiguration +from rasa.shared.constants import DOCS_URL_GRAPH_RECIPE +from rasa.shared.data import TrainingType +from rasa.shared.exceptions import InvalidConfigException +from rasa.shared.utils.common import mark_as_experimental_feature +from rasa.shared.utils.io import raise_warning +from rasa.engine.graph import GraphSchema + +from typing import Dict, Text, Any, Tuple + + +logger = logging.getLogger(__name__) + + +class GraphV1Recipe(Recipe): + """Recipe which converts the graph model config to train and predict graph.""" + + name = "graph.v1" + + def get_targets( + self, config: Dict, training_type: TrainingType + ) -> Tuple[Text, Any]: + """Return NLU and core targets from config dictionary. + + Note that default recipe has `nlu_target` and `core_target` as + fixed values of `run_RegexMessageHandler` and `select_prediction` + respectively. For graph recipe, target values are customizable. These + can be used in validation (default recipe does this validation check) + and during execution (all recipes use targets during execution). + """ + if training_type == TrainingType.NLU: + core_required = False + core_target = None + else: + core_required = True + core_target = config.get("core_target") + # NLU target is required because core (prediction) depends on NLU. + nlu_target = config.get("nlu_target") + if nlu_target is None or (core_required and core_target is None): + raise InvalidConfigException( + "Can't find target names for NLU and/or core. Please make " + "sure to provide 'nlu_target' (required for all training types) " + "and 'core_target' (required if training is not just NLU) values in " + "your config.yml file." + ) + return nlu_target, core_target + + def graph_config_for_recipe( + self, + config: Dict, + cli_parameters: Dict[Text, Any], + training_type: TrainingType = TrainingType.BOTH, + is_finetuning: bool = False, + ) -> GraphModelConfiguration: + """Converts the default config to graphs (see interface for full docstring).""" + mark_as_experimental_feature("graph recipe") + if cli_parameters or is_finetuning: + raise_warning( + "Unlike the Default Recipe, Graph Recipe does not utilize CLI " + "parameters or finetuning and these configurations will be ignored. " + "Add configuration to the recipe itself if you want them to be used.", + docs=DOCS_URL_GRAPH_RECIPE, + ) + + nlu_target, core_target = self.get_targets(config, training_type) + + return GraphModelConfiguration( + train_schema=GraphSchema.from_dict(config.get("train_schema")), + predict_schema=GraphSchema.from_dict(config.get("predict_schema")), + training_type=training_type, + language=config.get("language"), + core_target=core_target, + nlu_target=nlu_target, + ) diff --git a/rasa/engine/recipes/recipe.py b/rasa/engine/recipes/recipe.py index 86904099b5ca..0d67f219205d 100644 --- a/rasa/engine/recipes/recipe.py +++ b/rasa/engine/recipes/recipe.py @@ -1,12 +1,12 @@ from __future__ import annotations import abc -from typing import Text, Dict, Any, Optional +from typing import Text, Dict, Any, Optional, Tuple, Set import rasa.shared.utils.io from rasa.engine.graph import GraphModelConfiguration from rasa.shared.exceptions import RasaException -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType class InvalidRecipeException(RasaException): @@ -29,6 +29,7 @@ def recipe_for_name(name: Optional[Text]) -> Recipe: graph schemas. """ from rasa.engine.recipes.default_recipe import DefaultV1Recipe + from rasa.engine.recipes.graph_recipe import GraphV1Recipe if name is None: rasa.shared.utils.io.raise_deprecation_warning( @@ -37,7 +38,10 @@ def recipe_for_name(name: Optional[Text]) -> Recipe: f"'{DefaultV1Recipe.name}'." ) return DefaultV1Recipe() - recipes = {DefaultV1Recipe.name: DefaultV1Recipe} + recipes = { + DefaultV1Recipe.name: DefaultV1Recipe, + GraphV1Recipe.name: GraphV1Recipe, + } recipe_constructor = recipes.get(name) if recipe_constructor: @@ -49,6 +53,19 @@ def recipe_for_name(name: Optional[Text]) -> Recipe: f"'{DefaultV1Recipe.name}'." ) + @staticmethod + def auto_configure( + config_file_path: Optional[Text], + config: Dict, + training_type: Optional[TrainingType] = TrainingType.BOTH, + ) -> Tuple[Dict[Text, Any], Set[str], Set[str]]: + """Adds missing options with defaults and dumps the configuration. + + Override in child classes if this functionality is needed, each recipe + will have different auto configuration values. + """ + return config, set(), set() + @abc.abstractmethod def graph_config_for_recipe( self, diff --git a/rasa/engine/storage/storage.py b/rasa/engine/storage/storage.py index bf1937d0882e..a4740ce8898c 100644 --- a/rasa/engine/storage/storage.py +++ b/rasa/engine/storage/storage.py @@ -13,7 +13,7 @@ from rasa.exceptions import UnsupportedModelVersionError from rasa.engine.storage.resource import Resource from rasa.shared.core.domain import Domain -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType if typing.TYPE_CHECKING: from rasa.engine.graph import GraphSchema, GraphModelConfiguration diff --git a/rasa/model_testing.py b/rasa/model_testing.py index f45968d5e7a7..5764a7bf454d 100644 --- a/rasa/model_testing.py +++ b/rasa/model_testing.py @@ -13,7 +13,7 @@ from rasa.exceptions import ModelNotFound from rasa.shared.constants import DEFAULT_RESULTS_PATH import rasa.shared.nlu.training_data.loading -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.nlu.training_data.training_data import TrainingData import rasa.model diff --git a/rasa/model_training.py b/rasa/model_training.py index 922cf6f2135d..a01eac75fa12 100644 --- a/rasa/model_training.py +++ b/rasa/model_training.py @@ -13,7 +13,7 @@ from rasa.engine.storage.storage import ModelStorage from rasa.engine.training.components import FingerprintStatus from rasa.engine.training.graph_trainer import GraphTrainer -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.importers.importer import TrainingDataImporter from rasa import telemetry from rasa.shared.core.domain import Domain @@ -199,10 +199,17 @@ def _train_graph( config = file_importer.get_config() recipe = Recipe.recipe_for_name(config.get("recipe")) + config, _missing_keys, _configured_keys = recipe.auto_configure( + file_importer.get_config_file_for_auto_config(), + config, + training_type, + ) model_configuration = recipe.graph_config_for_recipe( - config, kwargs, training_type=training_type, is_finetuning=is_finetuning + config, + kwargs, + training_type=training_type, + is_finetuning=is_finetuning, ) - rasa.engine.validation.validate(model_configuration) with tempfile.TemporaryDirectory() as temp_model_dir: diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index 5c160c5d0cd8..0b00c1f1ef88 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -23,6 +23,7 @@ DOCS_URL_TRACKER_STORES = DOCS_BASE_URL + "/tracker-stores" DOCS_URL_COMPONENTS = DOCS_BASE_URL + "/components" DOCS_URL_GRAPH_COMPONENTS = DOCS_BASE_URL + "/custom-graph-components" +DOCS_URL_GRAPH_RECIPE = DOCS_BASE_URL + "/graph-recipe" DOCS_URL_MIGRATION_GUIDE = DOCS_BASE_URL + "/migration-guide" DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION = ( f"{DOCS_URL_MIGRATION_GUIDE}#rasa-21-to-rasa-22" diff --git a/rasa/shared/data.py b/rasa/shared/data.py index d665e9daef8e..e21675e90872 100644 --- a/rasa/shared/data.py +++ b/rasa/shared/data.py @@ -2,6 +2,7 @@ import shutil import tempfile import uuid +from enum import Enum from pathlib import Path from typing import Text, Optional, Union, List, Callable, Set, Iterable @@ -81,7 +82,6 @@ def get_data_files( Returns: Paths of training data files. """ - data_files = set() if paths is None: @@ -172,3 +172,21 @@ def _copy_files_to_new_dir(files: Iterable[Text]) -> Text: shutil.copy2(f, os.path.join(directory, unique_file_name)) return directory + + +class TrainingType(Enum): + """Enum class for defining explicitly what training types exist.""" + + NLU = 1 + CORE = 2 + BOTH = 3 + END_TO_END = 4 + + @property + def model_type(self) -> Text: + """Returns the type of model which this training yields.""" + if self == TrainingType.NLU: + return "nlu" + if self == TrainingType.CORE: + return "core" + return "rasa" diff --git a/rasa/shared/importers/autoconfig.py b/rasa/shared/importers/autoconfig.py deleted file mode 100644 index fe5cde648c39..000000000000 --- a/rasa/shared/importers/autoconfig.py +++ /dev/null @@ -1,286 +0,0 @@ -import copy -import logging -import os -from enum import Enum -from typing import Text, Dict, Any, List, Set, Optional - -import rasa.shared.constants -from rasa.shared.exceptions import FileNotFoundException -import rasa.shared.utils.cli -import rasa.shared.utils.common -import rasa.shared.utils.io - -logger = logging.getLogger(__name__) - -COMMENTS_FOR_KEYS = { - "pipeline": ( - f"# # No configuration for the NLU pipeline was provided. The following " - f"default pipeline was used to train your model.\n" - f"# # If you'd like to customize it, uncomment and adjust the pipeline.\n" - f"# # See {rasa.shared.constants.DOCS_URL_PIPELINE} for more information.\n" - ), - "policies": ( - f"# # No configuration for policies was provided. The following default " - f"policies were used to train your model.\n" - f"# # If you'd like to customize them, uncomment and adjust the policies.\n" - f"# # See {rasa.shared.constants.DOCS_URL_POLICIES} for more information.\n" - ), -} - - -class TrainingType(Enum): - NLU = 1 - CORE = 2 - BOTH = 3 - END_TO_END = 4 - - @property - def model_type(self) -> Text: - """Returns the type of model which this training yields.""" - if self == TrainingType.NLU: - return "nlu" - if self == TrainingType.CORE: - return "core" - return "rasa" - - -def get_configuration( - config_file_path: Optional[Text], - training_type: Optional[TrainingType] = TrainingType.BOTH, -) -> Dict[Text, Any]: - """Determine configuration from a configuration file. - - Keys that are provided and have a value in the file are kept. Keys that are not - provided are configured automatically. - - Args: - config_file_path: The path to the configuration file. - training_type: NLU, CORE or BOTH depending on what is trained. - """ - if not config_file_path or not os.path.exists(config_file_path): - logger.debug("No configuration file was provided to the TrainingDataImporter.") - return {} - - config = rasa.shared.utils.io.read_model_configuration(config_file_path) - - missing_keys = _get_missing_config_keys(config, training_type) - keys_to_configure = _get_unspecified_autoconfigurable_keys(config, training_type) - - if keys_to_configure: - config = _auto_configure(config, keys_to_configure) - _dump_config( - config, config_file_path, missing_keys, keys_to_configure, training_type - ) - - return config - - -def _get_unspecified_autoconfigurable_keys( - config: Dict[Text, Any], training_type: Optional[TrainingType] = TrainingType.BOTH -) -> Set[Text]: - if training_type == TrainingType.NLU: - all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS_NLU - elif training_type == TrainingType.CORE: - all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS_CORE - else: - all_keys = rasa.shared.constants.CONFIG_AUTOCONFIGURABLE_KEYS - - return {k for k in all_keys if config.get(k) is None} - - -def _get_missing_config_keys( - config: Dict[Text, Any], training_type: Optional[TrainingType] = TrainingType.BOTH -) -> Set[Text]: - if training_type == TrainingType.NLU: - all_keys = rasa.shared.constants.CONFIG_KEYS_NLU - elif training_type == TrainingType.CORE: - all_keys = rasa.shared.constants.CONFIG_KEYS_CORE - else: - all_keys = rasa.shared.constants.CONFIG_KEYS - - return {k for k in all_keys if k not in config.keys()} - - -def _auto_configure( - config: Dict[Text, Any], keys_to_configure: Set[Text] -) -> Dict[Text, Any]: - """Complete a config by adding automatic configuration for the specified keys. - - Args: - config: The provided configuration. - keys_to_configure: Keys to be configured automatically (e.g. `policies`). - - Returns: - The resulting configuration including both the provided and the automatically - configured keys. - """ - import pkg_resources - - if keys_to_configure: - logger.debug( - f"The provided configuration does not contain the key(s) " - f"{rasa.shared.utils.common.transform_collection_to_sentence(keys_to_configure)}. " # noqa: E501, W505 - f"Values will be provided from the default configuration." - ) - - filename = "default_config.yml" - - default_config_file = pkg_resources.resource_filename(__name__, filename) - default_config = rasa.shared.utils.io.read_config_file(default_config_file) - - config = copy.deepcopy(config) - for key in keys_to_configure: - config[key] = default_config[key] - - return config - - -def _dump_config( - config: Dict[Text, Any], - config_file_path: Text, - missing_keys: Set[Text], - auto_configured_keys: Set[Text], - training_type: Optional[TrainingType] = TrainingType.BOTH, -) -> None: - """Dump the automatically configured keys into the config file. - - The configuration provided in the file is kept as it is (preserving the order of - keys and comments). - For keys that were automatically configured, an explanatory comment is added and the - automatically chosen configuration is added commented-out. - If there are already blocks with comments from a previous auto configuration run, - they are replaced with the new auto configuration. - - Args: - config: The configuration including the automatically configured keys. - config_file_path: The file into which the configuration should be dumped. - missing_keys: Keys that need to be added to the config file. - auto_configured_keys: Keys for which a commented out auto configuration section - needs to be added to the config file. - training_type: NLU, CORE or BOTH depending on which is trained. - """ - - config_as_expected = _is_config_file_as_expected( - config_file_path, missing_keys, auto_configured_keys, training_type - ) - if not config_as_expected: - rasa.shared.utils.cli.print_error( - f"The configuration file at '{config_file_path}' has been removed or " - f"modified while the automatic configuration was running. The current " - f"configuration will therefore not be dumped to the file. If you want to " - f"your model to use the configuration provided in '{config_file_path}', " - f"you need to re-run training." - ) - return - - _add_missing_config_keys_to_file(config_file_path, missing_keys) - - autoconfig_lines = _get_commented_out_autoconfig_lines(config, auto_configured_keys) - - current_config_content = rasa.shared.utils.io.read_file(config_file_path) - current_config_lines = current_config_content.splitlines(keepends=True) - - updated_lines = _get_lines_including_autoconfig( - current_config_lines, autoconfig_lines - ) - - rasa.shared.utils.io.write_text_file("".join(updated_lines), config_file_path) - - auto_configured_keys = rasa.shared.utils.common.transform_collection_to_sentence( - auto_configured_keys - ) - rasa.shared.utils.cli.print_info( - f"The configuration for {auto_configured_keys} was chosen automatically. It " - f"was written into the config file at '{config_file_path}'." - ) - - -def _is_config_file_as_expected( - config_file_path: Text, - missing_keys: Set[Text], - auto_configured_keys: Set[Text], - training_type: Optional[TrainingType] = TrainingType.BOTH, -) -> bool: - try: - content = rasa.shared.utils.io.read_config_file(config_file_path) - except FileNotFoundException: - content = "" - - return ( - bool(content) - and missing_keys == _get_missing_config_keys(content, training_type) - and auto_configured_keys - == _get_unspecified_autoconfigurable_keys(content, training_type) - ) - - -def _add_missing_config_keys_to_file( - config_file_path: Text, missing_keys: Set[Text] -) -> None: - if not missing_keys: - return - with open( - config_file_path, "a", encoding=rasa.shared.utils.io.DEFAULT_ENCODING - ) as f: - for key in missing_keys: - f.write(f"{key}:\n") - - -def _get_lines_including_autoconfig( - lines: List[Text], autoconfig_lines: Dict[Text, List[Text]] -) -> List[Text]: - auto_configured_keys = autoconfig_lines.keys() - - lines_with_autoconfig = [] - remove_comments_until_next_uncommented_line = False - for line in lines: - insert_section = None - - # remove old auto configuration - if remove_comments_until_next_uncommented_line: - if line.startswith("#"): - continue - remove_comments_until_next_uncommented_line = False - - # add an explanatory comment to auto configured sections - for key in auto_configured_keys: - if line.startswith(f"{key}:"): # start of next auto-section - line = line + COMMENTS_FOR_KEYS[key] - insert_section = key - remove_comments_until_next_uncommented_line = True - - lines_with_autoconfig.append(line) - - if not insert_section: - continue - - # add the auto configuration (commented out) - lines_with_autoconfig += autoconfig_lines[insert_section] - - return lines_with_autoconfig - - -def _get_commented_out_autoconfig_lines( - config: Dict[Text, Any], auto_configured_keys: Set[Text] -) -> Dict[Text, List[Text]]: - import ruamel.yaml - import ruamel.yaml.compat - - yaml_parser = ruamel.yaml.YAML() - yaml_parser.indent(mapping=2, sequence=4, offset=2) - - autoconfig_lines = {} - - for key in auto_configured_keys: - stream = ruamel.yaml.compat.StringIO() - yaml_parser.dump(config.get(key), stream) - dump = stream.getvalue() - - lines = dump.split("\n") - if not lines[-1]: - lines = lines[:-1] # yaml dump adds an empty line at the end - lines = [f"# {line}\n" for line in lines] - - autoconfig_lines[key] = lines - - return autoconfig_lines diff --git a/rasa/shared/importers/importer.py b/rasa/shared/importers/importer.py index 12e0a50e780f..dabb9624eb6f 100644 --- a/rasa/shared/importers/importer.py +++ b/rasa/shared/importers/importer.py @@ -12,7 +12,6 @@ from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData from rasa.shared.nlu.constants import ENTITIES, ACTION_NAME -from rasa.shared.importers.autoconfig import TrainingType from rasa.shared.core.domain import IS_RETRIEVAL_INTENT_KEY logger = logging.getLogger(__name__) @@ -56,6 +55,11 @@ def get_config(self) -> Dict: """ raise NotImplementedError() + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return self.config_file + def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: """Retrieves the NLU training data that should be used for training. @@ -72,13 +76,11 @@ def load_from_config( config_path: Text, domain_path: Optional[Text] = None, training_data_paths: Optional[List[Text]] = None, - training_type: Optional[TrainingType] = TrainingType.BOTH, ) -> "TrainingDataImporter": """Loads a `TrainingDataImporter` instance from a configuration file.""" - config = rasa.shared.utils.io.read_config_file(config_path) return TrainingDataImporter.load_from_dict( - config, config_path, domain_path, training_data_paths, training_type + config, config_path, domain_path, training_data_paths ) @staticmethod @@ -91,9 +93,8 @@ def load_core_importer_from_config( Instance loaded from configuration file will only read Core training data. """ - importer = TrainingDataImporter.load_from_config( - config_path, domain_path, training_data_paths, TrainingType.CORE + config_path, domain_path, training_data_paths ) return importer @@ -107,9 +108,8 @@ def load_nlu_importer_from_config( Instance loaded from configuration file will only read NLU training data. """ - importer = TrainingDataImporter.load_from_config( - config_path, domain_path, training_data_paths, TrainingType.NLU + config_path, domain_path, training_data_paths ) if isinstance(importer, E2EImporter): @@ -125,26 +125,22 @@ def load_from_dict( config_path: Optional[Text] = None, domain_path: Optional[Text] = None, training_data_paths: Optional[List[Text]] = None, - training_type: Optional[TrainingType] = TrainingType.BOTH, ) -> "TrainingDataImporter": """Loads a `TrainingDataImporter` instance from a dictionary.""" - from rasa.shared.importers.rasa import RasaFileImporter config = config or {} importers = config.get("importers", []) importers = [ TrainingDataImporter._importer_from_dict( - importer, config_path, domain_path, training_data_paths, training_type + importer, config_path, domain_path, training_data_paths ) for importer in importers ] importers = [importer for importer in importers if importer] if not importers: importers = [ - RasaFileImporter( - config_path, domain_path, training_data_paths, training_type - ) + RasaFileImporter(config_path, domain_path, training_data_paths) ] return E2EImporter(ResponsesSyncImporter(CombinedDataImporter(importers))) @@ -155,7 +151,6 @@ def _importer_from_dict( config_path: Text, domain_path: Optional[Text] = None, training_data_paths: Optional[List[Text]] = None, - training_type: Optional[TrainingType] = TrainingType.BOTH, ) -> Optional["TrainingDataImporter"]: from rasa.shared.importers.multi_project import MultiProjectImporter from rasa.shared.importers.rasa import RasaFileImporter @@ -174,13 +169,12 @@ def _importer_from_dict( logging.warning(f"Importer '{module_path}' not found.") return None - importer_config = dict(training_type=training_type, **importer_config) - constructor_arguments = rasa.shared.utils.common.minimal_kwargs( importer_config, importer_class ) - return importer_class( + # mypy ignore needed because RasaFileImporter and MultiFI have different args + return importer_class( # type: ignore[call-arg] config_path, domain_path, training_data_paths, **constructor_arguments ) @@ -220,6 +214,11 @@ def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: """Retrieves NLU training data (see parent class for full docstring).""" return self._importer.get_nlu_data(language) + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return self._importer.get_config_file_for_auto_config() + class CombinedDataImporter(TrainingDataImporter): """A `TrainingDataImporter` that combines multiple importers. @@ -276,6 +275,17 @@ def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: lambda merged, other: merged.merge(other), nlu_data, TrainingData() ) + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + if len(self._importers) != 1: + rasa.shared.utils.io.raise_warning( + "Auto-config for multiple importers is not supported; " + "using config as is." + ) + return None + return self._importers[0].get_config_file_for_auto_config() + class ResponsesSyncImporter(TrainingDataImporter): """Importer that syncs `responses` between Domain and NLU training data. @@ -293,6 +303,11 @@ def get_config(self) -> Dict: """Retrieves model config (see parent class for full docstring).""" return self._importer.get_config() + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return self._importer.get_config_file_for_auto_config() + @rasa.shared.utils.common.cached_method def get_domain(self) -> Domain: """Merge existing domain with properties of retrieval intents in NLU data.""" @@ -465,6 +480,11 @@ def get_config(self) -> Dict: """Retrieves model config (see parent class for full docstring).""" return self.importer.get_config() + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return self.importer.get_config_file_for_auto_config() + @rasa.shared.utils.common.cached_method def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: """Retrieves NLU training data (see parent class for full docstring).""" diff --git a/rasa/shared/importers/multi_project.py b/rasa/shared/importers/multi_project.py index 7c87c7e8df83..79078066b6af 100644 --- a/rasa/shared/importers/multi_project.py +++ b/rasa/shared/importers/multi_project.py @@ -55,6 +55,10 @@ def __init__( mark_as_experimental_feature(feature_name="MultiProjectImporter") + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return None + def _init_from_path(self, path: Text) -> None: if os.path.isfile(path): self._init_from_file(path) diff --git a/rasa/shared/importers/rasa.py b/rasa/shared/importers/rasa.py index 4e26b7d71b29..2426cfb9cd89 100644 --- a/rasa/shared/importers/rasa.py +++ b/rasa/shared/importers/rasa.py @@ -1,12 +1,12 @@ import logging +import os from typing import Dict, List, Optional, Text, Union import rasa.shared.data + from rasa.shared.core.training_data.structures import StoryGraph from rasa.shared.importers import utils -from rasa.shared.importers import autoconfig from rasa.shared.importers.importer import TrainingDataImporter -from rasa.shared.importers.autoconfig import TrainingType from rasa.shared.nlu.training_data.training_data import TrainingData from rasa.shared.core.domain import InvalidDomain, Domain from rasa.shared.core.training_data.story_reader.yaml_story_reader import ( @@ -25,7 +25,6 @@ def __init__( config_file: Optional[Text] = None, domain_path: Optional[Text] = None, training_data_paths: Optional[Union[List[Text], Text]] = None, - training_type: Optional[TrainingType] = TrainingType.BOTH, ): self._domain_path = domain_path @@ -40,11 +39,16 @@ def __init__( training_data_paths, YAMLStoryReader.is_test_stories_file ) - self.config = autoconfig.get_configuration(config_file, training_type) + self.config_file = config_file def get_config(self) -> Dict: """Retrieves model config (see parent class for full docstring).""" - return self.config + if not self.config_file or not os.path.exists(self.config_file): + logger.debug("No configuration file was provided to the RasaFileImporter.") + return {} + + config = rasa.shared.utils.io.read_model_configuration(self.config_file) + return config def get_stories(self, exclusion_percentage: Optional[int] = None) -> StoryGraph: """Retrieves training stories / rules (see parent class for full docstring).""" diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 57b9dd99b976..107d0de0f03c 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -271,6 +271,7 @@ def _fetch_write_key(tool: Text, environment_variable: Text) -> Optional[Text]: def telemetry_write_key() -> Optional[Text]: """Read the Segment write key from the segment key text file. + The segment key text file should by present only in wheel/sdist packaged versions of Rasa Open Source. This avoids running telemetry locally when developing on Rasa or when running CI builds. @@ -280,7 +281,6 @@ def telemetry_write_key() -> Optional[Text]: Returns: Segment write key, if the key file was present. """ - return _fetch_write_key("segment", TELEMETRY_WRITE_KEY_ENVIRONMENT_VARIABLE) @@ -290,7 +290,6 @@ def sentry_write_key() -> Optional[Text]: Returns: Sentry write key, if the key file was present. """ - return _fetch_write_key("sentry", EXCEPTION_WRITE_KEY_ENVIRONMENT_VARIABLE) @@ -378,6 +377,7 @@ def _send_event( context: Dict[Text, Any], ) -> None: """Report the contents segmentof an event to the /track Segment endpoint. + Documentation: https://.com/docs/sources/server/http/ Do not call this function from outside telemetry.py! This function does not @@ -389,7 +389,6 @@ def _send_event( properties: Values to report along the event. context: Context information about the event. """ - payload = segment_request_payload(distinct_id, event_name, properties, context) if _is_telemetry_debug_enabled(): @@ -536,12 +535,12 @@ def _track( def get_telemetry_id() -> Optional[Text]: """Return the unique telemetry identifier for this Rasa Open Source install. + The identifier can be any string, but it should be a UUID. Returns: The identifier, if it is configured correctly. """ - try: telemetry_config = ( rasa_utils.read_global_config_value(CONFIG_FILE_TELEMETRY_KEY) or {} @@ -560,7 +559,6 @@ def toggle_telemetry_reporting(is_enabled: bool) -> None: is_enabled: `True` if the telemetry reporting should be enabled, `False` otherwise. """ - configuration = rasa_utils.read_global_config_value(CONFIG_FILE_TELEMETRY_KEY) if configuration: @@ -655,7 +653,8 @@ def initialize_error_reporting() -> None: Exceptions are reported to sentry. We avoid sending any metadata (local variables, paths, ...) to make sure we don't compromise any data. Only the exception and its stacktrace is logged and only if the exception origins - from the `rasa` package.""" + from the `rasa` package. + """ import sentry_sdk from sentry_sdk import configure_scope from sentry_sdk.integrations.atexit import AtexitIntegration @@ -763,6 +762,8 @@ def track_model_training( "type": model_type, "pipeline": config.get("pipeline"), "policies": config.get("policies"), + "train_schema": config.get("train_schema"), + "predict_schema": config.get("predict_schema"), "num_intent_examples": len(nlu_data.intent_examples), "num_entity_examples": len(nlu_data.entity_examples), "num_actions": len(domain.action_names_or_texts), @@ -782,6 +783,7 @@ def track_model_training( "num_synonyms": len(nlu_data.entity_synonyms), "num_regexes": len(nlu_data.regex_features), "is_finetuning": is_finetuning, + "recipe": config.get("recipe"), }, ) start = datetime.now() @@ -958,7 +960,8 @@ def track_shell_started(model_type: Text) -> None: """Track when a user starts a bot using rasa shell. Args: - model_type: Type of the model, core / nlu or rasa.""" + model_type: Type of the model, core / nlu or rasa. + """ _track(TELEMETRY_SHELL_STARTED_EVENT, {"type": model_type}) diff --git a/tests/conftest.py b/tests/conftest.py index 8edb4abb48d7..e1cf0122677e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def nlu_data_path() -> Text: @pytest.fixture(scope="session") def config_path() -> Text: - return "rasa/shared/importers/default_config.yml" + return "rasa/engine/recipes/config_files/default_config.yml" @pytest.fixture(scope="session") diff --git a/tests/engine/recipes/test_default_recipe.py b/tests/engine/recipes/test_default_recipe.py index 51735fc4c947..463a03c69ab1 100644 --- a/tests/engine/recipes/test_default_recipe.py +++ b/tests/engine/recipes/test_default_recipe.py @@ -1,8 +1,12 @@ -from typing import Text, Dict, Any +from typing import Text, Dict, Any, Set +import shutil import pytest +from _pytest.capture import CaptureFixture +from pathlib import Path import rasa.shared.utils.io +from rasa.shared.constants import CONFIG_AUTOCONFIGURABLE_KEYS from rasa.core.policies.ted_policy import TEDPolicy from rasa.engine.graph import GraphSchema, GraphComponent, ExecutionContext from rasa.engine.recipes.default_recipe import ( @@ -19,11 +23,17 @@ from rasa.nlu.classifiers.sklearn_intent_classifier import SklearnIntentClassifier from rasa.nlu.extractors.mitie_entity_extractor import MitieEntityExtractor from rasa.shared.exceptions import InvalidConfigException -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType import rasa.engine.validation from rasa.shared.importers.rasa import RasaFileImporter +CONFIG_FOLDER = Path("data/test_config") + +SOME_CONFIG = CONFIG_FOLDER / "stack_config.yml" +DEFAULT_CONFIG = Path("rasa/engine/recipes/config_files/default_config.yml") + + def test_recipe_for_name(): recipe = Recipe.recipe_for_name("default.v1") assert isinstance(recipe, DefaultV1Recipe) @@ -35,7 +45,7 @@ def test_recipe_for_name(): [ # The default config is the config which most users run ( - "rasa/shared/importers/default_config.yml", + "rasa/engine/recipes/config_files/default_config.yml", "data/graph_schemas/default_config_e2e_train_schema.yml", "data/graph_schemas/default_config_e2e_predict_schema.yml", TrainingType.END_TO_END, @@ -43,21 +53,21 @@ def test_recipe_for_name(): ), # The default config without end to end ( - "rasa/shared/importers/default_config.yml", + "rasa/engine/recipes/config_files/default_config.yml", "data/graph_schemas/default_config_train_schema.yml", "data/graph_schemas/default_config_predict_schema.yml", TrainingType.BOTH, False, ), ( - "rasa/shared/importers/default_config.yml", + "rasa/engine/recipes/config_files/default_config.yml", "data/graph_schemas/default_config_core_train_schema.yml", "data/graph_schemas/default_config_core_predict_schema.yml", TrainingType.CORE, False, ), ( - "rasa/shared/importers/default_config.yml", + "rasa/engine/recipes/config_files/default_config.yml", "data/graph_schemas/default_config_nlu_train_schema.yml", "data/graph_schemas/default_config_nlu_predict_schema.yml", TrainingType.NLU, @@ -109,7 +119,7 @@ def test_recipe_for_name(): ), # A full model which wants to be finetuned ( - "rasa/shared/importers/default_config.yml", + "rasa/engine/recipes/config_files/default_config.yml", "data/graph_schemas/default_config_finetune_schema.yml", "data/graph_schemas/default_config_predict_schema.yml", TrainingType.BOTH, @@ -320,7 +330,7 @@ def test_epoch_fraction_cli_param(): expected_train_schema = GraphSchema.from_dict(expected_schema_as_dict) config = rasa.shared.utils.io.read_yaml_file( - "rasa/shared/importers/default_config.yml" + "rasa/engine/recipes/config_files/default_config.yml" ) recipe = Recipe.recipe_for_name(DefaultV1Recipe.name) @@ -453,3 +463,196 @@ def test_train_core_without_nlu_pipeline(): DefaultV1Recipe().graph_config_for_recipe( {"policies": []}, {}, TrainingType.CORE ) + + +@pytest.mark.parametrize( + "config_path, expected_keys_to_configure", + [ + (Path("rasa/cli/initial_project/config.yml"), {"pipeline", "policies"}), + (CONFIG_FOLDER / "config_policies_empty.yml", {"policies"}), + (CONFIG_FOLDER / "config_pipeline_empty.yml", {"pipeline"}), + (CONFIG_FOLDER / "config_policies_missing.yml", {"policies"}), + (CONFIG_FOLDER / "config_pipeline_missing.yml", {"pipeline"}), + (SOME_CONFIG, set()), + ], +) +def test_get_configuration( + config_path: Path, expected_keys_to_configure: Set[Text], tmp_path: Path +): + new_config_file = tmp_path / "new_config.yml" + shutil.copyfile(config_path, new_config_file) + + config = rasa.shared.utils.io.read_model_configuration(new_config_file) + _config, _missing_keys, configured_keys = DefaultV1Recipe.auto_configure( + new_config_file, config + ) + + assert sorted(configured_keys) == sorted(expected_keys_to_configure) + + +@pytest.mark.parametrize( + "language, keys_to_configure", + [ + ("en", {"policies"}), + ("en", {"pipeline"}), + ("fr", {"pipeline"}), + ("en", {"policies", "pipeline"}), + ], +) +def test_auto_configure(language: Text, keys_to_configure: Set[Text]): + expected_config = rasa.shared.utils.io.read_config_file(DEFAULT_CONFIG) + + config = DefaultV1Recipe.complete_config({"language": language}, keys_to_configure) + + for k in keys_to_configure: + assert config[k] == expected_config[k] # given keys are configured correctly + + assert config.get("language") == language + config.pop("language") + assert len(config) == len(keys_to_configure) # no other keys are configured + + +@pytest.mark.parametrize( + "config_path, missing_keys", + [ + (CONFIG_FOLDER / "config_language_only.yml", {"pipeline", "policies"}), + (CONFIG_FOLDER / "config_policies_missing.yml", {"policies"}), + (CONFIG_FOLDER / "config_pipeline_missing.yml", {"pipeline"}), + (SOME_CONFIG, []), + ], +) +def test_add_missing_config_keys_to_file( + tmp_path: Path, config_path: Path, missing_keys: Set[Text] +): + config_file = str(tmp_path / "config.yml") + shutil.copyfile(str(config_path), config_file) + + DefaultV1Recipe._add_missing_config_keys_to_file(config_file, missing_keys) + + config_after_addition = rasa.shared.utils.io.read_config_file(config_file) + + assert all(key in config_after_addition for key in missing_keys) + + +def test_dump_config_missing_file(tmp_path: Path, capsys: CaptureFixture): + + config_path = tmp_path / "non_existent_config.yml" + + config = rasa.shared.utils.io.read_config_file(str(SOME_CONFIG)) + + DefaultV1Recipe._dump_config(config, str(config_path), set(), {"policies"}) + + assert not config_path.exists() + + captured = capsys.readouterr() + assert "has been removed or modified" in captured.out + + +# Test a few cases that are known to be potentially tricky (have failed in the past) +@pytest.mark.parametrize( + "input_file, expected_file, autoconfig_keys", + [ + ( + "config_with_comments.yml", + "config_with_comments_after_dumping.yml", + {"policies"}, + ), # comments in various positions + ( + "config_empty_en.yml", + "config_empty_en_after_dumping.yml", + {"policies", "pipeline"}, + ), # no empty lines + ( + "config_empty_fr.yml", + "config_empty_fr_after_dumping.yml", + {"policies", "pipeline"}, + ), # no empty lines, with different language + ( + "config_with_comments_after_dumping.yml", + "config_with_comments_after_dumping.yml", + {"policies"}, + ), # with previous auto config that needs to be overwritten + ], +) +def test_dump_config( + tmp_path: Path, + input_file: Text, + expected_file: Text, + capsys: CaptureFixture, + autoconfig_keys: Set[Text], +): + config_file = str(tmp_path / "config.yml") + shutil.copyfile(str(CONFIG_FOLDER / input_file), config_file) + old_config = rasa.shared.utils.io.read_model_configuration(config_file) + DefaultV1Recipe.auto_configure(config_file, old_config) + new_config = rasa.shared.utils.io.read_model_configuration(config_file) + + expected = rasa.shared.utils.io.read_model_configuration( + CONFIG_FOLDER / expected_file + ) + + assert new_config == expected + + captured = capsys.readouterr() + assert "does not exist or is empty" not in captured.out + + for k in CONFIG_AUTOCONFIGURABLE_KEYS: + if k in autoconfig_keys: + assert k in captured.out + else: + assert k not in captured.out + + +@pytest.mark.parametrize( + "input_file, expected_file, training_type", + [ + ( + "config_empty_en.yml", + "config_empty_en_after_dumping.yml", + TrainingType.BOTH, + ), + ( + "config_empty_en.yml", + "config_empty_en_after_dumping_core.yml", + TrainingType.CORE, + ), + ( + "config_empty_en.yml", + "config_empty_en_after_dumping_nlu.yml", + TrainingType.NLU, + ), + ], +) +def test_get_configuration_for_different_training_types( + tmp_path: Path, + input_file: Text, + expected_file: Text, + training_type: TrainingType, +): + config_file = str(tmp_path / "config.yml") + shutil.copyfile(str(CONFIG_FOLDER / input_file), config_file) + config = rasa.shared.utils.io.read_model_configuration(config_file) + + DefaultV1Recipe.auto_configure(config_file, config, training_type) + + actual = rasa.shared.utils.io.read_file(config_file) + + expected = rasa.shared.utils.io.read_file(str(CONFIG_FOLDER / expected_file)) + + assert actual == expected + + +def test_comment_causing_invalid_autoconfig(tmp_path: Path): + """Regression test for https://github.com/RasaHQ/rasa/issues/6948.""" + config_file = tmp_path / "config.yml" + shutil.copyfile( + str(CONFIG_FOLDER / "config_with_comment_between_suggestions.yml"), config_file + ) + config = rasa.shared.utils.io.read_model_configuration(config_file) + + _ = DefaultV1Recipe.auto_configure(str(config_file), config) + + # This should not throw + dumped = rasa.shared.utils.io.read_yaml_file(config_file) + + assert dumped diff --git a/tests/engine/recipes/test_graph_recipe.py b/tests/engine/recipes/test_graph_recipe.py new file mode 100644 index 000000000000..8830c8cdf0ab --- /dev/null +++ b/tests/engine/recipes/test_graph_recipe.py @@ -0,0 +1,162 @@ +from typing import Text + +import pytest +from pathlib import Path + +import rasa.shared.utils.io +from rasa.engine.exceptions import GraphSchemaException +from rasa.engine.graph import GraphSchema +from rasa.engine.recipes.graph_recipe import GraphV1Recipe +from rasa.engine.recipes.recipe import Recipe +from rasa.shared.data import TrainingType +import rasa.engine.validation + + +CONFIG_FOLDER = Path("data/test_config") +# The graph config is equivalent to the default config in graph schema format. +GRAPH_CONFIG = CONFIG_FOLDER / "graph_config.yml" +# Short config has a single node for each of train and predict; should be fast to test. +SHORT_CONFIG = CONFIG_FOLDER / "graph_config_short.yml" + + +def test_recipe_for_name(): + recipe = Recipe.recipe_for_name("graph.v1") + assert isinstance(recipe, GraphV1Recipe) + + +@pytest.mark.parametrize( + "config_path, expected_train_schema_path, expected_predict_schema_path, " + "training_type", + [ + ( + GRAPH_CONFIG, + "data/graph_schemas/default_config_train_schema.yml", + "data/graph_schemas/default_config_predict_schema.yml", + TrainingType.END_TO_END, + ), + ( + SHORT_CONFIG, + "data/graph_schemas/graph_config_short_train_schema.yml", + "data/graph_schemas/graph_config_short_predict_schema.yml", + TrainingType.BOTH, + ), + ( + SHORT_CONFIG, + "data/graph_schemas/graph_config_short_train_schema.yml", + "data/graph_schemas/graph_config_short_predict_schema.yml", + TrainingType.NLU, + ), + ( + SHORT_CONFIG, + "data/graph_schemas/graph_config_short_train_schema.yml", + "data/graph_schemas/graph_config_short_predict_schema.yml", + TrainingType.CORE, + ), + ], +) +def test_generate_graphs( + config_path: Text, + expected_train_schema_path: Text, + expected_predict_schema_path: Text, + training_type: TrainingType, +): + expected_schema_as_dict = rasa.shared.utils.io.read_yaml_file( + expected_train_schema_path + ) + expected_train_schema = GraphSchema.from_dict(expected_schema_as_dict) + + expected_schema_as_dict = rasa.shared.utils.io.read_yaml_file( + expected_predict_schema_path + ) + expected_predict_schema = GraphSchema.from_dict(expected_schema_as_dict) + + config = rasa.shared.utils.io.read_yaml_file(config_path) + + recipe = Recipe.recipe_for_name(GraphV1Recipe.name) + model_config = recipe.graph_config_for_recipe( + config, {}, training_type=training_type + ) + + assert model_config.train_schema == expected_train_schema + assert model_config.predict_schema == expected_predict_schema + + if training_type == TrainingType.NLU: + core_target = None + else: + core_target = config.get("core_target", "select_prediction") + + assert model_config.core_target == core_target + assert model_config.nlu_target == config.get( + "nlu_target", "run_RegexMessageHandler" + ) + + rasa.engine.validation.validate(model_config) + + +def test_language_returning(): + config = rasa.shared.utils.io.read_yaml( + """ + language: "xy" + recipe: graph.v1 + core_target: doesnt_validate_or_run + nlu_target: doesnt_validate_or_run + + train_schema: + nodes: {} + predict_schema: + nodes: {} + """ + ) + + recipe = Recipe.recipe_for_name(GraphV1Recipe.name) + model_config = recipe.graph_config_for_recipe(config, {}) + + assert model_config.language == "xy" + + +def test_retrieve_via_invalid_module_path(): + with pytest.raises(GraphSchemaException): + path = "rasa.core.policies.ted_policy.TEDPolicy1000" + GraphV1Recipe().graph_config_for_recipe( + { + "core_target": "doesnt_validate_or_run", + "nlu_target": "doesnt_validate_or_run", + "train_schema": {"nodes": {"some_graph_node": {"uses": path}}}, + "predict_schema": {}, + }, + cli_parameters={}, + training_type=TrainingType.CORE, + ) + + +def test_cli_parameter_warns(): + with pytest.warns( + UserWarning, match="Graph Recipe does not utilize CLI parameters" + ): + GraphV1Recipe().graph_config_for_recipe( + { + "core_target": "doesnt_validate_or_run", + "nlu_target": "doesnt_validate_or_run", + "train_schema": {"nodes": {}}, + "predict_schema": {"nodes": {}}, + }, + cli_parameters={"num_threads": 1, "epochs": 5}, + training_type=TrainingType.BOTH, + ) + + +def test_is_finetuning_warns(): + with pytest.warns( + UserWarning, match="Graph Recipe does not utilize CLI parameters" + ): + GraphV1Recipe().graph_config_for_recipe( + { + "core_target": "doesnt_validate_or_run", + "nlu_target": "doesnt_validate_or_run", + "train_schema": {"nodes": {}}, + "predict_schema": {"nodes": {}}, + }, + cli_parameters={}, + training_type=TrainingType.BOTH, + is_finetuning=True, + ) diff --git a/tests/engine/storage/test_local_model_storage.py b/tests/engine/storage/test_local_model_storage.py index 03a7a739d2ef..136ce7640a5c 100644 --- a/tests/engine/storage/test_local_model_storage.py +++ b/tests/engine/storage/test_local_model_storage.py @@ -15,7 +15,7 @@ from rasa.engine.storage.resource import Resource from rasa.exceptions import UnsupportedModelVersionError from rasa.shared.core.domain import Domain -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from tests.engine.graph_components_test_classes import PersistableTestComponent diff --git a/tests/engine/storage/test_storage.py b/tests/engine/storage/test_storage.py index 3c169772fb54..edd31deabf13 100644 --- a/tests/engine/storage/test_storage.py +++ b/tests/engine/storage/test_storage.py @@ -3,7 +3,7 @@ import pytest from rasa.exceptions import UnsupportedModelVersionError -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType import rasa.shared.utils.io from rasa.engine.graph import GraphSchema, SchemaNode from rasa.engine.storage.storage import ModelMetadata diff --git a/tests/engine/test_loader.py b/tests/engine/test_loader.py index baf634f6ebd0..661c4325eccb 100644 --- a/tests/engine/test_loader.py +++ b/tests/engine/test_loader.py @@ -14,7 +14,7 @@ from rasa.engine.storage.storage import ModelMetadata, ModelStorage from rasa.engine.training.graph_trainer import GraphTrainer from rasa.shared.core.domain import Domain -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.importers.importer import TrainingDataImporter from tests.engine.graph_components_test_classes import PersistableTestComponent diff --git a/tests/engine/test_validation.py b/tests/engine/test_validation.py index 5ddeb062916b..699ffb0cd4b3 100644 --- a/tests/engine/test_validation.py +++ b/tests/engine/test_validation.py @@ -19,7 +19,7 @@ from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage from rasa.shared.core.domain import Domain -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.importers.importer import TrainingDataImporter from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData diff --git a/tests/engine/training/test_graph_trainer.py b/tests/engine/training/test_graph_trainer.py index b2a20d07a08b..b1e53227a499 100644 --- a/tests/engine/training/test_graph_trainer.py +++ b/tests/engine/training/test_graph_trainer.py @@ -25,7 +25,7 @@ from rasa.engine.storage.storage import ModelStorage from rasa.engine.training.graph_trainer import GraphTrainer from rasa.shared.core.domain import Domain -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.importers.importer import TrainingDataImporter from tests.engine.graph_components_test_classes import ( AddInputs, diff --git a/tests/graph_components/validators/test_default_recipe_validator.py b/tests/graph_components/validators/test_default_recipe_validator.py index be3c09bcdf45..669319c19d2a 100644 --- a/tests/graph_components/validators/test_default_recipe_validator.py +++ b/tests/graph_components/validators/test_default_recipe_validator.py @@ -41,7 +41,7 @@ from rasa.shared.core.training_data.structures import StoryGraph from rasa.shared.core.domain import KEY_FORMS, Domain, InvalidDomain from rasa.shared.exceptions import InvalidConfigException -from rasa.shared.importers.autoconfig import TrainingType +from rasa.shared.data import TrainingType from rasa.shared.nlu.constants import ( ENTITIES, ENTITY_ATTRIBUTE_GROUP, @@ -55,6 +55,7 @@ from rasa.shared.nlu.training_data.training_data import TrainingData from rasa.shared.nlu.training_data.message import Message from rasa.shared.importers.importer import TrainingDataImporter +from rasa.shared.utils.validation import YamlValidationException import rasa.utils.common @@ -459,7 +460,7 @@ def test_nlu_raise_if_more_than_one_tokenizer(nodes: Dict[Text, SchemaNode]): def test_nlu_do_not_raise_if_two_tokenizers_with_end_to_end(): config = rasa.shared.utils.io.read_yaml_file( - "rasa/shared/importers/default_config.yml" + "rasa/engine/recipes/config_files/default_config.yml" ) graph_config = DefaultV1Recipe().graph_config_for_recipe( config, cli_parameters={}, training_type=TrainingType.END_TO_END @@ -1015,8 +1016,13 @@ def test_no_warnings_with_default_project(tmp_path: Path): training_data_paths=[str(tmp_path / "data")], ) + config, _missing_keys, _configured_keys = DefaultV1Recipe.auto_configure( + importer.get_config_file_for_auto_config(), + importer.get_config(), + TrainingType.END_TO_END, + ) graph_config = DefaultV1Recipe().graph_config_for_recipe( - importer.get_config(), cli_parameters={}, training_type=TrainingType.END_TO_END + config, cli_parameters={}, training_type=TrainingType.END_TO_END ) validator = DefaultV1RecipeValidator(graph_config.train_schema) @@ -1030,3 +1036,17 @@ def test_no_warnings_with_default_project(tmp_path: Path): for warn in records.list ] ) + + +def test_importer_with_invalid_model_config(tmp_path: Path): + invalid = {"version": "2.0", "policies": ["name"]} + config_file = tmp_path / "config.yml" + rasa.shared.utils.io.write_yaml(invalid, config_file) + + with pytest.raises(YamlValidationException): + importer = TrainingDataImporter.load_from_config(str(config_file)) + DefaultV1Recipe.auto_configure( + importer.get_config_file_for_auto_config(), + importer.get_config(), + TrainingType.END_TO_END, + ) diff --git a/tests/shared/importers/test_autoconfig.py b/tests/shared/importers/test_autoconfig.py deleted file mode 100644 index 0c371bea6f23..000000000000 --- a/tests/shared/importers/test_autoconfig.py +++ /dev/null @@ -1,223 +0,0 @@ -import shutil -from pathlib import Path -from typing import Text, Set -from unittest.mock import Mock - -import pytest -from _pytest.capture import CaptureFixture -from _pytest.monkeypatch import MonkeyPatch - -from rasa.shared.constants import CONFIG_AUTOCONFIGURABLE_KEYS -import rasa.shared.utils.io -from rasa.shared.importers import autoconfig - -CONFIG_FOLDER = Path("data/test_config") - -SOME_CONFIG = CONFIG_FOLDER / "stack_config.yml" -DEFAULT_CONFIG = Path("rasa/shared/importers/default_config.yml") - - -@pytest.mark.parametrize( - "config_path, autoconfig_keys", - [ - (Path("rasa/cli/initial_project/config.yml"), {"pipeline", "policies"}), - (CONFIG_FOLDER / "config_policies_empty.yml", {"policies"}), - (CONFIG_FOLDER / "config_pipeline_empty.yml", {"pipeline"}), - (CONFIG_FOLDER / "config_policies_missing.yml", {"policies"}), - (CONFIG_FOLDER / "config_pipeline_missing.yml", {"pipeline"}), - (SOME_CONFIG, set()), - ], -) -def test_get_configuration( - config_path: Path, autoconfig_keys: Set[Text], monkeypatch: MonkeyPatch -): - def _auto_configure(_, keys_to_configure: Set[Text]) -> Set[Text]: - return keys_to_configure - - monkeypatch.setattr(autoconfig, "_dump_config", Mock()) - monkeypatch.setattr(autoconfig, "_auto_configure", _auto_configure) - - config = autoconfig.get_configuration(str(config_path)) - - if autoconfig_keys: - expected_config = _auto_configure(config, autoconfig_keys) - else: - expected_config = config - - assert sorted(config) == sorted(expected_config) - - -@pytest.mark.parametrize("config_file", ("non_existent_config.yml", None)) -def test_get_configuration_missing_file(tmp_path: Path, config_file: Text): - if config_file: - config_file = tmp_path / "non_existent_config.yml" - - config = autoconfig.get_configuration(str(config_file)) - - assert config == {} - - -@pytest.mark.parametrize( - "language, keys_to_configure", - [ - ("en", {"policies"}), - ("en", {"pipeline"}), - ("fr", {"pipeline"}), - ("en", {"policies", "pipeline"}), - ], -) -def test_auto_configure(language: Text, keys_to_configure: Set[Text]): - expected_config = rasa.shared.utils.io.read_config_file(DEFAULT_CONFIG) - - config = autoconfig._auto_configure({"language": language}, keys_to_configure) - - for k in keys_to_configure: - assert config[k] == expected_config[k] # given keys are configured correctly - - assert config.get("language") == language - config.pop("language") - assert len(config) == len(keys_to_configure) # no other keys are configured - - -@pytest.mark.parametrize( - "config_path, missing_keys", - [ - (CONFIG_FOLDER / "config_language_only.yml", {"pipeline", "policies"}), - (CONFIG_FOLDER / "config_policies_missing.yml", {"policies"}), - (CONFIG_FOLDER / "config_pipeline_missing.yml", {"pipeline"}), - (SOME_CONFIG, []), - ], -) -def test_add_missing_config_keys_to_file( - tmp_path: Path, config_path: Path, missing_keys: Set[Text] -): - config_file = str(tmp_path / "config.yml") - shutil.copyfile(str(config_path), config_file) - - autoconfig._add_missing_config_keys_to_file(config_file, missing_keys) - - config_after_addition = rasa.shared.utils.io.read_config_file(config_file) - - assert all(key in config_after_addition for key in missing_keys) - - -def test_dump_config_missing_file(tmp_path: Path, capsys: CaptureFixture): - - config_path = tmp_path / "non_existent_config.yml" - - config = rasa.shared.utils.io.read_config_file(str(SOME_CONFIG)) - - autoconfig._dump_config(config, str(config_path), set(), {"policies"}) - - assert not config_path.exists() - - captured = capsys.readouterr() - assert "has been removed or modified" in captured.out - - -# Test a few cases that are known to be potentially tricky (have failed in the past) -@pytest.mark.parametrize( - "input_file, expected_file, autoconfig_keys", - [ - ( - "config_with_comments.yml", - "config_with_comments_after_dumping.yml", - {"policies"}, - ), # comments in various positions - ( - "config_empty_en.yml", - "config_empty_en_after_dumping.yml", - {"policies", "pipeline"}, - ), # no empty lines - ( - "config_empty_fr.yml", - "config_empty_fr_after_dumping.yml", - {"policies", "pipeline"}, - ), # no empty lines, with different language - ( - "config_with_comments_after_dumping.yml", - "config_with_comments_after_dumping.yml", - {"policies"}, - ), # with previous auto config that needs to be overwritten - ], -) -def test_dump_config( - tmp_path: Path, - input_file: Text, - expected_file: Text, - capsys: CaptureFixture, - autoconfig_keys: Set[Text], -): - config_file = str(tmp_path / "config.yml") - shutil.copyfile(str(CONFIG_FOLDER / input_file), config_file) - - autoconfig.get_configuration(config_file) - - actual = rasa.shared.utils.io.read_file(config_file) - - expected = rasa.shared.utils.io.read_file(str(CONFIG_FOLDER / expected_file)) - - assert actual == expected - - captured = capsys.readouterr() - assert "does not exist or is empty" not in captured.out - - for k in CONFIG_AUTOCONFIGURABLE_KEYS: - if k in autoconfig_keys: - assert k in captured.out - else: - assert k not in captured.out - - -@pytest.mark.parametrize( - "input_file, expected_file, training_type", - [ - ( - "config_empty_en.yml", - "config_empty_en_after_dumping.yml", - autoconfig.TrainingType.BOTH, - ), - ( - "config_empty_en.yml", - "config_empty_en_after_dumping_core.yml", - autoconfig.TrainingType.CORE, - ), - ( - "config_empty_en.yml", - "config_empty_en_after_dumping_nlu.yml", - autoconfig.TrainingType.NLU, - ), - ], -) -def test_get_configuration_for_different_training_types( - tmp_path: Path, - input_file: Text, - expected_file: Text, - training_type: autoconfig.TrainingType, -): - config_file = str(tmp_path / "config.yml") - shutil.copyfile(str(CONFIG_FOLDER / input_file), config_file) - - autoconfig.get_configuration(config_file, training_type) - - actual = rasa.shared.utils.io.read_file(config_file) - - expected = rasa.shared.utils.io.read_file(str(CONFIG_FOLDER / expected_file)) - - assert actual == expected - - -def test_comment_causing_invalid_autoconfig(tmp_path: Path): - """Regression test for https://github.com/RasaHQ/rasa/issues/6948.""" - - config_file = tmp_path / "config.yml" - shutil.copyfile( - str(CONFIG_FOLDER / "config_with_comment_between_suggestions.yml"), config_file - ) - - _ = autoconfig.get_configuration(str(config_file)) - - # This should not throw - dumped = rasa.shared.utils.io.read_yaml_file(config_file) - - assert dumped diff --git a/tests/shared/importers/test_importer.py b/tests/shared/importers/test_importer.py index 65592a2d5c45..7af27220fa5b 100644 --- a/tests/shared/importers/test_importer.py +++ b/tests/shared/importers/test_importer.py @@ -24,7 +24,6 @@ from rasa.shared.importers.rasa import RasaFileImporter from rasa.shared.nlu.constants import ACTION_TEXT, ACTION_NAME, INTENT, TEXT from rasa.shared.nlu.training_data.message import Message -from rasa.shared.utils.validation import YamlValidationException @pytest.fixture() @@ -368,15 +367,6 @@ def test_nlu_data_domain_sync_responses(project: Text): assert "utter_rasa" in domain.responses.keys() -def test_importer_with_invalid_model_config(tmp_path: Path): - invalid = {"version": "2.0", "policies": ["name"]} - config_file = tmp_path / "config.yml" - rasa.shared.utils.io.write_yaml(invalid, config_file) - - with pytest.raises(YamlValidationException): - TrainingDataImporter.load_from_config(str(config_file)) - - def test_importer_with_unicode_files(): importer = TrainingDataImporter.load_from_dict( training_data_paths=["./data/test_nlu_no_responses/nlu_with_unicode.yml"] diff --git a/tests/test_model_training.py b/tests/test_model_training.py index 915011604cb2..ed17e79c820e 100644 --- a/tests/test_model_training.py +++ b/tests/test_model_training.py @@ -26,11 +26,13 @@ import rasa.nlu from rasa.engine.exceptions import GraphSchemaValidationException from rasa.engine.storage.local_model_storage import LocalModelStorage +from rasa.engine.recipes.default_recipe import DefaultV1Recipe from rasa.engine.graph import GraphModelConfiguration from rasa.engine.training.graph_trainer import GraphTrainer +from rasa.shared.data import TrainingType + from rasa.nlu.classifiers.diet_classifier import DIETClassifier -import rasa.shared.importers.autoconfig as autoconfig import rasa.shared.utils.io from rasa.shared.core.domain import Domain from rasa.shared.exceptions import InvalidConfigException @@ -210,8 +212,8 @@ def test_train_core_autoconfig( monkeypatch.setattr(tempfile, "tempdir", tmp_path) # mock function that returns configuration - mocked_get_configuration = Mock(wraps=autoconfig.get_configuration) - monkeypatch.setattr(autoconfig, "get_configuration", mocked_get_configuration) + mocked_auto_configure = Mock(wraps=DefaultV1Recipe.auto_configure) + monkeypatch.setattr(DefaultV1Recipe, "auto_configure", mocked_auto_configure) # skip actual core training monkeypatch.setattr(GraphTrainer, GraphTrainer.train.__name__, Mock()) @@ -224,9 +226,9 @@ def test_train_core_autoconfig( output="test_train_core_temp_files_models", ) - mocked_get_configuration.assert_called_once() - _, args, _ = mocked_get_configuration.mock_calls[0] - assert args[1] == autoconfig.TrainingType.CORE + mocked_auto_configure.assert_called_once() + _, args, _ = mocked_auto_configure.mock_calls[0] + assert args[2] == TrainingType.CORE def test_train_nlu_autoconfig( @@ -238,8 +240,8 @@ def test_train_nlu_autoconfig( monkeypatch.setattr(tempfile, "tempdir", tmp_path) # mock function that returns configuration - mocked_get_configuration = Mock(wraps=autoconfig.get_configuration) - monkeypatch.setattr(autoconfig, "get_configuration", mocked_get_configuration) + mocked_auto_configuration = Mock(wraps=DefaultV1Recipe.auto_configure) + monkeypatch.setattr(DefaultV1Recipe, "auto_configure", mocked_auto_configuration) monkeypatch.setattr(GraphTrainer, GraphTrainer.train.__name__, Mock()) # do training @@ -247,9 +249,9 @@ def test_train_nlu_autoconfig( stack_config_path, nlu_data_path, output="test_train_nlu_temp_files_models" ) - mocked_get_configuration.assert_called_once() - _, args, _ = mocked_get_configuration.mock_calls[0] - assert args[1] == autoconfig.TrainingType.NLU + mocked_auto_configuration.assert_called_once() + _, args, _ = mocked_auto_configuration.mock_calls[0] + assert args[2] == TrainingType.NLU def new_model_path_in_same_dir(old_model_path: Text) -> Text: @@ -566,7 +568,7 @@ def test_model_finetuning_core( _, metadata = LocalModelStorage.from_model_archive(storage_dir, Path(result)) assert metadata.train_schema.nodes["train_TEDPolicy0"].config[EPOCHS] == 2 - assert metadata.training_type == autoconfig.TrainingType.CORE + assert metadata.training_type == TrainingType.CORE def test_model_finetuning_core_with_default_epochs( @@ -686,7 +688,7 @@ def test_model_finetuning_nlu( _, metadata = LocalModelStorage.from_model_archive(storage_dir, Path(model_name)) assert metadata.train_schema.nodes["train_DIETClassifier5"].config[EPOCHS] == 2 - assert metadata.training_type == autoconfig.TrainingType.NLU + assert metadata.training_type == TrainingType.NLU def test_model_finetuning_nlu_new_label(tmp_path: Path, trained_nlu_moodbot_path: Text): diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index 742e8691229b..b1f1d77c0a93 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -53,7 +53,7 @@ async def test_events_schema( training_data = TrainingDataImporter.load_from_config(config_path) with telemetry.track_model_training(training_data, "rasa"): - await asyncio.sleep(1) + pass telemetry.track_telemetry_disabled() @@ -89,10 +89,17 @@ async def test_events_schema( telemetry.track_markers_parsed_count(1, 1, 1) + # Also track train started for a graph config + training_data = TrainingDataImporter.load_from_config( + "data/test_config/graph_config.yml" + ) + with telemetry.track_model_training(training_data, "rasa"): + pass + pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) - assert mock.call_count == 19 + assert mock.call_count == 21 for args, _ in mock.call_args_list: event = args[0] diff --git a/tests/test_validator.py b/tests/test_validator.py index 767a83c13e49..adc41604989c 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -6,7 +6,6 @@ from rasa.validator import Validator from rasa.shared.importers.rasa import RasaFileImporter -from rasa.shared.importers.autoconfig import TrainingType from rasa.shared.core.domain import Domain from pathlib import Path @@ -45,7 +44,6 @@ def test_verify_nlu_with_e2e_story(tmp_path: Path, nlu_data_path: Path): config_file="data/test_moodbot/config.yml", domain_path="data/test_moodbot/domain.yml", training_data_paths=[story_file_name, nlu_data_path], - training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) @@ -133,7 +131,6 @@ def test_verify_bad_e2e_story_structure_when_text_identical(tmp_path: Path): config_file="data/test_config/config_defaults.yml", domain_path="data/test_domains/default.yml", training_data_paths=[story_file_name], - training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) assert not validator.verify_story_structure(ignore_warnings=False) @@ -166,7 +163,6 @@ def test_verify_correct_e2e_story_structure(tmp_path: Path): config_file="data/test_config/config_defaults.yml", domain_path="data/test_domains/default.yml", training_data_paths=[story_file_name], - training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False) @@ -192,7 +188,6 @@ def test_verify_correct_e2e_story_structure_with_intents(tmp_path: Path): config_file="data/test_config/config_defaults.yml", domain_path="data/test_domains/default.yml", training_data_paths=[story_file_name], - training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False) @@ -478,7 +473,6 @@ def test_response_selector_responses_in_domain_no_errors(): training_data_paths=[ "data/test_yaml_stories/test_base_retrieval_intent_story.yml" ], - training_type=TrainingType.CORE, ) validator = Validator.from_importer(importer) assert validator.verify_utterances_in_stories(ignore_warnings=True)