Merge pull request #7456 from RasaHQ/7329/load_models_in_finetune_mod…

…e_nlu #7329 load models in finetune mode nlu
RasaHQ · Dec 7, 2020 · 36fc2b6 · 36fc2b6
2 parents d5144a6 + d9f563a
commit 36fc2b6
Show file tree

Hide file tree

Showing 4 changed files with 155 additions and 33 deletions.
diff --git a/rasa/nlu/model.py b/rasa/nlu/model.py
@@ -1,6 +1,7 @@
 import copy
 import datetime
 import logging
+from math import ceil
 import os
 from typing import Any, Dict, List, Optional, Text
 
@@ -26,6 +27,7 @@
 from rasa.shared.nlu.training_data.training_data import TrainingData
 from rasa.shared.nlu.training_data.message import Message
 from rasa.nlu.utils import write_json_to_file
+from rasa.utils.tensorflow.constants import EPOCHS
 
 logger = logging.getLogger(__name__)
 
@@ -138,7 +140,8 @@ def __init__(
         cfg: RasaNLUModelConfig,
         component_builder: Optional[ComponentBuilder] = None,
         skip_validation: bool = False,
-    ):
+        model_to_finetune: Optional["Interpreter"] = None,
+    ) -> None:
 
         self.config = cfg
         self.skip_validation = skip_validation
@@ -154,8 +157,10 @@ def __init__(
         if not self.skip_validation:
             components.validate_requirements(cfg.component_names)
 
-        # build pipeline
-        self.pipeline = self._build_pipeline(cfg, component_builder)
+        if model_to_finetune:
+            self.pipeline = model_to_finetune.pipeline
+        else:
+            self.pipeline = self._build_pipeline(cfg, component_builder)
 
     def _build_pipeline(
         self, cfg: RasaNLUModelConfig, component_builder: ComponentBuilder
@@ -297,6 +302,8 @@ def load(
         model_dir: Text,
         component_builder: Optional[ComponentBuilder] = None,
         skip_validation: bool = False,
+        new_config: Optional[Dict] = None,
+        finetuning_epoch_fraction: float = 1.0,
     ) -> "Interpreter":
         """Create an interpreter based on a persisted model.
 
@@ -307,25 +314,76 @@ def load(
             model_dir: The path of the model to load
             component_builder: The
                 :class:`rasa.nlu.components.ComponentBuilder` to use.
+            new_config: Optional new config to use for the new epochs.
+            finetuning_epoch_fraction: Value to multiply all epochs by.
 
         Returns:
             An interpreter that uses the loaded model.
         """
-
         model_metadata = Metadata.load(model_dir)
 
+        if new_config:
+            Interpreter._update_metadata_epochs(
+                model_metadata, new_config, finetuning_epoch_fraction
+            )
+
         Interpreter.ensure_model_compatibility(model_metadata)
-        return Interpreter.create(model_metadata, component_builder, skip_validation)
+        return Interpreter.create(
+            model_metadata,
+            component_builder,
+            skip_validation,
+            should_finetune=new_config is not None,
+        )
+
+    @staticmethod
+    def _get_default_value_for_component(name: Text, key: Text) -> Any:
+        from rasa.nlu.registry import get_component_class
+
+        return get_component_class(name).defaults[key]
+
+    @staticmethod
+    def _update_metadata_epochs(
+        model_metadata: Metadata,
+        new_config: Optional[Dict] = None,
+        finetuning_epoch_fraction: float = 1.0,
+    ) -> Metadata:
+        for old_component_config, new_component_config in zip(
+            model_metadata.metadata["pipeline"], new_config["pipeline"]
+        ):
+            if EPOCHS in old_component_config:
+                new_epochs = new_component_config.get(
+                    EPOCHS,
+                    Interpreter._get_default_value_for_component(
+                        old_component_config["class"], EPOCHS
+                    ),
+                )
+                old_component_config[EPOCHS] = ceil(
+                    new_epochs * finetuning_epoch_fraction
+                )
+        return model_metadata
 
     @staticmethod
     def create(
         model_metadata: Metadata,
         component_builder: Optional[ComponentBuilder] = None,
         skip_validation: bool = False,
+        should_finetune: bool = False,
     ) -> "Interpreter":
-        """Load stored model and components defined by the provided metadata."""
+        """Create model and components defined by the provided metadata.
 
-        context = {}
+        Args:
+            model_metadata: The metadata describing each component.
+            component_builder: The
+                :class:`rasa.nlu.components.ComponentBuilder` to use.
+            skip_validation: If set to `True`, does not check that all
+                required packages for the components are installed
+                before loading them.
+            should_finetune: Indicates if the model components will be fine-tuned.
+
+        Returns:
+            An interpreter that uses the created model.
+        """
+        context = {"should_finetune": should_finetune}
 
         if component_builder is None:
             # If no builder is passed, every interpreter creation will result

diff --git a/rasa/nlu/train.py b/rasa/nlu/train.py
@@ -3,7 +3,6 @@
 from typing import Any, Optional, Text, Tuple, Union, Dict
 
 import rasa.shared.utils.common
-import rasa.utils.common as common_utils
 from rasa.nlu import config, utils
 from rasa.nlu.components import ComponentBuilder
 from rasa.nlu.config import RasaNLUModelConfig
@@ -84,7 +83,6 @@ async def train(
     training_data_endpoint: Optional[EndpointConfig] = None,
     persist_nlu_training_data: bool = False,
     model_to_finetune: Optional[Interpreter] = None,
-    finetuning_epoch_fraction: float = 1.0,
     **kwargs: Any,
 ) -> Tuple[Trainer, Interpreter, Optional[Text]]:
     """Loads the trainer and the data and runs the training of the model."""
@@ -96,7 +94,9 @@ async def train(
     # Ensure we are training a model that we can save in the end
     # WARN: there is still a race condition if a model with the same name is
     # trained in another subprocess
-    trainer = Trainer(nlu_config, component_builder)
+    trainer = Trainer(
+        nlu_config, component_builder, model_to_finetune=model_to_finetune
+    )
     persistor = create_persistor(storage)
     if training_data_endpoint is not None:
         training_data = await load_data_from_endpoint(

diff --git a/rasa/train.py b/rasa/train.py
@@ -2,7 +2,7 @@
 import os
 import tempfile
 from contextlib import ExitStack
-from typing import Text, Optional, List, Union, Dict, TYPE_CHECKING
+from typing import Any, Text, Optional, List, Union, Dict, TYPE_CHECKING
 
 import rasa.core.interpreter
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
@@ -631,7 +631,9 @@ async def _train_nlu_with_validated_data(
         )
 
         if model_to_finetune:
-            model_to_finetune = _nlu_model_for_finetuning(model_to_finetune)
+            model_to_finetune = _nlu_model_for_finetuning(
+                model_to_finetune, config, finetuning_epoch_fraction
+            )
 
             if not model_to_finetune:
                 rasa.shared.utils.cli.print_warning(
@@ -652,7 +654,6 @@ async def _train_nlu_with_validated_data(
                 fixed_model_name="nlu",
                 persist_nlu_training_data=persist_nlu_training_data,
                 model_to_finetune=model_to_finetune,
-                finetuning_epoch_fraction=finetuning_epoch_fraction,
                 **additional_arguments,
             )
         rasa.shared.utils.cli.print_color(
@@ -674,19 +675,20 @@ async def _train_nlu_with_validated_data(
         return _train_path
 
 
-def _nlu_model_for_finetuning(model_to_finetune: Text) -> Optional[Interpreter]:
-    from rasa.core.interpreter import RasaNLUInterpreter
-
+def _nlu_model_for_finetuning(
+    model_to_finetune: Text,
+    new_config: Dict[Text, Any],
+    finetuning_epoch_fraction: float = 1.0,
+) -> Optional[Interpreter]:
     path_to_archive = model.get_model_for_finetuning(model_to_finetune)
     if not path_to_archive:
         return None
 
-    try:
-        interpreter = _interpreter_from_previous_model(path_to_archive)
-        if interpreter and isinstance(interpreter, RasaNLUInterpreter):
-            return interpreter.interpreter
-    except Exception:
-        # Anything might go wrong. In that case we skip model finetuning.
-        pass
+    with model.unpack_model(path_to_archive) as unpacked:
+        _, old_nlu = model.get_model_subdirectories(unpacked)
 
-    return None
+        return Interpreter.load(
+            old_nlu,
+            new_config=new_config,
+            finetuning_epoch_fraction=finetuning_epoch_fraction,
+        )
diff --git a/tests/test_train.py b/tests/test_train.py
@@ -12,12 +12,15 @@
 import rasa.model
 import rasa.core
 import rasa.nlu
+from rasa.nlu.classifiers.diet_classifier import DIETClassifier
 import rasa.shared.importers.autoconfig as autoconfig
+import rasa.shared.utils.io
 from rasa.core.agent import Agent
 from rasa.core.interpreter import RasaNLUInterpreter
 from rasa.nlu.model import Interpreter
 
 from rasa.train import train_core, train_nlu, train
+from rasa.utils.tensorflow.constants import EPOCHS
 from tests.conftest import DEFAULT_CONFIG_PATH, DEFAULT_NLU_DATA, AsyncMock
 from tests.core.conftest import DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_STORIES_FILE
 from tests.test_model import _fingerprint
@@ -430,30 +433,89 @@ def test_model_finetuning_nlu(
     monkeypatch: MonkeyPatch,
     default_domain_path: Text,
     default_nlu_data: Text,
-    default_stack_config: Text,
-    trained_rasa_model: Text,
+    trained_moodbot_path: Text,
     use_latest_model: bool,
 ):
     mocked_nlu_training = AsyncMock(return_value="")
     monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)
 
+    mock_interpreter_create = Mock(wraps=Interpreter.create)
+    monkeypatch.setattr(Interpreter, "create", mock_interpreter_create)
+
+    mock_DIET_load = Mock(wraps=DIETClassifier.load)
+    monkeypatch.setattr(DIETClassifier, "load", mock_DIET_load)
+
     (tmp_path / "models").mkdir()
     output = str(tmp_path / "models")
 
     if use_latest_model:
-        trained_rasa_model = str(Path(trained_rasa_model).parent)
+        trained_moodbot_path = str(Path(trained_moodbot_path).parent)
+
+    # Typically models will be fine-tuned with a smaller number of epochs than training
+    # from scratch.
+    # Fine-tuning will use the number of epochs in the new config.
+    old_config = rasa.shared.utils.io.read_yaml_file("examples/moodbot/config.yml")
+    old_config["pipeline"][-1][EPOCHS] = 10
+    new_config_path = tmp_path / "new_config.yml"
+    rasa.shared.utils.io.write_yaml(old_config, new_config_path)
 
     train_nlu(
-        default_stack_config,
-        default_nlu_data,
+        str(new_config_path),
+        "examples/moodbot/data/nlu.yml",
         output=output,
-        model_to_finetune=trained_rasa_model,
-        finetuning_epoch_fraction=1,
+        model_to_finetune=trained_moodbot_path,
+        finetuning_epoch_fraction=0.5,
     )
 
+    assert mock_interpreter_create.call_args[1]["should_finetune"]
+
     mocked_nlu_training.assert_called_once()
-    _, kwargs = mocked_nlu_training.call_args
-    assert isinstance(kwargs["model_to_finetune"], Interpreter)
+    _, nlu_train_kwargs = mocked_nlu_training.call_args
+    model_to_finetune = nlu_train_kwargs["model_to_finetune"]
+    assert isinstance(model_to_finetune, Interpreter)
+
+    _, diet_kwargs = mock_DIET_load.call_args
+    assert diet_kwargs["should_finetune"] is True
+
+    new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][-1]
+    assert new_diet_metadata["name"] == "DIETClassifier"
+    assert new_diet_metadata[EPOCHS] == 5
+
+
+def test_model_finetuning_nlu_with_default_epochs(
+    tmp_path: Path,
+    monkeypatch: MonkeyPatch,
+    default_domain_path: Text,
+    default_nlu_data: Text,
+    trained_moodbot_path: Text,
+):
+    mocked_nlu_training = AsyncMock(return_value="")
+    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)
+
+    (tmp_path / "models").mkdir()
+    output = str(tmp_path / "models")
+
+    # Providing a new config with no epochs will mean the default amount are used
+    # and then scaled by `finetuning_epoch_fraction`.
+    old_config = rasa.shared.utils.io.read_yaml_file("examples/moodbot/config.yml")
+    del old_config["pipeline"][-1][EPOCHS]
+    new_config_path = tmp_path / "new_config.yml"
+    rasa.shared.utils.io.write_yaml(old_config, new_config_path)
+
+    train_nlu(
+        str(new_config_path),
+        "examples/moodbot/data/nlu.yml",
+        output=output,
+        model_to_finetune=trained_moodbot_path,
+        finetuning_epoch_fraction=0.5,
+    )
+
+    mocked_nlu_training.assert_called_once()
+    _, nlu_train_kwargs = mocked_nlu_training.call_args
+    model_to_finetune = nlu_train_kwargs["model_to_finetune"]
+    new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][-1]
+    assert new_diet_metadata["name"] == "DIETClassifier"
+    assert new_diet_metadata[EPOCHS] == DIETClassifier.defaults[EPOCHS] * 0.5
 
 
 @pytest.mark.parametrize("model_to_fine_tune", ["invalid-path-to-model", "."])