diff --git a/.github/stale.yml b/.github/stale.yml
index abfaead479a4..3380984f8673 100644
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -5,7 +5,9 @@ daysUntilClose: 7
 # Issues with these labels will never be considered stale
 exemptLabels:
   - "type:enhancement ✨"
+  - "type:enhancement :sparkles:"
   - "type:discussion 👨‍👧‍👦"
+  - "type:discussion :family_man_girl_boy:"
 # Label to use when marking an issue as stale
 staleLabel: stale
 # Comment to post when marking an issue as stale. Set to `false` to disable
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2be9c575bfb7..75e0b9ada835 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -17,6 +17,25 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0.
 
 .. towncrier release notes start
 
+[1.8.2] - 2020-03-19
+^^^^^^^^^^^^^^^^^^^^
+
+Bugfixes
+--------
+- `#5438 <https://github.com/rasahq/rasa/issues/5438>`_: Fixed bug when installing rasa with ``poetry``.
+- `#5413 <https://github.com/RasaHQ/rasa/issues/5413>`_: Fixed bug with ``EmbeddingIntentClassifier``, where results
+  weren't the same as in 1.7.x. Fixed by setting weight sparsity to 0.
+
+Improved Documentation
+----------------------
+- `#5404 <https://github.com/rasahq/rasa/issues/5404>`_: Explain how to run commands as ``root`` user in Rasa SDK Docker images since version
+  ``1.8.0``. Since version ``1.8.0`` the Rasa SDK Docker images does not longer run as
+  ``root`` user by default. For commands which require ``root`` user usage, you have to
+  switch back to the ``root`` user in your Docker image as described in
+  :ref:`deploying-your-rasa-assistant_custom-dependencies`.
+- `#5402 <https://github.com/RasaHQ/rasa/issues/5402>`_: Made improvements to Building Assistants tutorial
+
+
 [1.8.1] - 2020-03-06
 ^^^^^^^^^^^^^^^^^^^^
 
@@ -42,7 +61,7 @@ Deprecations and Removals
 
 Features
 --------
-- `#2674 <https://github.com/rasahq/rasa/issues/2674>`_: Add default value ``__other__`` to ``values`` of a ``CategoricalSlot``. 
+- `#2674 <https://github.com/rasahq/rasa/issues/2674>`_: Add default value ``__other__`` to ``values`` of a ``CategoricalSlot``.
 
   All values not mentioned in the list of values of a ``CategoricalSlot``
   will be mapped to ``__other__`` for featurization.
@@ -106,7 +125,7 @@ Improvements
 - `#4653 <https://github.com/rasahq/rasa/issues/4653>`_: Added a new command-line argument ``--init-dir`` to command ``rasa init`` to specify
   the directory in which the project is initialised.
 - `#4682 <https://github.com/rasahq/rasa/issues/4682>`_: Added support to send images with the twilio output channel.
-- `#4817 <https://github.com/rasahq/rasa/issues/4817>`_: Part of Slack sanitization: 
+- `#4817 <https://github.com/rasahq/rasa/issues/4817>`_: Part of Slack sanitization:
   Multiple garbled URL's in a string coming from slack will be converted into actual strings.
   ``Example: health check of <http://eemdb.net|eemdb.net> and <http://eemdb1.net|eemdb1.net> to health check of
   eemdb.net and eemdb1.net``
@@ -138,7 +157,7 @@ Bugfixes
   `TwoStageFallbackPolicy <https://rasa.com/docs/rasa/core/policies/#two-stage-fallback-policy>`_ are used together.
 - `#5201 <https://github.com/rasahq/rasa/issues/5201>`_: Fixed incorrectly raised Error encountered in pipelines with a ``ResponseSelector`` and NLG.
 
-  When NLU training data is split before NLU pipeline comparison, 
+  When NLU training data is split before NLU pipeline comparison,
   NLG responses were not also persisted and therefore training for a pipeline including the ``ResponseSelector`` would fail.
 
   NLG responses are now persisted along with NLU data to a ``/train`` directory in the ``run_x/xx%_exclusion`` folder.
diff --git a/docs/_static/images/contextual_interjection.png b/docs/_static/images/contextual_interjection.png
new file mode 100644
index 000000000000..901782a1b63b
Binary files /dev/null and b/docs/_static/images/contextual_interjection.png differ
diff --git a/docs/_static/images/generic_interjection.png b/docs/_static/images/generic_interjection.png
new file mode 100644
index 000000000000..7ef99728a685
Binary files /dev/null and b/docs/_static/images/generic_interjection.png differ
diff --git a/docs/_static/images/generic_interjection_handled.png b/docs/_static/images/generic_interjection_handled.png
new file mode 100644
index 000000000000..a764f72616cd
Binary files /dev/null and b/docs/_static/images/generic_interjection_handled.png differ
diff --git a/docs/_static/images/greet_interjection.png b/docs/_static/images/greet_interjection.png
new file mode 100644
index 000000000000..3e0654c3fa45
Binary files /dev/null and b/docs/_static/images/greet_interjection.png differ
diff --git a/docs/_static/images/memoization_policy_convo.png b/docs/_static/images/memoization_policy_convo.png
new file mode 100644
index 000000000000..fe46270e2e77
Binary files /dev/null and b/docs/_static/images/memoization_policy_convo.png differ
diff --git a/docs/api/event-brokers.rst b/docs/api/event-brokers.rst
index 847a0fcea6b5..8c8bbe75ad3f 100644
--- a/docs/api/event-brokers.rst
+++ b/docs/api/event-brokers.rst
@@ -8,12 +8,22 @@ Event Brokers
 
 .. edit-link::
 
-Rasa Core allows you to stream events to a message broker. The event broker
-emits events into the event queue. It becomes part of the ``TrackerStore``
-which you use when starting an ``Agent`` or launch ``rasa.core.run``.
+An event broker allows you to connect your running assistant to other services that process the data coming 
+in from conversations. For example, you could `connect your live assistant to 
+Rasa X <https://rasa.com/docs/rasa-x/installation-and-setup/existing-deployment/>`_
+to review and annotate conversations or forward messages to an external analytics
+service. The event broker publishes messages to a message streaming service, 
+also known as a message broker, to forward Rasa :ref:`events` from the Rasa server to other services.
+
+.. contents::
+   :local:
+   :depth: 1
+
+Format
+------
 
 All events are streamed to the broker as serialised dictionaries every time
-the tracker updates it state. An example event emitted from the ``default``
+the tracker updates its state. An example event emitted from the ``default``
 tracker looks like this:
 
 .. code-block:: json
@@ -23,17 +33,15 @@ tracker looks like this:
         "timestamp": 1528402837.617099,
         "event": "bot",
         "text": "what your bot said",
-        "data": "some data"
+        "data": "some data about e.g. attachments"
+        "metadata" {
+              "a key": "a value",
+         }
     }
 
 The ``event`` field takes the event's ``type_name`` (for more on event
 types, check out the :ref:`events` docs).
 
-Rasa enables three possible broker types:
-
-- `Pika Event Broker`_
-- `Kafka Event Broker`_
-- `SQL Event Broker`_
 
 .. _event-brokers-pika:
 
@@ -44,21 +52,19 @@ The example implementation we're going to show you here uses
 `Pika <https://pika.readthedocs.io>`_ , the Python client library for
 `RabbitMQ <https://www.rabbitmq.com>`_.
 
+.. contents::
+   :local:
+
 Adding a Pika Event Broker Using the Endpoint Configuration
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-You can use an endpoint configuration file to instruct Rasa Core to stream
-all events to your event broker. To do so, add the following section to your
-endpoint configuration, e.g. ``endpoints.yml``:
+You can instruct Rasa to stream all events to your Pika event broker by adding an ``event_broker`` section to your
+``endpoints.yml``:
 
 .. literalinclude:: ../../data/test_endpoints/event_brokers/pika_endpoint.yml
 
-Then instruct Rasa Core to use the endpoint configuration and Pika producer by adding
-``--endpoints <path to your endpoint configuration`` as following example:
+Rasa will automatically start streaming events when you restart the Rasa server.
 
-.. code-block:: shell
-
-    rasa run -m models --endpoints endpoints.yml
 
 Adding a Pika Event Broker in Python
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -68,14 +74,14 @@ Here is how you add it using Python code:
 .. code-block:: python
 
     from rasa.core.brokers.pika import PikaEventBroker
-    from rasa_platform.core.tracker_store import InMemoryTrackerStore
+    from rasa.core.tracker_store import InMemoryTrackerStore
 
     pika_broker = PikaEventBroker('localhost',
                                   'username',
                                   'password',
                                   queues=['rasa_events'])
 
-    tracker_store = InMemoryTrackerStore(db=db, event_broker=pika_broker)
+    tracker_store = InMemoryTrackerStore(domain=domain, event_broker=pika_broker)
 
 
 Implementing a Pika Event Consumer
@@ -121,35 +127,20 @@ It is possible to use `Kafka <https://kafka.apache.org/>`_ as main broker for yo
 events. In this example we are going to use the `python-kafka <https://kafka-python
 .readthedocs.io/en/master/usage.html>`_ library, a Kafka client written in Python.
 
-.. note::
-
-  In order to use the Kafka event broker, ``rasa`` has to be installed with the
-  ``kafka`` option:
-
-  .. code-block:: bash
-
-    $ pip install rasa[kafka]
-
+.. contents::
+   :local:
 
 Adding a Kafka Event Broker Using the Endpoint Configuration
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-As for the other brokers, you can use an endpoint configuration file to instruct Rasa Core to stream
-all events to this event broker. To do it, add the following section to your
-endpoint configuration.
-
-Pass the ``endpoints.yml`` file as argument with ``--endpoints <path to your endpoint configuration>``
-when running Rasa, as following example:
-
-.. code-block:: shell
-
-    rasa run -m models --endpoints endpoints.yml
+You can instruct Rasa to stream all events to your Kafka event broker by adding an ``event_broker`` section to your
+``endpoints.yml``.
 
 Using ``SASL_PLAINTEXT`` protocol the endpoints file must have the following entries:
 
 .. literalinclude:: ../../data/test_endpoints/event_brokers/kafka_plaintext_endpoint.yml
 
-In the case of using SSL protocol the endpoints file must looks like:
+If using SSL protocol, the endpoints file should look like:
 
 .. literalinclude:: ../../data/test_endpoints/event_brokers/kafka_ssl_endpoint.yml
 
@@ -166,7 +157,7 @@ The code below shows an example on how to instantiate a Kafka producer in you sc
     kafka_broker = KafkaEventBroker(host='localhost:9092',
                                     topic='rasa_events')
 
-    tracker_store = InMemoryTrackerStore(event_broker=kafka_broker)
+    tracker_store = InMemoryTrackerStore(domain=domain, event_broker=kafka_broker)
 
 
 The host variable can be either a list of brokers adresses or a single one.
@@ -185,10 +176,10 @@ list of strings. e.g.:
                                           'kafka_broker_3:9092'],
                                     topic='rasa_events')
 
-Authentication and authorization
+Authentication and Authorization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Rasa Core's Kafka producer accepts two types of security protocols - ``SASL_PLAINTEXT`` and ``SSL``.
+Rasa's Kafka producer accepts two types of security protocols - ``SASL_PLAINTEXT`` and ``SSL``.
 
 For development environment, or if the brokers servers and clients are located
 into the same machine, you can use simple authentication with ``SASL_PLAINTEXT``.
@@ -231,7 +222,7 @@ and inter-broker connections to prevent man-in-the-middle attacks.
 Implementing a Kafka Event Consumer
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The parameters used to create a Kafka consumer is the same used on the producer creation,
+The parameters used to create a Kafka consumer are the same used on the producer creation,
 according to the security protocol being used. The following implementation shows an example:
 
 .. code-block:: python
@@ -265,9 +256,8 @@ and PostgreSQL databases, to see other options, please see the
 Adding a SQL Event Broker Using the Endpoint Configuration
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-You can use the endpoint configuration file to instruct Rasa to save
-all events to your SQL event broker. To do so, add a ``event_broker`` section to your
-endpoint configuration, e.g. ``endpoints.yml``. For example, a valid SQLite configuration
+To instruct Rasa to save all events to your SQL event broker, add an ``event_broker`` section to your
+``endpoints.yml``. For example, a valid SQLite configuration
 could look like the following:
 
 .. code-block:: yaml
diff --git a/docs/api/tensorflow_usage.rst b/docs/api/tensorflow_usage.rst
index aa9d84023844..3e4f9caf6dc6 100644
--- a/docs/api/tensorflow_usage.rst
+++ b/docs/api/tensorflow_usage.rst
@@ -14,19 +14,33 @@ All configuration options are specified using environment variables as shown in
 Optimizing CPU Performance
 --------------------------
 
+.. note::
+    We recommend that you configure these options only if you are an advanced TensorFlow user and understand the 
+    implementation of the machine learning components in your pipeline. These options affect how operations are carried 
+    out under the hood in Tensorflow. Leaving them at their default values is fine.
+
+Depending on the TensorFlow operations a NLU component or Core policy uses, you can leverage multi-core CPU
+parallelism by tuning these options.
+
 Parallelizing One Operation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Set ``TF_INTRA_OP_PARALLELISM_THREADS`` as an environment variable to specify the maximum number of threads that can be used
-to parallelize the execution of one operation. If left unspecified, this value defaults to ``0`` which means TensorFlow should
-pick an appropriate value depending on the system configuration.
+to parallelize the execution of one operation. For example, operations like ``tf.matmul()`` and ``tf.reduce_sum`` can be executed
+on multiple threads running in parallel. The default value for this variable is ``0`` which means TensorFlow would
+allocate one thread per CPU core.
 
 Parallelizing Multiple Operations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Set ``TF_INTER_OP_PARALLELISM_THREADS`` as an environment variable to specify the maximum number of threads that can be used
-to parallelize the execution of multiple **non-blocking** operations. If left unspecified, this value defaults to ``0``
-which means TensorFlow should pick an appropriate value depending on the system configuration.
+to parallelize the execution of multiple **non-blocking** operations. These would include operations that do not have a
+directed path between them in the TensorFlow graph. In other words, the computation of one operation does not affect the
+computation of the other operation. The default value for this variable is ``0`` which means TensorFlow would allocate one thread per CPU core.
+
+To understand more about how these two options differ from each other, refer to this
+`stackoverflow thread <https://stackoverflow.com/a/41233901/3001665>`_.
+
 
 Optimizing GPU Performance
 --------------------------
@@ -35,18 +49,13 @@ Limiting GPU Memory Growth
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 TensorFlow by default blocks all the available GPU memory for the running process. This can be limiting if you are running
-multiple TensorFlow processes and want to distribute memory across them. To prevent this,
-set the environment variable ``TF_FORCE_GPU_ALLOW_GROWTH`` to ``True``.
+multiple TensorFlow processes and want to distribute memory across them. To prevent Rasa Open Source from blocking all
+of the available GPU memory, set the environment variable ``TF_FORCE_GPU_ALLOW_GROWTH`` to ``True``.
 
 Restricting Absolute GPU Memory Available
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Often, a developer wants to limit the absolute amount of GPU memory that can be used by a process.
-
-For example, you may have two visible GPUs(``GPU:0`` and ``GPU:1``) and you want to allocate 1024 MB from the first GPU
-and 2048 MB from the second GPU.
-You can do so by setting an environment variable as ``TF_GPU_MEMORY_ALLOC="0:1024, 1:2048"``.
+You may want to limit the absolute amount of GPU memory that can be used by a Rasa Open Source process.
 
-Another scenario can be where you have access to 2 GPUs(``GPU:0`` and ``GPU:1``) but you would like to use only the second
-GPU.
-``TF_GPU_MEMORY_ALLOC="1:2048"`` would make 2048 MB of memory available from GPU 1.
+For example, say you have two visible GPUs(``GPU:0`` and ``GPU:1``) and you want to allocate 1024 MB from the first GPU
+and 2048 MB from the second GPU. You can do this by setting the environment variable ``TF_GPU_MEMORY_ALLOC`` to ``"0:1024, 1:2048"``.
diff --git a/docs/migration-guide.rst b/docs/migration-guide.rst
index 036a1eedbcc4..317946ebc501 100644
--- a/docs/migration-guide.rst
+++ b/docs/migration-guide.rst
@@ -92,11 +92,12 @@ General
     - name: DIETClassifier
       hidden_layers_sizes:
         text: [256, 128]
+      number_of_transformer_layers: 0
+      weight_sparsity: 0
       intent_classification: True
       entity_recognition: False
       use_masked_language_model: False
       BILOU_flag: False
-      number_of_transformer_layers: 0
       # ... any other parameters
 
   See :ref:`DIETClassifier <diet-classifier>` for more information about the new component.
diff --git a/docs/user-guide/building-assistants.rst b/docs/user-guide/building-assistants.rst
index 2deffe2ddc2e..7e3e9ee91b70 100644
--- a/docs/user-guide/building-assistants.rst
+++ b/docs/user-guide/building-assistants.rst
@@ -25,8 +25,9 @@ build a basic FAQ assistant using features of Rasa designed specifically for thi
 
 In this section we’re going to cover the following topics:
 
-    - Responding to simple intents with the MemoizationPolicy
-    - Handling FAQs using the ResponseSelector
+    - `Responding to simple intents <respond-with-memoization-policy>`_ with the MemoizationPolicy
+    - `Handling FAQs <faqs-response-selector>`_ using the ResponseSelector
+
 
 We’re going to use content from `Sara <https://github.com/RasaHQ/rasa-demo>`_, the Rasa
 assistant that, amongst other things, helps the user get started with the Rasa products.
@@ -44,7 +45,9 @@ new Rasa project.
 
 
 Let's remove the default content from this bot, so that the ``nlu.md``, ``stories.md``
-and ``domain.yml`` are empty.
+and ``domain.yml`` files are empty.
+
+.. _respond-with-memoization-policy:
 
 Memoization Policy
 ^^^^^^^^^^^^^^^^^^
@@ -54,7 +57,7 @@ of turns. The number of "turns" includes messages the user sent, and actions the
 assistant performed. For the purpose of a simple, context-less FAQ bot, we only need
 to pay attention to the last message the user sent, and therefore we’ll set that to ``1``.
 
-You can do this by editing your ``config.yml`` file as follows:
+You can do this by editing your ``config.yml`` file as follows (you can remove ``TEDPolicy`` for now):
 
 .. code-block:: yaml
 
@@ -84,7 +87,7 @@ intents to the ``stories.md`` file:
    * bye
      - utter_bye
 
-We’ll also need to add the intents, actions and templates to our ``domain.yml`` file in the following sections:
+We’ll also need to add the intents, actions and responses to our ``domain.yml`` file in the following sections:
 
 .. code-block:: md
 
@@ -93,7 +96,7 @@ We’ll also need to add the intents, actions and templates to our ``domain.yml`
      - bye
      - thank
 
-   templates:
+   responses:
      utter_noworries:
        - text: No worries!
      utter_greet:
@@ -101,7 +104,7 @@ We’ll also need to add the intents, actions and templates to our ``domain.yml`
      utter_bye:
        - text: Bye!
 
-Finally, we’ll copy over some NLU data from Sara into our ``nlu.md``
+Finally, we’ll copy over some NLU data from Sara into our ``nlu.md`` file
 (more can be found `here <https://github.com/RasaHQ/rasa-demo/blob/master/data/nlu/nlu.md>`__):
 
 .. code-block:: md
@@ -144,6 +147,13 @@ You can now train a first model and test the bot, by running the following comma
 
 This bot should now be able to reply to the intents we defined consistently, and in any order.
 
+For example:
+
+.. image:: /_static/images/memoization_policy_convo.png
+   :alt: Memoization Policy Conversation
+   :align: center
+
+
 While it's good to test the bot interactively, we should also add end to end test cases that
 can later be included as part of our CI/CD system. `End to end stories <https://rasa.com/docs/rasa/user-guide/evaluating-models/#end-to-end-evaluation>`_
 include NLU data, so that both components of Rasa can be tested.  Create a file called
@@ -183,6 +193,8 @@ also specify whether it was an NLU or Core prediction that went wrong.  As part
 CI/CD pipeline, the test option ``--fail-on-prediction-errors`` can be used to throw
 an exception that stops the pipeline.
 
+.. _faqs-response-selector:
+
 Response Selectors
 ^^^^^^^^^^^^^^^^^^
 
@@ -239,23 +251,25 @@ Next, we’ll need to define the responses associated with these FAQs in a new f
    * faq/ask_rasax
     - Rasa X is a tool to learn from real conversations and improve your assistant. Read more [here](https://rasa.com/docs/rasa-x/)
 
-To use the Response Selector we need to add it to the end of the expanded `supervised_embeddings <https://rasa.com/docs/rasa/nlu/choosing-a-pipeline/#section-supervised-embeddings-pipeline>`_
-NLU pipeline in our ``config.yml``:
+The ResponseSelector should already be at the end of the NLU pipeline in our ``config.yml``:
 
 .. code-block:: yaml
 
-   pipeline:
-   - name: "WhitespaceTokenizer"
-   - name: "RegexFeaturizer"
-   - name: "CRFEntityExtractor"
-   - name: "EntitySynonymMapper"
-   - name: "CountVectorsFeaturizer"
-   - name: "CountVectorsFeaturizer"
-     analyzer: "char_wb"
-     min_ngram: 1
-     max_ngram: 4
-   - name: "EmbeddingIntentClassifier"
-   - name: "ResponseSelector"
+    language: en
+    pipeline:
+      - name: WhitespaceTokenizer
+      - name: RegexFeaturizer
+      - name: LexicalSyntacticFeaturizer
+      - name: CountVectorsFeaturizer
+      - name: CountVectorsFeaturizer
+        analyzer: "char_wb"
+        min_ngram: 1
+        max_ngram: 4
+      - name: DIETClassifier
+        epochs: 100
+      - name: EntitySynonymMapper
+      - name: ResponseSelector
+        epochs: 100
 
 Now that we’ve defined the NLU side, we need to make Core aware of these changes. Open your ``domain.yml`` file and add the ``faq`` intent:
 
@@ -313,22 +327,34 @@ You can read more in this `blog post <https://blog.rasa.com/response-retrieval-m
 `Retrieval Actions <https://rasa.com/docs/rasa/core/retrieval-actions/>`_ page.
 
 Using the features we described in this tutorial, you can easily build a context-less assistant.
-When you’re ready to enhance your assistant with context, check out :ref:`build-contextual-assistant`.
+When you’re ready to enhance your assistant with context, check out :ref:`tutorial-contextual-assistants`.
 
-.. _build-contextual-assistant:
 
-Building contextual assistants
-------------------------------
+.. note::
+    Here's a minimal checklist of files we modified to build a basic FAQ assistant:
+
+      - ``data/nlu.md``: Add NLU training data for ``faq/`` intents
+      - ``data/responses.md``: Add responses associated with ``faq/`` intents
+      - ``config.yml``: Add ``ReponseSelector`` in your NLU pipeline
+      - ``domain.yml``: Add a retrieval action ``respond_faq`` and intent ``faq``
+      - ``data/stories.md``: Add a simple story for FAQs
+      - ``test_stories.md``: Add E2E test stories for your FAQs
+
+
+.. _tutorial-contextual-assistants:
+
+Building a contextual assistant
+-------------------------------
 
 Whether you’ve just created an FAQ bot or are starting from scratch, the next step is to expand
 your bot to handle contextual conversations.
 
 In this tutorial we’re going to cover a variety of topics:
 
-    - Handling business logic
-    - Handling unexpected user input
-    - Failing gracefully
-    - More complex contextual conversations
+    - :ref:`handling-business-logic`
+    - :ref:`handling-unexpected-user-input`
+    - :ref:`failing-gracefully`
+    - :ref:`more-complex-contextual-conversations`
 
 Please make sure you’ve got all the data from the :ref:`build-faq-assistant` section before starting this part.
 You will need to make some adjustments to your configuration file, since we now need to pay attention to context:
@@ -343,8 +369,10 @@ We removed the ``max_history: 1`` configuration. The default is ``5``,
 meaning Core will pay attention to the past 5 turns when making a prediction
 (see explanation of `max history <https://rasa.com/docs/rasa/core/policies/#max-history>`_).
 
-Business logic
-^^^^^^^^^^^^^^
+.. _handling-business-logic:
+
+Handling business logic
+^^^^^^^^^^^^^^^^^^^^^^^
 
 A lot of conversational assistants have user goals that involve collecting a bunch of information
 from the user before being able to do something for them. This is called slot filling. For
@@ -400,7 +428,7 @@ Note: you can customise the required slots function not to be static. E.g. if th
 developer, you could add a ``required_slot`` about the users experience level with Rasa
 
 Once you’ve done that, you’ll need to specify how the bot should ask for this information. This
-is done by specifying ``utter_ask_{slotname}`` templates in your domain file. For the above
+is done by specifying ``utter_ask_{slotname}`` responses in your ``domain.yml`` file. For the above
 we’ll need to specify the following:
 
 .. code-block:: yaml
@@ -418,7 +446,7 @@ we’ll need to specify the following:
    utter_ask_use_case:
      - text: What's your use case?
 
-We’ll also need to define all these slots in our domain:
+We’ll also need to define all these slots in our ``domain.yml`` file:
 
 .. code-block:: yaml
 
@@ -455,7 +483,7 @@ In this case, we only tell the user that we’ll be in touch with them, however
 usually you would send this information to an API or a database. See the `rasa-demo <https://github.com/RasaHQ/rasa-demo/blob/master/demo/actions.py#L69>`_
 for an example of how to store this information in a spreadsheet.
 
-We’ll need to add the form we just created to a new section in the domain file:
+We’ll need to add the form we just created to a new section in our ``domain.yml`` file:
 
 .. code-block:: yaml
 
@@ -527,7 +555,7 @@ data to your NLU file:
     (e.g. :ref:`DucklingHTTPExtractor` or :ref:`SpacyEntityExtractor`), but for this tutorial
     we want to avoid any additional setup.
 
-The intents and entities will need to be added to your domain as well:
+The intents and entities will need to be added to your ``domain.yml`` file as well:
 
 .. code-block:: yaml
 
@@ -548,15 +576,19 @@ The intents and entities will need to be added to your domain as well:
      - use_case
 
 A story for a form is very simple, as all the slot collection form happens inside the form, and
-therefore doesn’t need to be covered in your stories.
+therefore doesn’t need to be covered in your stories. You just need to write a single story showing when the form should be activated. For the sales form, add this story
+to your ``stories.md`` file:
+
 
 .. code-block:: md
 
    ## sales form
    * contact_sales
-       - sales_form
-       - form{"name": "sales_form"}
-       - form{"name": null}
+       - sales_form                   <!--Run the sales_form action-->
+       - form{"name": "sales_form"}   <!--Activate the form-->
+       - form{"name": null}           <!--Deactivate the form-->
+
+
 
 As a final step, let’s add the FormPolicy to our config file:
 
@@ -620,6 +652,28 @@ business logic. Read more about this `here <https://rasa.com/docs/rasa/core/form
 However, you should make sure not to handle any unhappy paths inside the form. These
 should be handled by writing regular stories, so your model can learn this behaviour.
 
+
+.. note::
+    Here's a minimal checklist of files we modified to handle business logic using a form action:
+
+      - ``actions.py``: Define the form action, including the ``required_slots``, ``slot_mappings`` and ``submit`` methods
+      - ``data/nlu.md``:
+          - Add examples for an intent to activate the form
+          - Add examples for an ``inform`` intent to fill the form
+      - ``domain.yml``:
+          - Add all slots required by the form
+          - Add ``utter_ask_{slot}`` responses for all required slots
+          - Add your form action to the ``forms`` section
+          - Add all intents and entities from your NLU training data
+      - ``data/stories.md``: Add a story for the form
+      - ``config.yml``:
+          - Add the ``FormPolicy`` to your policies
+          - Add entity extractors to your pipeline
+      - ``endpoints.yml``: Define the ``action_endpoint``
+
+
+.. _handling-unexpected-user-input:
+
 Handling unexpected user input
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -643,12 +697,21 @@ matter the context, you can use the :ref:`mapping-policy` to handle these. It wi
 predict the same action for an intent, and when combined with a forgetting mechanism,
 you don’t need to write any stories either.
 
+For example, let's say you see users having conversations like the following one with
+your assistant, where they write a greeting in the middle of a conversation -
+maybe because they were gone for a few minutes:
+
+.. image:: /_static/images/greet_interjection.png
+   :width: 240
+   :alt: Greeting Interjection
+   :align: center
+
 The greet intent is a good example where we will always give the same response and
 yet we don’t want the intent to affect the dialogue history. To do this, the response
 must be an action that returns the ``UserUtteranceReverted()`` event to remove the
 interaction from the dialogue history.
 
-First, open the ``domain.yml`` and modify the greet intent and add a new block ```actions``` in
+First, open the ``domain.yml`` file and modify the greet intent and add a new block ```actions``` in
 the file, next, add the ``action_greet`` as shown here:
 
 .. code-block:: yaml
@@ -696,6 +759,14 @@ Then we can retrain the model, and try out our additions:
    rasa train
    rasa shell
 
+FAQs are another kind of generic interjections that should always get the same response.
+For example, a user might ask a related FAQ in the middle of filling a form:
+
+.. image:: /_static/images/generic_interjection.png
+   :width: 240
+   :alt: Generic Interjections
+   :align: center
+
 To handle FAQs defined with retrieval actions, you can add a simple story that will be handled by the MemoizationPolicy:
 
 .. code-block:: md
@@ -710,6 +781,13 @@ To handle FAQs defined with retrieval actions, you can add a simple story that w
        - form{"name": null}
 
 This will break out of the form and deal with the users FAQ question, and then return back to the original task.
+For example:
+
+.. image:: /_static/images/generic_interjection_handled.png
+   :width: 240
+   :alt: Generic Interjection Handled
+   :align: center
+
 If you find it difficult to write stories in this format, you can always use `Interactive Learning <https://rasa.com/docs/rasa/core/interactive-learning/>`_
 to help you create them.
 
@@ -720,7 +798,12 @@ Contextual questions
 
 You can also handle `contextual questions <https://rasa.com/docs/rasa/dialogue-elements/completing-tasks/#contextual-questions)>`_,
 like the user asking the question "Why do you need to know that". The user could ask this based on a certain slot
-the bot has requested, and the response should differ for each slot.
+the bot has requested, and the response should differ for each slot. For example:
+
+.. image:: /_static/images/contextual_interjection.png
+   :width: 240
+   :alt: Contextual Interjection
+   :align: center
 
 To handle this, we need to make the ``requested_slot`` featurized, and assign it the categorical type:
 
@@ -763,7 +846,7 @@ unfeaturized slots are only used for storing information. The stories for this s
        - sales_form
        - form{"name": null}
 
-We’ll need to add the intent and utterances we just added to our domain:
+We’ll need to add the intent and utterances we just added to our ``domain.yml`` file:
 
 .. code-block:: yaml
 
@@ -774,7 +857,7 @@ We’ll need to add the intent and utterances we just added to our domain:
    - faq
    - explain
 
-   templates:
+   responses:
      utter_explain_why_budget:
      - text: We need to know your budget to recommend a subscription
      utter_explain_why_email:
@@ -804,6 +887,24 @@ Then you can retrain your bot and test it again:
 
 Don’t forget to add a few end to end stories to your ``test_stories.md`` for testing as well.
 
+
+.. note::
+    Here's a minimal checklist of  of files we modified to handle unexpected user input:
+
+      - ``actions.py``: Define ``action_greet``
+      - ``data/nlu.md``: Add training data for an ``explain`` intent
+      - ``domain.yml``:
+          - Map intent ``greet`` to  ``action_greet_user``
+          - Make ``requested_slot`` a categorical slots with all required slots as values
+          - Add the ``explain`` intent
+          - Add responses for contextual question interruptions
+      - ``data/stories.md``:
+          - Remove stories using mapped intents if you have them
+          - Add stories with FAQ & contextual interruptions in the middle of filling a form
+
+
+.. _failing-gracefully:
+
 Failing gracefully
 ^^^^^^^^^^^^^^^^^^
 
@@ -823,7 +924,7 @@ the TwoStageFallbackPolicy. You can enable it by adding the following to your co
      - name: TwoStageFallbackPolicy
        nlu_threshold: 0.8
 
-and adding the ``out_of_scope`` intent to your domain file:
+and adding the ``out_of_scope`` intent to your ``domain.yml`` file:
 
 .. code-block:: yaml
 
@@ -856,8 +957,7 @@ We define some intent mappings to make it more intuitive to the user what an int
 Out of scope intent
 """""""""""""""""""
 
-It is good practice to also handle questions you know your users may ask, but you don’t necessarily have a skill
-implemented yet.
+It is good practice to also handle questions you know your users may ask, but for which you haven't necessarily implemented a user goal yet.
 
 You can define an ``out_of_scope`` intent to handle generic out of scope requests, like "I’m hungry" and have
 the bot respond with a default message like "Sorry, I can’t handle that request":
@@ -877,11 +977,11 @@ We’ll need to add NLU data for the ``out_of_scope`` intent as well:
    - Who’s the US President?
    - I need a job
 
-And finally we’ll add a template to our domain file:
+And finally we’ll add a response to our ``domain.yml`` file:
 
 .. code-block:: yaml
 
-   templates:
+   responses:
      utter_out_of_scope:
      - text: Sorry, I can’t handle that request.
 
@@ -903,6 +1003,24 @@ let’s say the user asks "I want to apply for a job at Rasa", we can then reply
    * ask_job
      utter_job_not_handled
 
+.. note::
+    Here's a minimal checklist of files we modified to help our assistant fail gracefully:
+
+      - ``data/nlu.md``:
+          - Add training data for the ``out_of_scope`` intent & any specific out of scope intents that you want to handle seperately
+      - ``data/stories.md``:
+          - Add stories for any specific out of scope intents
+      - ``domain.yml``:
+          - Add the ``out_of_scope`` intent & any specific out of scope intents
+          - Add an ``utter_out_of_scope`` response & responses for any specific out of scope intents
+      - ``actions.py``:
+          - Customise ``ActionDefaultAskAffirmation`` to suggest intents for the user to choose from
+      - ``config.yml``:
+          - Add the TwoStageFallbackPolicy to the ``policies`` section
+
+
+.. _more-complex-contextual-conversations:
+
 More complex contextual conversations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -964,7 +1082,7 @@ question by storing this information in a slot.
      - utter_getstarted_new
      - utter_built_bot_before
 
-For this to work, keep in mind that the slot has to be featurized in your domain
+For this to work, keep in mind that the slot has to be featurized in your ``domain.yml``
 file. This time we can use the ``text`` slot type, as we only care about whether the
 `slot was set or not <https://rasa.com/docs/rasa/core/slots/>`_.
 
@@ -989,7 +1107,7 @@ always have a higher priority (read more `here <https://rasa.com/docs/rasa/core/
 ML based policies give your assistant the chance not to fail, whereas if they are not
 used your assistant will definitely fail, like in state machine based dialogue systems.
 
-These types of unexpected user behaviors are something our `EmbeddingPolicy <https://blog.rasa.com/attention-dialogue-and-learning-reusable-patterns/>`_ deals with
+These types of unexpected user behaviors are something our `TEDPolicy <https://blog.rasa.com/unpacking-the-ted-policy-in-rasa-open-source/>`_ deals with
 very well. It can learn to bring the user back on track after some
 interjections during the main user goal the user is trying to complete. For example,
 in the conversation below (extracted from a conversation on `Rasa X <https://rasa.com/docs/rasa-x/user-guide/review-conversations/>`__):
@@ -1016,7 +1134,7 @@ in the conversation below (extracted from a conversation on `Rasa X <https://ras
      - utter_direct_to_step2
 
 Here we can see the user has completed a few chitchat tasks first, and then ultimately
-asks how they can get started with Rasa X. The EmbeddingPolicy correctly predicts that
+asks how they can get started with Rasa X. The TEDPolicy correctly predicts that
 Rasa X should be explained to the user, and then also takes them down the getting started
 path, without asking all the qualifying questions first.
 
diff --git a/docs/user-guide/how-to-deploy.rst b/docs/user-guide/how-to-deploy.rst
index 6b846998ef81..b2142365427e 100644
--- a/docs/user-guide/how-to-deploy.rst
+++ b/docs/user-guide/how-to-deploy.rst
@@ -332,7 +332,7 @@ Then build a custom action using the Rasa SDK, e.g.:
     def name(self):
       return "action_joke"
 
-    def run(self, dispatcher, tracker, domain):
+    async def run(self, dispatcher, tracker, domain):
       request = requests.get('http://api.icndb.com/jokes/random').json()  # make an api call
       joke = request['value']['joke']  # extract a joke from returned json response
       dispatcher.utter_message(text=joke)  # send the message back to the user
@@ -382,6 +382,8 @@ Add this to your ``endpoints.yml`` (if it does not exist, create it):
 Run ``docker-compose up`` to start the action server together
 with Rasa.
 
+.. _deploying-your-rasa-assistant_custom-dependencies:
+
 Adding Custom Dependencies
 **************************
 
@@ -396,6 +398,10 @@ image and add your custom dependencies. For example:
     # Extend the official Rasa SDK image
     FROM rasa/rasa-sdk:latest
 
+    # The Rasa SDK image runs as non-root user by default. Hence, you have to switch
+    # back to the `root` user if you want to install additional dependencies.
+    USER root
+
     # Add a custom system library (e.g. git)
     RUN apt-get update && \
         apt-get install -y git
@@ -403,6 +409,9 @@ image and add your custom dependencies. For example:
     # Add a custom python library (e.g. jupyter)
     RUN pip install --no-cache-dir jupyter
 
+   # Switch back to a non-root user
+   USER 1001
+
 You can then build the image via the following command, and use it in your
 ``docker-compose.yml`` instead of the ``rasa/rasa-sdk`` image.
 
diff --git a/poetry.lock b/poetry.lock
index 1aec6591abd8..e02b9d6bfc54 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -633,6 +633,14 @@ version = "6.0.0"
 [package.dependencies]
 requests = ">=2.0"
 
+[[package]]
+category = "main"
+description = "A platform independent file lock."
+name = "filelock"
+optional = true
+python-versions = "*"
+version = "3.0.12"
+
 [[package]]
 category = "dev"
 description = "the modular source code checker: pep8, pyflakes and co"
@@ -2833,6 +2841,14 @@ cuda90 = ["thinc-gpu-ops (>=0.0.1,<0.1.0)", "cupy-cuda90 (>=5.0.0b4)"]
 cuda91 = ["thinc-gpu-ops (>=0.0.1,<0.1.0)", "cupy-cuda91 (>=5.0.0b4)"]
 cuda92 = ["thinc-gpu-ops (>=0.0.1,<0.1.0)", "cupy-cuda92 (>=5.0.0b4)"]
 
+[[package]]
+category = "main"
+description = "Fast and Customizable Tokenizers"
+name = "tokenizers"
+optional = true
+python-versions = "*"
+version = "0.0.11"
+
 [[package]]
 category = "dev"
 description = "Python Library for Tom's Obvious, Minimal Language"
@@ -2895,23 +2911,31 @@ category = "main"
 description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch"
 name = "transformers"
 optional = true
-python-versions = "*"
-version = "2.3.0"
+python-versions = ">=3.5.0"
+version = "2.4.1"
 
 [package.dependencies]
 boto3 = "*"
+filelock = "*"
 numpy = "*"
 regex = "!=2019.12.17"
 requests = "*"
 sacremoses = "*"
 sentencepiece = "*"
-tqdm = "*"
+tokenizers = "0.0.11"
+tqdm = ">=4.27"
 
 [package.extras]
-all = ["pydantic", "uvicorn", "fastapi", "tensorflow", "torch"]
-serving = ["pydantic", "uvicorn", "fastapi"]
-serving-tf = ["pydantic", "uvicorn", "fastapi", "tensorflow"]
-serving-torch = ["pydantic", "uvicorn", "fastapi", "torch"]
+all = ["pydantic", "uvicorn", "fastapi", "starlette", "tensorflow", "torch"]
+dev = ["pytest", "pytest-xdist", "black", "isort", "flake8", "mecab-python3", "scikit-learn", "tensorflow", "torch"]
+docs = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme"]
+mecab = ["mecab-python3"]
+quality = ["black", "isort", "flake8"]
+serving = ["pydantic", "uvicorn", "fastapi", "starlette"]
+sklearn = ["scikit-learn"]
+testing = ["pytest", "pytest-xdist"]
+tf = ["tensorflow"]
+torch = ["torch"]
 
 [[package]]
 category = "main"
@@ -2977,11 +3001,10 @@ pytz = "*"
 [[package]]
 category = "main"
 description = "Ultra fast JSON encoder and decoder for Python"
-marker = "sys_platform != \"win32\" and implementation_name == \"cpython\""
 name = "ujson"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-version = "2.0.2"
+python-versions = "*"
+version = "1.35"
 
 [[package]]
 category = "main"
@@ -3155,7 +3178,7 @@ spacy = ["spacy"]
 transformers = ["transformers"]
 
 [metadata]
-content-hash = "05dbb4ecffed3b4fa61ae0fe19212644e89b7f72999ca69619de22648b69cfa6"
+content-hash = "23c99b1378288b2648e2a06866bd4e62b305362465c51a5114722467fdc74bf6"
 python-versions = "^3.6"
 
 [metadata.files]
@@ -3473,6 +3496,10 @@ fbmessenger = [
     {file = "fbmessenger-6.0.0-py2.py3-none-any.whl", hash = "sha256:82cffd6e2fe02bfcf8ed083c59bdddcfdaa594dd0040f0c49eabbaf0e58d974c"},
     {file = "fbmessenger-6.0.0.tar.gz", hash = "sha256:6e42c4588a4c942547be228886278bbc7a084e0b34799c7e6ebd786129f021e6"},
 ]
+filelock = [
+    {file = "filelock-3.0.12-py3-none-any.whl", hash = "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"},
+    {file = "filelock-3.0.12.tar.gz", hash = "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59"},
+]
 flake8 = [
     {file = "flake8-3.7.9-py2.py3-none-any.whl", hash = "sha256:49356e766643ad15072a789a20915d3c91dc89fd313ccd71802303fd67e4deca"},
     {file = "flake8-3.7.9.tar.gz", hash = "sha256:45681a117ecc81e870cbf1262835ae4af5e7a8b08e40b944a8a6e6b895914cfb"},
@@ -3567,6 +3594,9 @@ greenlet = [
     {file = "greenlet-0.4.15-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8041e2de00e745c0e05a502d6e6db310db7faa7c979b3a5877123548a4c0b214"},
     {file = "greenlet-0.4.15-cp37-cp37m-win32.whl", hash = "sha256:81fcd96a275209ef117e9ec91f75c731fa18dcfd9ffaa1c0adbdaa3616a86043"},
     {file = "greenlet-0.4.15-cp37-cp37m-win_amd64.whl", hash = "sha256:37c9ba82bd82eb6a23c2e5acc03055c0e45697253b2393c9a50cef76a3985304"},
+    {file = "greenlet-0.4.15-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e538b8dae561080b542b0f5af64d47ef859f22517f7eca617bb314e0e03fd7ef"},
+    {file = "greenlet-0.4.15-cp38-cp38-win32.whl", hash = "sha256:51155342eb4d6058a0ffcd98a798fe6ba21195517da97e15fca3db12ab201e6e"},
+    {file = "greenlet-0.4.15-cp38-cp38-win_amd64.whl", hash = "sha256:7457d685158522df483196b16ec648b28f8e847861adb01a55d41134e7734122"},
     {file = "greenlet-0.4.15.tar.gz", hash = "sha256:9416443e219356e3c31f1f918a91badf2e37acf297e2fa13d24d1cc2380f8fbc"},
 ]
 grpcio = [
@@ -4482,6 +4512,11 @@ sanic-plugins-framework = [
 ]
 scikit-learn = [
     {file = "scikit-learn-0.22.2.post1.tar.gz", hash = "sha256:57538d138ba54407d21e27c306735cbd42a6aae0df6a5a30c7a6edde46b0017d"},
+    {file = "scikit_learn-0.22.2.post1-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:267ad874b54c67b479c3b45eb132ef4a56ab2b27963410624a413a4e2a3fc388"},
+    {file = "scikit_learn-0.22.2.post1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:8ed66ab27b3d68e57bb1f315fc35e595a5c4a1f108c3420943de4d18fc40e615"},
+    {file = "scikit_learn-0.22.2.post1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:4990f0e166292d2a0f0ee528233723bcfd238bfdb3ec2512a9e27f5695362f35"},
+    {file = "scikit_learn-0.22.2.post1-cp35-cp35m-win32.whl", hash = "sha256:ddd3bf82977908ff69303115dd5697606e669d8a7eafd7d83bb153ef9e11bd5e"},
+    {file = "scikit_learn-0.22.2.post1-cp35-cp35m-win_amd64.whl", hash = "sha256:349ba3d837fb3f7cb2b91486c43713e4b7de17f9e852f165049b1b7ac2f81478"},
     {file = "scikit_learn-0.22.2.post1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:73207dca6e70f8f611f28add185cf3a793c8232a1722f21d82259560dc35cd50"},
     {file = "scikit_learn-0.22.2.post1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:de9933297f8659ee3bb330eafdd80d74cd73d5dab39a9026b65a4156bc479063"},
     {file = "scikit_learn-0.22.2.post1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6043e2c4ccfc68328c331b0fc19691be8fb02bd76d694704843a23ad651de902"},
@@ -4714,6 +4749,21 @@ thinc = [
     {file = "thinc-7.0.8-cp37-cp37m-win_amd64.whl", hash = "sha256:f27e0fe9b1e4be2eb0ff95112a9cbcd79e1614d25b8bae6f2e8e2b727c2a2fe6"},
     {file = "thinc-7.0.8.tar.gz", hash = "sha256:5cdb72e8efec0e7b6efae09a09245d744f144114048f63f1ed4b63b8656d2aa4"},
 ]
+tokenizers = [
+    {file = "tokenizers-0.0.11-cp35-cp35m-macosx_10_13_x86_64.whl", hash = "sha256:1385deb90ec76cbee59b50298c8d2dc5909cda080a706d263e4f81c8474ba53d"},
+    {file = "tokenizers-0.0.11-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:a4d1ef6ee9221e7f9c1a4c122a15e93f0961977aaae2813b7b405c778728dcee"},
+    {file = "tokenizers-0.0.11-cp35-cp35m-win_amd64.whl", hash = "sha256:a7f5e43674dd5b012ad29b79a32f0652ecfff3a3ed1c04f9073038c4bf63829d"},
+    {file = "tokenizers-0.0.11-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:a66ff87c32a221a126904d7ec972e7c8e0033486b24f8777c0f056aedbc09011"},
+    {file = "tokenizers-0.0.11-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bb44fa1b268d1bbdf2bb14cd82da6ffb93d19638157c77f9e17e246928f0233f"},
+    {file = "tokenizers-0.0.11-cp36-cp36m-win_amd64.whl", hash = "sha256:82e8c3b13a66410358753b7e48776749935851cdb49a3d0c139a046178ec4f49"},
+    {file = "tokenizers-0.0.11-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:ce75c75430a3dfc33a10c90c1607d44b172c6d2ea19d586692b6cc9ba6ec5e14"},
+    {file = "tokenizers-0.0.11-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:08e08027564194e16aa647d180837d292b2c9c5ef772fed15badcc88e2474a8f"},
+    {file = "tokenizers-0.0.11-cp37-cp37m-win_amd64.whl", hash = "sha256:3ebe7f0bff9e30ab15dec4846c54c9085e02e47711eb7253d36a6777eadc2948"},
+    {file = "tokenizers-0.0.11-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:7de28e0bebd0904b990560a1f14c3c5600da29be287e544bdf19e6970ea11d54"},
+    {file = "tokenizers-0.0.11-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ba2c6eaac2e8e0a2d839c0420d16707496b5e93b1454029d19487c5dd8c9b62"},
+    {file = "tokenizers-0.0.11-cp38-cp38-win_amd64.whl", hash = "sha256:503418d5195ae1a483ced0257a0d2f4583456aa49bdfe0014c8605babf244ac5"},
+    {file = "tokenizers-0.0.11.tar.gz", hash = "sha256:4b7c42b644a1c5705a59b14c53c84b50b8f0b9c0f5f952a8a91a350403e7615f"},
+]
 toml = [
     {file = "toml-0.10.0-py2.7.egg", hash = "sha256:f1db651f9657708513243e61e6cc67d101a39bad662eaa9b5546f789338e07a3"},
     {file = "toml-0.10.0-py2.py3-none-any.whl", hash = "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"},
@@ -4743,8 +4793,8 @@ traitlets = [
     {file = "traitlets-4.3.3.tar.gz", hash = "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"},
 ]
 transformers = [
-    {file = "transformers-2.3.0-py3-none-any.whl", hash = "sha256:2c237b06d60bb7f17f6b9e1ab9c5d4530508a287bc16ec64b5f7bb11d99df717"},
-    {file = "transformers-2.3.0.tar.gz", hash = "sha256:d881aca9ff1d0d9cf500bda47d1cbe1b87d4297af75f2e1b9cf7ac0293dd9c38"},
+    {file = "transformers-2.4.1-py3-none-any.whl", hash = "sha256:c5e765d3fd1a654e27b0f675e1cc1e210139429cc00d1b816c3c21502ace941e"},
+    {file = "transformers-2.4.1.tar.gz", hash = "sha256:995393b9ce764044287847792476101cd4e8377c756874df1116b221980749ad"},
 ]
 twilio = [
     {file = "twilio-6.26.3-py2.py3-none-any.whl", hash = "sha256:be51f482e8df17ae224263e0533ab68283d9a0158d5aaa58ac9ff35f954c8f3c"},
@@ -4787,13 +4837,7 @@ tzlocal = [
     {file = "tzlocal-2.0.0.tar.gz", hash = "sha256:949b9dd5ba4be17190a80c0268167d7e6c92c62b30026cf9764caf3e308e5590"},
 ]
 ujson = [
-    {file = "ujson-2.0.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:1039b8857da6b4d6bdb4ca35a05c18e5013303e3a0da7252572dd8fc3352badc"},
-    {file = "ujson-2.0.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:749c445ceff194615d8df90a530a9bb7c1741c547aacd31970182998fb670d17"},
-    {file = "ujson-2.0.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:bb9a4636acf18d4698b26d3f28e274978e15b486dd56e68ad5f0c630e4c066c5"},
-    {file = "ujson-2.0.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:940bc49addf7075e1895d46acb65c7d717d920d480af56091d4006d17eb8f1f6"},
-    {file = "ujson-2.0.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:db264d95357ddad7318c39fec1f7480d13792c0a711755fa49d14d863cbceae3"},
-    {file = "ujson-2.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:a42ea3d279d9fa4768dd96b58cfa8f99b46ca1b6e20db778309366ade92943d7"},
-    {file = "ujson-2.0.2.tar.gz", hash = "sha256:91f30f0b6fd5cd4b43844a9c52febc80772aad6348a58369d0a08bd1b347a52c"},
+    {file = "ujson-1.35.tar.gz", hash = "sha256:f66073e5506e91d204ab0c614a148d5aa938bdbf104751be66f8ad7a222f5f86"},
 ]
 uritemplate = [
     {file = "uritemplate-3.0.1-py2.py3-none-any.whl", hash = "sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f"},
diff --git a/pyproject.toml b/pyproject.toml
index a55e346abf8d..54d5e3b8c5cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ exclude = "((.eggs | .git | .pytype | .pytest_cache | build | dist))"
 
 [tool.poetry]
 name = "rasa"
-version = "1.8.1"
+version = "1.8.2"
 description = "Open source machine learning framework to automate text- and voice-based conversations: NLU, dialogue management, connect to Slack, Facebook, and more - Create chatbots and voice assistants"
 authors = [ "Rasa Technologies GmbH <hi@rasa.com>",]
 maintainers = [ "Tom Bocklisch <tom@rasa.com>",]
@@ -121,6 +121,7 @@ tensorflow-estimator = "2.1.0"
 tensorflow-probability = "~0.7"
 setuptools = ">=41.0.0"
 kafka-python = "^1.4"
+ujson = "^1.35"
 
 [tool.poetry.dev-dependencies]
 pytest-cov = "^2.8.1"
@@ -183,7 +184,7 @@ version = "~1.3.0"
 optional = true
 
 [tool.poetry.dependencies.transformers]
-version = "~2.3.0"
+version = "~2.4"
 optional = true
 
 [tool.poetry.dependencies.jieba]
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 7e91e8e6d594..193d3c71a267 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -124,7 +124,7 @@ def required_components(cls) -> List[Type[Component]]:
         # Dropout rate for encoder
         DROP_RATE: 0.2,
         # Sparsity of the weights in dense layers
-        WEIGHT_SPARSITY: 0.8,
+        WEIGHT_SPARSITY: 0.0,
         # If 'True' apply dropout to sparse tensors
         SPARSE_INPUT_DROPOUT: False,
         # ## Evaluation parameters
diff --git a/rasa/nlu/utils/hugging_face/hf_transformers.py b/rasa/nlu/utils/hugging_face/hf_transformers.py
index ad2a3c1e1de2..a40d926bce0c 100644
--- a/rasa/nlu/utils/hugging_face/hf_transformers.py
+++ b/rasa/nlu/utils/hugging_face/hf_transformers.py
@@ -23,7 +23,7 @@
 
 
 class HFTransformersNLP(Component):
-    """Utility Component for interfacing between Transformers library.
+    """Utility Component for interfacing between Transformers library and Rasa OS.
 
     The transformers(https://github.com/huggingface/transformers) library
     is used to load pre-trained language models like BERT, GPT-2, etc.
@@ -97,6 +97,16 @@ def required_packages(cls) -> List[Text]:
         return ["transformers"]
 
     def _lm_tokenize(self, text: Text) -> Tuple[List[int], List[Text]]:
+        """
+        Pass the text through the tokenizer of the language model.
+
+        Args:
+            text: Text to be tokenized.
+
+        Returns:
+            List of token ids and token strings.
+
+        """
         split_token_ids = self.tokenizer.encode(text, add_special_tokens=False)
 
         split_token_strings = self.tokenizer.convert_ids_to_tokens(split_token_ids)
@@ -106,6 +116,14 @@ def _lm_tokenize(self, text: Text) -> Tuple[List[int], List[Text]]:
     def _add_lm_specific_special_tokens(
         self, token_ids: List[List[int]]
     ) -> List[List[int]]:
+        """Add language model specific special tokens which were used during their training.
+
+        Args:
+            token_ids: List of token ids for each example in the batch.
+
+        Returns:
+            Augmented list of token ids for each example in the batch.
+        """
         from rasa.nlu.utils.hugging_face.registry import (
             model_special_tokens_pre_processors,
         )
@@ -119,6 +137,18 @@ def _add_lm_specific_special_tokens(
     def _lm_specific_token_cleanup(
         self, split_token_ids: List[int], token_strings: List[Text]
     ) -> Tuple[List[int], List[Text]]:
+        """Clean up special chars added by tokenizers of language models.
+
+        Many language models add a special char in front/back of (some) words. We clean up those chars as they are not
+        needed once the features are already computed.
+
+        Args:
+            split_token_ids: List of token ids received as output from the language model specific tokenizer.
+            token_strings: List of token strings received as output from the language model specific tokenizer.
+
+        Returns:
+            Cleaned up token ids and token strings.
+        """
         from rasa.nlu.utils.hugging_face.registry import model_tokens_cleaners
 
         return model_tokens_cleaners[self.model_name](split_token_ids, token_strings)
@@ -126,6 +156,14 @@ def _lm_specific_token_cleanup(
     def _post_process_sequence_embeddings(
         self, sequence_embeddings: np.ndarray
     ) -> Tuple[np.ndarray, np.ndarray]:
+        """Compute sentence level representations and sequence level representations for relevant tokens.
+
+        Args:
+            sequence_embeddings: Sequence level dense features received as output from language model.
+
+        Returns:
+            Sentence and sequence level representations.
+        """
 
         from rasa.nlu.utils.hugging_face.registry import (
             model_embeddings_post_processors,
@@ -151,6 +189,21 @@ def _post_process_sequence_embeddings(
     def _tokenize_example(
         self, message: Message, attribute: Text
     ) -> Tuple[List[Token], List[int]]:
+        """Tokenize a single message example.
+
+        Many language models add a special char in front of (some) words and split words into
+        sub-words. To ensure the entity start and end values matches the token values,
+        tokenize the text first using the whitespace tokenizer. If individual tokens
+        are split up into multiple tokens, we make sure that the start and end value
+        of the first and last respective tokens stay the same.
+
+        Args:
+            message: Single message object to be processed.
+            attribute: Property of message to be processed, one of ``TEXT`` or ``RESPONSE``.
+
+        Returns:
+            List of token strings and token ids for the corresponding attribute of the message.
+        """
 
         tokens_in = self.whitespace_tokenizer.tokenize(message, attribute)
 
@@ -177,6 +230,16 @@ def _tokenize_example(
     def _get_token_ids_for_batch(
         self, batch_examples: List[Message], attribute: Text
     ) -> Tuple[List[List[Token]], List[List[int]]]:
+        """Compute token ids and token strings for each example in batch.
+
+        A token id is the id of that token in the vocabulary of the language model.
+        Args:
+            batch_examples: Batch of message objects for which tokens need to be computed.
+            attribute: Property of message to be processed, one of ``TEXT`` or ``RESPONSE``.
+
+        Returns:
+            List of token strings and token ids for each example in the batch.
+        """
 
         batch_token_ids = []
         batch_tokens = []
@@ -192,6 +255,16 @@ def _get_token_ids_for_batch(
 
     @staticmethod
     def _compute_attention_mask(actual_sequence_lengths: List[int]) -> np.ndarray:
+        """Compute a mask for padding tokens.
+
+        This mask will be used by the language model so that it does not attend to padding tokens.
+
+        Args:
+            actual_sequence_lengths: List of length of each example without any padding
+
+        Returns:
+            Computed attention mask, 0 for padding and 1 for non-padding tokens.
+        """
 
         attention_mask = []
         max_seq_length = max(actual_sequence_lengths)
@@ -210,6 +283,14 @@ def _compute_attention_mask(actual_sequence_lengths: List[int]) -> np.ndarray:
     def _add_padding_to_batch(
         self, batch_token_ids: List[List[int]]
     ) -> Tuple[List[int], List[List[int]]]:
+        """Add padding so that all examples in the batch are of the same length.
+
+        Args:
+            batch_token_ids: Batch of examples where each example is a non-padded list of token ids.
+
+        Returns:
+            Padded batch with all examples of the same length.
+        """
         padded_token_ids = []
         # Compute max length across examples
         max_seq_len = 0
@@ -234,6 +315,15 @@ def _add_padding_to_batch(
     def _extract_nonpadded_embeddings(
         embeddings: np.ndarray, actual_sequence_lengths: List[int]
     ) -> np.ndarray:
+        """Use pre-computed non-padded lengths of each example to extract embeddings for non-padding tokens.
+
+        Args:
+            embeddings: sequence level representations for each example of the batch.
+            actual_sequence_lengths: non-padded lengths of each example of the batch.
+
+        Returns:
+            Sequence level embeddings for only non-padding tokens of the batch.
+        """
         nonpadded_sequence_embeddings = []
         for index, embedding in enumerate(embeddings):
             unmasked_embedding = embedding[: actual_sequence_lengths[index]]
@@ -244,6 +334,15 @@ def _extract_nonpadded_embeddings(
     def _compute_batch_sequence_features(
         self, batch_attention_mask: np.ndarray, padded_token_ids: List[List[int]]
     ) -> np.ndarray:
+        """Feed the padded batch to the language model.
+
+        Args:
+            batch_attention_mask: Mask of 0s and 1s which indicate whether the token is a padding token or not.
+            padded_token_ids: Batch of token ids for each example. The batch is padded and hence can be fed at once.
+
+        Returns:
+            Sequence level representations from the language model.
+        """
         model_outputs = self.model(
             np.array(padded_token_ids), attention_mask=np.array(batch_attention_mask)
         )
@@ -257,6 +356,19 @@ def _compute_batch_sequence_features(
     def _get_model_features_for_batch(
         self, batch_token_ids: List[List[int]]
     ) -> Tuple[np.ndarray, np.ndarray]:
+        """Compute dense features of each example in the batch.
+
+        We first add the special tokens corresponding to each language model. Next, we add appropriate padding
+        and compute a mask for that padding so that it doesn't affect the feature computation. The padded batch is next
+        fed to the language model and token level embeddings are computed. Using the pre-computed mask, embeddings for
+        non-padding tokens are extracted and subsequently sentence level embeddings are computed.
+
+        Args:
+            batch_token_ids: List of token ids of each example in the batch.
+
+        Returns:
+            Sentence and token level dense representations.
+        """
         # Let's first add tokenizer specific special tokens to all examples
         batch_token_ids_augmented = self._add_lm_specific_special_tokens(
             batch_token_ids
@@ -291,6 +403,15 @@ def _get_model_features_for_batch(
     def _get_docs_for_batch(
         self, batch_examples: List[Message], attribute: Text
     ) -> List[Dict[Text, Any]]:
+        """Compute language model docs for all examples in the batch.
+
+        Args:
+            batch_examples: Batch of message objects for which language model docs need to be computed.
+            attribute: Property of message to be processed, one of ``TEXT`` or ``RESPONSE``.
+
+        Returns:
+            List of language model docs for each message in batch.
+        """
 
         batch_tokens, batch_token_ids = self._get_token_ids_for_batch(
             batch_examples, attribute
@@ -321,6 +442,13 @@ def train(
         config: Optional[RasaNLUModelConfig] = None,
         **kwargs: Any,
     ) -> None:
+        """Compute tokens and dense features for each message in training data.
+
+        Args:
+            training_data: NLU training data to be tokenized and featurized
+            config: NLU pipeline config consisting of all components.
+
+        """
 
         batch_size = 64
 
@@ -350,6 +478,11 @@ def train(
                 batch_start_index += batch_size
 
     def process(self, message: Message, **kwargs: Any) -> None:
+        """Process an incoming message by computing its tokens and dense features.
+
+        Args:
+            message: Incoming message object
+        """
 
         message.set(
             LANGUAGE_MODEL_DOCS[TEXT],
diff --git a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
index 8dfcd6f7c21b..2436e6c7814a 100644
--- a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
+++ b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
@@ -5,7 +5,15 @@
 def cleanup_tokens(
     token_ids_string: List[Tuple[int, Text]], delimiter: Text
 ) -> Tuple[List[int], List[Text]]:
-    """Utility method to apply specific delimiter based cleanup on list of tokens"""
+    """Utility method to apply delimiter based cleanup on list of tokens.
+
+    Args:
+        token_ids_string: List of tuples with each tuple containing (token id, token string).
+        delimiter: character/string to be cleaned from token strings.
+
+    Returns:
+        Token ids and Token strings unpacked.
+    """
 
     token_ids_string = [
         (id, string.replace(delimiter, "")) for id, string in token_ids_string
@@ -20,7 +28,14 @@ def cleanup_tokens(
 
 
 def bert_tokens_pre_processor(token_ids: List[int]) -> List[int]:
-    """Add BERT style special tokens(CLS and SEP)"""
+    """Add BERT style special tokens(CLS and SEP).
+
+    Args:
+        token_ids: List of token ids without any special tokens.
+
+    Returns:
+        List of token ids augmented with special tokens.
+    """
     BERT_CLS_ID = 101
     BERT_SEP_ID = 102
 
@@ -33,11 +48,27 @@ def bert_tokens_pre_processor(token_ids: List[int]) -> List[int]:
 
 
 def gpt_tokens_pre_processor(token_ids: List[int]) -> List[int]:
+    """Add GPT style special tokens(None).
+
+    Args:
+        token_ids: List of token ids without any special tokens.
+
+    Returns:
+        List of token ids augmented with special tokens.
+    """
+
     return token_ids
 
 
 def xlnet_tokens_pre_processor(token_ids: List[int]) -> List[int]:
-    """Add XLNET style special tokens"""
+    """Add XLNET style special tokens.
+
+    Args:
+        token_ids: List of token ids without any special tokens.
+
+    Returns:
+        List of token ids augmented with special tokens.
+    """
     XLNET_CLS_ID = 3
     XLNET_SEP_ID = 4
 
@@ -48,7 +79,14 @@ def xlnet_tokens_pre_processor(token_ids: List[int]) -> List[int]:
 
 
 def roberta_tokens_pre_processor(token_ids: List[int]) -> List[int]:
-    """Add RoBERTa style special tokens"""
+    """Add RoBERTa style special tokens.
+
+    Args:
+        token_ids: List of token ids without any special tokens.
+
+    Returns:
+        List of token ids augmented with special tokens.
+    """
     ROBERTA_BEG_ID = 0
     ROBERTA_END_ID = 2
 
@@ -59,7 +97,14 @@ def roberta_tokens_pre_processor(token_ids: List[int]) -> List[int]:
 
 
 def xlm_tokens_pre_processor(token_ids: List[int]) -> List[int]:
-    """Add RoBERTa style special tokens"""
+    """Add XLM style special tokens.
+
+    Args:
+        token_ids: List of token ids without any special tokens.
+
+    Returns:
+        List of token ids augmented with special tokens.
+    """
     XLM_SEP_ID = 1
 
     token_ids.insert(0, XLM_SEP_ID)
@@ -71,10 +116,17 @@ def xlm_tokens_pre_processor(token_ids: List[int]) -> List[int]:
 def bert_embeddings_post_processor(
     sequence_embeddings: np.ndarray,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """Post process embeddings from BERT
+    """Post-process embeddings from BERT.
 
     by removing CLS and SEP embeddings and returning CLS token embedding as
-    sentence representation"""
+    sentence representation.
+
+    Args:
+        sequence_embeddings: Sequence of token level embeddings received as output from BERT.
+
+    Returns:
+        sentence level embedding and post-processed sequence level embedding.
+    """
     sentence_embedding = sequence_embeddings[0]
     post_processed_embedding = sequence_embeddings[1:-1]
 
@@ -84,10 +136,17 @@ def bert_embeddings_post_processor(
 def gpt_embeddings_post_processor(
     sequence_embeddings: np.ndarray,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """Post process embeddings from GPT models
+    """Post-process embeddings from GPT models.
 
     by taking a mean over sequence embeddings and returning that as sentence
-    representation"""
+    representation.
+
+    Args:
+        sequence_embeddings: Sequence of token level embeddings received as output from GPT.
+
+    Returns:
+        sentence level embedding and post-processed sequence level embedding.
+    """
     sentence_embedding = np.mean(sequence_embeddings, axis=0)
     post_processed_embedding = sequence_embeddings
 
@@ -97,11 +156,18 @@ def gpt_embeddings_post_processor(
 def xlnet_embeddings_post_processor(
     sequence_embeddings: np.ndarray,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """Post process embeddings from XLNet models
+    """Post-process embeddings from XLNet models.
 
     by taking a mean over sequence embeddings and returning that as sentence
     representation. Remove last two time steps corresponding
-    to special tokens from the sequence embeddings."""
+    to special tokens from the sequence embeddings.
+
+    Args:
+        sequence_embeddings: Sequence of token level embeddings received as output from XLNet.
+
+    Returns:
+        sentence level embedding and post-processed sequence level embedding.
+    """
     post_processed_embedding = sequence_embeddings[:-2]
     sentence_embedding = np.mean(post_processed_embedding, axis=0)
 
@@ -111,11 +177,18 @@ def xlnet_embeddings_post_processor(
 def roberta_embeddings_post_processor(
     sequence_embeddings: np.ndarray,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """Post process embeddings from Roberta models
+    """Post process embeddings from Roberta models.
 
     by taking a mean over sequence embeddings and returning that as sentence
     representation. Remove first and last time steps
-    corresponding to special tokens from the sequence embeddings."""
+    corresponding to special tokens from the sequence embeddings.
+
+    Args:
+        sequence_embeddings: Sequence of token level embeddings received as output from Roberta
+
+    Returns:
+        sentence level embedding and post-processed sequence level embedding
+    """
 
     post_processed_embedding = sequence_embeddings[1:-1]
     sentence_embedding = np.mean(post_processed_embedding, axis=0)
@@ -130,7 +203,14 @@ def xlm_embeddings_post_processor(
 
     by taking a mean over sequence embeddings and returning that as sentence
     representation. Remove first and last time steps
-    corresponding to special tokens from the sequence embeddings."""
+    corresponding to special tokens from the sequence embeddings.
+
+    Args:
+        sequence_embeddings: Sequence of token level embeddings received as output from XLM
+
+    Returns:
+        sentence level embedding and post-processed sequence level embedding
+    """
     post_processed_embedding = sequence_embeddings[1:-1]
     sentence_embedding = np.mean(post_processed_embedding, axis=0)
 
@@ -140,30 +220,66 @@ def xlm_embeddings_post_processor(
 def bert_tokens_cleaner(
     token_ids: List[int], token_strings: List[Text]
 ) -> Tuple[List[int], List[Text]]:
-    """Clean up tokens with the extra delimiters(##) BERT adds while breaking a token
-    into sub-tokens"""
+    """Token cleanup method for BERT.
+
+    Clean up tokens with the extra delimiters(##) BERT adds while breaking a token into sub-tokens.
+
+    Args:
+        token_ids: List of token ids received as output from BERT Tokenizer.
+        token_strings: List of token strings received as output from BERT Tokenizer.
+
+    Returns:
+        Cleaned token ids and token strings.
+    """
     return cleanup_tokens(list(zip(token_ids, token_strings)), "##")
 
 
 def openaigpt_tokens_cleaner(
     token_ids: List[int], token_strings: List[Text]
 ) -> Tuple[List[int], List[Text]]:
-    """Clean up tokens with the extra delimiters(</w>) OpenAIGPT adds while breaking a
-    token into sub-tokens"""
+    """Token cleanup method for GPT.
+
+    Clean up tokens with the extra delimiters(</w>) OpenAIGPT adds while breaking a token into sub-tokens.
+
+    Args:
+        token_ids: List of token ids received as output from GPT Tokenizer.
+        token_strings: List of token strings received as output from GPT Tokenizer.
+
+    Returns:
+        Cleaned token ids and token strings.
+    """
     return cleanup_tokens(list(zip(token_ids, token_strings)), "</w>")
 
 
 def gpt2_tokens_cleaner(
     token_ids: List[int], token_strings: List[Text]
 ) -> Tuple[List[int], List[Text]]:
-    """Clean up tokens with the extra delimiters(Ġ) GPT2 adds while breaking a token
-    into sub-tokens"""
+    """Token cleanup method for GPT2.
+
+    Clean up tokens with the extra delimiters(Ġ) GPT2 adds while breaking a token into sub-tokens.
+
+    Args:
+        token_ids: List of token ids received as output from GPT Tokenizer.
+        token_strings: List of token strings received as output from GPT Tokenizer.
+
+    Returns:
+        Cleaned token ids and token strings.
+    """
     return cleanup_tokens(list(zip(token_ids, token_strings)), "Ġ")
 
 
 def xlnet_tokens_cleaner(
     token_ids: List[int], token_strings: List[Text]
 ) -> Tuple[List[int], List[Text]]:
-    """Clean up tokens with the extra delimiters(▁) XLNet adds while breaking a token
-    into sub-tokens"""
+    """Token cleanup method for XLNet.
+
+    Clean up tokens with the extra delimiters(▁) XLNet adds while breaking a token into sub-tokens.
+
+    Args:
+        token_ids: List of token ids received as output from GPT Tokenizer.
+        token_strings: List of token strings received as output from GPT Tokenizer.
+
+    Returns:
+        Cleaned token ids and token strings.
+    """
     return cleanup_tokens(list(zip(token_ids, token_strings)), "▁")
diff --git a/rasa/version.py b/rasa/version.py
index adb5c42fd51b..841a3e4c049d 100644
--- a/rasa/version.py
+++ b/rasa/version.py
@@ -1,3 +1,3 @@
 # this file will automatically be changed,
 # do not add anything but the version number here!
-__version__ = "1.8.1"
+__version__ = "1.8.2"