Skip to content

Commit

Permalink
Merge pull request #7137 from RasaHQ/merge-2.0.x
Browse files Browse the repository at this point in the history
Merge 2.0.x to master
  • Loading branch information
rasabot authored Oct 30, 2020
2 parents fcdff3f + 83f2931 commit a789072
Show file tree
Hide file tree
Showing 15 changed files with 445 additions and 176 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ https://github.com/RasaHQ/rasa/tree/master/changelog/ . -->

<!-- TOWNCRIER -->

## [2.0.3] - 2020-10-29


### Bugfixes
- [#7089](https://github.com/rasahq/rasa/issues/7089): Fix [ConveRTTokenizer](components.mdx#converttokenizer) failing because of wrong model URL by making the `model_url` parameter of `ConveRTTokenizer` mandatory.

Since the ConveRT model was taken [offline](https://github.com/RasaHQ/rasa/issues/6806), we can no longer use
the earlier public URL of the model. Additionally, since the licence for the model is unknown,
we cannot host it ourselves. Users can still use the component by setting `model_url` to a community/self-hosted
model URL or path to a local directory containing model files. For example:
```yaml
pipeline:
- name: ConveRTTokenizer
model_url: <remote/local path to model>
```
- [#7108](https://github.com/rasahq/rasa/issues/7108): Update example formbot to use `FormValidationAction` for slot validation
## [2.0.2] - 2020-10-22
Expand Down
2 changes: 1 addition & 1 deletion CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Code in the `rasa.shared` package is potentially re-used by downstream dependencies
# such as Rasa X. Hence, changes within this package require double checking.
/rasa/shared/ @backend
/rasa/shared/ @RasaHQ/backend
/docs/docs/prototype-an-assistant.mdx @ricwo @alwx
10 changes: 8 additions & 2 deletions docs/docs/components.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,16 @@ word vectors in your pipeline.
"intent_split_symbol": "_"
# Regular expression to detect tokens
"token_pattern": None
# Remote URL of hosted model
"model_url": TF_HUB_MODULE_URL
# Remote URL/Local directory of model files(Required)
"model_url": None
```

:::note
Since the public URL of the ConveRT model was taken offline recently, it is now mandatory
to set the parameter `model_url` to a community/self-hosted URL or path to a local directory containing model files.

:::


### LanguageModelTokenizer

Expand Down
4 changes: 2 additions & 2 deletions docs/docs/playground.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ responses:
What is your email address?
utter_subscribed:
- text: |
I've subscribed {email} to the newsletter!
Check your inbox at {email} in order to finish subscribing to the newsletter!
- text: |
You've been subscribed, the newsletter will be sent to {email}.
You're all set! Check your inbox at {email} to confirm your subscription.
```

</AssistantBuilder.Code>
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/setting-up-ci-cd.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ to your server as part of the continuous deployment process.

### Testing Your Assistant

Testing your trained model on [test stories](./testing-your-assistant.mdx#end-to-end-testing) is the best way to have confidence in how your assistant
Testing your trained model on [test stories](./testing-your-assistant.mdx#writing-test-stories) is the best way to have confidence in how your assistant
will act in certain situations. Written in a modified story
format, test stories allow you to provide entire conversations and test that, given certain
user input, your model will behave in the expected manner. This is especially
Expand Down
191 changes: 114 additions & 77 deletions docs/docs/testing-your-assistant.mdx

Large diffs are not rendered by default.

53 changes: 4 additions & 49 deletions examples/formbot/actions/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,14 @@

from rasa_sdk import Tracker
from rasa_sdk.executor import CollectingDispatcher
from rasa_sdk.forms import FormAction
from rasa_sdk.forms import FormValidationAction


class RestaurantForm(FormAction):
"""Example of a custom form action."""
class ValidateRestaurantForm(FormValidationAction):
"""Example of a form validation action."""

def name(self) -> Text:
"""Unique identifier of the form."""

return "restaurant_form"

@staticmethod
def required_slots(tracker: Tracker) -> List[Text]:
"""A list of required slots that the form has to fill."""

return ["cuisine", "num_people", "outdoor_seating", "preferences", "feedback"]

def slot_mappings(self) -> Dict[Text, Union[Dict, List[Dict]]]:
"""A dictionary to map required slots to
- an extracted entity
- intent: value pairs
- a whole message
or a list of them, where a first match will be picked."""

return {
"cuisine": self.from_entity(entity="cuisine", not_intent="chitchat"),
"num_people": [
self.from_entity(
entity="number", intent=["inform", "request_restaurant"]
),
],
"outdoor_seating": [
self.from_entity(entity="seating"),
self.from_intent(intent="affirm", value=True),
self.from_intent(intent="deny", value=False),
],
"preferences": [
self.from_intent(intent="deny", value="no additional preferences"),
self.from_text(not_intent="affirm"),
],
"feedback": [self.from_entity(entity="feedback"), self.from_text()],
}
return "validate_restaurant_form"

@staticmethod
def cuisine_db() -> List[Text]:
Expand Down Expand Up @@ -127,14 +93,3 @@ def validate_outdoor_seating(
else:
# affirm/deny was picked up as True/False by the from_intent mapping
return {"outdoor_seating": value}

def submit(
self,
dispatcher: CollectingDispatcher,
tracker: Tracker,
domain: Dict[Text, Any],
) -> List[Dict]:
"""Define what the form has to do after all required slots are filled."""

dispatcher.utter_message(template="utter_submit")
return []
3 changes: 3 additions & 0 deletions examples/formbot/domain.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ forms:
entity: feedback
- type: from_text

actions:
- validate_restaurant_form

session_config:
session_expiration_time: 60 # value in minutes
carry_over_slots_to_new_session: true
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ exclude = "((.eggs | .git | .pytest_cache | build | dist))"

[tool.poetry]
name = "rasa"
version = "2.0.2"
version = "2.0.3"
description = "Open source machine learning framework to automate text- and voice-based conversations: NLU, dialogue management, connect to Slack, Facebook, and more - Create chatbots and voice assistants"
authors = [ "Rasa Technologies GmbH <[email protected]>",]
maintainers = [ "Tom Bocklisch <[email protected]>",]
Expand Down
124 changes: 119 additions & 5 deletions rasa/nlu/tokenizers/convert_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,27 @@
from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
from rasa.shared.nlu.training_data.message import Message
from rasa.utils import common
import rasa.nlu.utils
import rasa.utils.train_utils as train_utils
from rasa.exceptions import RasaException
import tensorflow as tf
import os


TF_HUB_MODULE_URL = (
# URL to the old remote location of the model which
# users might use. The model is no longer hosted here.
ORIGINAL_TF_HUB_MODULE_URL = (
"https://github.com/PolyAI-LDN/polyai-models/releases/download/v1.0/model.tar.gz"
)

# Warning: This URL is only intended for running pytests on ConveRT
# related components. This URL should not be allowed to be used by the user.
RESTRICTED_ACCESS_URL = "https://storage.googleapis.com/continuous-integration-model-storage/convert_tf2.tar.gz"


class ConveRTTokenizer(WhitespaceTokenizer):
"""Tokenizer using ConveRT model.
Loads the ConveRT(https://github.com/PolyAI-LDN/polyai-models#convert)
model from TFHub and computes sub-word tokens for dense
featurizable attributes of each message object.
Expand All @@ -30,25 +40,129 @@ class ConveRTTokenizer(WhitespaceTokenizer):
"intent_split_symbol": "_",
# Regular expression to detect tokens
"token_pattern": None,
# Remote URL of hosted model
"model_url": TF_HUB_MODULE_URL,
# Remote URL/Local path to model files
"model_url": None,
}

def __init__(self, component_config: Dict[Text, Any] = None) -> None:
"""Construct a new tokenizer using the WhitespaceTokenizer framework."""
"""Construct a new tokenizer using the WhitespaceTokenizer framework.
Args:
component_config: User configuration for the component
"""
super().__init__(component_config)

self.model_url = self.component_config.get("model_url", TF_HUB_MODULE_URL)
self.model_url = self._get_validated_model_url()

self.module = train_utils.load_tf_hub_model(self.model_url)

self.tokenize_signature = self.module.signatures["tokenize"]

@staticmethod
def _validate_model_files_exist(model_directory: Text) -> None:
"""Check if essential model files exist inside the model_directory.
Args:
model_directory: Directory to investigate
"""
files_to_check = [
os.path.join(model_directory, "saved_model.pb"),
os.path.join(model_directory, "variables/variables.index"),
os.path.join(model_directory, "variables/variables.data-00001-of-00002"),
os.path.join(model_directory, "variables/variables.data-00000-of-00002"),
]

for file_path in files_to_check:
if not os.path.exists(file_path):
raise RasaException(
f"""File {file_path} does not exist.
Re-check the files inside the directory {model_directory}.
It should contain the following model
files - [{", ".join(files_to_check)}]"""
)

def _get_validated_model_url(self) -> Text:
"""Validates the specified `model_url` parameter.
The `model_url` parameter cannot be left empty. It can either
be set to a remote URL where the model is hosted or it can be
a path to a local directory.
Returns:
Validated path to model
"""
model_url = self.component_config.get("model_url", None)

if not model_url:
raise RasaException(
f"""Parameter "model_url" was not specified in the configuration
of "{ConveRTTokenizer.__name__}".
You can either use a community hosted URL of the model
or if you have a local copy of the model, pass the
path to the directory containing the model files."""
)

if model_url == ORIGINAL_TF_HUB_MODULE_URL:
# Can't use the originally hosted URL
raise RasaException(
f"""Parameter "model_url" of "{ConveRTTokenizer.__name__}" was
set to "{model_url}" which does not contain the model any longer.
You can either use a community hosted URL or if you have a
local copy of the model, pass the path to the directory
containing the model files."""
)

if model_url == RESTRICTED_ACCESS_URL:
# Can't use the URL that is reserved for tests only
raise RasaException(
f"""Parameter "model_url" of "{ConveRTTokenizer.__name__}" was
set to "{model_url}" which is strictly reserved for pytests of Rasa Open Source only.
Due to licensing issues you are not allowed to use the model from this URL.
You can either use a community hosted URL or if you have a
local copy of the model, pass the path to the directory
containing the model files."""
)

if os.path.isfile(model_url):
# Definitely invalid since the specified path should be a directory
raise RasaException(
f"""Parameter "model_url" of "{ConveRTTokenizer.__name__}" was
set to the path of a file which is invalid. You
can either use a community hosted URL or if you have a
local copy of the model, pass the path to the directory
containing the model files."""
)

if rasa.nlu.utils.is_url(model_url):
return model_url

if os.path.isdir(model_url):
# Looks like a local directory. Inspect the directory
# to see if model files exist.
self._validate_model_files_exist(model_url)
# Convert the path to an absolute one since
# TFHUB doesn't like relative paths
return os.path.abspath(model_url)

raise RasaException(
f"""{model_url} is neither a valid remote URL nor a local directory.
You can either use a community hosted URL or if you have a
local copy of the model, pass the path to
the directory containing the model files."""
)

@classmethod
def cache_key(
cls, component_meta: Dict[Text, Any], model_metadata: Metadata
) -> Optional[Text]:
"""Cache the component for future use.
Args:
component_meta: configuration for the component.
model_metadata: configuration for the whole pipeline.
Returns: key of the cache for future retrievals.
"""
_config = common.update_existing_keys(cls.defaults, component_meta)
return f"{cls.name}-{get_dict_hash(_config)}"

Expand Down
23 changes: 18 additions & 5 deletions rasa/nlu/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,24 @@ def is_model_dir(model_dir: Text) -> bool:


def is_url(resource_name: Text) -> bool:
"""Return True if string is an http, ftp, or file URL path.
This implementation is the same as the one used by matplotlib"""

URL_REGEX = re.compile(r"http://|https://|ftp://|file://|file:\\")
"""Check whether the url specified is a well formed one.
Regex adapted from https://stackoverflow.com/a/7160778/3001665
Args:
resource_name: Remote URL to validate
Returns: `True` if valid, otherwise `False`.
"""
URL_REGEX = re.compile(
r"^(?:http|ftp|file)s?://" # http:// or https:// or file://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain
r"localhost|" # localhost
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)
return URL_REGEX.match(resource_name) is not None


Expand Down
2 changes: 1 addition & 1 deletion rasa/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# this file will automatically be changed,
# do not add anything but the version number here!
__version__ = "2.0.2"
__version__ = "2.0.3"
Loading

0 comments on commit a789072

Please sign in to comment.