From 80e3db2574fb6e5f14de00bf6236238890e60571 Mon Sep 17 00:00:00 2001 From: Vignesh Muthu <36441437+vigneshp826@users.noreply.github.com> Date: Thu, 21 Nov 2019 21:13:21 +0530 Subject: [PATCH 1/9] Convert multiple links in text to original content --- rasa/core/channels/slack.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rasa/core/channels/slack.py b/rasa/core/channels/slack.py index d3c66cd04860..6289ea96f9f5 100644 --- a/rasa/core/channels/slack.py +++ b/rasa/core/channels/slack.py @@ -210,17 +210,17 @@ def _sanitize_user_message(text, uids_to_remove) -> Text: ]: text = re.sub(regex, replacement, text) - """Find mailto or http links like or 'in text and substitute it with original content + """Find multiple mailto or http links like or 'in text and substitute it with original content """ - pattern = r"\<(mailto:|(http|https):\/\/).*\|.*\>" - match = re.search(pattern, text) + pattern = r"(\<(?:mailto|http|https):\/\/.*?\|.*?\>)" + match = re.findall(pattern, text) if match: - regex = match.group(0) - replacement = regex.split("|")[1] - replacement = replacement.replace(">", "") - text = text.replace(regex, replacement) + for remove in match: + replacement = remove.split("|")[1] + replacement = replacement.replace(">", "") + text = text.replace(remove, replacement) return text.strip() @staticmethod From b499984e429f4b561cd906a39e6f3b3c88132cbc Mon Sep 17 00:00:00 2001 From: Vignesh Muthu <36441437+vigneshp826@users.noreply.github.com> Date: Thu, 21 Nov 2019 21:21:08 +0530 Subject: [PATCH 2/9] Slack sanitization Multi links will be converted to original content --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 757d6624fe72..63f0904f88d8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,7 @@ Added - Add command line argument ``rasa x --config CONFIG``, to specify path to the policy and NLU pipeline configuration of your bot (default: ``config.yml``) + Changed ------- - Do not retrain the entire Core model if only the ``templates`` section of the domain is changed. @@ -37,6 +38,7 @@ Fixed - Fixed rasa init showing traceback error when user does Keyboard Interrupt before choosing a project path - ``CountVectorsFeaturizer`` featurizes intents only if its analyzer is set to ``word`` - fixed bug where facebooks generic template was not rendered when buttons were ``None`` +- Fixed issue in converting multi links in incoming message as part of Slack sanitization [1.4.5] - 2019-11-14 ^^^^^^^^^^^^^^^^^^^^ From f9758377e206113ca79bff7ae8e35ea0ae17533f Mon Sep 17 00:00:00 2001 From: Vignesh Muthu <36441437+vigneshp826@users.noreply.github.com> Date: Fri, 24 Jan 2020 15:43:08 +0530 Subject: [PATCH 3/9] Url conversion test case addition for converting garbled url's into actuals --- tests/core/test_channels.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/core/test_channels.py b/tests/core/test_channels.py index eeaa7ca4ea2e..218a90d679bb 100644 --- a/tests/core/test_channels.py +++ b/tests/core/test_channels.py @@ -468,6 +468,8 @@ def test_slack_message_sanitization(): target_message_1 = "You can sit here if you want" target_message_2 = "Hey, you can sit here if you want !" target_message_3 = "Hey, you can sit here if you want!" + target_message_4 = "convert garbled url to vicdb-f.net" + target_message_5 = "convert multiple garbled url to vicdb-f.net. Also eemdb-p.net" uid_token = f"<@{test_uid}>" raw_messages = [ @@ -483,6 +485,9 @@ def test_slack_message_sanitization(): "You can sit here{uid}if you want", "Hey {uid}, you can sit here if you want{uid}!", "Hey{uid} , you can sit here if you want {uid}!", + "convert garbled url to ", + "convert multiple garbled url to . Also ", + ] ] @@ -493,6 +498,8 @@ def test_slack_message_sanitization(): target_message_1, target_message_2, target_message_3, + target_message_4, + target_message_5, ] sanitized_messages = [ @@ -512,7 +519,6 @@ def test_slack_message_sanitization(): == 0 ) - def test_slack_init_one_parameter(): from rasa.core.channels.slack import SlackInput From ede58c5913e8bd466a9d02070d5942e59657f7a7 Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 24 Jan 2020 16:30:22 +0530 Subject: [PATCH 4/9] changelog for slack sanitization - conversion of Multiple links in message --- changelog/4817.improvement.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelog/4817.improvement.rst diff --git a/changelog/4817.improvement.rst b/changelog/4817.improvement.rst new file mode 100644 index 000000000000..71fb53e94139 --- /dev/null +++ b/changelog/4817.improvement.rst @@ -0,0 +1,2 @@ +Part of Slack sanitization: +Multiple garbled URL's in a string coming from slack will be converted into actual strings. ``Example: health check of and to health check of eemdb.net and eemdb1.net`` From 86f53248d10b9591558ff3b1ebeac617324b4cc3 Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 24 Jan 2020 19:06:36 +0530 Subject: [PATCH 5/9] removed old changelog format --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 63f0904f88d8..195cab07a00a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -38,7 +38,7 @@ Fixed - Fixed rasa init showing traceback error when user does Keyboard Interrupt before choosing a project path - ``CountVectorsFeaturizer`` featurizes intents only if its analyzer is set to ``word`` - fixed bug where facebooks generic template was not rendered when buttons were ``None`` -- Fixed issue in converting multi links in incoming message as part of Slack sanitization + [1.4.5] - 2019-11-14 ^^^^^^^^^^^^^^^^^^^^ From 1fe68a2ec9e846fa1567e255aa69f7b327cc0597 Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 24 Jan 2020 19:07:39 +0530 Subject: [PATCH 6/9] removed old changelog format --- CHANGELOG.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 195cab07a00a..5d936a04415a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -39,7 +39,6 @@ Fixed - ``CountVectorsFeaturizer`` featurizes intents only if its analyzer is set to ``word`` - fixed bug where facebooks generic template was not rendered when buttons were ``None`` - [1.4.5] - 2019-11-14 ^^^^^^^^^^^^^^^^^^^^ From 562e621f4e6516fa892ac2bc7e40a33e6cbfb69f Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 24 Jan 2020 19:14:43 +0530 Subject: [PATCH 7/9] formatting the lines --- CHANGELOG.rst | 1 - tests/core/test_channels.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5d936a04415a..757d6624fe72 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,7 +21,6 @@ Added - Add command line argument ``rasa x --config CONFIG``, to specify path to the policy and NLU pipeline configuration of your bot (default: ``config.yml``) - Changed ------- - Do not retrain the entire Core model if only the ``templates`` section of the domain is changed. diff --git a/tests/core/test_channels.py b/tests/core/test_channels.py index 218a90d679bb..13ab47d7347e 100644 --- a/tests/core/test_channels.py +++ b/tests/core/test_channels.py @@ -519,6 +519,7 @@ def test_slack_message_sanitization(): == 0 ) + def test_slack_init_one_parameter(): from rasa.core.channels.slack import SlackInput From 9738e76a370ee0add30ad1d11be945190e104f16 Mon Sep 17 00:00:00 2001 From: vignesh Date: Thu, 30 Jan 2020 17:20:06 +0530 Subject: [PATCH 8/9] removed empty space's using autopep8 command --- tests/core/test_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/test_channels.py b/tests/core/test_channels.py index 13ab47d7347e..0d3b36a88572 100644 --- a/tests/core/test_channels.py +++ b/tests/core/test_channels.py @@ -487,7 +487,7 @@ def test_slack_message_sanitization(): "Hey{uid} , you can sit here if you want {uid}!", "convert garbled url to ", "convert multiple garbled url to . Also ", - + ] ] From d44b02cea42048988aa87a481cb8171a393a5efe Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 31 Jan 2020 19:36:27 +0530 Subject: [PATCH 9/9] formatted using black --- tests/core/test_channels.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/test_channels.py b/tests/core/test_channels.py index 0d3b36a88572..90b1e308be77 100644 --- a/tests/core/test_channels.py +++ b/tests/core/test_channels.py @@ -487,7 +487,6 @@ def test_slack_message_sanitization(): "Hey{uid} , you can sit here if you want {uid}!", "convert garbled url to ", "convert multiple garbled url to . Also ", - ] ]