From 1837a4c0de3d98f81a9382286dedabfec0a39357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=B2=D1=8B=D0=B4=D1=8C=D1=8F=D0=BD=20=D0=94?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=BB=20=D0=90=D0=BD=D0=B4=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=BA=D0=BE=D0=B2=D0=B8=D1=87?= <19140612@cab-wsm-0925543.igd_rostelecom> Date: Fri, 29 Dec 2023 11:21:37 +0300 Subject: [PATCH 1/5] PNLP-8004: mask numbers in bubbles and pronounceText --- core/message/from_message.py | 3 ++- core/utils/utils.py | 12 +++++++++- smart_kit/message/smartapp_to_message.py | 2 ++ .../test_utils/test_mask_numbers.py | 24 +++++++++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 tests/core_tests/test_utils/test_mask_numbers.py diff --git a/core/message/from_message.py b/core/message/from_message.py index f9e0bb3f..971a88ac 100644 --- a/core/message/from_message.py +++ b/core/message/from_message.py @@ -11,7 +11,7 @@ import core.logging.logger_constants as log_const from core.logging.logger_utils import log from core.utils.masking_message import masking -from core.utils.utils import current_time_ms +from core.utils.utils import current_time_ms, mask_numbers from core.message.msg_validator import MessageValidator from smart_kit.configs import get_app_config @@ -253,6 +253,7 @@ def generate_new_callback_id(self) -> str: @property def masked_value(self) -> str: masked_data = masking(self.as_dict, self.masking_fields) + mask_numbers(masked_data) return json.dumps(masked_data, ensure_ascii=False) @property diff --git a/core/utils/utils.py b/core/utils/utils.py index f4aa15c3..efa98fb0 100644 --- a/core/utils/utils.py +++ b/core/utils/utils.py @@ -8,7 +8,7 @@ from collections import OrderedDict from math import isnan, isinf -from typing import Optional +from typing import Optional, Any, Dict from time import time from scenarios.user.user_model import User @@ -143,3 +143,13 @@ def deep_update_dict(original, update): elif isinstance(value, dict): deep_update_dict(value, update[key]) return update + + +def mask_numbers(masked_message: Dict[str, Any]): + items = masked_message.get("payload", {}).get("items", []) + for item in items: + if "bubble" in item: + item["bubble"]["text"] = re.sub(r"\d+(?:[.,]\d+)?", "*number*", item["bubble"]["text"]) + pronounce_text = masked_message.get("payload", {}).get("pronounceText") + if pronounce_text is not None: + masked_message["payload"]["pronounceText"] = re.sub(r"\d+(?:[.,]\d+)", "*number*", pronounce_text) diff --git a/smart_kit/message/smartapp_to_message.py b/smart_kit/message/smartapp_to_message.py index 067c2547..d4141cdf 100644 --- a/smart_kit/message/smartapp_to_message.py +++ b/smart_kit/message/smartapp_to_message.py @@ -6,6 +6,7 @@ from copy import copy from core.utils.masking_message import masking +from core.utils.utils import mask_numbers from smart_kit.utils import SmartAppToMessage_pb2 if TYPE_CHECKING: @@ -70,6 +71,7 @@ def as_protobuf_message(data_as_dict): @cached_property def masked_value(self): masked_data = masking(self.as_dict, self.masking_fields) + mask_numbers(masked_data) if self.command.loader == "json.dumps": return json.dumps(masked_data, ensure_ascii=False) elif self.command.loader == "protobuf": diff --git a/tests/core_tests/test_utils/test_mask_numbers.py b/tests/core_tests/test_utils/test_mask_numbers.py new file mode 100644 index 00000000..65f6f34e --- /dev/null +++ b/tests/core_tests/test_utils/test_mask_numbers.py @@ -0,0 +1,24 @@ +from unittest import TestCase + +from core.utils.utils import mask_numbers + + +class TestMaskNumbers(TestCase): + def test_1(self): + masked_message = { + "payload": { + "pronounceText": "номер телефона: +79990000000", + "items": [ + { + "bubble": { + "text": "номер телефона: +7 (999) 000-00-00" + } + } + ] + } + } + + mask_numbers(masked_message) + self.assertEqual(masked_message["payload"]["pronounceText"], "номер телефона: +*number*") + self.assertEqual(masked_message["payload"]["items"][0]["bubble"]["text"], + "номер телефона: +*number* (*number*) *number*-*number*-*number*") From 52665c7e36d3a0abbfd2e09164960d6a04975728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=B2=D1=8B=D0=B4=D1=8C=D1=8F=D0=BD=20=D0=94?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=BB=20=D0=90=D0=BD=D0=B4=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=BA=D0=BE=D0=B2=D0=B8=D1=87?= <19140612@cab-wsm-0925543.igd_rostelecom> Date: Fri, 29 Dec 2023 12:44:01 +0300 Subject: [PATCH 2/5] PNLP-8004: mask numbers in bubbles and pronounceText --- core/message/from_message.py | 3 +-- core/utils/utils.py | 7 +++++-- smart_kit/message/smartapp_to_message.py | 3 +-- .../core_tests/test_utils/test_mask_numbers.py | 17 +++++++++++++++-- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/core/message/from_message.py b/core/message/from_message.py index 971a88ac..f8da6f04 100644 --- a/core/message/from_message.py +++ b/core/message/from_message.py @@ -252,8 +252,7 @@ def generate_new_callback_id(self) -> str: @property def masked_value(self) -> str: - masked_data = masking(self.as_dict, self.masking_fields) - mask_numbers(masked_data) + masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) return json.dumps(masked_data, ensure_ascii=False) @property diff --git a/core/utils/utils.py b/core/utils/utils.py index efa98fb0..30f60b03 100644 --- a/core/utils/utils.py +++ b/core/utils/utils.py @@ -1,4 +1,5 @@ # coding=utf-8 +import copy import datetime import gc import json @@ -145,11 +146,13 @@ def deep_update_dict(original, update): return update -def mask_numbers(masked_message: Dict[str, Any]): +def mask_numbers(message: Dict[str, Any]) -> Dict[str, Any]: + masked_message = copy.deepcopy(message) items = masked_message.get("payload", {}).get("items", []) for item in items: if "bubble" in item: item["bubble"]["text"] = re.sub(r"\d+(?:[.,]\d+)?", "*number*", item["bubble"]["text"]) pronounce_text = masked_message.get("payload", {}).get("pronounceText") if pronounce_text is not None: - masked_message["payload"]["pronounceText"] = re.sub(r"\d+(?:[.,]\d+)", "*number*", pronounce_text) + masked_message["payload"]["pronounceText"] = re.sub(r"\d+(?:[.,]\d+)?", "*number*", pronounce_text) + return masked_message diff --git a/smart_kit/message/smartapp_to_message.py b/smart_kit/message/smartapp_to_message.py index d4141cdf..eac32a6f 100644 --- a/smart_kit/message/smartapp_to_message.py +++ b/smart_kit/message/smartapp_to_message.py @@ -70,8 +70,7 @@ def as_protobuf_message(data_as_dict): @cached_property def masked_value(self): - masked_data = masking(self.as_dict, self.masking_fields) - mask_numbers(masked_data) + masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) if self.command.loader == "json.dumps": return json.dumps(masked_data, ensure_ascii=False) elif self.command.loader == "protobuf": diff --git a/tests/core_tests/test_utils/test_mask_numbers.py b/tests/core_tests/test_utils/test_mask_numbers.py index 65f6f34e..288b2744 100644 --- a/tests/core_tests/test_utils/test_mask_numbers.py +++ b/tests/core_tests/test_utils/test_mask_numbers.py @@ -5,7 +5,7 @@ class TestMaskNumbers(TestCase): def test_1(self): - masked_message = { + message = { "payload": { "pronounceText": "номер телефона: +79990000000", "items": [ @@ -18,7 +18,20 @@ def test_1(self): } } - mask_numbers(masked_message) + masked_message = mask_numbers(message) self.assertEqual(masked_message["payload"]["pronounceText"], "номер телефона: +*number*") self.assertEqual(masked_message["payload"]["items"][0]["bubble"]["text"], "номер телефона: +*number* (*number*) *number*-*number*-*number*") + + self.assertEqual(message, { + "payload": { + "pronounceText": "номер телефона: +79990000000", + "items": [ + { + "bubble": { + "text": "номер телефона: +7 (999) 000-00-00" + } + } + ] + } + }) From 15882ee85bd56d5817407c4a797c3fde5d261573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=B2=D1=8B=D0=B4=D1=8C=D1=8F=D0=BD=20=D0=94?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=BB=20=D0=90=D0=BD=D0=B4=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=BA=D0=BE=D0=B2=D0=B8=D1=87?= <19140612@cab-wsm-0925543.igd_rostelecom> Date: Tue, 9 Jan 2024 11:08:05 +0300 Subject: [PATCH 3/5] PNLP-8004: mask numbers in bubbles and pronounceText --- core/message/from_message.py | 5 ++++- core/utils/utils.py | 2 +- smart_kit/message/smartapp_to_message.py | 5 ++++- smart_kit/template/static/configs/template_config.yml | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/message/from_message.py b/core/message/from_message.py index f8da6f04..8f448169 100644 --- a/core/message/from_message.py +++ b/core/message/from_message.py @@ -15,6 +15,7 @@ from core.message.msg_validator import MessageValidator from smart_kit.configs import get_app_config +from smart_kit.configs import settings class Headers: @@ -252,7 +253,9 @@ def generate_new_callback_id(self) -> str: @property def masked_value(self) -> str: - masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) + mask_numbers_flag = settings.Settings()["template_settings"]["mask_numbers"] + masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) if mask_numbers_flag else \ + masking(self.as_dict, self.masking_fields) return json.dumps(masked_data, ensure_ascii=False) @property diff --git a/core/utils/utils.py b/core/utils/utils.py index 30f60b03..cd3e3b07 100644 --- a/core/utils/utils.py +++ b/core/utils/utils.py @@ -150,7 +150,7 @@ def mask_numbers(message: Dict[str, Any]) -> Dict[str, Any]: masked_message = copy.deepcopy(message) items = masked_message.get("payload", {}).get("items", []) for item in items: - if "bubble" in item: + if "bubble" in item and "text" and item["bubble"]: item["bubble"]["text"] = re.sub(r"\d+(?:[.,]\d+)?", "*number*", item["bubble"]["text"]) pronounce_text = masked_message.get("payload", {}).get("pronounceText") if pronounce_text is not None: diff --git a/smart_kit/message/smartapp_to_message.py b/smart_kit/message/smartapp_to_message.py index eac32a6f..0d964997 100644 --- a/smart_kit/message/smartapp_to_message.py +++ b/smart_kit/message/smartapp_to_message.py @@ -7,6 +7,7 @@ from core.utils.masking_message import masking from core.utils.utils import mask_numbers +from smart_kit.configs import settings from smart_kit.utils import SmartAppToMessage_pb2 if TYPE_CHECKING: @@ -70,7 +71,9 @@ def as_protobuf_message(data_as_dict): @cached_property def masked_value(self): - masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) + mask_numbers_flag = settings.Settings()["template_settings"]["mask_numbers"] + masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) if mask_numbers_flag else \ + masking(self.as_dict, self.masking_fields) if self.command.loader == "json.dumps": return json.dumps(masked_data, ensure_ascii=False) elif self.command.loader == "protobuf": diff --git a/smart_kit/template/static/configs/template_config.yml b/smart_kit/template/static/configs/template_config.yml index 6acdd793..b0fd5ff0 100644 --- a/smart_kit/template/static/configs/template_config.yml +++ b/smart_kit/template/static/configs/template_config.yml @@ -21,3 +21,4 @@ self_service_with_state_save_messages: true project_id: template-app-id consumer_topic: "app" kafka_message_key_recovery_log_level: "DEBUG" +mask_numbers: false From 13ab07de6a16ede1817194e1e878a7a045d40aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=B2=D1=8B=D0=B4=D1=8C=D1=8F=D0=BD=20=D0=94?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=BB=20=D0=90=D0=BD=D0=B4=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=BA=D0=BE=D0=B2=D0=B8=D1=87?= <19140612@cab-wsm-0925543.igd_rostelecom> Date: Tue, 9 Jan 2024 11:25:49 +0300 Subject: [PATCH 4/5] PNLP-8004: mask numbers in bubbles and pronounceText --- core/message/from_message.py | 2 +- smart_kit/message/smartapp_to_message.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/message/from_message.py b/core/message/from_message.py index 8f448169..51b1c4cd 100644 --- a/core/message/from_message.py +++ b/core/message/from_message.py @@ -253,7 +253,7 @@ def generate_new_callback_id(self) -> str: @property def masked_value(self) -> str: - mask_numbers_flag = settings.Settings()["template_settings"]["mask_numbers"] + mask_numbers_flag = settings.Settings()["template_settings"].get("mask_numbers", False) masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) if mask_numbers_flag else \ masking(self.as_dict, self.masking_fields) return json.dumps(masked_data, ensure_ascii=False) diff --git a/smart_kit/message/smartapp_to_message.py b/smart_kit/message/smartapp_to_message.py index 0d964997..d6fc1a36 100644 --- a/smart_kit/message/smartapp_to_message.py +++ b/smart_kit/message/smartapp_to_message.py @@ -71,7 +71,7 @@ def as_protobuf_message(data_as_dict): @cached_property def masked_value(self): - mask_numbers_flag = settings.Settings()["template_settings"]["mask_numbers"] + mask_numbers_flag = settings.Settings()["template_settings"].get("mask_numbers", False) masked_data = mask_numbers(masking(self.as_dict, self.masking_fields)) if mask_numbers_flag else \ masking(self.as_dict, self.masking_fields) if self.command.loader == "json.dumps": From e4dd0160beb8d16d0d37e9004dfcf6b9deb8e0bb Mon Sep 17 00:00:00 2001 From: Dan1lD Date: Tue, 9 Jan 2024 11:28:54 +0300 Subject: [PATCH 5/5] fix typo --- core/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/utils/utils.py b/core/utils/utils.py index cd3e3b07..abca12c5 100644 --- a/core/utils/utils.py +++ b/core/utils/utils.py @@ -150,7 +150,7 @@ def mask_numbers(message: Dict[str, Any]) -> Dict[str, Any]: masked_message = copy.deepcopy(message) items = masked_message.get("payload", {}).get("items", []) for item in items: - if "bubble" in item and "text" and item["bubble"]: + if "bubble" in item and "text" in item["bubble"]: item["bubble"]["text"] = re.sub(r"\d+(?:[.,]\d+)?", "*number*", item["bubble"]["text"]) pronounce_text = masked_message.get("payload", {}).get("pronounceText") if pronounce_text is not None: