From b81921ad1993ce6cb3d3f5a13984874c8327de60 Mon Sep 17 00:00:00 2001 From: mroberti Date: Wed, 24 Jun 2020 14:39:43 +0200 Subject: [PATCH 1/2] Resolve issue concerning multiple targets capabilities --- app/analyzers/word2vec.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/analyzers/word2vec.py b/app/analyzers/word2vec.py index 149b1070..9b130a39 100644 --- a/app/analyzers/word2vec.py +++ b/app/analyzers/word2vec.py @@ -226,7 +226,9 @@ def _add_doc_and_target_sentences_to_batch(self, - number of document not added to batch """ for target_sentence in target_sentences: - flattened_target_sentence = helpers.utils.flatten_sentence(target_sentence, sep_str='') + separators_without_special_char = re.sub(r'\\(.)', r'\1', self.model_settings["separators"]) + flattened_target_sentence = helpers.utils.flatten_sentence(target_sentence, + sep_str=separators_without_special_char) for aggregator_sentence in aggr_sentences: flattened_aggregator_sentence = helpers.utils.flatten_sentence(aggregator_sentence) From e5d6e6506e96b2925ef8750863ffa445d434053a Mon Sep 17 00:00:00 2001 From: mroberti Date: Wed, 24 Jun 2020 15:13:05 +0200 Subject: [PATCH 2/2] Add comment word2vec code --- app/analyzers/word2vec.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/analyzers/word2vec.py b/app/analyzers/word2vec.py index 9b130a39..ab2e7534 100644 --- a/app/analyzers/word2vec.py +++ b/app/analyzers/word2vec.py @@ -226,6 +226,8 @@ def _add_doc_and_target_sentences_to_batch(self, - number of document not added to batch """ for target_sentence in target_sentences: + # Remove escape character '\' that escapes regex special characters. + # This escape character may be present into the regex expression contained in separators. separators_without_special_char = re.sub(r'\\(.)', r'\1', self.model_settings["separators"]) flattened_target_sentence = helpers.utils.flatten_sentence(target_sentence, sep_str=separators_without_special_char)