From e6ea7e97f44b3fef48cf524f505c10df2675084b Mon Sep 17 00:00:00 2001 From: ryanwdale Date: Fri, 23 Apr 2021 20:39:13 -0700 Subject: [PATCH] fix(clean): escape apostrophes in code exported by clean_dupl --- dataprep/clean/clean_duplication_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dataprep/clean/clean_duplication_utils.py b/dataprep/clean/clean_duplication_utils.py index 2139ed6ef..cb2bbb26a 100644 --- a/dataprep/clean/clean_duplication_utils.py +++ b/dataprep/clean/clean_duplication_utils.py @@ -219,9 +219,13 @@ def _create_replace_calls( df_name, col = self._df_name, f"'{self._col}'" replace_calls = [] for idx, cluster in enumerate(cluster_page): - cluster_repr = f"'{new_values[idx]}'" if do_merge[idx]: - cluster_vals = [f"'{val}'" for val, _ in cluster if f"'{val}'" != cluster_repr] + # create the string that all the values in the cluster will be set to + cluster_repr = new_values[idx].replace("'", "\\'") + cluster_repr = f"'{cluster_repr}'" + # create the strings to be replaced + cluster_vals = [val.replace("'", "\\'") for val, _ in cluster] + cluster_vals = [f"'{val}'" for val in cluster_vals if f"'{val}'" != cluster_repr] code = ( f"{df_name}[{col}] = {df_name}[{col}].replace" f"([{', '.join(cluster_vals)}], {cluster_repr})"