Skip to content

Commit

Permalink
fix(clean): escape apostrophes in code exported by clean_dupl
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanwdale committed Apr 24, 2021
1 parent 5df88aa commit e6ea7e9
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions dataprep/clean/clean_duplication_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,13 @@ def _create_replace_calls(
df_name, col = self._df_name, f"'{self._col}'"
replace_calls = []
for idx, cluster in enumerate(cluster_page):
cluster_repr = f"'{new_values[idx]}'"
if do_merge[idx]:
cluster_vals = [f"'{val}'" for val, _ in cluster if f"'{val}'" != cluster_repr]
# create the string that all the values in the cluster will be set to
cluster_repr = new_values[idx].replace("'", "\\'")
cluster_repr = f"'{cluster_repr}'"
# create the strings to be replaced
cluster_vals = [val.replace("'", "\\'") for val, _ in cluster]
cluster_vals = [f"'{val}'" for val in cluster_vals if f"'{val}'" != cluster_repr]
code = (
f"{df_name}[{col}] = {df_name}[{col}].replace"
f"([{', '.join(cluster_vals)}], {cluster_repr})"
Expand Down

0 comments on commit e6ea7e9

Please sign in to comment.