From 8002b57019d6f741cf7919c3775a48f6f3f6884c Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Tue, 19 Sep 2023 16:36:12 +0100 Subject: [PATCH] refactor: replace `add_num_commas` & `num_add_commas` functions f-strings can format numbers to use a comma as a thousands separator. The `add_num_commas` & `num_add_commas` functions are now redundant. --- .../extract_transform/process_unicode.py | 3 +- .../extract_transform/process_wiki.py | 3 +- .../extract_transform/update_data.py | 35 ++----------------- src/scribe_data/utils.py | 32 +---------------- 4 files changed, 6 insertions(+), 67 deletions(-) diff --git a/src/scribe_data/extract_transform/process_unicode.py b/src/scribe_data/extract_transform/process_unicode.py index 5ed9b9b26..1e8083c3f 100644 --- a/src/scribe_data/extract_transform/process_unicode.py +++ b/src/scribe_data/extract_transform/process_unicode.py @@ -21,7 +21,6 @@ from scribe_data.extract_transform.emoji_utils import get_emoji_codes_to_ignore from scribe_data.load.update_utils import ( - add_num_commas, get_language_iso, get_path_from_et_dir, ) @@ -199,7 +198,7 @@ def gen_emoji_lexicon( if emojis_per_keyword and len(emojis) > emojis_per_keyword: emojis[:] = emojis[:emojis_per_keyword] - total_keywords = add_num_commas(num=len(keyword_dict)) + total_keywords = f"{len(keyword_dict):,}" if verbose: print( diff --git a/src/scribe_data/extract_transform/process_wiki.py b/src/scribe_data/extract_transform/process_wiki.py index 319258687..fbbe2de2a 100644 --- a/src/scribe_data/extract_transform/process_wiki.py +++ b/src/scribe_data/extract_transform/process_wiki.py @@ -22,7 +22,6 @@ from tqdm.auto import tqdm from scribe_data.utils import ( # get_android_data_path, get_desktop_data_path, - add_num_commas, get_ios_data_path, get_language_qid, get_language_words_to_ignore, @@ -142,7 +141,7 @@ def clean( ) print( - f"Randomly sampling {add_num_commas(len(selected_idxs))} {language.capitalize()} Wikipedia articles..." + f"Randomly sampling {len(selected_idxs):,} {language.capitalize()} Wikipedia articles..." ) texts = [texts[i] for i in selected_idxs] print("Random sampling finished.") diff --git a/src/scribe_data/extract_transform/update_data.py b/src/scribe_data/extract_transform/update_data.py index ca13fdac3..d2f508054 100644 --- a/src/scribe_data/extract_transform/update_data.py +++ b/src/scribe_data/extract_transform/update_data.py @@ -32,7 +32,6 @@ sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) from scribe_data.utils import ( - add_num_commas, check_and_return_command_line_args, get_ios_data_path, get_path_from_et_dir, @@ -240,34 +239,6 @@ json.dump(current_data, f, ensure_ascii=False, indent=0) -def num_add_commas(num): - """ - Adds commas to a numeric string for readability. - - Parameters - ---------- - num : int - An int to have commas added to. - - Returns - ------- - str_with_commas : str - The original number with commas to make it more readable. - """ - num_str = str(num) - - str_list = list(num_str) - str_list = str_list[::-1] - - str_list_with_commas = [ - f"{s}," if i % 3 == 0 and i != 0 else s for i, s in enumerate(str_list) - ] - - str_list_with_commas = str_list_with_commas[::-1] - - return "".join(str_list_with_commas) - - # Update data_table.txt current_data_df = pd.DataFrame( index=sorted(list(current_data.keys())), @@ -277,9 +248,9 @@ def num_add_commas(num): list(current_data_df.index), list(current_data_df.columns) ): if wt in current_data[lang].keys(): - current_data_df.loc[lang, wt] = num_add_commas(current_data[lang][wt]) + current_data_df.loc[lang, wt] = f"{current_data[lang][wt]:,}" elif wt == "translations": - current_data_df.loc[lang, wt] = num_add_commas(67652) + current_data_df.loc[lang, wt] = f"{67652:,}" current_data_df.index.name = "Languages" current_data_df.columns = [c.capitalize() for c in current_data_df.columns] @@ -342,7 +313,7 @@ def num_add_commas(num): elif data_added_dict[l][wt] == 1: # remove the s for label data_added_string += f" {data_added_dict[l][wt]} {wt[:-1]}," else: - data_added_string += f" {add_num_commas(data_added_dict[l][wt])} {wt}," + data_added_string += f" {data_added_dict[l][wt]:,} {wt}," data_added_string = data_added_string[:-1] # remove the last comma diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index 3c7de07a8..e86acea65 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -17,8 +17,7 @@ get_ios_data_path, get_android_data_path, get_desktop_data_path, - check_command_line_args, - add_num_commas + check_command_line_args """ import ast @@ -390,32 +389,3 @@ def check_and_return_command_line_args( python {all_args[0]} '["comma_separated_sets_in_quotes"]' """ ) - - -def add_num_commas(num): - """ - Adds commas to a numeric string for readability. - - Parameters - ---------- - num : int or float - A number to have commas added to. - - Returns - ------- - str_with_commas : str - The original number with commas to make it more readable. - """ - num_str = str(num) - num_str_no_decimal = num_str.split(".")[0] - decimal = num_str.split(".")[1] if "." in num_str else None - - str_list = num_str_no_decimal[::-1] - str_list_with_commas = [ - f"{s}," if i % 3 == 0 and i != 0 else s for i, s in enumerate(str_list) - ] - - str_list_with_commas = str_list_with_commas[::-1] - str_with_commas = "".join(str_list_with_commas) - - return str_with_commas if decimal is None else f"{str_with_commas}.{decimal}"