Skip to content

Commit

Permalink
refactor: replace add_num_commas & num_add_commas functions
Browse files Browse the repository at this point in the history
f-strings can format numbers to use a comma as a thousands separator. The
`add_num_commas` & `num_add_commas` functions are now redundant.
  • Loading branch information
m-charlton committed Sep 19, 2023
1 parent b32c0e1 commit 8002b57
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 67 deletions.
3 changes: 1 addition & 2 deletions src/scribe_data/extract_transform/process_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from scribe_data.extract_transform.emoji_utils import get_emoji_codes_to_ignore
from scribe_data.load.update_utils import (
add_num_commas,
get_language_iso,
get_path_from_et_dir,
)
Expand Down Expand Up @@ -199,7 +198,7 @@ def gen_emoji_lexicon(
if emojis_per_keyword and len(emojis) > emojis_per_keyword:
emojis[:] = emojis[:emojis_per_keyword]

total_keywords = add_num_commas(num=len(keyword_dict))
total_keywords = f"{len(keyword_dict):,}"

if verbose:
print(
Expand Down
3 changes: 1 addition & 2 deletions src/scribe_data/extract_transform/process_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from tqdm.auto import tqdm

from scribe_data.utils import ( # get_android_data_path, get_desktop_data_path,
add_num_commas,
get_ios_data_path,
get_language_qid,
get_language_words_to_ignore,
Expand Down Expand Up @@ -142,7 +141,7 @@ def clean(
)

print(
f"Randomly sampling {add_num_commas(len(selected_idxs))} {language.capitalize()} Wikipedia articles..."
f"Randomly sampling {len(selected_idxs):,} {language.capitalize()} Wikipedia articles..."
)
texts = [texts[i] for i in selected_idxs]
print("Random sampling finished.")
Expand Down
35 changes: 3 additions & 32 deletions src/scribe_data/extract_transform/update_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import (
add_num_commas,
check_and_return_command_line_args,
get_ios_data_path,
get_path_from_et_dir,
Expand Down Expand Up @@ -240,34 +239,6 @@
json.dump(current_data, f, ensure_ascii=False, indent=0)


def num_add_commas(num):
"""
Adds commas to a numeric string for readability.
Parameters
----------
num : int
An int to have commas added to.
Returns
-------
str_with_commas : str
The original number with commas to make it more readable.
"""
num_str = str(num)

str_list = list(num_str)
str_list = str_list[::-1]

str_list_with_commas = [
f"{s}," if i % 3 == 0 and i != 0 else s for i, s in enumerate(str_list)
]

str_list_with_commas = str_list_with_commas[::-1]

return "".join(str_list_with_commas)


# Update data_table.txt
current_data_df = pd.DataFrame(
index=sorted(list(current_data.keys())),
Expand All @@ -277,9 +248,9 @@ def num_add_commas(num):
list(current_data_df.index), list(current_data_df.columns)
):
if wt in current_data[lang].keys():
current_data_df.loc[lang, wt] = num_add_commas(current_data[lang][wt])
current_data_df.loc[lang, wt] = f"{current_data[lang][wt]:,}"
elif wt == "translations":
current_data_df.loc[lang, wt] = num_add_commas(67652)
current_data_df.loc[lang, wt] = f"{67652:,}"

current_data_df.index.name = "Languages"
current_data_df.columns = [c.capitalize() for c in current_data_df.columns]
Expand Down Expand Up @@ -342,7 +313,7 @@ def num_add_commas(num):
elif data_added_dict[l][wt] == 1: # remove the s for label
data_added_string += f" {data_added_dict[l][wt]} {wt[:-1]},"
else:
data_added_string += f" {add_num_commas(data_added_dict[l][wt])} {wt},"
data_added_string += f" {data_added_dict[l][wt]:,} {wt},"

data_added_string = data_added_string[:-1] # remove the last comma

Expand Down
32 changes: 1 addition & 31 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
get_ios_data_path,
get_android_data_path,
get_desktop_data_path,
check_command_line_args,
add_num_commas
check_command_line_args
"""

import ast
Expand Down Expand Up @@ -390,32 +389,3 @@ def check_and_return_command_line_args(
python {all_args[0]} '["comma_separated_sets_in_quotes"]'
"""
)


def add_num_commas(num):
"""
Adds commas to a numeric string for readability.
Parameters
----------
num : int or float
A number to have commas added to.
Returns
-------
str_with_commas : str
The original number with commas to make it more readable.
"""
num_str = str(num)
num_str_no_decimal = num_str.split(".")[0]
decimal = num_str.split(".")[1] if "." in num_str else None

str_list = num_str_no_decimal[::-1]
str_list_with_commas = [
f"{s}," if i % 3 == 0 and i != 0 else s for i, s in enumerate(str_list)
]

str_list_with_commas = str_list_with_commas[::-1]
str_with_commas = "".join(str_list_with_commas)

return str_with_commas if decimal is None else f"{str_with_commas}.{decimal}"

0 comments on commit 8002b57

Please sign in to comment.