Skip to content

Commit

Permalink
Merge pull request #486 from OmarAI2003/convert-CamelCase-to-snake_case
Browse files Browse the repository at this point in the history
Convert camel case to snake case
  • Loading branch information
andrewtavis authored Nov 11, 2024
2 parents 04e0955 + ad11f2e commit a0e0692
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 23 deletions.
1 change: 1 addition & 0 deletions docs/source/scribe_data/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ Options:
- ``-o, --overwrite``: Whether to overwrite existing files (default: False).
- ``-a, --all``: Get all languages and data types. Can be combined with `-dt` to get all languages for a specific data type, or with `-lang` to get all data types for a specific language.
- ``-i, --interactive``: Run in interactive mode.
- ``-ic, --identifier-case``: The case format for identifiers in the output data (default: camel).

Examples:

Expand Down
94 changes: 79 additions & 15 deletions src/scribe_data/cli/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
DEFAULT_SQLITE_EXPORT_DIR,
DEFAULT_TSV_EXPORT_DIR,
get_language_iso,
camel_to_snake,
)


# MARK: JSON


Expand All @@ -45,6 +47,7 @@ def convert_to_json(
input_file: str,
output_dir: str = None,
overwrite: bool = False,
identifier_case: str = "camel",
) -> None:
"""
Convert a CSV/TSV file to JSON.
Expand All @@ -69,6 +72,9 @@ def convert_to_json(
overwrite : bool
Whether to overwrite existing files.
identifier_case : str
The case format for identifiers. Default is "camel".
Returns
-------
None
Expand Down Expand Up @@ -118,15 +124,22 @@ def convert_to_json(
elif len(keys) == 2:
# Handle Case: { key: value }.
for row in rows:
key = row[keys[0]]
key = (
camel_to_snake(row[keys[0]])
if identifier_case == "snake"
else row[keys[0]]
)
value = row[keys[1]]
data[key] = value

elif len(keys) > 2:
if all(col in first_row for col in ["emoji", "is_base", "rank"]):
# Handle Case: { key: [ { emoji: ..., is_base: ..., rank: ... }, { emoji: ..., is_base: ..., rank: ... } ] }.
for row in rows:
key = row.get(reader.fieldnames[0])
if identifier_case == "snake":
key = camel_to_snake(row.get(reader.fieldnames[0]))
else:
key = row.get(reader.fieldnames[0])
emoji = row.get("emoji", "").strip()
is_base = (
row.get("is_base", "false").strip().lower() == "true"
Expand All @@ -143,7 +156,14 @@ def convert_to_json(
else:
# Handle Case: { key: { value1: ..., value2: ... } }.
for row in rows:
data[row[keys[0]]] = {k: row[k] for k in keys[1:]}
data[row[keys[0]]] = {
(
camel_to_snake(k)
if identifier_case == "snake"
else k
): row[k]
for k in keys[1:]
}

except (IOError, csv.Error) as e:
print(f"Error reading '{input_file_path}': {e}")
Expand Down Expand Up @@ -181,6 +201,7 @@ def convert_to_csv_or_tsv(
input_file: str,
output_dir: str = None,
overwrite: bool = False,
identifier_case: str = "camel",
) -> None:
"""
Convert a JSON File to CSV/TSV file.
Expand All @@ -205,6 +226,9 @@ def convert_to_csv_or_tsv(
overwrite : bool
Whether to overwrite existing files.
identifier_case : str
The case format for identifiers. Default is "camel".
Returns
-------
None
Expand Down Expand Up @@ -265,7 +289,16 @@ def convert_to_csv_or_tsv(
if isinstance(data[first_key], dict):
# Handle case: { key: { value1: ..., value2: ... } }.
columns = sorted(next(iter(data.values())).keys())
writer.writerow([dtype[:-1]] + columns)
header = [
camel_to_snake(dtype[:-1])
if identifier_case == "snake"
else dtype[:-1]
]
header += [
camel_to_snake(col) if identifier_case == "snake" else col
for col in columns
]
writer.writerow(header)

for key, value in data.items():
row = [key] + [value.get(col, "") for col in columns]
Expand All @@ -276,7 +309,11 @@ def convert_to_csv_or_tsv(
# Handle case: { key: [ { value1: ..., value2: ... } ] }.
if "emoji" in data[first_key][0]: # emoji specific case
columns = ["word", "emoji", "is_base", "rank"]
writer.writerow(columns)
writer.writerow(
[camel_to_snake(col) for col in columns]
if identifier_case == "snake"
else columns
)

for key, value in data.items():
for item in value:
Expand All @@ -288,7 +325,13 @@ def convert_to_csv_or_tsv(
]
writer.writerow(row)
else:
columns = [dtype[:-1]] + list(data[first_key][0].keys())
if identifier_case == "snake":
columns = [camel_to_snake(dtype[:-1])] + [
camel_to_snake(col)
for col in data[first_key][0].keys()
]
else:
writer.writerow(columns)
writer.writerow(columns)

for key, value in data.items():
Expand All @@ -300,20 +343,30 @@ def convert_to_csv_or_tsv(

elif all(isinstance(item, str) for item in data[first_key]):
# Handle case: { key: [value1, value2, ...] }.
writer.writerow(
[dtype[:-1]]
+ [
f"autosuggestion_{i+1}"
for i in range(len(data[first_key]))
]
)
header = [
camel_to_snake(dtype[:-1])
if identifier_case == "snake"
else dtype[:-1]
]
header += [
f"autosuggestion_{i+1}"
for i in range(len(data[first_key]))
]
writer.writerow(header)
for key, value in data.items():
row = [key] + value
writer.writerow(row)

else:
# Handle case: { key: value }.
writer.writerow([dtype[:-1], "value"])
writer.writerow(
[
camel_to_snake(dtype[:-1])
if identifier_case == "snake"
else dtype[:-1],
"value",
]
)
for key, value in data.items():
writer.writerow([key, value])

Expand All @@ -334,6 +387,7 @@ def convert_to_sqlite(
input_file: str = None,
output_dir: str = None,
overwrite: bool = False,
identifier_case: str = "snake",
) -> None:
"""
Converts a Scribe-Data output file to an SQLite file.
Expand All @@ -358,6 +412,9 @@ def convert_to_sqlite(
overwrite : bool
Whether to overwrite existing files.
identifier_case : str
The case format for identifiers. Default is "camel".
Returns
-------
A SQLite file saved in the given location.
Expand All @@ -383,7 +440,7 @@ def convert_to_sqlite(
if not output_dir.exists():
output_dir.mkdir(parents=True, exist_ok=True)

data_to_sqlite(languages, specific_tables)
data_to_sqlite(languages, specific_tables, identifier_case)

source_file = f"{get_language_iso(language).capitalize()}LanguageData.sqlite"
source_path = input_file.parent / source_file
Expand All @@ -410,6 +467,7 @@ def convert_wrapper(
input_file: str,
output_dir: str = None,
overwrite: bool = False,
identifier_case: str = "snake",
):
"""
Convert data to the specified output type: JSON, CSV/TSV, or SQLite.
Expand All @@ -434,6 +492,9 @@ def convert_wrapper(
overwrite : bool, optional
Whether to overwrite existing output files. Defaults to False.
identifier_case : str
The case format for identifiers. Default is "camel".
Returns
-------
None
Expand All @@ -452,6 +513,7 @@ def convert_wrapper(
input_file=input_file,
output_dir=output_dir,
overwrite=overwrite,
identifier_case=identifier_case,
)

elif output_type in {"csv", "tsv"}:
Expand All @@ -462,6 +524,7 @@ def convert_wrapper(
input_file=input_file,
output_dir=output_dir,
overwrite=overwrite,
identifier_case=identifier_case,
)

elif output_type == "sqlite":
Expand All @@ -472,6 +535,7 @@ def convert_wrapper(
input_file=input_file,
output_dir=output_dir,
overwrite=overwrite,
identifier_case=identifier_case,
)

else:
Expand Down
5 changes: 5 additions & 0 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def get_data(
outputs_per_entry: int = None,
all: bool = False,
interactive: bool = False,
identifier_case: str = "camel",
) -> None:
"""
Function for controlling the data get process for the CLI.
Expand Down Expand Up @@ -75,6 +76,9 @@ def get_data(
interactive : bool (default: False)
Whether it's running in interactive mode.
identifier_case : str
The case format for identifiers. Default is "camel".
Returns
-------
The requested data saved locally given file type and location arguments.
Expand Down Expand Up @@ -181,6 +185,7 @@ def get_data(
input_file=str(json_input_path),
output_dir=output_dir,
overwrite=overwrite,
identifier_case=identifier_case,
)

os.remove(json_input_path)
Expand Down
26 changes: 24 additions & 2 deletions src/scribe_data/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ def main() -> None:
get_parser.add_argument(
"-i", "--interactive", action="store_true", help="Run in interactive mode"
)
get_parser.add_argument(
"-ic",
"--identifier-case",
type=str,
choices=["camel", "snake"],
default="camel",
help="The case format for identifiers in the output data (default: camel).",
)

# MARK: Total

Expand Down Expand Up @@ -242,6 +250,14 @@ def main() -> None:
default=True,
help="Whether to keep the original file to be converted (default: True).",
)
convert_parser.add_argument(
"-ic",
"--identifier-case",
type=str,
choices=["camel", "snake"],
default="camel",
help="The case format for identifiers in the output data (default: camel).",
)

# MARK: Setup CLI

Expand Down Expand Up @@ -287,6 +303,7 @@ def main() -> None:
outputs_per_entry=args.outputs_per_entry,
overwrite=args.overwrite,
all=args.all,
identifier_case=args.identifier_case,
)

elif args.command in ["total", "t"]:
Expand All @@ -295,8 +312,12 @@ def main() -> None:

else:
total_wrapper(
language=args.language.lower() if args.language is not None else None,
data_type=args.data_type.lower() if args.data_type is not None else None,
language=args.language.lower()
if args.language is not None
else None,
data_type=args.data_type.lower()
if args.data_type is not None
else None,
all_bool=args.all,
)

Expand All @@ -308,6 +329,7 @@ def main() -> None:
input_file=args.input_file,
output_dir=args.output_dir,
overwrite=args.overwrite,
identifier_case=args.identifier_case,
)

else:
Expand Down
14 changes: 11 additions & 3 deletions src/scribe_data/load/data_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,16 @@
from scribe_data.utils import (
DEFAULT_JSON_EXPORT_DIR,
DEFAULT_SQLITE_EXPORT_DIR,
camel_to_snake,
get_language_iso,
list_all_languages,
)


def data_to_sqlite(
languages: Optional[List[str]] = None, specific_tables: Optional[List[str]] = None
languages: Optional[List[str]] = None,
specific_tables: Optional[List[str]] = None,
identifier_case: str = "camel",
) -> None:
PATH_TO_SCRIBE_DATA = Path(__file__).parent.parent

Expand Down Expand Up @@ -108,11 +111,16 @@ def create_table(data_type, cols):
Parameters
----------
data_type : str
The name of the table to be created
The name of the table to be created.
cols : list of strings
The names of columns for the new table
The names of columns for the new table.
"""
# Convert column names to snake_case if requested.
cols = [
camel_to_snake(col) if identifier_case == "snake" else col for col in cols
]

cursor.execute(
f"CREATE TABLE IF NOT EXISTS {data_type} ({' Text, '.join(cols)} Text, UNIQUE({cols[0]}))"
)
Expand Down
9 changes: 9 additions & 0 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import ast
import json
import re
from importlib import resources
from pathlib import Path
from typing import Any, Optional
Expand Down Expand Up @@ -649,3 +650,11 @@ def list_languages_with_metadata_for_data_type(language_metadata=_languages):
)

return sorted(current_languages, key=lambda x: x["name"])


# MARK: Case Conversion


def camel_to_snake(name: str) -> str:
"""Convert camelCase to snake_case."""
return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
Loading

0 comments on commit a0e0692

Please sign in to comment.