Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added functionality to convert data to CSV/TSV and JSON and vice versa. #329

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
bc76779
fixing IncompleteRead issue
axif0 Oct 3, 2024
c6b7cae
feat: Implement functionality to convert data to CSV/TSV and JSON
john-thuo1 Oct 12, 2024
74b522b
adding a sparql file in Hebrew/adjectives for hebrew adjectives
OmarAI2003 Oct 12, 2024
d71b304
Renaming the query file to /Hebrew/adjectives/query_adjectives.sparql…
OmarAI2003 Oct 12, 2024
919f6c8
simple sparql query for fetching all the 4274 hebrew adjectives on wi…
OmarAI2003 Oct 12, 2024
aef6592
Merge branch 'main' into decouple_convert
andrewtavis Oct 12, 2024
3270e86
Fix tests and minor updates to cli main
andrewtavis Oct 12, 2024
40ab3fc
Minor docstring fixes and making all args in cli main explicit
andrewtavis Oct 12, 2024
dd40ccd
fix - interactive bug
axif0 Oct 12, 2024
b982d17
Merge branch 'main' into decouple_convert
andrewtavis Oct 12, 2024
18f377d
Expand query to optionally return all forms of Hebrew adjectives
OmarAI2003 Oct 13, 2024
0c326b6
Add optional retrieval of Hebrew adjective forms by gender and number
OmarAI2003 Oct 13, 2024
1de9dd3
fix interactive cli command
axif0 Oct 13, 2024
df7fa75
Add filter not exist to remove construct forms and filter he
andrewtavis Oct 13, 2024
8614915
Remove selection of lexeme URI
andrewtavis Oct 13, 2024
e93f8fb
Merge pull request #333 from OmarAI2003/Heb-adjectives
andrewtavis Oct 13, 2024
43fcb55
Merge branch 'main' into IncompleteRead
andrewtavis Oct 13, 2024
50289a1
Spacing and removing unused import
andrewtavis Oct 13, 2024
419576e
Merge pull request #221 from axif0/IncompleteRead
andrewtavis Oct 13, 2024
3869e75
Minor edits to the interactive mode setup / functionality
andrewtavis Oct 13, 2024
c4da4e9
Merge pull request #334 from axif0/inter
andrewtavis Oct 13, 2024
1571ce3
Merge branch 'decouple_convert' of github.com:john-thuo1/Scribe-Data …
john-thuo1 Oct 13, 2024
a59be57
removed required on convert arg --output dir
john-thuo1 Oct 13, 2024
eb349a6
Update convert tests(sqlite)
john-thuo1 Oct 13, 2024
aa9433c
Update Default Directories for convert functions
john-thuo1 Oct 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
398 changes: 244 additions & 154 deletions src/scribe_data/cli/convert.py

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def get_data(
overwrite: bool = False,
outputs_per_entry: int = None,
all: bool = False,
interactive: bool = False,
) -> None:
"""
Function for controlling the data get process for the CLI.
Expand All @@ -61,12 +62,15 @@ def get_data(
outputs_per_entry : str
How many outputs should be generated per data entry.

overwrite : bool
Whether to overwrite existing files (default: False).
overwrite : bool (default: False)
Whether to overwrite existing files.

all : bool
Get all languages and data types.

interactive : bool (default: False)
Whether it's running in interactive mode.

Returns
-------
The requested data saved locally given file type and location arguments.
Expand Down Expand Up @@ -125,6 +129,7 @@ def get_data(
data_type=data_type,
output_dir=output_dir,
overwrite=overwrite,
interactive=interactive,
)
subprocess_result = True

Expand All @@ -140,6 +145,8 @@ def get_data(
print(
f"Updated data was saved in: {Path(output_dir).resolve()}.",
)
if interactive:
return True

# The emoji keywords process has failed.
elif data_type in {"emoji-keywords", "emoji_keywords"}:
Expand Down
60 changes: 45 additions & 15 deletions src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,33 @@
-->
"""

import logging
from pathlib import Path
from typing import List

import questionary
from questionary import Choice
from rich import print as rprint
from rich.console import Console
from rich.logging import RichHandler
from rich.table import Table
from tqdm import tqdm

from scribe_data.cli.cli_utils import data_type_metadata, language_metadata
from scribe_data.cli.get import get_data
from scribe_data.cli.version import get_version_message
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR

# MARK: Config Setup

logging.basicConfig(
level=logging.INFO,
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler(markup=True)], # Enable markup for colors
)
console = Console()
logger = logging.getLogger("rich")


class ScribeDataConfig:
Expand All @@ -60,9 +72,11 @@ def display_summary():
"""
Displays a summary of the interactive mode request to run.
"""
table = Table(title="Scribe-Data Configuration Summary")
table = Table(
title="Scribe-Data Request Configuration Summary", style="bright_white"
)

table.add_column("Setting", style="cyan")
table.add_column("Setting", style="bold cyan", no_wrap=True)
table.add_column("Value(s)", style="magenta")

table.add_row("Languages", ", ".join(config.selected_languages) or "None")
Expand All @@ -71,7 +85,9 @@ def display_summary():
table.add_row("Output Directory", str(config.output_dir))
table.add_row("Overwrite", "Yes" if config.overwrite else "No")

console.print(table)
console.print("\n")
console.print(table, justify="left")
console.print("\n")


def configure_settings():
Expand Down Expand Up @@ -107,7 +123,7 @@ def configure_settings():
rprint(
"[yellow]No language selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?").ask():
if questionary.confirm("Continue?", default=True).ask():
return configure_settings()

else:
Expand Down Expand Up @@ -135,7 +151,7 @@ def configure_settings():
rprint(
"[yellow]No data type selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?").ask():
if questionary.confirm("Continue?", default=True).ask():
return configure_settings()

if data_type_selected:
Expand Down Expand Up @@ -166,27 +182,40 @@ def run_request():
rprint("[bold red]Error: Please configure languages and data types.[/bold red]")
return

# MARK: Export Data
# Calculate total operations
total_operations = len(config.selected_languages) * len(config.selected_data_types)

with console.status("[bold green]Exporting data...[/bold green]") as status:
# MARK: Export Data
with tqdm(
total=total_operations,
desc="Exporting data",
unit="operation",
) as pbar:
for language in config.selected_languages:
for data_type in config.selected_data_types:
status.update(
f"[bold green]Exporting {language} {data_type} data...[/bold green]"
)
pbar.set_description(f"Exporting {language} {data_type} data")

get_data(
if get_data(
language=language,
data_type=data_type,
output_type=config.output_type,
output_dir=str(config.output_dir),
overwrite=config.overwrite,
all=config.output_type,
)
interactive=True,
):
logger.info(
f"[green]✔ Exported {language} {data_type} data.[/green]"
)

else:
logger.info(
f"[red]✘ Failed to export {language} {data_type} data.[/red]"
)

rprint(f"\n[green]✔[/green] Exported {language} {data_type} data.")
pbar.update(1)

rprint("[bold green]Data export completed successfully![/bold green]")
if config.overwrite:
rprint("[bold green]Data request completed successfully![/bold green]")


# MARK: Start
Expand Down Expand Up @@ -219,6 +248,7 @@ def start_interactive_mode():
break

else:
rprint("[bold cyan]Thank you for using Scribe-Data![/bold cyan]")
break


Expand Down
82 changes: 68 additions & 14 deletions src/scribe_data/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@

#!/usr/bin/env python3
import argparse
from pathlib import Path

from scribe_data.cli.convert import convert_to_csv_or_tsv, convert_to_sqlite
from scribe_data.cli.convert import (
convert_to_csv_or_tsv,
convert_to_json,
convert_to_sqlite,
)
from scribe_data.cli.get import get_data
from scribe_data.cli.interactive import start_interactive_mode
from scribe_data.cli.list import list_wrapper
Expand Down Expand Up @@ -179,22 +184,55 @@ def main() -> None:
epilog=CLI_EPILOG,
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=60),
)
convert_parser._actions[0].help = "Show this help message and exit."

# Setting up the arguments for the convert command
convert_parser.add_argument(
"-lang",
"--language",
type=str,
required=True,
help="The language of the file to convert.",
)
convert_parser.add_argument(
"-f", "--file", type=str, help="The file to convert to a new type."
"-dt",
"--data-type",
type=str,
required=True,
help="The data type(s) of the file to convert (e.g., noun, verb).",
)
convert_parser.add_argument(
"-if",
"--input-file",
type=Path,
required=True,
help="The path to the input file to convert.",
)
convert_parser.add_argument(
"-ot",
"--output-type",
type=str,
choices=["json", "csv", "tsv", "sqlite"],
required=True,
help="The output file type.",
)
convert_parser.add_argument(
"-od",
"--output-dir",
type=Path,
help="The directory where the output file will be saved.",
)
convert_parser.add_argument(
"-o",
"--overwrite",
action="store_true",
help="Whether to overwrite existing files (default: False).",
)
convert_parser.add_argument(
"-ko",
"--keep-original",
action="store_false",
help="Whether to keep the file to be converted (default: True).",
action="store_true",
default=True,
help="Whether to keep the original file to be converted (default: True).",
)

# MARK: Setup CLI
Expand All @@ -210,7 +248,9 @@ def main() -> None:
return

if args.command in ["list", "l"]:
list_wrapper(args.language, args.data_type, args.all)
list_wrapper(
language=args.language, data_type=args.data_type, all_bool=args.all
)

elif args.command in ["get", "g"]:
if args.interactive:
Expand All @@ -233,18 +273,32 @@ def main() -> None:
elif args.command in ["convert", "c"]:
if args.output_type in ["csv", "tsv"]:
convert_to_csv_or_tsv(
args.language,
args.data_type,
args.output_dir,
args.overwrite,
language=args.language,
data_type=args.data_type,
output_type=args.output_type,
input_file=args.input_file,
output_dir=args.output_dir,
overwrite=args.overwrite,
)

elif args.output_type == "sqlite":
convert_to_sqlite(
args.language,
args.data_type,
args.output_dir,
args.overwrite,
language=args.language,
data_type=args.data_type,
output_type=args.output_type,
input_file=args.input_file,
output_dir=args.output_dir,
overwrite=args.overwrite,
)

elif args.output_type == "json":
convert_to_json(
language=args.language,
data_type=args.data_type,
output_type=args.output_type,
input_file=args.input_file,
output_dir=args.output_dir,
overwrite=args.overwrite,
)

else:
Expand Down
Loading