From 37e841310afa0745c826d52e1d934a71795452a6 Mon Sep 17 00:00:00 2001 From: Roman Isecke <136338424+rbiseck3@users.noreply.github.com> Date: Tue, 24 Oct 2023 08:57:33 -0400 Subject: [PATCH] refactor ingest CLI for better code reuse (#1846) ### Description Much of the current CLI code is copy-paste across subcommands. To alleviate this, most of the duplicate code was moved into base classes for src and destination connector commands. This also allows for code reuse when a destination command is called and it no longer has to jump through hoops to dynamically recreate what _would_ have been called by a source command. The reason everything can't live in a single BaseCmd class is due to the need for a dynamic map to the source command. This runs into a circular dependency issue if it was all in one class. By splitting it into a `BaseSrcCmd` and a `BaseDestCmd` class, this helps avoid that issue. --- CHANGELOG.md | 1 + unstructured/ingest/cli/__init__.py | 14 ++ unstructured/ingest/cli/base/__init__.py | 0 unstructured/ingest/cli/base/cmd.py | 19 +++ unstructured/ingest/cli/base/dest.py | 67 ++++++++ unstructured/ingest/cli/base/src.py | 58 +++++++ unstructured/ingest/cli/cli.py | 6 +- unstructured/ingest/cli/cmd_factory.py | 12 ++ unstructured/ingest/cli/cmds/__init__.py | 146 +++++++++++------- unstructured/ingest/cli/cmds/airtable.py | 36 +---- unstructured/ingest/cli/cmds/azure.py | 43 +----- .../ingest/cli/cmds/azure_cognitive_search.py | 46 +----- unstructured/ingest/cli/cmds/biomed.py | 36 +---- unstructured/ingest/cli/cmds/box.py | 43 +----- unstructured/ingest/cli/cmds/confluence.py | 36 +---- unstructured/ingest/cli/cmds/delta_table.py | 84 ++-------- unstructured/ingest/cli/cmds/discord.py | 36 +---- unstructured/ingest/cli/cmds/dropbox.py | 43 +----- unstructured/ingest/cli/cmds/elasticsearch.py | 36 +---- unstructured/ingest/cli/cmds/fsspec.py | 45 +----- unstructured/ingest/cli/cmds/gcs.py | 43 +----- unstructured/ingest/cli/cmds/github.py | 36 +---- unstructured/ingest/cli/cmds/gitlab.py | 36 +---- unstructured/ingest/cli/cmds/google_drive.py | 40 +---- unstructured/ingest/cli/cmds/jira.py | 36 +---- unstructured/ingest/cli/cmds/local.py | 40 +---- unstructured/ingest/cli/cmds/notion.py | 40 +---- unstructured/ingest/cli/cmds/onedrive.py | 40 +---- unstructured/ingest/cli/cmds/outlook.py | 40 +---- unstructured/ingest/cli/cmds/reddit.py | 36 +---- unstructured/ingest/cli/cmds/s3.py | 89 ++--------- unstructured/ingest/cli/cmds/salesforce.py | 40 +---- unstructured/ingest/cli/cmds/sharepoint.py | 40 +---- unstructured/ingest/cli/cmds/slack.py | 36 +---- unstructured/ingest/cli/cmds/wikipedia.py | 36 +---- unstructured/ingest/cli/utils.py | 54 ++----- 36 files changed, 433 insertions(+), 1056 deletions(-) create mode 100644 unstructured/ingest/cli/base/__init__.py create mode 100644 unstructured/ingest/cli/base/cmd.py create mode 100644 unstructured/ingest/cli/base/dest.py create mode 100644 unstructured/ingest/cli/base/src.py create mode 100644 unstructured/ingest/cli/cmd_factory.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bafbbf473..f294bfdd3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ ### Enhancements * **Duplicate CLI param check** Given that many of the options associated with the `Click` based cli ingest commands are added dynamically from a number of configs, a check was incorporated to make sure there were no duplicate entries to prevent new configs from overwriting already added options. +* **Ingest CLI refactor for better code reuse** Much of the ingest cli code can be templated and was a copy-paste across files, adding potential risk. Code was refactored to use a base class which had much of the shared code templated. ### Features diff --git a/unstructured/ingest/cli/__init__.py b/unstructured/ingest/cli/__init__.py index e69de29bb2..f3490ae22a 100644 --- a/unstructured/ingest/cli/__init__.py +++ b/unstructured/ingest/cli/__init__.py @@ -0,0 +1,14 @@ +import typing as t + +import click + +from unstructured.ingest.cli.cmds import base_dest_cmd_fns, base_src_cmd_fns + +src: t.List[click.Group] = [v().get_src_cmd() for v in base_src_cmd_fns] + +dest: t.List[click.Command] = [v().get_dest_cmd() for v in base_dest_cmd_fns] + +__all__ = [ + "src", + "dest", +] diff --git a/unstructured/ingest/cli/base/__init__.py b/unstructured/ingest/cli/base/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/cli/base/cmd.py b/unstructured/ingest/cli/base/cmd.py new file mode 100644 index 0000000000..274358d0b4 --- /dev/null +++ b/unstructured/ingest/cli/base/cmd.py @@ -0,0 +1,19 @@ +import typing as t +from abc import ABC +from dataclasses import dataclass, field + +from unstructured.ingest.cli.interfaces import CliMixin +from unstructured.ingest.interfaces import BaseConfig + + +@dataclass +class BaseCmd(ABC): + cmd_name: str + cli_config: t.Optional[t.Type[BaseConfig]] = None + additional_cli_options: t.List[t.Type[CliMixin]] = field(default_factory=list) + addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict) + is_fsspec: bool = False + + @property + def cmd_name_key(self): + return self.cmd_name.replace("-", "_") diff --git a/unstructured/ingest/cli/base/dest.py b/unstructured/ingest/cli/base/dest.py new file mode 100644 index 0000000000..144aca651e --- /dev/null +++ b/unstructured/ingest/cli/base/dest.py @@ -0,0 +1,67 @@ +import logging +from dataclasses import dataclass + +import click + +from unstructured.ingest.cli.base.cmd import BaseCmd +from unstructured.ingest.cli.cmd_factory import get_src_cmd +from unstructured.ingest.cli.common import ( + log_options, +) +from unstructured.ingest.cli.interfaces import ( + CliFilesStorageConfig, +) +from unstructured.ingest.cli.utils import add_options, conform_click_options +from unstructured.ingest.logger import ingest_log_streaming_init, logger + + +@dataclass +class BaseDestCmd(BaseCmd): + def get_dest_runner(self, source_cmd: str, options: dict, parent_options: dict): + src_cmd_fn = get_src_cmd(cmd_name=source_cmd) + src_cmd = src_cmd_fn() + runner = src_cmd.get_source_runner(options=parent_options) + runner.writer_type = self.cmd_name_key + runner.writer_kwargs = options + return runner + + def check_dest_options(self, options: dict): + self.cli_config.from_dict(options) + + def dest(self, ctx: click.Context, **options): + if not ctx.parent: + raise click.ClickException("destination command called without a parent") + if not ctx.parent.info_name: + raise click.ClickException("parent command missing info name") + source_cmd = ctx.parent.info_name.replace("-", "_") + parent_options: dict = ctx.parent.params if ctx.parent else {} + conform_click_options(options) + verbose = parent_options.get("verbose", False) + ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) + log_options(parent_options, verbose=verbose) + log_options(options, verbose=verbose) + try: + self.check_dest_options(options=options) + runner = self.get_dest_runner( + source_cmd=source_cmd, + options=options, + parent_options=parent_options, + ) + runner.run(**parent_options) + except Exception as e: + logger.error(e, exc_info=True) + raise click.ClickException(str(e)) from e + + def get_dest_cmd(self) -> click.Command: + # Dynamically create the command without the use of click decorators + fn = self.dest + fn = click.pass_context(fn) + cmd: click.Group = click.command(fn) + cmd.name = self.cmd_name + cmd.invoke_without_command = True + options = [self.cli_config] if self.cli_config else [] + options += self.additional_cli_options + if self.is_fsspec and CliFilesStorageConfig not in options: + options.append(CliFilesStorageConfig) + add_options(cmd, extras=options, is_src=False) + return cmd diff --git a/unstructured/ingest/cli/base/src.py b/unstructured/ingest/cli/base/src.py new file mode 100644 index 0000000000..4caf645bde --- /dev/null +++ b/unstructured/ingest/cli/base/src.py @@ -0,0 +1,58 @@ +import logging +from dataclasses import dataclass + +import click + +from unstructured.ingest.cli.base.cmd import BaseCmd +from unstructured.ingest.cli.common import ( + log_options, +) +from unstructured.ingest.cli.interfaces import CliFilesStorageConfig +from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs +from unstructured.ingest.interfaces import FsspecConfig +from unstructured.ingest.logger import ingest_log_streaming_init, logger +from unstructured.ingest.runner import runner_map + + +@dataclass +class BaseSrcCmd(BaseCmd): + def get_source_runner(self, options: dict): + addition_configs = self.addition_configs + if self.is_fsspec and "fsspec_config" not in addition_configs: + addition_configs["fsspec_config"] = FsspecConfig + configs = extract_configs( + options, + validate=[self.cli_config] if self.cli_config else None, + extras=addition_configs, + ) + runner = runner_map[self.cmd_name_key] + return runner(**configs) # type: ignore + + def src(self, ctx: click.Context, **options): + if ctx.invoked_subcommand: + return + + conform_click_options(options) + verbose = options.get("verbose", False) + ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) + log_options(options, verbose=verbose) + try: + runner = self.get_source_runner(options=options) + runner.run(**options) + except Exception as e: + logger.error(e, exc_info=True) + raise click.ClickException(str(e)) from e + + def get_src_cmd(self) -> click.Group: + # Dynamically create the command without the use of click decorators + fn = self.src + fn = click.pass_context(fn) + cmd: click.Group = click.group(fn, cls=Group) + cmd.name = self.cmd_name + cmd.invoke_without_command = True + extra_options = [self.cli_config] if self.cli_config else [] + extra_options += self.additional_cli_options + if self.is_fsspec and CliFilesStorageConfig not in extra_options: + extra_options.append(CliFilesStorageConfig) + add_options(cmd, extras=extra_options) + return cmd diff --git a/unstructured/ingest/cli/cli.py b/unstructured/ingest/cli/cli.py index ba24aae5cf..166f2097c5 100644 --- a/unstructured/ingest/cli/cli.py +++ b/unstructured/ingest/cli/cli.py @@ -1,6 +1,6 @@ import click -import unstructured.ingest.cli.cmds as cli_cmds +from unstructured.ingest.cli import dest, src @click.group() @@ -11,9 +11,9 @@ def ingest(): def get_cmd() -> click.Command: cmd = ingest # Add all subcommands - for src_subcommand in cli_cmds.src: + for src_subcommand in src: # add destination subcommands - for dest_subcommand in cli_cmds.dest: + for dest_subcommand in dest: src_subcommand.add_command(dest_subcommand) cmd.add_command(src_subcommand) return cmd diff --git a/unstructured/ingest/cli/cmd_factory.py b/unstructured/ingest/cli/cmd_factory.py new file mode 100644 index 0000000000..3260828cbf --- /dev/null +++ b/unstructured/ingest/cli/cmd_factory.py @@ -0,0 +1,12 @@ +import typing as t + +from unstructured.ingest.cli.base.src import BaseSrcCmd +from unstructured.ingest.cli.cmds import base_src_cmd_fns + + +def get_src_cmd_map() -> t.Dict[str, t.Callable[[], BaseSrcCmd]]: + return {b().cmd_name_key: b for b in base_src_cmd_fns} + + +def get_src_cmd(cmd_name: str) -> t.Callable[[], BaseSrcCmd]: + return get_src_cmd_map()[cmd_name] diff --git a/unstructured/ingest/cli/cmds/__init__.py b/unstructured/ingest/cli/cmds/__init__.py index 9c5123ab52..ee494e4858 100644 --- a/unstructured/ingest/cli/cmds/__init__.py +++ b/unstructured/ingest/cli/cmds/__init__.py @@ -1,67 +1,97 @@ +from __future__ import annotations + +import collections import typing as t -import click +from unstructured.ingest.cli.base.src import BaseSrcCmd + +from .airtable import get_base_src_cmd as airtable_base_src_cmd +from .azure import get_base_src_cmd as azure_base_src_cmd +from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd +from .biomed import get_base_src_cmd as biomed_base_src_cmd +from .box import get_base_src_cmd as box_base_src_cmd +from .confluence import get_base_src_cmd as confluence_base_src_cmd +from .delta_table import get_base_dest_cmd as delta_table_dest_cmd +from .delta_table import get_base_src_cmd as delta_table_base_src_cmd +from .discord import get_base_src_cmd as discord_base_src_cmd +from .dropbox import get_base_src_cmd as dropbox_base_src_cmd +from .elasticsearch import get_base_src_cmd as elasticsearch_base_src_cmd +from .fsspec import get_base_src_cmd as fsspec_base_src_cmd +from .gcs import get_base_src_cmd as gcs_base_src_cmd +from .github import get_base_src_cmd as github_base_src_cmd +from .gitlab import get_base_src_cmd as gitlab_base_src_cmd +from .google_drive import get_base_src_cmd as google_drive_base_src_cmd +from .jira import get_base_src_cmd as jira_base_src_cmd +from .local import get_base_src_cmd as local_base_src_cmd +from .notion import get_base_src_cmd as notion_base_src_cmd +from .onedrive import get_base_src_cmd as onedrive_base_src_cmd +from .outlook import get_base_src_cmd as outlook_base_src_cmd +from .reddit import get_base_src_cmd as reddit_base_src_cmd +from .s3 import get_base_dest_cmd as s3_base_dest_cmd +from .s3 import get_base_src_cmd as s3_base_src_cmd +from .salesforce import get_base_src_cmd as salesforce_base_src_cmd +from .sharepoint import get_base_src_cmd as sharepoint_base_src_cmd +from .slack import get_base_src_cmd as slack_base_src_cmd +from .wikipedia import get_base_src_cmd as wikipedia_base_src_cmd + +if t.TYPE_CHECKING: + from unstructured.ingest.cli.base.dest import BaseDestCmd + +base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [ + airtable_base_src_cmd, + azure_base_src_cmd, + biomed_base_src_cmd, + box_base_src_cmd, + confluence_base_src_cmd, + delta_table_base_src_cmd, + discord_base_src_cmd, + dropbox_base_src_cmd, + elasticsearch_base_src_cmd, + fsspec_base_src_cmd, + gcs_base_src_cmd, + github_base_src_cmd, + gitlab_base_src_cmd, + google_drive_base_src_cmd, + jira_base_src_cmd, + local_base_src_cmd, + notion_base_src_cmd, + onedrive_base_src_cmd, + outlook_base_src_cmd, + reddit_base_src_cmd, + salesforce_base_src_cmd, + sharepoint_base_src_cmd, + slack_base_src_cmd, + s3_base_src_cmd, + wikipedia_base_src_cmd, +] -from .airtable import get_source_cmd as airtable_src -from .azure import get_source_cmd as azure_src -from .azure_cognitive_search import get_dest_cmd as azure_cognitive_search_dest -from .biomed import get_source_cmd as biomed_src -from .box import get_source_cmd as box_src -from .confluence import get_source_cmd as confluence_src -from .delta_table import get_dest_cmd as delta_table_dest -from .delta_table import get_source_cmd as delta_table_src -from .discord import get_source_cmd as discord_src -from .dropbox import get_source_cmd as dropbox_src -from .elasticsearch import get_source_cmd as elasticsearch_src -from .fsspec import get_source_cmd as fsspec_src -from .gcs import get_source_cmd as gcs_src -from .github import get_source_cmd as github_src -from .gitlab import get_source_cmd as gitlab_src -from .google_drive import get_source_cmd as google_drive_src -from .jira import get_source_cmd as jira_src -from .local import get_source_cmd as local_src -from .notion import get_source_cmd as notion_src -from .onedrive import get_source_cmd as onedrive_src -from .outlook import get_source_cmd as outlook_src -from .reddit import get_source_cmd as reddit_src -from .s3 import get_dest_cmd as s3_dest -from .s3 import get_source_cmd as s3_src -from .salesforce import get_source_cmd as salesforce_src -from .sharepoint import get_source_cmd as sharepoint_src -from .slack import get_source_cmd as slack_src -from .wikipedia import get_source_cmd as wikipedia_src +# Make sure there are not overlapping names +src_cmd_names = [b().cmd_name for b in base_src_cmd_fns] +src_duplicates = [item for item, count in collections.Counter(src_cmd_names).items() if count > 1] +if src_duplicates: + raise ValueError( + "multiple base src commands defined with the same names: {}".format( + ", ".join(src_duplicates), + ), + ) -src: t.List[click.Group] = [ - airtable_src(), - azure_src(), - biomed_src(), - box_src(), - confluence_src(), - delta_table_src(), - discord_src(), - dropbox_src(), - elasticsearch_src(), - fsspec_src(), - gcs_src(), - github_src(), - gitlab_src(), - google_drive_src(), - jira_src(), - local_src(), - notion_src(), - onedrive_src(), - outlook_src(), - reddit_src(), - salesforce_src(), - sharepoint_src(), - slack_src(), - s3_src(), - wikipedia_src(), +base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [ + s3_base_dest_cmd, + azure_cognitive_search_base_dest_cmd, + delta_table_dest_cmd, ] -dest: t.List[click.Command] = [azure_cognitive_search_dest(), s3_dest(), delta_table_dest()] +# Make sure there are not overlapping names +dest_cmd_names = [b().cmd_name for b in base_dest_cmd_fns] +dest_duplicates = [item for item, count in collections.Counter(dest_cmd_names).items() if count > 1] +if dest_duplicates: + raise ValueError( + "multiple base dest commands defined with the same names: {}".format( + ", ".join(dest_duplicates), + ), + ) __all__ = [ - "src", - "dest", + "base_src_cmd_fns", + "base_dest_cmd_fns", ] diff --git a/unstructured/ingest/cli/cmds/airtable.py b/unstructured/ingest/cli/cmds/airtable.py index d9dae6b40c..30993a6bb8 100644 --- a/unstructured/ingest/cli/cmds/airtable.py +++ b/unstructured/ingest/cli/cmds/airtable.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import AirtableRunner @dataclass @@ -69,28 +63,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="airtable", invoke_without_command=True, cls=Group) -@click.pass_context -def airtable_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[AirtableCliConfig]) - runner = AirtableRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = airtable_source - add_options(cmd, extras=[AirtableCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="airtable", cli_config=AirtableCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/azure.py b/unstructured/ingest/cli/cmds/azure.py index 10649df389..12537fe52a 100644 --- a/unstructured/ingest/cli/cmds/azure.py +++ b/unstructured/ingest/cli/cmds/azure.py @@ -1,20 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import AzureRunner +from unstructured.ingest.interfaces import BaseConfig @dataclass @@ -46,32 +39,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="azure", invoke_without_command=True, cls=Group) -@click.pass_context -def azure_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs( - options, - validate=[AzureCliConfig], - extras={"fsspec_config": FsspecConfig}, - ) - runner = AzureRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = azure_source - add_options(cmd, extras=[AzureCliConfig, CliFilesStorageConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="azure", cli_config=AzureCliConfig, is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/cli/cmds/azure_cognitive_search.py index ac11da932b..d27db28348 100644 --- a/unstructured/ingest/cli/cmds/azure_cognitive_search.py +++ b/unstructured/ingest/cli/cmds/azure_cognitive_search.py @@ -1,20 +1,12 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import conform_click_options, orchestrate_runner from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger - -pass @dataclass @@ -53,35 +45,11 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.command(name="azure-cognitive-search") -@click.pass_context -def azure_cognitive_search_dest(ctx: click.Context, **options): - if not ctx.parent: - raise click.ClickException("destination command called without a parent") - if not ctx.parent.info_name: - raise click.ClickException("parent command missing info name") - source_cmd = ctx.parent.info_name.replace("-", "_") - parent_options: dict = ctx.parent.params if ctx.parent else {} - conform_click_options(options) - conform_click_options(parent_options) - verbose = parent_options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(parent_options, verbose=verbose) - log_options(options, verbose=verbose) - try: - orchestrate_runner( - source_cmd=source_cmd, - writer_type="azure_cognitive_search", - parent_options=parent_options, - options=options, - validate=[AzureCognitiveSearchCliWriteConfig], - ) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - +def get_base_dest_cmd(): + from unstructured.ingest.cli.base.dest import BaseDestCmd -def get_dest_cmd() -> click.Command: - cmd = azure_cognitive_search_dest - AzureCognitiveSearchCliWriteConfig.add_cli_options(cmd) - return cmd + cmd_cls = BaseDestCmd( + cmd_name="azure-cognitive-search", + cli_config=AzureCognitiveSearchCliWriteConfig, + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/biomed.py b/unstructured/ingest/cli/cmds/biomed.py index a1aaf1d3f0..d5c9eeb4de 100644 --- a/unstructured/ingest/cli/cmds/biomed.py +++ b/unstructured/ingest/cli/cmds/biomed.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import BiomedRunner @dataclass @@ -56,28 +50,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="biomed", invoke_without_command=True, cls=Group) -@click.pass_context -def biomed_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[BiomedCliConfig]) - runner = BiomedRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = biomed_source - add_options(cmd, extras=[BiomedCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="biomed", cli_config=BiomedCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/box.py b/unstructured/ingest/cli/cmds/box.py index bb16776254..35e3c58d9b 100644 --- a/unstructured/ingest/cli/cmds/box.py +++ b/unstructured/ingest/cli/cmds/box.py @@ -1,20 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import BoxRunner +from unstructured.ingest.interfaces import BaseConfig @dataclass @@ -33,32 +26,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="box", invoke_without_command=True, cls=Group) -@click.pass_context -def box_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs( - options, - validate=[BoxCliConfig], - extras={"fsspec_config": FsspecConfig}, - ) - runner = BoxRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = box_source - add_options(cmd, extras=[BoxCliConfig, CliFilesStorageConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="box", cli_config=BoxCliConfig, is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/confluence.py b/unstructured/ingest/cli/cmds/confluence.py index 6768c1744f..b84c179df3 100644 --- a/unstructured/ingest/cli/cmds/confluence.py +++ b/unstructured/ingest/cli/cmds/confluence.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import ConfluenceRunner @dataclass @@ -74,28 +68,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="confluence", invoke_without_command=True, cls=Group) -@click.pass_context -def confluence_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[ConfluenceCliConfig]) - runner = ConfluenceRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = confluence_source - add_options(cmd, extras=[ConfluenceCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="confluence", cli_config=ConfluenceCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/delta_table.py b/unstructured/ingest/cli/cmds/delta_table.py index bb1beb2dbd..37627f91a1 100644 --- a/unstructured/ingest/cli/cmds/delta_table.py +++ b/unstructured/ingest/cli/cmds/delta_table.py @@ -1,25 +1,15 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import ( - Group, - add_options, - conform_click_options, - extract_configs, - orchestrate_runner, -) from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import DeltaTableRunner + +CMD_NAME = "delta-table" @dataclass @@ -60,27 +50,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="delta-table", invoke_without_command=True, cls=Group) -@click.pass_context -def delta_table_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[DeltaTableCliConfig]) - runner = DeltaTableRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - @dataclass class DeltaTableCliWriteConfig(BaseConfig, CliMixin): write_column: str @@ -108,42 +77,17 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.command(name="delta-table") -@click.pass_context -def delta_table_dest(ctx: click.Context, **options): - if not ctx.parent: - raise click.ClickException("destination command called without a parent") - if not ctx.parent.info_name: - raise click.ClickException("parent command missing info name") - source_cmd = ctx.parent.info_name.replace("-", "_") - parent_options: dict = ctx.parent.params if ctx.parent else {} - conform_click_options(options) - conform_click_options(parent_options) - verbose = parent_options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(parent_options, verbose=verbose) - log_options(options, verbose=verbose) - try: - orchestrate_runner( - source_cmd=source_cmd, - writer_type="delta_table", - parent_options=parent_options, - options=options, - validate=[DeltaTableCliWriteConfig], - ) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, cli_config=DeltaTableCliConfig) + return cmd_cls -def get_dest_cmd() -> click.Command: - cmd = delta_table_dest - DeltaTableCliConfig.add_cli_options(cmd) - DeltaTableCliWriteConfig.add_cli_options(cmd) - return cmd +def get_base_dest_cmd(): + from unstructured.ingest.cli.base.dest import BaseDestCmd -def get_source_cmd() -> click.Group: - cmd = delta_table_source - add_options(cmd, extras=[DeltaTableCliConfig]) - return cmd + cmd_cls = BaseDestCmd( + cmd_name=CMD_NAME, + cli_config=DeltaTableCliConfig, + additional_cli_options=[DeltaTableCliWriteConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/discord.py b/unstructured/ingest/cli/cmds/discord.py index 1bf48e2d87..9bea878ac1 100644 --- a/unstructured/ingest/cli/cmds/discord.py +++ b/unstructured/ingest/cli/cmds/discord.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import DiscordRunner @dataclass @@ -48,28 +42,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="discord", invoke_without_command=True, cls=Group) -@click.pass_context -def discord_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[DiscordCliConfig]) - runner = DiscordRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = discord_source - add_options(cmd, extras=[DiscordCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="discord", cli_config=DiscordCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/dropbox.py b/unstructured/ingest/cli/cmds/dropbox.py index b45e0254d7..13f21ca998 100644 --- a/unstructured/ingest/cli/cmds/dropbox.py +++ b/unstructured/ingest/cli/cmds/dropbox.py @@ -1,20 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import DropboxRunner +from unstructured.ingest.interfaces import BaseConfig @dataclass @@ -33,32 +26,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="dropbox", invoke_without_command=True, cls=Group) -@click.pass_context -def dropbox_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs( - options, - validate=[DropboxCliConfig], - extras={"fsspec_config": FsspecConfig}, - ) - runner = DropboxRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = dropbox_source - add_options(cmd, extras=[DropboxCliConfig, CliFilesStorageConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="dropbox", cli_config=DropboxCliConfig, is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/elasticsearch.py b/unstructured/ingest/cli/cmds/elasticsearch.py index c32daa1b71..245ff3a1c6 100644 --- a/unstructured/ingest/cli/cmds/elasticsearch.py +++ b/unstructured/ingest/cli/cmds/elasticsearch.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import ElasticSearchRunner @dataclass @@ -50,28 +44,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="elasticsearch", invoke_without_command=True, cls=Group) -@click.pass_context -def elasticsearch_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=[ElasticsearchCliConfig]) - runner = ElasticSearchRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = elasticsearch_source - add_options(cmd, extras=[ElasticsearchCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="elasticsearch", cli_config=ElasticsearchCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec.py b/unstructured/ingest/cli/cmds/fsspec.py index 2464a2a92b..d081c45b01 100644 --- a/unstructured/ingest/cli/cmds/fsspec.py +++ b/unstructured/ingest/cli/cmds/fsspec.py @@ -1,43 +1,6 @@ -import logging +from unstructured.ingest.cli.base.src import BaseSrcCmd -import click -from unstructured.ingest.cli.common import ( - log_options, -) -from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, -) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.interfaces import ( - FsspecConfig, -) -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import FsspecRunner - - -@click.group(name="fsspec", invoke_without_command=True, cls=Group) -@click.pass_context -def fsspec_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, extras={"fsspec_config": FsspecConfig}) - runner = FsspecRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = fsspec_source - add_options(cmd, extras=[CliFilesStorageConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="fsspec", is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/gcs.py b/unstructured/ingest/cli/cmds/gcs.py index 5a07ff0d5a..ccec32491b 100644 --- a/unstructured/ingest/cli/cmds/gcs.py +++ b/unstructured/ingest/cli/cmds/gcs.py @@ -1,20 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import GCSRunner +from unstructured.ingest.interfaces import BaseConfig @dataclass @@ -35,32 +28,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="gcs", invoke_without_command=True, cls=Group) -@click.pass_context -def gcs_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs( - options, - validate=([GcsCliConfig]), - extras={"fsspec_config": FsspecConfig}, - ) - runner = GCSRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = gcs_source - add_options(cmd, extras=[GcsCliConfig, CliFilesStorageConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="gcs", cli_config=GcsCliConfig, is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/github.py b/unstructured/ingest/cli/cmds/github.py index 9b6b2e5909..5528acd82a 100644 --- a/unstructured/ingest/cli/cmds/github.py +++ b/unstructured/ingest/cli/cmds/github.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import GithubRunner @dataclass @@ -59,28 +53,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="github", invoke_without_command=True, cls=Group) -@click.pass_context -def github_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([GithubCliConfig])) - runner = GithubRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = github_source - add_options(cmd, extras=[GithubCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="github", cli_config=GithubCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/gitlab.py b/unstructured/ingest/cli/cmds/gitlab.py index ae82797935..b277a9413d 100644 --- a/unstructured/ingest/cli/cmds/gitlab.py +++ b/unstructured/ingest/cli/cmds/gitlab.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import GitlabRunner @dataclass @@ -59,28 +53,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="gitlab", invoke_without_command=True, cls=Group) -@click.pass_context -def gitlab_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([GitlabCliConfig])) - runner = GitlabRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = gitlab_source - add_options(cmd, extras=[GitlabCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="gitlab", cli_config=GitlabCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/google_drive.py b/unstructured/ingest/cli/cmds/google_drive.py index 3021bbe80c..758184e4ff 100644 --- a/unstructured/ingest/cli/cmds/google_drive.py +++ b/unstructured/ingest/cli/cmds/google_drive.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import GoogleDriveRunner @dataclass @@ -48,28 +42,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="google-drive", invoke_without_command=True, cls=Group) -@click.pass_context -def google_drive_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([GoogleDriveCliConfig])) - runner = GoogleDriveRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = google_drive_source - add_options(cmd, extras=[GoogleDriveCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="google-drive", + cli_config=GoogleDriveCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/jira.py b/unstructured/ingest/cli/cmds/jira.py index 51a956d62b..539b60443a 100644 --- a/unstructured/ingest/cli/cmds/jira.py +++ b/unstructured/ingest/cli/cmds/jira.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import JiraRunner @dataclass @@ -76,28 +70,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="jira", invoke_without_command=True, cls=Group) -@click.pass_context -def jira_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([JiraCliConfig])) - runner = JiraRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = jira_source - add_options(cmd, extras=[JiraCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="jira", cli_config=JiraCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/local.py b/unstructured/ingest/cli/cmds/local.py index d8af3e7446..66b76ce773 100644 --- a/unstructured/ingest/cli/cmds/local.py +++ b/unstructured/ingest/cli/cmds/local.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import LocalRunner @dataclass @@ -42,28 +36,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="local", invoke_without_command=True, cls=Group) -@click.pass_context -def local_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([LocalCliConfig])) - runner = LocalRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = local_source - add_options(cmd, extras=[LocalCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="local", + cli_config=LocalCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/notion.py b/unstructured/ingest/cli/cmds/notion.py index 917b6387c2..b149c339a3 100644 --- a/unstructured/ingest/cli/cmds/notion.py +++ b/unstructured/ingest/cli/cmds/notion.py @@ -1,21 +1,15 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import NotionRunner @dataclass @@ -51,28 +45,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="notion", invoke_without_command=True, cls=Group) -@click.pass_context -def notion_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([NotionCliConfig])) - runner = NotionRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = notion_source - add_options(cmd, extras=[NotionCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="notion", + cli_config=NotionCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/onedrive.py b/unstructured/ingest/cli/cmds/onedrive.py index 4faf108114..ab947cd5d0 100644 --- a/unstructured/ingest/cli/cmds/onedrive.py +++ b/unstructured/ingest/cli/cmds/onedrive.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import OneDriveRunner @dataclass @@ -70,28 +64,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="onedrive", invoke_without_command=True, cls=Group) -@click.pass_context -def onedrive_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([OnedriveCliConfig])) - runner = OneDriveRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = onedrive_source - add_options(cmd, extras=[OnedriveCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="onedrive", + cli_config=OnedriveCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/outlook.py b/unstructured/ingest/cli/cmds/outlook.py index d08f1ebb35..ecc9148f3d 100644 --- a/unstructured/ingest/cli/cmds/outlook.py +++ b/unstructured/ingest/cli/cmds/outlook.py @@ -1,21 +1,15 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import OutlookRunner @dataclass @@ -71,28 +65,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="outlook", invoke_without_command=True, cls=Group) -@click.pass_context -def outlook_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([OutlookCliConfig])) - runner = OutlookRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = outlook_source - add_options(cmd, extras=[OutlookCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="outlook", + cli_config=OutlookCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/reddit.py b/unstructured/ingest/cli/cmds/reddit.py index 9baf5495d5..1fc4623ac3 100644 --- a/unstructured/ingest/cli/cmds/reddit.py +++ b/unstructured/ingest/cli/cmds/reddit.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import RedditRunner @dataclass @@ -72,28 +66,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="reddit", invoke_without_command=True, cls=Group) -@click.pass_context -def reddit_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([RedditCliConfig])) - runner = RedditRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = reddit_source - add_options(cmd, extras=[RedditCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="reddit", cli_config=RedditCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/s3.py b/unstructured/ingest/cli/cmds/s3.py index d4d701c051..a2d4173c44 100644 --- a/unstructured/ingest/cli/cmds/s3.py +++ b/unstructured/ingest/cli/cmds/s3.py @@ -1,26 +1,15 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( - CliFilesStorageConfig, CliMixin, ) -from unstructured.ingest.cli.utils import ( - Group, - add_options, - conform_click_options, - extract_configs, - orchestrate_runner, -) -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import S3Runner +from unstructured.ingest.interfaces import BaseConfig + +CMD_NAME = "s3" @dataclass @@ -48,69 +37,13 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="s3", invoke_without_command=True, cls=Group) -@click.pass_context -def s3_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - # Click sets all multiple fields as tuple, this needs to be updated to list - for k, v in options.items(): - if isinstance(v, tuple): - options[k] = list(v) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs( - options, - validate=[S3CliConfig], - extras={"fsspec_config": FsspecConfig}, - ) - s3_runner = S3Runner( - **configs, # type: ignore - ) - s3_runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -@click.command(name="s3") -@click.pass_context -def s3_dest(ctx: click.Context, **options): - if not ctx.parent: - raise click.ClickException("destination command called without a parent") - if not ctx.parent.info_name: - raise click.ClickException("parent command missing info name") - source_cmd = ctx.parent.info_name.replace("-", "_") - parent_options: dict = ctx.parent.params if ctx.parent else {} - conform_click_options(options) - verbose = parent_options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(parent_options, verbose=verbose) - log_options(options, verbose=verbose) - try: - orchestrate_runner( - source_cmd=source_cmd, - writer_type="s3", - parent_options=parent_options, - options=options, - validate=[S3CliConfig, CliFilesStorageConfig], - ) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - +def get_base_src_cmd(): + cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, cli_config=S3CliConfig, is_fsspec=True) + return cmd_cls -def get_dest_cmd() -> click.Command: - cmd = s3_dest - S3CliConfig.add_cli_options(cmd) - CliFilesStorageConfig.add_cli_options(cmd) - return cmd +def get_base_dest_cmd(): + from unstructured.ingest.cli.base.dest import BaseDestCmd -def get_source_cmd() -> click.Group: - cmd = s3_source - add_options(cmd, extras=[S3CliConfig, CliFilesStorageConfig]) - return cmd + cmd_cls = BaseDestCmd(cmd_name=CMD_NAME, cli_config=S3CliConfig, is_fsspec=True) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/salesforce.py b/unstructured/ingest/cli/cmds/salesforce.py index 35c82ccad6..56744c87bf 100644 --- a/unstructured/ingest/cli/cmds/salesforce.py +++ b/unstructured/ingest/cli/cmds/salesforce.py @@ -1,21 +1,15 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import SalesforceRunner @dataclass @@ -60,28 +54,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="salesforce", invoke_without_command=True, cls=Group) -@click.pass_context -def salesforce_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(options, validate=([SalesforceCliConfig])) - runner = SalesforceRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = salesforce_source - add_options(cmd, extras=[SalesforceCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="salesforce", + cli_config=SalesforceCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/sharepoint.py b/unstructured/ingest/cli/cmds/sharepoint.py index 3f28a08588..6c2fa0ca44 100644 --- a/unstructured/ingest/cli/cmds/sharepoint.py +++ b/unstructured/ingest/cli/cmds/sharepoint.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, CliRecursiveConfig, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import SharePointRunner @dataclass @@ -69,28 +63,10 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="sharepoint", invoke_without_command=True, cls=Group) -@click.pass_context -def sharepoint_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(data=options, validate=[SharepointCliConfig]) - sharepoint_runner = SharePointRunner( - **configs, # type: ignore - ) - sharepoint_runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = sharepoint_source - add_options(cmd, extras=[SharepointCliConfig, CliRecursiveConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd( + cmd_name="sharepoint", + cli_config=SharepointCliConfig, + additional_cli_options=[CliRecursiveConfig], + ) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/slack.py b/unstructured/ingest/cli/cmds/slack.py index a1798c6ff9..2ef4b51679 100644 --- a/unstructured/ingest/cli/cmds/slack.py +++ b/unstructured/ingest/cli/cmds/slack.py @@ -1,20 +1,14 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, DelimitedString, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import SlackRunner @dataclass @@ -59,28 +53,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="slack", invoke_without_command=True, cls=Group) -@click.pass_context -def slack_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(data=options, validate=[SlackCliConfig]) - sharepoint_runner = SlackRunner( - **configs, # type: ignore - ) - sharepoint_runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = slack_source - add_options(cmd, extras=[SlackCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="slack", cli_config=SlackCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/cmds/wikipedia.py b/unstructured/ingest/cli/cmds/wikipedia.py index 5ee8dc7b94..e42ec8714f 100644 --- a/unstructured/ingest/cli/cmds/wikipedia.py +++ b/unstructured/ingest/cli/cmds/wikipedia.py @@ -1,19 +1,13 @@ -import logging import typing as t from dataclasses import dataclass import click -from unstructured.ingest.cli.common import ( - log_options, -) +from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.interfaces import ( CliMixin, ) -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import WikipediaRunner @dataclass @@ -41,28 +35,6 @@ def get_cli_options() -> t.List[click.Option]: return options -@click.group(name="wikipedia", invoke_without_command=True, cls=Group) -@click.pass_context -def wikipedia_source(ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - configs = extract_configs(data=options, validate=[WikipediaCliConfig]) - runner = WikipediaRunner( - **configs, # type: ignore - ) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - -def get_source_cmd() -> click.Group: - cmd = wikipedia_source - add_options(cmd, extras=[WikipediaCliConfig]) - return cmd +def get_base_src_cmd() -> BaseSrcCmd: + cmd_cls = BaseSrcCmd(cmd_name="wikipedia", cli_config=WikipediaCliConfig) + return cmd_cls diff --git a/unstructured/ingest/cli/utils.py b/unstructured/ingest/cli/utils.py index faebdb5228..12083585c1 100644 --- a/unstructured/ingest/cli/utils.py +++ b/unstructured/ingest/cli/utils.py @@ -13,35 +13,7 @@ CliReadConfig, CliRetryStrategyConfig, ) -from unstructured.ingest.interfaces import BaseConfig, FsspecConfig -from unstructured.ingest.runner import FsspecBaseRunner, runner_map - - -def orchestrate_runner( - source_cmd: str, - writer_type: str, - parent_options: dict, - options: dict, - validate: t.Optional[t.List[t.Type[BaseConfig]]] = None, -): - runner_cls = runner_map[source_cmd] - configs = extract_configs( - parent_options, - extras={"fsspec_config": FsspecConfig} - if issubclass(runner_cls, FsspecBaseRunner) - else None, - ) - for val in validate: - val.from_dict(options) - runner_cls = runner_map[source_cmd] - runner = runner_cls( - **configs, # type: ignore - writer_type=writer_type, - writer_kwargs=options, - ) - runner.run( - **parent_options, - ) +from unstructured.ingest.interfaces import BaseConfig def conform_click_options(options: dict): @@ -79,16 +51,20 @@ def extract_configs( return res -def add_options(cmd: click.Command, extras=t.List[t.Type[CliMixin]]) -> click.Command: - configs: t.List[t.Type[CliMixin]] = [ - CliPartitionConfig, - CliReadConfig, - CliEmbeddingConfig, - CliChunkingConfig, - CliProcessorConfig, - CliPermissionsConfig, - CliRetryStrategyConfig, - ] +def add_options(cmd: click.Command, extras=t.List[t.Type[CliMixin]], is_src=True) -> click.Command: + configs: t.List[t.Type[CliMixin]] = ( + [ + CliPartitionConfig, + CliReadConfig, + CliEmbeddingConfig, + CliChunkingConfig, + CliProcessorConfig, + CliPermissionsConfig, + CliRetryStrategyConfig, + ] + if is_src + else [] + ) configs.extend(extras) for config in configs: try: