Skip to content

Commit

Permalink
restructure fsspec files
Browse files Browse the repository at this point in the history
  • Loading branch information
rbiseck3 committed Dec 7, 2023
1 parent dc319e6 commit 57033a9
Show file tree
Hide file tree
Showing 24 changed files with 68 additions and 49 deletions.
24 changes: 12 additions & 12 deletions unstructured/ingest/cli/cmds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,25 @@
from unstructured.ingest.cli.base.src import BaseSrcCmd

from .airtable import get_base_src_cmd as airtable_base_src_cmd
from .azure import get_base_dest_cmd as azure_base_dest_cmd
from .azure import get_base_src_cmd as azure_base_src_cmd
from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd
from .biomed import get_base_src_cmd as biomed_base_src_cmd
from .box import get_base_dest_cmd as box_base_dest_cmd
from .box import get_base_src_cmd as box_base_src_cmd
from .confluence import get_base_src_cmd as confluence_base_src_cmd
from .delta_table import get_base_dest_cmd as delta_table_dest_cmd
from .delta_table import get_base_src_cmd as delta_table_base_src_cmd
from .discord import get_base_src_cmd as discord_base_src_cmd
from .dropbox import get_base_dest_cmd as dropbox_base_dest_cmd
from .dropbox import get_base_src_cmd as dropbox_base_src_cmd
from .elasticsearch import get_base_src_cmd as elasticsearch_base_src_cmd
from .fsspec import get_base_dest_cmd as fsspec_base_dest_cmd
from .fsspec import get_base_src_cmd as fsspec_base_src_cmd
from .gcs import get_base_dest_cmd as gcs_base_dest_cmd
from .gcs import get_base_src_cmd as gcs_base_src_cmd
from .fsspec.azure import get_base_dest_cmd as azure_base_dest_cmd
from .fsspec.azure import get_base_src_cmd as azure_base_src_cmd
from .fsspec.box import get_base_dest_cmd as box_base_dest_cmd
from .fsspec.box import get_base_src_cmd as box_base_src_cmd
from .fsspec.dropbox import get_base_dest_cmd as dropbox_base_dest_cmd
from .fsspec.dropbox import get_base_src_cmd as dropbox_base_src_cmd
from .fsspec.fsspec import get_base_dest_cmd as fsspec_base_dest_cmd
from .fsspec.fsspec import get_base_src_cmd as fsspec_base_src_cmd
from .fsspec.gcs import get_base_dest_cmd as gcs_base_dest_cmd
from .fsspec.gcs import get_base_src_cmd as gcs_base_src_cmd
from .fsspec.s3 import get_base_dest_cmd as s3_base_dest_cmd
from .fsspec.s3 import get_base_src_cmd as s3_base_src_cmd
from .github import get_base_src_cmd as github_base_src_cmd
from .gitlab import get_base_src_cmd as gitlab_base_src_cmd
from .google_drive import get_base_src_cmd as google_drive_base_src_cmd
Expand All @@ -35,8 +37,6 @@
from .outlook import get_base_src_cmd as outlook_base_src_cmd
from .pinecone import get_base_dest_cmd as pinecone_base_dest_cmd
from .reddit import get_base_src_cmd as reddit_base_src_cmd
from .s3 import get_base_dest_cmd as s3_base_dest_cmd
from .s3 import get_base_src_cmd as s3_base_src_cmd
from .salesforce import get_base_src_cmd as salesforce_base_src_cmd
from .sharepoint import get_base_src_cmd as sharepoint_base_src_cmd
from .slack import get_base_src_cmd as slack_base_src_cmd
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from unstructured.ingest.cli.interfaces import (
CliConfig,
)
from unstructured.ingest.connector.azure import AzureWriteConfig, SimpleAzureBlobStorageConfig
from unstructured.ingest.connector.fsspec.azure import (
AzureWriteConfig,
SimpleAzureBlobStorageConfig,
)

CMD_NAME = "azure"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from unstructured.ingest.cli.interfaces import (
CliConfig,
)
from unstructured.ingest.connector.box import BoxAccessConfig, BoxWriteConfig
from unstructured.ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig

CMD_NAME = "box"


@dataclass
class BoxCliConfig(BoxAccessConfig, CliConfig):
class BoxCliConfig(SimpleBoxConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from unstructured.ingest.cli.interfaces import (
CliConfig,
)
from unstructured.ingest.connector.dropbox import (
DropboxAccessConfig,
from unstructured.ingest.connector.fsspec.dropbox import (
DropboxWriteConfig,
SimpleDropboxConfig,
)

CMD_NAME = "dropbox"


@dataclass
class DropboxCliConfig(DropboxAccessConfig, CliConfig):
class DropboxCliConfig(SimpleDropboxConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
CliConfig,
FileOrJson,
)
from unstructured.ingest.connector.gcs import GcsAccessConfig, GcsWriteConfig
from unstructured.ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig

CMD_NAME = "gcs"


@dataclass
class GcsCliConfig(GcsAccessConfig, CliConfig):
class GcsCliConfig(SimpleGcsConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
help_string = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from unstructured.ingest.cli.interfaces import (
CliConfig,
)
from unstructured.ingest.connector.s3 import S3WriteConfig, SimpleS3Config
from unstructured.ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config

CMD_NAME = "s3"

Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import typing as t
from dataclasses import dataclass

from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecDestinationConnector,
FsspecIngestDoc,
FsspecSourceConnector,
Expand Down Expand Up @@ -73,6 +73,10 @@ class AzureBlobStorageDestinationConnector(FsspecDestinationConnector):
connector_config: SimpleAzureBlobStorageConfig
write_config: AzureWriteConfig

@requires_dependencies(["adlfs", "fsspec"], extras="azure")
def initialize(self):
super().initialize()

@requires_dependencies(["adlfs"], extras="azure")
def check_connection(self):
from adlfs import AzureBlobFileSystem
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import typing as t
from dataclasses import dataclass

from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecDestinationConnector,
FsspecIngestDoc,
FsspecSourceConnector,
Expand Down Expand Up @@ -94,6 +94,10 @@ class BoxDestinationConnector(FsspecDestinationConnector):
connector_config: SimpleBoxConfig
write_config: BoxWriteConfig

@requires_dependencies(["boxfs", "fsspec"], extras="box")
def initialize(self):
super().initialize()

@requires_dependencies(["boxfs"], extras="box")
def check_connection(self):
from boxfs import BoxFileSystem
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from typing import Type

from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecDestinationConnector,
FsspecIngestDoc,
FsspecSourceConnector,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass
from typing import Type

from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecDestinationConnector,
FsspecIngestDoc,
FsspecSourceConnector,
Expand Down Expand Up @@ -47,6 +47,10 @@ def get_file(self):
class GcsSourceConnector(FsspecSourceConnector):
connector_config: SimpleGcsConfig

@requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
def initialize(self):
super().initialize()

def __post_init__(self):
self.ingest_doc_cls: Type[GcsIngestDoc] = GcsIngestDoc

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass
from typing import Type

from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecDestinationConnector,
FsspecIngestDoc,
FsspecSourceConnector,
Expand Down Expand Up @@ -56,3 +56,7 @@ def __post_init__(self):
class S3DestinationConnector(FsspecDestinationConnector):
connector_config: SimpleS3Config
write_config: S3WriteConfig

@requires_dependencies(["s3fs", "fsspec"], extras="s3")
def initialize(self):
super().initialize()
10 changes: 5 additions & 5 deletions unstructured/ingest/connector/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
from typing import Dict, Type, cast

from unstructured.ingest.connector.airtable import AirtableIngestDoc
from unstructured.ingest.connector.azure import AzureBlobStorageIngestDoc
from unstructured.ingest.connector.biomed import BiomedIngestDoc
from unstructured.ingest.connector.box import BoxIngestDoc
from unstructured.ingest.connector.confluence import ConfluenceIngestDoc
from unstructured.ingest.connector.delta_table import DeltaTableIngestDoc
from unstructured.ingest.connector.discord import DiscordIngestDoc
from unstructured.ingest.connector.dropbox import DropboxIngestDoc
from unstructured.ingest.connector.elasticsearch import (
ElasticsearchIngestDoc,
ElasticsearchIngestDocBatch,
)
from unstructured.ingest.connector.gcs import GcsIngestDoc
from unstructured.ingest.connector.fsspec.azure import AzureBlobStorageIngestDoc
from unstructured.ingest.connector.fsspec.box import BoxIngestDoc
from unstructured.ingest.connector.fsspec.dropbox import DropboxIngestDoc
from unstructured.ingest.connector.fsspec.gcs import GcsIngestDoc
from unstructured.ingest.connector.fsspec.s3 import S3IngestDoc
from unstructured.ingest.connector.github import GitHubIngestDoc
from unstructured.ingest.connector.gitlab import GitLabIngestDoc
from unstructured.ingest.connector.google_drive import GoogleDriveIngestDoc
Expand All @@ -27,7 +28,6 @@
from unstructured.ingest.connector.onedrive import OneDriveIngestDoc
from unstructured.ingest.connector.outlook import OutlookIngestDoc
from unstructured.ingest.connector.reddit import RedditIngestDoc
from unstructured.ingest.connector.s3 import S3IngestDoc
from unstructured.ingest.connector.salesforce import SalesforceIngestDoc
from unstructured.ingest.connector.sharepoint import SharepointIngestDoc
from unstructured.ingest.connector.slack import SlackIngestDoc
Expand Down
12 changes: 6 additions & 6 deletions unstructured/ingest/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@
from typing import Type

from .airtable import AirtableRunner
from .azure import AzureRunner
from .base_runner import Runner
from .biomed import BiomedRunner
from .box import BoxRunner
from .confluence import ConfluenceRunner
from .delta_table import DeltaTableRunner
from .discord import DiscordRunner
from .dropbox import DropboxRunner
from .elasticsearch import ElasticSearchRunner
from .fsspec import FsspecRunner
from .gcs import GCSRunner
from .fsspec.azure import AzureRunner
from .fsspec.box import BoxRunner
from .fsspec.dropbox import DropboxRunner
from .fsspec.fsspec import FsspecRunner
from .fsspec.gcs import GCSRunner
from .fsspec.s3 import S3Runner
from .github import GithubRunner
from .gitlab import GitlabRunner
from .google_drive import GoogleDriveRunner
Expand All @@ -23,7 +24,6 @@
from .onedrive import OneDriveRunner
from .outlook import OutlookRunner
from .reddit import RedditRunner
from .s3 import S3Runner
from .salesforce import SalesforceRunner
from .sharepoint import SharePointRunner
from .slack import SlackRunner
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.azure import SimpleAzureBlobStorageConfig
from unstructured.ingest.connector.fsspec.azure import SimpleAzureBlobStorageConfig


@dataclass
Expand All @@ -23,7 +23,7 @@ def update_read_config(self):
)

def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.azure import (
from unstructured.ingest.connector.fsspec.azure import (
AzureBlobStorageSourceConnector,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.box import SimpleBoxConfig
from unstructured.ingest.connector.fsspec.box import SimpleBoxConfig


class BoxRunner(Runner):
Expand All @@ -25,7 +25,7 @@ def run(
logger=logger,
)

from unstructured.ingest.connector.box import BoxSourceConnector
from unstructured.ingest.connector.fsspec.box import BoxSourceConnector

source_doc_connector = BoxSourceConnector( # type: ignore
read_config=self.read_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.dropbox import SimpleDropboxConfig
from unstructured.ingest.connector.fsspec.dropbox import SimpleDropboxConfig


@dataclass
Expand All @@ -23,7 +23,7 @@ def update_read_config(self):
)

def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.dropbox import (
from unstructured.ingest.connector.fsspec.dropbox import (
DropboxSourceConnector,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.fsspec import SimpleFsspecConfig
from unstructured.ingest.connector.fsspec.fsspec import SimpleFsspecConfig


class FsspecRunner(Runner):
Expand All @@ -31,7 +31,7 @@ def update_read_config(self):
)

def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.fsspec import (
from unstructured.ingest.connector.fsspec.fsspec import (
FsspecSourceConnector,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.gcs import SimpleGcsConfig
from unstructured.ingest.connector.fsspec.gcs import SimpleGcsConfig


@dataclass
Expand All @@ -23,6 +23,6 @@ def update_read_config(self):
)

def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.gcs import GcsSourceConnector
from unstructured.ingest.connector.fsspec.gcs import GcsSourceConnector

return GcsSourceConnector
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from unstructured.ingest.runner.utils import update_download_dir_remote_url

if t.TYPE_CHECKING:
from unstructured.ingest.connector.s3 import SimpleS3Config
from unstructured.ingest.connector.fsspec.s3 import SimpleS3Config


@dataclass
Expand All @@ -23,6 +23,6 @@ def update_read_config(self):
)

def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.s3 import S3SourceConnector
from unstructured.ingest.connector.fsspec.s3 import S3SourceConnector

return S3SourceConnector

0 comments on commit 57033a9

Please sign in to comment.