Skip to content

Commit

Permalink
lint fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
rbiseck3 committed Oct 4, 2023
1 parent c01da19 commit eaf6746
Show file tree
Hide file tree
Showing 38 changed files with 61 additions and 200 deletions.
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/airtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def airtable_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[AirtableCliConfig])
runner = Airtable(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def azure_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[AzureCliConfig])
runner = Azure(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/biomed.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def biomed_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[BiomedCliConfig])
runner = Biomed(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def box_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[BoxCliConfig])
runner = Box(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def confluence_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[ConfluenceCliConfig])
runner = Confluence(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/delta_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def delta_table_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[DeltaTableCliConfig])
runner = DeltaTable(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def discord_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[DiscordCliConfig])
runner = Discord(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def dropbox_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[DropboxCliConfig])
runner = Dropbox(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def elasticsearch_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[ElasticsearchCliConfig])
runner = ElasticSearch(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def fsspec_source(ctx: click.Context, **options):
try:
configs = extract_configs(options)
runner = Fsspec(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def gcs_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([GcsCliConfig]))
runner = GCS(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def github_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([GithubCliConfig]))
runner = Github(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def gitlab_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([GitlabCliConfig]))
runner = Gitlab(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/google_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def google_drive_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([GoogleDriveCliConfig]))
runner = GoogleDrive(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/jira.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def jira_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([JiraCliConfig]))
runner = Jira(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def local_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([LocalCliConfig]))
runner = Local(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/notion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def notion_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([NotionCliConfig]))
runner = Notion(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def onedrive_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([OnedriveCliConfig]))
runner = OneDrive(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/outlook.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def outlook_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([OutlookCliConfig]))
runner = Outlook(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def reddit_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([RedditCliConfig]))
runner = Reddit(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions unstructured/ingest/cli/cmds/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def s3_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[S3CliConfig])
s3_runner = S3(
**configs,
**configs, # type: ignore
)
s3_runner.run(**options)
except Exception as e:
Expand All @@ -79,7 +79,7 @@ def s3_dest(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=[S3CliConfig])
s3_runner = S3(
**configs,
**configs, # type: ignore
writer_type="s3",
writer_kwargs=options,
)
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/salesforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def salesforce_source(ctx: click.Context, **options):
try:
configs = extract_configs(options, validate=([SalesforceCliConfig]))
runner = Salesforce(
**configs,
**configs, # type: ignore
)
runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def sharepoint_source(ctx: click.Context, **options):
try:
configs = extract_configs(data=options, validate=[SharepointCliConfig])
sharepoint_runner = SharePoint(
**configs,
**configs, # type: ignore
)
sharepoint_runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def slack_source(ctx: click.Context, **options):
try:
configs = extract_configs(data=options, validate=[SlackCliConfig])
sharepoint_runner = Slack(
**configs,
**configs, # type: ignore
)
sharepoint_runner.run(**options)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion unstructured/ingest/cli/cmds/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def wikipedia_source(ctx: click.Context, **options):
try:
configs = extract_configs(data=options, validate=[WikipediaCliConfig])
sharepoint_runner = Wikipedia(
**configs,
**configs, # type: ignore
)
sharepoint_runner.run(**options)
except Exception as e:
Expand Down
6 changes: 5 additions & 1 deletion unstructured/ingest/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ def conform_click_options(options: dict):
options[k] = list(v)


def extract_configs(data: dict, validate: t.List[t.Type[BaseConfig]]) -> t.Dict[str, BaseConfig]:
def extract_configs(
data: dict,
validate: t.Optional[t.List[t.Type[BaseConfig]]] = None,
) -> t.Dict[str, BaseConfig]:
validate = validate if validate else []
res = {
"read_config": CliReadConfig.from_dict(data),
"partition_config": CliPartitionConfig.from_dict(data),
Expand Down
4 changes: 2 additions & 2 deletions unstructured/ingest/connector/biomed.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def urls_to_metadata(urls):
download_filepath=(Path(self.read_config.download_dir) / local_path)
.resolve()
.as_posix(),
output_filepath=(Path(self.partition_config.output_dir) / local_path)
output_filepath=(Path(self.processor_config.output_dir) / local_path)
.resolve()
.as_posix(),
),
Expand Down Expand Up @@ -246,7 +246,7 @@ def traverse(path, download_dir, output_dir):
.resolve()
.as_posix(),
output_filepath=(
Path(self.partition_config.output_dir) / local_path
Path(self.processor_config.output_dir) / local_path
)
.resolve()
.as_posix(),
Expand Down
8 changes: 2 additions & 6 deletions unstructured/ingest/connector/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import typing as t
from contextlib import suppress
from dataclasses import dataclass, field
from pathlib import Path
from pathlib import Path, PurePath

from unstructured.ingest.error import SourceConnectionError
from unstructured.ingest.interfaces import (
Expand Down Expand Up @@ -246,11 +246,7 @@ def write(self, docs: t.List[BaseIngestDoc]) -> None:
for doc in docs:
s3_file_path = doc.base_filename
s3_folder = self.connector_config.path
if s3_folder[-1] != "/":
s3_folder = f"{s3_file_path}/"
if s3_file_path[0] == "/":
s3_file_path = s3_file_path[1:]

s3_output_path = s3_folder + s3_file_path
s3_output_path = str(PurePath(s3_folder, s3_file_path)) if s3_file_path else s3_folder
logger.debug(f"Uploading {doc._output_filename} -> {s3_output_path}")
fs.put_file(lpath=doc._output_filename, rpath=s3_output_path)
Empty file.
70 changes: 0 additions & 70 deletions unstructured/ingest/doc_processor/generalized.py

This file was deleted.

3 changes: 2 additions & 1 deletion unstructured/ingest/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .doc_factory import DocFactory
from .interfaces import PipelineContext
from .interfaces import PipelineContext, ReformatNode
from .partition import Partitioner
from .pipeline import Pipeline
from .reformat.chunking import Chunker
Expand All @@ -16,4 +16,5 @@
"Pipeline",
"Writer",
"Chunker",
"ReformatNode",
]
2 changes: 1 addition & 1 deletion unstructured/ingest/pipeline/doc_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class DocFactory(DocFactoryNode):
def initialize(self):
self.source_doc_connector.initialize()

def run(self) -> t.Iterable[str]:
def run(self, *args, **kwargs) -> t.Iterable[str]:
docs = self.source_doc_connector.get_ingest_docs()
json_docs = [doc.to_json() for doc in docs]
return json_docs
Loading

0 comments on commit eaf6746

Please sign in to comment.