Skip to content

Commit

Permalink
Add GCS protocol in the structured dataset
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Su <[email protected]>
  • Loading branch information
pingsutw committed Mar 1, 2022
1 parent 9477e1f commit 945a0cf
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 2 deletions.
3 changes: 2 additions & 1 deletion flytekit/types/structured/basic_dfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from flytekit.models.literals import StructuredDatasetMetadata
from flytekit.models.types import StructuredDatasetType
from flytekit.types.structured.structured_dataset import (
GCS,
LOCAL,
PARQUET,
S3,
Expand Down Expand Up @@ -106,7 +107,7 @@ def decode(
return pq.read_table(local_dir)


for protocol in [LOCAL, S3]: # Should we add GCS
for protocol in [LOCAL, S3, GCS]:
StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(protocol), default_for_type=True)
StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(protocol), default_for_type=True)
StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(protocol), default_for_type=True)
Expand Down
1 change: 1 addition & 0 deletions flytekit/types/structured/structured_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
# Protocols
BIGQUERY = "bq"
S3 = "s3"
GCS = "gs"
LOCAL = "/"

# For specifying the storage formats of StructuredDatasets. It's just a string, nothing fancy.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import importlib

from flytekit import USE_STRUCTURED_DATASET, StructuredDatasetTransformerEngine, logger
from flytekit.types.structured.structured_dataset import S3
from flytekit.types.structured.structured_dataset import GCS, S3

from .persist import FSSpecPersistence

Expand All @@ -18,3 +18,6 @@ def _register(protocol: str):

if importlib.util.find_spec("s3fs"):
_register(S3)

if importlib.util.find_spec("gcsfs"):
_register(GCS)
1 change: 1 addition & 0 deletions plugins/flytekit-data-fsspec/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
extras_require={
# https://github.com/fsspec/filesystem_spec/blob/master/setup.py#L36
"aws": ["s3fs>=2021.7.0"],
"gcp": ["gcsfs>=2021.7.0"],
},
license="apache2",
python_requires=">=3.7",
Expand Down

0 comments on commit 945a0cf

Please sign in to comment.