diff --git a/components/caption_images/Dockerfile b/components/caption_images/Dockerfile index 9713a5785..49aa50657 100644 --- a/components/caption_images/Dockerfile +++ b/components/caption_images/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component diff --git a/components/chunk_text/Dockerfile b/components/chunk_text/Dockerfile index f734b49a2..35e7cc91f 100644 --- a/components/chunk_text/Dockerfile +++ b/components/chunk_text/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component diff --git a/components/download_images/Dockerfile b/components/download_images/Dockerfile index f734b49a2..35e7cc91f 100644 --- a/components/download_images/Dockerfile +++ b/components/download_images/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component diff --git a/components/embed_images/Dockerfile b/components/embed_images/Dockerfile index 77e9ccdbe..bbc2fedf3 100644 --- a/components/embed_images/Dockerfile +++ b/components/embed_images/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/embed_text/Dockerfile b/components/embed_text/Dockerfile index fd1dbc22f..5c1eea4ed 100644 --- a/components/embed_text/Dockerfile +++ b/components/embed_text/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component diff --git a/components/embedding_based_laion_retrieval/Dockerfile b/components/embedding_based_laion_retrieval/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/embedding_based_laion_retrieval/Dockerfile +++ b/components/embedding_based_laion_retrieval/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/filter_image_resolution/Dockerfile b/components/filter_image_resolution/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/filter_image_resolution/Dockerfile +++ b/components/filter_image_resolution/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/image_cropping/Dockerfile b/components/image_cropping/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/image_cropping/Dockerfile +++ b/components/image_cropping/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/image_resolution_extraction/Dockerfile b/components/image_resolution_extraction/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/image_resolution_extraction/Dockerfile +++ b/components/image_resolution_extraction/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/index_weaviate/Dockerfile b/components/index_weaviate/Dockerfile index d212df5d3..41aa27b5c 100644 --- a/components/index_weaviate/Dockerfile +++ b/components/index_weaviate/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/language_filter/Dockerfile b/components/language_filter/Dockerfile index 563dd87a1..8290240da 100644 --- a/components/language_filter/Dockerfile +++ b/components/language_filter/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/language_filter/tests/language_filter_component_test.py b/components/language_filter/tests/language_filter_component_test.py index 1cfc0d0ce..7f6adcd36 100644 --- a/components/language_filter/tests/language_filter_component_test.py +++ b/components/language_filter/tests/language_filter_component_test.py @@ -1,6 +1,6 @@ """Unit test for language filter component.""" import pandas as pd -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec from components.language_filter.src.main import LanguageFilterComponent diff --git a/components/load_from_files/Dockerfile b/components/load_from_files/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/load_from_files/Dockerfile +++ b/components/load_from_files/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/load_from_hf_hub/Dockerfile b/components/load_from_hf_hub/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/load_from_hf_hub/Dockerfile +++ b/components/load_from_hf_hub/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/load_from_hf_hub/src/main.py b/components/load_from_hf_hub/src/main.py index 55e562237..b978a96af 100644 --- a/components/load_from_hf_hub/src/main.py +++ b/components/load_from_hf_hub/src/main.py @@ -6,7 +6,7 @@ import dask.dataframe as dd import pandas as pd from fondant.component import DaskLoadComponent -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec logger = logging.getLogger(__name__) diff --git a/components/load_from_parquet/Dockerfile b/components/load_from_parquet/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/load_from_parquet/Dockerfile +++ b/components/load_from_parquet/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/load_from_parquet/src/main.py b/components/load_from_parquet/src/main.py index cfc104740..ddd338552 100644 --- a/components/load_from_parquet/src/main.py +++ b/components/load_from_parquet/src/main.py @@ -6,7 +6,7 @@ import dask.dataframe as dd import pandas as pd from fondant.component import DaskLoadComponent -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec logger = logging.getLogger(__name__) diff --git a/components/minhash_generator/Dockerfile b/components/minhash_generator/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/minhash_generator/Dockerfile +++ b/components/minhash_generator/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/minhash_generator/tests/component_test.py b/components/minhash_generator/tests/component_test.py index b446a704c..76c1da411 100644 --- a/components/minhash_generator/tests/component_test.py +++ b/components/minhash_generator/tests/component_test.py @@ -1,6 +1,6 @@ """Unit test for minhash generation component.""" import pandas as pd -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec from components.minhash_generator.src.main import MinHashGeneratorComponent diff --git a/components/prompt_based_laion_retrieval/Dockerfile b/components/prompt_based_laion_retrieval/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/prompt_based_laion_retrieval/Dockerfile +++ b/components/prompt_based_laion_retrieval/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/resize_images/Dockerfile b/components/resize_images/Dockerfile index 563dd87a1..8290240da 100644 --- a/components/resize_images/Dockerfile +++ b/components/resize_images/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/segment_images/Dockerfile b/components/segment_images/Dockerfile index c93dca10d..7985cfcb2 100644 --- a/components/segment_images/Dockerfile +++ b/components/segment_images/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/text_length_filter/Dockerfile b/components/text_length_filter/Dockerfile index 4642b9b8b..72525d884 100644 --- a/components/text_length_filter/Dockerfile +++ b/components/text_length_filter/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/text_length_filter/tests/text_length_filter_test.py b/components/text_length_filter/tests/text_length_filter_test.py index 74107a8f8..fbbbb1aba 100644 --- a/components/text_length_filter/tests/text_length_filter_test.py +++ b/components/text_length_filter/tests/text_length_filter_test.py @@ -1,6 +1,6 @@ """Unit test for text length filter component.""" import pandas as pd -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec from components.text_length_filter.src.main import TextLengthFilterComponent diff --git a/components/text_normalization/Dockerfile b/components/text_normalization/Dockerfile index b1b0947bf..b4f330990 100644 --- a/components/text_normalization/Dockerfile +++ b/components/text_normalization/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component diff --git a/components/write_to_hf_hub/Dockerfile b/components/write_to_hf_hub/Dockerfile index e94e95ba1..833bc8e21 100644 --- a/components/write_to_hf_hub/Dockerfile +++ b/components/write_to_hf_hub/Dockerfile @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Install Fondant # This is split from other requirements to leverage caching ARG FONDANT_VERSION=main -RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} +RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION} # Set the working directory to the component folder WORKDIR /component/src diff --git a/components/write_to_hf_hub/src/main.py b/components/write_to_hf_hub/src/main.py index 939d81c89..0ed01b961 100644 --- a/components/write_to_hf_hub/src/main.py +++ b/components/write_to_hf_hub/src/main.py @@ -10,7 +10,7 @@ import huggingface_hub from datasets.features.features import generate_from_arrow_type from fondant.component import DaskWriteComponent -from fondant.component_spec import ComponentSpec +from fondant.core.component_spec import ComponentSpec from PIL import Image logger = logging.getLogger(__name__) diff --git a/pyproject.toml b/pyproject.toml index 91675f85d..1b1660b88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,26 +42,31 @@ classifiers = [ [tool.poetry.dependencies] python = ">= 3.8, <3.11" -dask = {extras = ["dataframe", "distributed", "diagnostics"], version = ">= 2023.4.1"} + +fsspec = ">= 2023.4.0" importlib-resources = { version = ">= 1.3", python = "<3.9" } jsonschema = ">= 4.18" pyarrow = ">= 11.0.0" -fsspec = { version = ">= 2023.4.0", optional = true} +dask = { version = ">= 2023.4.1", extras = ["dataframe", "distributed", "diagnostics"], optional = true } + gcsfs = { version = ">= 2023.10.0", optional = true } s3fs = { version = ">= 2023.4.0", optional = true } adlfs = { version = ">= 2023.4.0", optional = true } + docker = {version = ">= 6.1.3", optional = true } kfp = { version = "2.3.0", optional = true, extras =["kubernetes"] } -pandas = { version = ">= 1.3.5", optional = true } google-cloud-aiplatform = { version = "1.34.0", optional = true} [tool.poetry.extras] -aws = ["fsspec", "s3fs"] -azure = ["fsspec", "adlfs"] -gcp = ["fsspec", "gcsfs"] -kfp = ["kfp"] -vertex = ["kfp", "google-cloud-aiplatform"] +component = ["dask"] + +aws = ["s3fs"] +azure = ["adlfs"] +gcp = ["gcsfs"] + +kfp = ["docker", "kfp"] +vertex = ["docker", "kfp", "google-cloud-aiplatform"] docker = ["docker"] [tool.poetry.group.test.dependencies] diff --git a/src/fondant/component/__init__.py b/src/fondant/component/__init__.py index 9a37e1d2d..7aac00958 100644 --- a/src/fondant/component/__init__.py +++ b/src/fondant/component/__init__.py @@ -1,3 +1,14 @@ +try: + pass +except ImportError: + msg = ( + "You need to install fondant using the `component` extra to develop or run a component." + "You can install it with `pip install fondant[component]`" + ) + raise SystemExit( + msg, + ) + from .component import ( # noqa BaseComponent, Component,