diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index 50ca7c35c..38bc56cce 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -22,26 +22,11 @@ jobs: run: | pip install --upgrade pip pip install poetry==1.4.0 - poetry install --all-extras - - name: Test with pre-commit - run: poetry run pre-commit run --all-files --show-diff-on-failure - - name: Check licenses - run: | - poetry export -f requirements.txt --output /tmp/requirements.txt - poetry run liccheck -s license_strategy.ini -r /tmp/requirements.txt -l PARANOID - - name: Shadow-run pylint - # Check if pylint catches errors not caught by our pre-commit checks to validate switch - # to ruff - run: | - pip install pylint==2.16.4 - pylint fondant --exit-zero - - name: Test with pytest - run: | - poetry run pytest tests --cov fondant --cov-report term-missing + poetry install --all-extras --with test + - name: Test with tox + run: poetry run tox - name: Coveralls - run: | - pip install "coveralls<4" - coveralls --service github + run: poetry run coveralls --service github env: COVERALLS_PARALLEL: true GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/components/caption_images/src/main.py b/components/caption_images/src/main.py index 7404f0687..72f4a5fe4 100644 --- a/components/caption_images/src/main.py +++ b/components/caption_images/src/main.py @@ -6,11 +6,10 @@ import numpy as np import pandas as pd import torch +from fondant.component import PandasTransformComponent from PIL import Image from transformers import BatchEncoding, BlipForConditionalGeneration, BlipProcessor -from fondant.component import PandasTransformComponent - logger = logging.getLogger(__name__) diff --git a/components/download_images/src/main.py b/components/download_images/src/main.py index 017001e0d..c5dd07376 100644 --- a/components/download_images/src/main.py +++ b/components/download_images/src/main.py @@ -11,9 +11,8 @@ import urllib import dask.dataframe as dd -from resizer import Resizer - from fondant.component import DaskTransformComponent +from resizer import Resizer logger = logging.getLogger(__name__) diff --git a/components/embedding_based_laion_retrieval/src/main.py b/components/embedding_based_laion_retrieval/src/main.py index 1c8e1672c..4a4e55204 100644 --- a/components/embedding_based_laion_retrieval/src/main.py +++ b/components/embedding_based_laion_retrieval/src/main.py @@ -7,7 +7,6 @@ import pandas as pd from clip_client import ClipClient, Modality - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/filter_comments/src/main.py b/components/filter_comments/src/main.py index aa2698657..6925afe66 100644 --- a/components/filter_comments/src/main.py +++ b/components/filter_comments/src/main.py @@ -5,9 +5,8 @@ import logging import dask.dataframe as dd -from utils.text_extraction import get_comments_to_code_ratio - from fondant.component import DaskTransformComponent +from utils.text_extraction import get_comments_to_code_ratio logger = logging.getLogger(__name__) diff --git a/components/filter_image_resolution/src/main.py b/components/filter_image_resolution/src/main.py index 3d9e83566..c59ca5a60 100644 --- a/components/filter_image_resolution/src/main.py +++ b/components/filter_image_resolution/src/main.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/filter_line_length/src/main.py b/components/filter_line_length/src/main.py index 9401af8ac..a2792b38c 100644 --- a/components/filter_line_length/src/main.py +++ b/components/filter_line_length/src/main.py @@ -2,7 +2,6 @@ import logging import dask.dataframe as dd - from fondant.component import DaskTransformComponent logger = logging.getLogger(__name__) diff --git a/components/image_cropping/src/main.py b/components/image_cropping/src/main.py index 85445169f..318c4d6b9 100644 --- a/components/image_cropping/src/main.py +++ b/components/image_cropping/src/main.py @@ -5,11 +5,10 @@ import dask.dataframe as dd import numpy as np +from fondant.component import DaskTransformComponent from image_crop import remove_borders from PIL import Image -from fondant.component import DaskTransformComponent - logger = logging.getLogger(__name__) diff --git a/components/image_embedding/src/main.py b/components/image_embedding/src/main.py index 99758af1c..05b71f7cb 100644 --- a/components/image_embedding/src/main.py +++ b/components/image_embedding/src/main.py @@ -6,11 +6,10 @@ import numpy as np import pandas as pd import torch +from fondant.component import PandasTransformComponent from PIL import Image from transformers import CLIPProcessor, CLIPVisionModelWithProjection -from fondant.component import PandasTransformComponent - logger = logging.getLogger(__name__) diff --git a/components/image_resolution_extraction/src/main.py b/components/image_resolution_extraction/src/main.py index e1be245c2..89e230348 100644 --- a/components/image_resolution_extraction/src/main.py +++ b/components/image_resolution_extraction/src/main.py @@ -6,7 +6,6 @@ import imagesize import numpy as np import pandas as pd - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/language_filter/src/main.py b/components/language_filter/src/main.py index 5e52def65..c2e3dcc02 100644 --- a/components/language_filter/src/main.py +++ b/components/language_filter/src/main.py @@ -3,7 +3,6 @@ import fasttext import pandas as pd - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/language_filter/tests/language_filter_component_test.py b/components/language_filter/tests/language_filter_component_test.py index 633b3ecee..60abe7fc9 100644 --- a/components/language_filter/tests/language_filter_component_test.py +++ b/components/language_filter/tests/language_filter_component_test.py @@ -1,8 +1,8 @@ """Unit test for language filter component.""" import pandas as pd +from fondant.component_spec import ComponentSpec from components.language_filter.src.main import LanguageFilterComponent -from fondant.component_spec import ComponentSpec def test_run_component_test(): diff --git a/components/load_from_hf_hub/src/main.py b/components/load_from_hf_hub/src/main.py index cc0bb3a39..03a1e4c8b 100644 --- a/components/load_from_hf_hub/src/main.py +++ b/components/load_from_hf_hub/src/main.py @@ -3,7 +3,6 @@ import typing as t import dask.dataframe as dd - from fondant.component import LoadComponent logger = logging.getLogger(__name__) diff --git a/components/pii_redaction/src/main.py b/components/pii_redaction/src/main.py index e05a6aaa0..98237dc05 100644 --- a/components/pii_redaction/src/main.py +++ b/components/pii_redaction/src/main.py @@ -4,11 +4,10 @@ import logging import dask.dataframe as dd +from fondant.component import DaskTransformComponent from pii_detection import scan_pii from pii_redaction import redact_pii -from fondant.component import DaskTransformComponent - logger = logging.getLogger(__name__) diff --git a/components/prompt_based_laion_retrieval/src/main.py b/components/prompt_based_laion_retrieval/src/main.py index 6dbc39a57..d1e057b0b 100644 --- a/components/prompt_based_laion_retrieval/src/main.py +++ b/components/prompt_based_laion_retrieval/src/main.py @@ -6,7 +6,6 @@ import pandas as pd from clip_client import ClipClient, Modality - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/segment_images/src/main.py b/components/segment_images/src/main.py index 89e9193ef..c552cf028 100644 --- a/components/segment_images/src/main.py +++ b/components/segment_images/src/main.py @@ -6,12 +6,11 @@ import numpy as np import pandas as pd import torch +from fondant.component import PandasTransformComponent from palette import palette from PIL import Image from transformers import AutoModelForSemanticSegmentation, BatchFeature, SegformerImageProcessor -from fondant.component import PandasTransformComponent - logger = logging.getLogger(__name__) diff --git a/components/text_normalization/src/main.py b/components/text_normalization/src/main.py index 8959944ae..716273410 100644 --- a/components/text_normalization/src/main.py +++ b/components/text_normalization/src/main.py @@ -5,7 +5,6 @@ from typing import List import pandas as pd - from fondant.component import PandasTransformComponent logger = logging.getLogger(__name__) diff --git a/components/write_to_hf_hub/src/main.py b/components/write_to_hf_hub/src/main.py index c3022b234..bbe35d883 100644 --- a/components/write_to_hf_hub/src/main.py +++ b/components/write_to_hf_hub/src/main.py @@ -9,9 +9,8 @@ # Define the schema for the struct using PyArrow import huggingface_hub from datasets.features.features import generate_from_arrow_type -from PIL import Image - from fondant.component import WriteComponent +from PIL import Image logger = logging.getLogger(__name__) diff --git a/fondant/components b/fondant/components deleted file mode 120000 index 6c2e490e0..000000000 --- a/fondant/components +++ /dev/null @@ -1 +0,0 @@ -../components/ \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ac4a1e241..3ec4519b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,10 +55,10 @@ pandas = { version = ">= 1.3.5", optional = true } pipelines = ["kfp", "kubernetes"] [tool.poetry.group.test.dependencies] -liccheck = "^0.7.3" pre-commit = "^3.1.1" -pytest = "^7.2.2" -pytest-cov = "^4.1.0" +tox = "^4.6.4" +tox-gh-actions = "^3.1.3" +coveralls = "^3.3.1" [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.1.8" diff --git a/scripts/pre-build.sh b/scripts/pre-build.sh index a81307f07..5ef895a5f 100755 --- a/scripts/pre-build.sh +++ b/scripts/pre-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# This script copies the components/ directory to fondant/components, replacing the symlink +# This script copies the components/ directory to src/fondant/components, replacing the symlink # It should be run before building the fondant package' # This script makes changes to the local files, which should not be committed to git set -e @@ -8,6 +8,6 @@ scripts_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) root_path=$(dirname "$scripts_path") pushd "$root_path" -rm fondant/components -cp -r components/ fondant/ +rm -rf src/fondant/components +cp -r components/ src/fondant/ popd diff --git a/fondant/__init__.py b/src/fondant/__init__.py similarity index 100% rename from fondant/__init__.py rename to src/fondant/__init__.py diff --git a/fondant/cli.py b/src/fondant/cli.py similarity index 100% rename from fondant/cli.py rename to src/fondant/cli.py diff --git a/fondant/compiler.py b/src/fondant/compiler.py similarity index 100% rename from fondant/compiler.py rename to src/fondant/compiler.py diff --git a/fondant/component.py b/src/fondant/component.py similarity index 100% rename from fondant/component.py rename to src/fondant/component.py diff --git a/fondant/component_spec.py b/src/fondant/component_spec.py similarity index 100% rename from fondant/component_spec.py rename to src/fondant/component_spec.py diff --git a/fondant/data_io.py b/src/fondant/data_io.py similarity index 100% rename from fondant/data_io.py rename to src/fondant/data_io.py diff --git a/fondant/exceptions.py b/src/fondant/exceptions.py similarity index 100% rename from fondant/exceptions.py rename to src/fondant/exceptions.py diff --git a/fondant/explorer.py b/src/fondant/explorer.py similarity index 100% rename from fondant/explorer.py rename to src/fondant/explorer.py diff --git a/fondant/import_utils.py b/src/fondant/import_utils.py similarity index 100% rename from fondant/import_utils.py rename to src/fondant/import_utils.py diff --git a/fondant/manifest.py b/src/fondant/manifest.py similarity index 100% rename from fondant/manifest.py rename to src/fondant/manifest.py diff --git a/fondant/pipeline.py b/src/fondant/pipeline.py similarity index 100% rename from fondant/pipeline.py rename to src/fondant/pipeline.py diff --git a/fondant/runner.py b/src/fondant/runner.py similarity index 100% rename from fondant/runner.py rename to src/fondant/runner.py diff --git a/fondant/schema.py b/src/fondant/schema.py similarity index 100% rename from fondant/schema.py rename to src/fondant/schema.py diff --git a/fondant/schemas/common.json b/src/fondant/schemas/common.json similarity index 100% rename from fondant/schemas/common.json rename to src/fondant/schemas/common.json diff --git a/fondant/schemas/component_spec.json b/src/fondant/schemas/component_spec.json similarity index 100% rename from fondant/schemas/component_spec.json rename to src/fondant/schemas/component_spec.json diff --git a/fondant/schemas/manifest.json b/src/fondant/schemas/manifest.json similarity index 100% rename from fondant/schemas/manifest.json rename to src/fondant/schemas/manifest.json diff --git a/tests/test_cli.py b/tests/test_cli.py index f121bd353..8b539986c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,6 @@ from unittest.mock import patch import pytest - from fondant.cli import ImportFromStringError, compile, pipeline_from_string, run from fondant.pipeline import Pipeline diff --git a/tests/test_compiler.py b/tests/test_compiler.py index 689a0c19e..70dd40c88 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -3,7 +3,6 @@ import pytest import yaml - from fondant.compiler import DockerCompiler from fondant.pipeline import ComponentOp, Pipeline diff --git a/tests/test_component.py b/tests/test_component.py index fc1f75da3..e6b0f2576 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -8,7 +8,6 @@ import pandas as pd import pytest import yaml - from fondant.component import ( Component, DaskTransformComponent, diff --git a/tests/test_component_specs.py b/tests/test_component_specs.py index 698e7bd38..56499515d 100644 --- a/tests/test_component_specs.py +++ b/tests/test_component_specs.py @@ -6,7 +6,6 @@ import pytest import yaml - from fondant.component_spec import ComponentSpec, ComponentSubset, KubeflowComponentSpec from fondant.exceptions import InvalidComponentSpec from fondant.schema import Type diff --git a/tests/test_data_io.py b/tests/test_data_io.py index 650e73d97..7d42b2e1d 100644 --- a/tests/test_data_io.py +++ b/tests/test_data_io.py @@ -2,7 +2,6 @@ import dask.dataframe as dd import pytest - from fondant.component_spec import ComponentSpec from fondant.data_io import DaskDataLoader, DaskDataWriter from fondant.manifest import Manifest diff --git a/tests/test_import_utils.py b/tests/test_import_utils.py index 63120b05c..23283c4eb 100644 --- a/tests/test_import_utils.py +++ b/tests/test_import_utils.py @@ -3,7 +3,6 @@ from unittest import mock import pytest - from fondant.import_utils import ( is_datasets_available, is_kfp_available, diff --git a/tests/test_manifest.py b/tests/test_manifest.py index b39786195..68fb9f364 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,7 +3,6 @@ from pathlib import Path import pytest - from fondant.exceptions import InvalidManifest from fondant.manifest import Field, Index, Manifest, Subset, Type diff --git a/tests/test_manifest_evolution.py b/tests/test_manifest_evolution.py index bec99c280..29afabe87 100644 --- a/tests/test_manifest_evolution.py +++ b/tests/test_manifest_evolution.py @@ -3,7 +3,6 @@ import pytest import yaml - from fondant.component_spec import ComponentSpec from fondant.manifest import Manifest diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 221616372..d68e44efb 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -3,7 +3,6 @@ import pytest import yaml - from fondant.exceptions import InvalidPipelineDefinition from fondant.pipeline import ComponentOp, ComponentSpec, Pipeline diff --git a/tests/test_schema.py b/tests/test_schema.py index 7d69331a7..7af7055e5 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,6 +1,5 @@ import pyarrow as pa import pytest - from fondant.exceptions import InvalidTypeSchema from fondant.schema import Type diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..244e6ebbc --- /dev/null +++ b/tox.ini @@ -0,0 +1,43 @@ +[tox] +isolated_build = True +envlist = + pre-commit + check-licenses + py{38,39,310,311} + +[gh-actions] +python = + 3.8: py38 + 3.9: py39 + 3.10: pre-commit,check-licenses, py310 + 3.11: py311 + +[testenv:pre-commit] +allowlist_externals= + pre-commit +commands=pre-commit run --all-files --show-diff-on-failure + +[testenv:check-licenses] +deps= + liccheck>=0.7.3,<0.8 +commands= + poetry lock + poetry install --all-extras + poetry show + poetry export -f requirements.txt --output /tmp/requirements.txt + poetry run liccheck -s license_strategy.ini -r /tmp/requirements.txt -l PARANOID + +[testenv] +setenv=PYTHONPATH = {toxinidir}:{toxinidir} +deps= + pytest>=7.4.0,<8 + pytest-cov>=4.1.0,<5 +allowlist_externals= + poetry + /usr/bin/bash +commands= + bash ./scripts/pre-build.sh + poetry lock + poetry install --all-extras + poetry show + poetry run python -m pytest tests --cov fondant --cov-report term-missing