Skip to content

Commit

Permalink
Remove pyarrow as a direct dependency (#2228)
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas J. Fan <[email protected]>
  • Loading branch information
thomasjpfan authored Jul 29, 2024
1 parent 955ae33 commit 11faf39
Show file tree
Hide file tree
Showing 11 changed files with 16 additions and 13 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pythonbuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Test with coverage
run: |
Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Run extras unit tests with coverage
# Skip this step if running on python 3.12 due to https://github.com/tensorflow/tensorflow/issues/62003
Expand Down
1 change: 1 addition & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ autoflake
pillow
numpy
pandas
pyarrow
scikit-learn
types-requests
prometheus-client
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ dependencies = [
"marshmallow-jsonschema>=0.12.0",
"mashumaro>=3.11",
"protobuf!=4.25.0",
"pyarrow",
"pygments",
"python-json-logger>=2.0.0",
"pytimeparse>=1.1.8",
Expand Down
3 changes: 2 additions & 1 deletion tests/flytekit/unit/core/test_type_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import List, Optional, Type

import mock
import pyarrow as pa
import pytest
import typing_extensions
from dataclasses_json import DataClassJsonMixin, dataclass_json
Expand Down Expand Up @@ -1408,9 +1407,11 @@ class UnsupportedEnumValues(Enum):
BLUE = 3


@pytest.mark.skipif("polars" not in sys.modules, reason="pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
def test_structured_dataset_type():
import pandas as pd
import pyarrow as pa
from pandas._testing import assert_frame_equal

name = "Name"
Expand Down
3 changes: 2 additions & 1 deletion tests/flytekit/unit/deck/test_renderer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import sys

import pyarrow as pa
import pytest

from flytekit.deck.renderer import DEFAULT_MAX_COLS, DEFAULT_MAX_ROWS, ArrowRenderer, TopFrameRenderer


@pytest.mark.skipif("pyarrow" not in sys.modules, reason="Pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
@pytest.mark.parametrize(
"rows, cols, max_rows, expected_max_rows, max_cols, expected_max_cols",
Expand All @@ -23,6 +23,7 @@
)
def test_renderer(rows, cols, max_rows, expected_max_rows, max_cols, expected_max_cols):
import pandas as pd
import pyarrow as pa

df = pd.DataFrame({f"abc-{k}": list(range(rows)) for k in range(cols)})
pa_df = pa.Table.from_pandas(df)
Expand Down
4 changes: 2 additions & 2 deletions tests/flytekit/unit/lazy_module/test_lazy_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@


def test_lazy_module():
mod = lazy_module("pyarrow")
assert mod.__name__ == "pyarrow"
mod = lazy_module("click")
assert mod.__name__ == "click"
mod = lazy_module("fake_module")
assert isinstance(mod, LazyModule)
with pytest.raises(ImportError, match="Module fake_module is not yet installed."):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import sys
import typing

import pyarrow as pa
import pytest
from typing_extensions import Annotated

from flytekit import kwtypes, task


@pytest.mark.skipif("pyarrow" not in sys.modules, reason="Pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
def test_structured_dataset_wf():
import pandas as pd
import pyarrow as pa

cols = kwtypes(Name=str, Age=int)
subset_cols = kwtypes(Name=str)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections import OrderedDict

import google.cloud.bigquery
import pyarrow as pa
import pytest
from fsspec.utils import get_protocol
from typing_extensions import Annotated
Expand Down Expand Up @@ -34,6 +33,7 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")

my_cols = kwtypes(w=typing.Dict[str, typing.Dict[str, int]], x=typing.List[typing.List[int]], y=int, z=str)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import typing

import mock
import pyarrow as pa
import pytest

from flytekit.core import context_manager
Expand All @@ -17,6 +16,7 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")
my_cols = kwtypes(w=typing.Dict[str, typing.Dict[str, int]], x=typing.List[typing.List[int]], y=int, z=str)
fields = [("some_int", pa.int32()), ("some_string", pa.string())]
arrow_schema = pa.schema(fields)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from dataclasses import dataclass

import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import pytest
from typing_extensions import Annotated

Expand All @@ -24,6 +22,8 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")
pq = pytest.importorskip("pyarrow.parquet")

PANDAS_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
NUMPY_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from dataclasses import dataclass

import pyarrow as pa
import pytest
from typing_extensions import Annotated

from flytekit import FlyteContextManager, StructuredDataset, kwtypes, task, workflow

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")

PANDAS_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
NUMPY_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
Expand Down

0 comments on commit 11faf39

Please sign in to comment.