Skip to content

Commit

Permalink
fix: make pyarrow an optional dependency post-3.20.0 yanked release (
Browse files Browse the repository at this point in the history
…#1879)

* fix: make `pyarrow` an optional dependency again

* install older version of pyarrow

* fix for older tqdm

* remove many pragma: NO COVERs
  • Loading branch information
tswast authored Mar 28, 2024
1 parent 7dfee0c commit 21714e1
Show file tree
Hide file tree
Showing 21 changed files with 126 additions and 100 deletions.
18 changes: 7 additions & 11 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import pandas # type: ignore

pandas_import_exception = None
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
pandas = None
pandas_import_exception = exc
else:
Expand All @@ -44,25 +44,21 @@
date_dtype_name = db_dtypes.DateDtype.name
time_dtype_name = db_dtypes.TimeDtype.name
db_dtypes_import_exception = None
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
db_dtypes = None
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
from pyarrow import ArrowTypeError # type: ignore # noqa: E402

_BIGNUMERIC_SUPPORT = False
if pyarrow is not None: # pragma: NO COVER
_BIGNUMERIC_SUPPORT = True
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

try:
# _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
# No shapely, use NoneType for _BaseGeometry as a placeholder.
_BaseGeometry = type(None)
else:
# We don't have any unit test sessions that install shapely but not pandas.
if pandas is not None: # pragma: NO COVER

def _to_wkb():
Expand Down Expand Up @@ -309,10 +305,10 @@ def bq_to_arrow_array(series, bq_field):
if field_type_upper in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
return pyarrow.Array.from_pandas(series, type=arrow_type)
except ArrowTypeError: # pragma: NO COVER
except pyarrow.ArrowTypeError:
msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
_LOGGER.error(msg)
raise ArrowTypeError(msg)
raise pyarrow.ArrowTypeError(msg)


def get_column_or_index(dataframe, name):
Expand Down
4 changes: 2 additions & 2 deletions google/cloud/bigquery/_pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None


Expand Down Expand Up @@ -49,7 +49,7 @@ def pyarrow_timestamp():
_BQ_TO_ARROW_SCALARS = {}
_ARROW_SCALAR_IDS_TO_BQ = {}

if pyarrow: # pragma: NO COVER
if pyarrow:
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py
Expand Down
13 changes: 8 additions & 5 deletions google/cloud/bigquery/_tqdm_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@

try:
import tqdm # type: ignore
import tqdm.notebook as notebook # type: ignore

except ImportError: # pragma: NO COVER
except ImportError:
tqdm = None

try:
import tqdm.notebook as tqdm_notebook # type: ignore
except ImportError:
tqdm_notebook = None

if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.bigquery import QueryJob
from google.cloud.bigquery.table import RowIterator
Expand All @@ -42,7 +45,7 @@

def get_progress_bar(progress_bar_type, description, total, unit):
"""Construct a tqdm progress bar object, if tqdm is installed."""
if tqdm is None:
if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook":
if progress_bar_type is not None:
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
return None
Expand All @@ -58,7 +61,7 @@ def get_progress_bar(progress_bar_type, description, total, unit):
unit=unit,
)
elif progress_bar_type == "tqdm_notebook":
return notebook.tqdm(
return tqdm_notebook.tqdm(
bar_format="{l_bar}{bar}|",
desc=description,
file=sys.stdout,
Expand Down
4 changes: 2 additions & 2 deletions google/cloud/bigquery/_versions_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
"""
try:
import pyarrow
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
if raise_if_error:
raise exceptions.LegacyPyarrowError(
"pyarrow package not found. Install pyarrow version >="
Expand Down Expand Up @@ -212,7 +212,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
"""
try:
import pandas
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
if raise_if_error:
raise exceptions.LegacyPandasError(
"pandas package not found. Install pandas version >="
Expand Down
7 changes: 1 addition & 6 deletions google/cloud/bigquery/job/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,9 @@

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import db_dtypes # type: ignore
except ImportError: # pragma: NO COVER
db_dtypes = None

if typing.TYPE_CHECKING: # pragma: NO COVER
# Assumption: type checks are only used by library developers and CI environments
# that have all optional dependencies installed, thus no conditional imports.
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/magics/magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
import IPython # type: ignore
from IPython import display # type: ignore
from IPython.core import magic_arguments # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
raise ImportError("This module can only be loaded in IPython.")

from google.api_core import client_info
Expand Down
6 changes: 3 additions & 3 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None

try:
import db_dtypes # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
db_dtypes = None

try:
Expand Down
15 changes: 8 additions & 7 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def default(session, install_extras=True):
install_target = ".[all]"
else:
install_target = "."
session.install("-e", install_target)
session.install("-e", install_target, "-c", constraints_path)
session.run("python", "-m", "pip", "freeze")

# Run py.test against the unit tests.
Expand Down Expand Up @@ -115,14 +115,15 @@ def unit(session):
def unit_noextras(session):
"""Run the unit test suite."""

# Install optional dependencies that are out-of-date.
# Install optional dependencies that are out-of-date to see that
# we fail gracefully.
# https://github.com/googleapis/python-bigquery/issues/933
# There is no pyarrow 1.0.0 package for Python 3.9.

#
# We only install this extra package on one of the two Python versions
# so that it continues to be an optional dependency.
# https://github.com/googleapis/python-bigquery/issues/1877
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
session.install("pyarrow>=3.0.0")
elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]:
session.install("pyarrow")
session.install("pyarrow==1.0.0")

default(session, install_extras=False)

Expand Down
1 change: 0 additions & 1 deletion samples/desktopapp/requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@ google-cloud-testutils==1.4.0
pytest===7.4.4; python_version == '3.7'
pytest==8.1.1; python_version >= '3.8'
mock==5.1.0
pyarrow>=3.0.0
2 changes: 1 addition & 1 deletion samples/snippets/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# samples/snippets should be runnable with no "extras"
google-cloud-testutils==1.4.0
pytest===7.4.4; python_version == '3.7'
pytest==8.1.1; python_version >= '3.8'
mock==5.1.0
pyarrow>=3.0.0
3 changes: 2 additions & 1 deletion samples/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
google-cloud-bigquery==3.19.0
# samples/snippets should be runnable with no "extras"
google-cloud-bigquery==3.19.0
1 change: 0 additions & 1 deletion testing/constraints-3.11.txt
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
pyarrow>=3.0.0
1 change: 0 additions & 1 deletion testing/constraints-3.12.txt
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
pyarrow>=3.0.0
4 changes: 2 additions & 2 deletions testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ packaging==20.0.0
pandas==1.1.0
proto-plus==1.22.0
protobuf==3.19.5
pyarrow>=3.0.0
pyarrow==3.0.0
python-dateutil==2.7.3
requests==2.21.0
Shapely==1.8.4
six==1.13.0
tqdm==4.7.4
tqdm==4.7.4
40 changes: 14 additions & 26 deletions tests/unit/job/test_query_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,53 +19,38 @@

import pytest

from ..helpers import make_connection
from .helpers import _make_client
from .helpers import _make_job_resource

try:
from google.cloud import bigquery_storage
import google.cloud.bigquery_storage_v1.reader
import google.cloud.bigquery_storage_v1.services.big_query_read.client
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
bigquery_storage = None

try:
import pandas
except (ImportError, AttributeError): # pragma: NO COVER
pandas = None
try:
import shapely
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
shapely = None
try:
import geopandas
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
geopandas = None
try:
import tqdm
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
tqdm = None

try:
import importlib.metadata as metadata
except ImportError:
import importlib_metadata as metadata

from ..helpers import make_connection
from .helpers import _make_client
from .helpers import _make_job_resource

if pandas is not None:
PANDAS_INSTALLED_VERSION = metadata.version("pandas")
else:
PANDAS_INSTALLED_VERSION = "0.0.0"

pandas = pytest.importorskip("pandas")

try:
import pyarrow
import pyarrow.types
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None

pandas = pytest.importorskip("pandas")


@pytest.fixture
def table_read_options_kwarg():
Expand Down Expand Up @@ -660,7 +645,10 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
)


@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
@pytest.mark.skipif(
pandas.__version__.startswith("2."),
reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those",
)
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
def test_to_dataframe_column_dtypes():
from google.cloud.bigquery.job import QueryJob as target_class
Expand Down
15 changes: 8 additions & 7 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
import pandas
import pandas.api.types
import pandas.testing
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import geopandas
except ImportError: # pragma: NO COVER
except ImportError:
geopandas = None

import pytest
Expand All @@ -46,18 +46,19 @@
from google.cloud.bigquery import _pyarrow_helpers
from google.cloud.bigquery import _versions_helpers
from google.cloud.bigquery import schema
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

if pyarrow:
import pyarrow.parquet
import pyarrow.types
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
else: # pragma: NO COVER

_BIGNUMERIC_SUPPORT = True
else:
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()
_BIGNUMERIC_SUPPORT = False

bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import()

Expand Down Expand Up @@ -572,9 +573,9 @@ def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO C
series = pandas.Series(rows, name="test_col", dtype="object")
bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED")
exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
with pytest.raises(ArrowTypeError, match=exc_msg):
with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg):
module_under_test.bq_to_arrow_array(series, bq_field)
raise ArrowTypeError(exc_msg)
raise pyarrow.ArrowTypeError(exc_msg)


@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
Expand Down
Loading

0 comments on commit 21714e1

Please sign in to comment.