From 2b36521e52f61f6a68e58e7c8c2f7bf2ed805cdc Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Sun, 6 Aug 2023 23:38:14 -0400 Subject: [PATCH] GH-36642: [Python][CI] Configure warnings as errors during pytest (#37018) ### Rationale for this change Warnings are constantly being introduced into the pyarrow tests. Let's try enforcing them as errors in an effort to keep the codebase healthy. ### What changes are included in this PR? * Fixed existing warnings * Set warnings as errors in CI ### Are these changes tested? Yes, ran pytests locally w/o warnings. ### Are there any user-facing changes? No * Closes: #36642 Authored-by: Dane Pitkin Signed-off-by: Sutou Kouhei --- dev/tasks/tasks.yml | 3 ++- docker-compose.yml | 1 + python/pyarrow/tests/parquet/conftest.py | 15 +++++++------ python/pyarrow/tests/parquet/test_dataset.py | 2 +- python/pyarrow/tests/strategies.py | 22 +++++++++++--------- python/pyarrow/tests/test_pandas.py | 5 ++++- python/pyarrow/tests/test_tensor.py | 7 +++++-- 7 files changed, 34 insertions(+), 21 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 73b793162d959..941506b9c2abc 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1253,6 +1253,7 @@ tasks: params: env: PYTHON: "{{ python_version }}" + PYTEST_ARGS: "-W error" image: conda-python {% endfor %} @@ -1265,7 +1266,7 @@ tasks: HYPOTHESIS_PROFILE: ci PYARROW_TEST_HYPOTHESIS: ON # limit to execute hypothesis tests only - PYTEST_ARGS: "-m hypothesis" + PYTEST_ARGS: "-m hypothesis -W error" image: conda-python-pandas test-conda-python-3.10-substrait: diff --git a/docker-compose.yml b/docker-compose.yml index fe98a30d0b92b..3bf346ef94173 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -835,6 +835,7 @@ services: shm_size: *shm-size environment: <<: [*common, *ccache, *sccache] + PYTEST_ARGS: # inherit volumes: *conda-volumes command: &python-conda-command [" diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py index 1e75493cdae03..461c24af22aa9 100644 --- a/python/pyarrow/tests/parquet/conftest.py +++ b/python/pyarrow/tests/parquet/conftest.py @@ -29,9 +29,10 @@ def datadir(base_datadir): def s3_bucket(s3_server): boto3 = pytest.importorskip('boto3') botocore = pytest.importorskip('botocore') + s3_bucket_name = 'test-s3fs' host, port, access_key, secret_key = s3_server['connection'] - s3 = boto3.resource( + s3_client = boto3.client( 's3', endpoint_url='http://{}:{}'.format(host, port), aws_access_key_id=access_key, @@ -39,13 +40,15 @@ def s3_bucket(s3_server): config=botocore.client.Config(signature_version='s3v4'), region_name='us-east-1' ) - bucket = s3.Bucket('test-s3fs') + try: - bucket.create() + s3_client.create_bucket(Bucket=s3_bucket_name) except Exception: - # we get BucketAlreadyOwnedByYou error with fsspec handler - pass - return 'test-s3fs' + pass # we get BucketAlreadyOwnedByYou error with fsspec handler + finally: + s3_client.close() + + return s3_bucket_name @pytest.fixture diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index cd991617c9fa8..3e6ff49265c32 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path, output_df[col] = output_df[col].astype('category') if schema: - expected_date_type = schema.field_by_name('date').type.to_pandas_dtype() + expected_date_type = schema.field('date').type.to_pandas_dtype() output_df["date"] = output_df["date"].astype(expected_date_type) tm.assert_frame_equal(output_df, input_df) diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 48f7e5381724a..bb88a4dcb7b2a 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types): def dictionary_types(key_strategy=None, value_strategy=None): - key_strategy = key_strategy or signed_integer_types - value_strategy = value_strategy or st.one_of( - bool_type, - integer_types, - st.sampled_from([pa.float32(), pa.float64()]), - binary_type, - string_type, - fixed_size_binary_type, - ) + if key_strategy is None: + key_strategy = signed_integer_types + if value_strategy is None: + value_strategy = st.one_of( + bool_type, + integer_types, + st.sampled_from([pa.float32(), pa.float64()]), + binary_type, + string_type, + fixed_size_binary_type, + ) return st.builds(pa.dictionary, key_strategy, value_strategy) @@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None): children = [draw(arrays(field.type, size=rows)) for field in schema] # TODO(kszucs): the names and schema arguments are not consistent with # Table.from_array's arguments - return pa.RecordBatch.from_arrays(children, names=schema) + return pa.RecordBatch.from_arrays(children, schema=schema) @st.composite diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 8bdc7253a4837..ef6ddd09933c9 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2913,7 +2913,10 @@ def test_strided_data_import(self): 'f4', 'f8'] for type_name in numeric_dtypes: - cases.append(random_numbers.astype(type_name)) + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cases.append(random_numbers.astype(type_name)) # strings cases.append(np.array([random_ascii(10) for i in range(N * K)], diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index aee46bc93690c..3e6a4ca8ed222 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -18,6 +18,7 @@ import os import sys import pytest +import warnings import weakref import numpy as np @@ -82,8 +83,10 @@ def test_tensor_base_object(): @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs) def test_tensor_numpy_roundtrip(dtype_str, arrow_type): dtype = np.dtype(dtype_str) - data = (100 * np.random.randn(10, 4)).astype(dtype) - + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + data = (100 * np.random.randn(10, 4)).astype(dtype) tensor = pa.Tensor.from_numpy(data) assert tensor.type == arrow_type