GH-36642: [Python][CI] Configure warnings as errors during pytest (#3…

…7018) ### Rationale for this change Warnings are constantly being introduced into the pyarrow tests. Let's try enforcing them as errors in an effort to keep the codebase healthy. ### What changes are included in this PR? * Fixed existing warnings * Set warnings as errors in CI ### Are these changes tested? Yes, ran pytests locally w/o warnings. ### Are there any user-facing changes? No * Closes: #36642 Authored-by: Dane Pitkin <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
apache · Aug 7, 2023 · 2b36521 · 2b36521
1 parent 3bb13da
commit 2b36521
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 21 deletions.
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
@@ -1253,6 +1253,7 @@ tasks:
     params:
       env:
         PYTHON: "{{ python_version }}"
+        PYTEST_ARGS: "-W error"
       image: conda-python
 {% endfor %}
 
@@ -1265,7 +1266,7 @@ tasks:
         HYPOTHESIS_PROFILE: ci
         PYARROW_TEST_HYPOTHESIS: ON
         # limit to execute hypothesis tests only
-        PYTEST_ARGS: "-m hypothesis"
+        PYTEST_ARGS: "-m hypothesis -W error"
       image: conda-python-pandas
 
   test-conda-python-3.10-substrait:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -835,6 +835,7 @@ services:
     shm_size: *shm-size
     environment:
       <<: [*common, *ccache, *sccache]
+      PYTEST_ARGS:  # inherit
     volumes: *conda-volumes
     command: &python-conda-command
       ["

diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py
@@ -29,23 +29,26 @@ def datadir(base_datadir):
 def s3_bucket(s3_server):
     boto3 = pytest.importorskip('boto3')
     botocore = pytest.importorskip('botocore')
+    s3_bucket_name = 'test-s3fs'
 
     host, port, access_key, secret_key = s3_server['connection']
-    s3 = boto3.resource(
+    s3_client = boto3.client(
         's3',
         endpoint_url='http://{}:{}'.format(host, port),
         aws_access_key_id=access_key,
         aws_secret_access_key=secret_key,
         config=botocore.client.Config(signature_version='s3v4'),
         region_name='us-east-1'
     )
-    bucket = s3.Bucket('test-s3fs')
+
     try:
-        bucket.create()
+        s3_client.create_bucket(Bucket=s3_bucket_name)
     except Exception:
-        # we get BucketAlreadyOwnedByYou error with fsspec handler
-        pass
-    return 'test-s3fs'
+        pass  # we get BucketAlreadyOwnedByYou error with fsspec handler
+    finally:
+        s3_client.close()
+
+    return s3_bucket_name
 
 
 @pytest.fixture

diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path,
         output_df[col] = output_df[col].astype('category')
 
     if schema:
-        expected_date_type = schema.field_by_name('date').type.to_pandas_dtype()
+        expected_date_type = schema.field('date').type.to_pandas_dtype()
         output_df["date"] = output_df["date"].astype(expected_date_type)
 
     tm.assert_frame_equal(output_df, input_df)

diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
@@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types):
 
 
 def dictionary_types(key_strategy=None, value_strategy=None):
-    key_strategy = key_strategy or signed_integer_types
-    value_strategy = value_strategy or st.one_of(
-        bool_type,
-        integer_types,
-        st.sampled_from([pa.float32(), pa.float64()]),
-        binary_type,
-        string_type,
-        fixed_size_binary_type,
-    )
+    if key_strategy is None:
+        key_strategy = signed_integer_types
+    if value_strategy is None:
+        value_strategy = st.one_of(
+            bool_type,
+            integer_types,
+            st.sampled_from([pa.float32(), pa.float64()]),
+            binary_type,
+            string_type,
+            fixed_size_binary_type,
+        )
     return st.builds(pa.dictionary, key_strategy, value_strategy)
 
 
@@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None):
     children = [draw(arrays(field.type, size=rows)) for field in schema]
     # TODO(kszucs): the names and schema arguments are not consistent with
     #               Table.from_array's arguments
-    return pa.RecordBatch.from_arrays(children, names=schema)
+    return pa.RecordBatch.from_arrays(children, schema=schema)
 
 
 @st.composite

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
@@ -2913,7 +2913,10 @@ def test_strided_data_import(self):
                           'f4', 'f8']
 
         for type_name in numeric_dtypes:
-            cases.append(random_numbers.astype(type_name))
+            # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                cases.append(random_numbers.astype(type_name))
 
         # strings
         cases.append(np.array([random_ascii(10) for i in range(N * K)],

diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
@@ -18,6 +18,7 @@
 import os
 import sys
 import pytest
+import warnings
 import weakref
 
 import numpy as np
@@ -82,8 +83,10 @@ def test_tensor_base_object():
 @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
 def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
     dtype = np.dtype(dtype_str)
-    data = (100 * np.random.randn(10, 4)).astype(dtype)
-
+    # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        data = (100 * np.random.randn(10, 4)).astype(dtype)
     tensor = pa.Tensor.from_numpy(data)
     assert tensor.type == arrow_type