Skip to content

Commit

Permalink
GH-36642: [Python][CI] Configure warnings as errors during pytest (#3…
Browse files Browse the repository at this point in the history
…7018)

### Rationale for this change

Warnings are constantly being introduced into the pyarrow tests. Let's try enforcing them as errors in an effort to keep the codebase healthy.

### What changes are included in this PR?

* Fixed existing warnings
* Set warnings as errors in CI

### Are these changes tested?

Yes, ran pytests locally w/o warnings.

### Are there any user-facing changes?

No
* Closes: #36642

Authored-by: Dane Pitkin <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
danepitkin authored Aug 7, 2023
1 parent 3bb13da commit 2b36521
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 21 deletions.
3 changes: 2 additions & 1 deletion dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,7 @@ tasks:
params:
env:
PYTHON: "{{ python_version }}"
PYTEST_ARGS: "-W error"
image: conda-python
{% endfor %}

Expand All @@ -1265,7 +1266,7 @@ tasks:
HYPOTHESIS_PROFILE: ci
PYARROW_TEST_HYPOTHESIS: ON
# limit to execute hypothesis tests only
PYTEST_ARGS: "-m hypothesis"
PYTEST_ARGS: "-m hypothesis -W error"
image: conda-python-pandas

test-conda-python-3.10-substrait:
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,7 @@ services:
shm_size: *shm-size
environment:
<<: [*common, *ccache, *sccache]
PYTEST_ARGS: # inherit
volumes: *conda-volumes
command: &python-conda-command
["
Expand Down
15 changes: 9 additions & 6 deletions python/pyarrow/tests/parquet/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,26 @@ def datadir(base_datadir):
def s3_bucket(s3_server):
boto3 = pytest.importorskip('boto3')
botocore = pytest.importorskip('botocore')
s3_bucket_name = 'test-s3fs'

host, port, access_key, secret_key = s3_server['connection']
s3 = boto3.resource(
s3_client = boto3.client(
's3',
endpoint_url='http://{}:{}'.format(host, port),
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
config=botocore.client.Config(signature_version='s3v4'),
region_name='us-east-1'
)
bucket = s3.Bucket('test-s3fs')

try:
bucket.create()
s3_client.create_bucket(Bucket=s3_bucket_name)
except Exception:
# we get BucketAlreadyOwnedByYou error with fsspec handler
pass
return 'test-s3fs'
pass # we get BucketAlreadyOwnedByYou error with fsspec handler
finally:
s3_client.close()

return s3_bucket_name


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/parquet/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path,
output_df[col] = output_df[col].astype('category')

if schema:
expected_date_type = schema.field_by_name('date').type.to_pandas_dtype()
expected_date_type = schema.field('date').type.to_pandas_dtype()
output_df["date"] = output_df["date"].astype(expected_date_type)

tm.assert_frame_equal(output_df, input_df)
Expand Down
22 changes: 12 additions & 10 deletions python/pyarrow/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types):


def dictionary_types(key_strategy=None, value_strategy=None):
key_strategy = key_strategy or signed_integer_types
value_strategy = value_strategy or st.one_of(
bool_type,
integer_types,
st.sampled_from([pa.float32(), pa.float64()]),
binary_type,
string_type,
fixed_size_binary_type,
)
if key_strategy is None:
key_strategy = signed_integer_types
if value_strategy is None:
value_strategy = st.one_of(
bool_type,
integer_types,
st.sampled_from([pa.float32(), pa.float64()]),
binary_type,
string_type,
fixed_size_binary_type,
)
return st.builds(pa.dictionary, key_strategy, value_strategy)


Expand Down Expand Up @@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None):
children = [draw(arrays(field.type, size=rows)) for field in schema]
# TODO(kszucs): the names and schema arguments are not consistent with
# Table.from_array's arguments
return pa.RecordBatch.from_arrays(children, names=schema)
return pa.RecordBatch.from_arrays(children, schema=schema)


@st.composite
Expand Down
5 changes: 4 additions & 1 deletion python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2913,7 +2913,10 @@ def test_strided_data_import(self):
'f4', 'f8']

for type_name in numeric_dtypes:
cases.append(random_numbers.astype(type_name))
# Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore")
cases.append(random_numbers.astype(type_name))

# strings
cases.append(np.array([random_ascii(10) for i in range(N * K)],
Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/tests/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os
import sys
import pytest
import warnings
import weakref

import numpy as np
Expand Down Expand Up @@ -82,8 +83,10 @@ def test_tensor_base_object():
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
dtype = np.dtype(dtype_str)
data = (100 * np.random.randn(10, 4)).astype(dtype)

# Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore")
data = (100 * np.random.randn(10, 4)).astype(dtype)
tensor = pa.Tensor.from_numpy(data)
assert tensor.type == arrow_type

Expand Down

0 comments on commit 2b36521

Please sign in to comment.