Skip to content

Commit

Permalink
Python 3.11 Updates (#981)
Browse files Browse the repository at this point in the history
* updates to envs

* linting fixes
  • Loading branch information
mmccarty authored Mar 14, 2024
1 parent fa5a583 commit b3954e9
Show file tree
Hide file tree
Showing 27 changed files with 83 additions and 36 deletions.
2 changes: 1 addition & 1 deletion ci/environment-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies:
- numba
- numpy
- psutil
- python=3.10
- python=3.11
- sortedcontainers
- scikit-learn >=1.2.0
- scipy
Expand Down
2 changes: 1 addition & 1 deletion ci/environment-latest.yaml
1 change: 1 addition & 0 deletions dask_ml/compose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
These estimators are useful for working with heterogenous tabular data.
"""

from ._column_transformer import ColumnTransformer, make_column_transformer

__all__ = ["ColumnTransformer", "make_column_transformer"]
1 change: 1 addition & 0 deletions dask_ml/decomposition/extmath.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Extended math utilities.
"""

# Authors: Gael Varoquaux
# Alexandre Gramfort
# Alexandre T. Passos
Expand Down
1 change: 1 addition & 0 deletions dask_ml/feature_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utilities for extracting features from data.
"""

from . import text

__all__ = ["text"]
1 change: 1 addition & 0 deletions dask_ml/feature_extraction/text.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utilities to build feature vectors from text documents.
"""

import itertools

import dask
Expand Down
1 change: 1 addition & 0 deletions dask_ml/linear_model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""The ``dask_ml.linear_model`` module implements linear models for
classification and regression.
"""

from .glm import LinearRegression, LogisticRegression, PoissonRegression

__all__ = [
Expand Down
1 change: 1 addition & 0 deletions dask_ml/linear_model/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
"""

import dask.array as da
import dask.dataframe as dd
import numpy as np
Expand Down
3 changes: 2 additions & 1 deletion dask_ml/metrics/pairwise.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Daskified versions of sklearn.metrics.pairwise
"""

import warnings
from typing import Any, Callable, Dict, Optional, Tuple, Union

Expand Down Expand Up @@ -188,7 +189,7 @@ def sigmoid_kernel(
"rbf": rbf_kernel,
"linear": linear_kernel,
"polynomial": polynomial_kernel,
"sigmoid": sigmoid_kernel
"sigmoid": sigmoid_kernel,
# TODO:
# - cosine_similarity
# - laplacian
Expand Down
1 change: 1 addition & 0 deletions dask_ml/model_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
These estimators will operate in parallel. Their scalability depends
on the underlying estimators being used.
"""

from ._hyperband import HyperbandSearchCV
from ._incremental import IncrementalSearchCV, InverseDecaySearchCV
from ._search import GridSearchCV, RandomizedSearchCV, check_cv, compute_n_splits
Expand Down
9 changes: 6 additions & 3 deletions dask_ml/model_selection/_split.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilities for splitting datasets.
"""

import itertools
import logging
import numbers
Expand Down Expand Up @@ -436,9 +437,11 @@ def train_test_split(
if da.Array in types and types & {dd.Series, dd.DataFrame}:
if convert_mixed_types:
arrays = tuple(
x.to_dask_array(lengths=True)
if isinstance(x, (dd.Series, dd.DataFrame))
else x
(
x.to_dask_array(lengths=True)
if isinstance(x, (dd.Series, dd.DataFrame))
else x
)
for x in arrays
)
else:
Expand Down
1 change: 1 addition & 0 deletions dask_ml/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilties for Preprocessing data.
"""

from ._block_transformer import BlockTransformer
from ._encoders import OneHotEncoder
from .data import (
Expand Down
1 change: 1 addition & 0 deletions dask_ml/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Meta-estimators for parallelizing estimators using the scikit-learn API."""

import logging
import warnings

Expand Down
1 change: 1 addition & 0 deletions dask_ml/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
will be setup in distributed mode alongside your existing
``dask.distributed`` cluster.
"""

from dask_xgboost import * # noqa
9 changes: 6 additions & 3 deletions tests/compose/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_column_transformer():
# Ordering of make_column_transformer was changed from
Expand Down Expand Up @@ -61,7 +62,8 @@ def test_column_transformer():


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_column_transformer_unk_chunksize():
names = ["a", "b", "c"]
Expand Down Expand Up @@ -93,7 +95,8 @@ def test_column_transformer_unk_chunksize():


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_sklearn_col_trans_disallows_hstack_then_block():
# Test that sklearn ColumnTransformer (to which dask-ml ColumnTransformer
Expand Down
3 changes: 2 additions & 1 deletion tests/linear_model/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def test_model_coef_dask_numpy(est, data, request):


# fmt: off
@pytest.mark.skip(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=0.0002")
@pytest.mark.skip(
reason="AssertionError: Not equal to tolerance rtol=0.001, atol=0.0002")
@pytest.mark.parametrize("solver", ["newton", "lbfgs"])
@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize(
Expand Down
3 changes: 2 additions & 1 deletion tests/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def test_log_loss_shape(yhat):


@pytest.mark.skip(
reason="FutureWarning: The `needs_threshold` and `needs_proba` parameter are deprecated"
reason="FutureWarning: The `needs_threshold` and `needs_proba` "
+ "parameter are deprecated"
)
@pytest.mark.parametrize("y", [[0, 1, 1, 0], [0, 1, 2, 0]])
def test_log_loss_scoring(y):
Expand Down
3 changes: 2 additions & 1 deletion tests/metrics/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def test_mse_squared(squared):


@pytest.mark.skip(
reason="InvalidParameterError: The 'multioutput' parameter of mean_squared_error must be a string among..."
reason="InvalidParameterError: The 'multioutput' parameter of mean_squared_error "
+ "must be a string among..."
)
@pytest.mark.parametrize("multioutput", ["uniform_average", None])
def test_regression_metrics_unweighted_average_multioutput(metric_pairs, multioutput):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ def test_hyperparameter_searcher_with_fit_params(cls, kwargs):


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_grid_search_no_score():
Expand Down Expand Up @@ -156,7 +157,8 @@ def test_grid_search_no_score():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_score_method():
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
Expand Down Expand Up @@ -187,7 +189,8 @@ def test_grid_search_score_method():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_groups():
# Check if ValueError (when groups is None) propagates to dcv.GridSearchCV
Expand Down Expand Up @@ -272,7 +275,8 @@ def test_return_train_score_warn():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_classes__property():
Expand Down Expand Up @@ -415,7 +419,8 @@ def test_grid_search_bad_param_grid():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_sparse():
# Test that grid search works with both dense and sparse matrices
Expand All @@ -439,7 +444,8 @@ def test_grid_search_sparse():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_sparse_scoring():
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
Expand Down Expand Up @@ -1010,7 +1016,8 @@ def test_search_cv_results_none_param():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_grid_search_correct_score_results():
Expand Down Expand Up @@ -1221,7 +1228,8 @@ def test_grid_search_failing_classifier_raise():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_search_train_scores_set_to_false():
X = np.arange(6).reshape(6, -1)
Expand Down
3 changes: 2 additions & 1 deletion tests/model_selection/test_incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@


@pytest.mark.skip(
reason="yaml.representer.RepresenterError: ('cannot represent an object', 0.05006666249977221)"
reason="yaml.representer.RepresenterError: "
+ "('cannot represent an object', 0.05006666249977221)"
)
@gen_cluster(client=True, timeout=1000)
async def test_basic(c, s, a, b):
Expand Down
6 changes: 4 additions & 2 deletions tests/model_selection/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@


@pytest.mark.skip(
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must be an instance of 'bool' or an instance of 'numpy.bool_'"
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split "
+ "must be an instance of 'bool' or an instance of 'numpy.bool_'"
)
def test_20_newsgroups():
data = fetch_20newsgroups()
Expand Down Expand Up @@ -246,7 +247,8 @@ def test_split_mixed():


@pytest.mark.skip(
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must be an instance of 'bool' or an instance of 'numpy.bool_'"
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must "
+ "be an instance of 'bool' or an instance of 'numpy.bool_'"
)
def test_split_3d_data():
X_3d = np.arange(1.0, 5001.0).reshape((100, 10, 5))
Expand Down
18 changes: 12 additions & 6 deletions tests/preprocessing/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def test_inverse_transform(self):
assert_eq_ar(result, X)

@pytest.mark.skip(
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword argument 'columns'"
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword "
+ "argument 'columns'"
)
@pytest.mark.xfail(reason="removed columns")
def test_df_inverse_transform(self):
Expand All @@ -139,7 +140,8 @@ def test_df_inverse_transform(self):
assert_eq_df(result, df2)

@pytest.mark.skip(
reason="AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount"
reason="AssertionError: found values in 'a' and 'b' which differ by more "
+ "than the allowed amount"
)
def test_df_values(self):
est1 = dpp.MinMaxScaler()
Expand All @@ -158,7 +160,8 @@ def test_df_values(self):
assert_eq_ar(result_ar, result_df)

@pytest.mark.skip(
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword argument 'columns'"
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword "
+ "argument 'columns'"
)
@pytest.mark.xfail(reason="removed columns")
def test_df_column_slice(self):
Expand Down Expand Up @@ -211,7 +214,8 @@ def test_inverse_transform(self):
assert_eq_ar(result, X)

@pytest.mark.skip(
reason="DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`"
reason="DeprecationWarning: np.find_common_type is deprecated. Please use "
+ "`np.result_type` or `np.promote_types`"
)
def test_df_values(self):
est1 = dpp.RobustScaler()
Expand Down Expand Up @@ -354,7 +358,8 @@ def test_raises(self):

class TestDummyEncoder:
@pytest.mark.skip(
reason='AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="A_a") are different'
reason="AssertionError: Attributes of "
+ 'DataFrame.iloc[:, 1] (column name="A_a") are different'
)
@pytest.mark.parametrize("daskify", [False, True])
@pytest.mark.parametrize("values", [True, False])
Expand Down Expand Up @@ -634,7 +639,8 @@ def test_transformer_params(self):

mark = pytest.mark.xfail(
DASK_EXPR_ENABLED,
reason="dask-expr: NotImplementedError in assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)",
reason="dask-expr: NotImplementedError in "
+ "assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)",
)

@pytest.mark.parametrize("daskify", [pytest.param(True, marks=mark), False])
Expand Down
4 changes: 3 additions & 1 deletion tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tests for Incremental PCA."""

import numpy as np
import pytest
from dask import array as da
Expand Down Expand Up @@ -143,7 +144,8 @@ def test_incremental_pca_inverse():

@pytest.mark.skip(reason="AssertionError: Regex pattern did not match.")
@pytest.mark.skip(
reason="InvalidParameterError: The 'min_batch_size' parameter of gen_batches must be an int in the range [0, inf). Got -1 instead."
reason="InvalidParameterError: The 'min_batch_size' parameter of "
+ "gen_batches must be an int in the range [0, inf). Got -1 instead."
)
def test_incremental_pca_validation():
# Test that n_components is >=1 and <= n_features.
Expand Down
Loading

0 comments on commit b3954e9

Please sign in to comment.