Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python 3.11 Updates #981

Merged
merged 2 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/environment-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies:
- numba
- numpy
- psutil
- python=3.10
- python=3.11
- sortedcontainers
- scikit-learn >=1.2.0
- scipy
Expand Down
2 changes: 1 addition & 1 deletion ci/environment-latest.yaml
1 change: 1 addition & 0 deletions dask_ml/compose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

These estimators are useful for working with heterogenous tabular data.
"""

from ._column_transformer import ColumnTransformer, make_column_transformer

__all__ = ["ColumnTransformer", "make_column_transformer"]
1 change: 1 addition & 0 deletions dask_ml/decomposition/extmath.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Extended math utilities.
"""

# Authors: Gael Varoquaux
# Alexandre Gramfort
# Alexandre T. Passos
Expand Down
1 change: 1 addition & 0 deletions dask_ml/feature_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utilities for extracting features from data.
"""

from . import text

__all__ = ["text"]
1 change: 1 addition & 0 deletions dask_ml/feature_extraction/text.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utilities to build feature vectors from text documents.
"""

import itertools

import dask
Expand Down
1 change: 1 addition & 0 deletions dask_ml/linear_model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""The ``dask_ml.linear_model`` module implements linear models for
classification and regression.
"""

from .glm import LinearRegression, LogisticRegression, PoissonRegression

__all__ = [
Expand Down
1 change: 1 addition & 0 deletions dask_ml/linear_model/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
"""

import dask.array as da
import dask.dataframe as dd
import numpy as np
Expand Down
3 changes: 2 additions & 1 deletion dask_ml/metrics/pairwise.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Daskified versions of sklearn.metrics.pairwise
"""

import warnings
from typing import Any, Callable, Dict, Optional, Tuple, Union

Expand Down Expand Up @@ -188,7 +189,7 @@ def sigmoid_kernel(
"rbf": rbf_kernel,
"linear": linear_kernel,
"polynomial": polynomial_kernel,
"sigmoid": sigmoid_kernel
"sigmoid": sigmoid_kernel,
# TODO:
# - cosine_similarity
# - laplacian
Expand Down
1 change: 1 addition & 0 deletions dask_ml/model_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
These estimators will operate in parallel. Their scalability depends
on the underlying estimators being used.
"""

from ._hyperband import HyperbandSearchCV
from ._incremental import IncrementalSearchCV, InverseDecaySearchCV
from ._search import GridSearchCV, RandomizedSearchCV, check_cv, compute_n_splits
Expand Down
9 changes: 6 additions & 3 deletions dask_ml/model_selection/_split.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilities for splitting datasets.
"""

import itertools
import logging
import numbers
Expand Down Expand Up @@ -436,9 +437,11 @@ def train_test_split(
if da.Array in types and types & {dd.Series, dd.DataFrame}:
if convert_mixed_types:
arrays = tuple(
x.to_dask_array(lengths=True)
if isinstance(x, (dd.Series, dd.DataFrame))
else x
(
x.to_dask_array(lengths=True)
if isinstance(x, (dd.Series, dd.DataFrame))
else x
)
for x in arrays
)
else:
Expand Down
1 change: 1 addition & 0 deletions dask_ml/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilties for Preprocessing data.
"""

from ._block_transformer import BlockTransformer
from ._encoders import OneHotEncoder
from .data import (
Expand Down
1 change: 1 addition & 0 deletions dask_ml/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Meta-estimators for parallelizing estimators using the scikit-learn API."""

import logging
import warnings

Expand Down
1 change: 1 addition & 0 deletions dask_ml/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
will be setup in distributed mode alongside your existing
``dask.distributed`` cluster.
"""

from dask_xgboost import * # noqa
9 changes: 6 additions & 3 deletions tests/compose/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_column_transformer():
# Ordering of make_column_transformer was changed from
Expand Down Expand Up @@ -61,7 +62,8 @@ def test_column_transformer():


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_column_transformer_unk_chunksize():
names = ["a", "b", "c"]
Expand Down Expand Up @@ -93,7 +95,8 @@ def test_column_transformer_unk_chunksize():


@pytest.mark.skip(
reason="ValueError: Specifying the columns using strings is only supported for dataframes."
reason="ValueError: Specifying the columns using strings is only "
+ "supported for dataframes."
)
def test_sklearn_col_trans_disallows_hstack_then_block():
# Test that sklearn ColumnTransformer (to which dask-ml ColumnTransformer
Expand Down
3 changes: 2 additions & 1 deletion tests/linear_model/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def test_model_coef_dask_numpy(est, data, request):


# fmt: off
@pytest.mark.skip(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=0.0002")
@pytest.mark.skip(
reason="AssertionError: Not equal to tolerance rtol=0.001, atol=0.0002")
@pytest.mark.parametrize("solver", ["newton", "lbfgs"])
@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize(
Expand Down
3 changes: 2 additions & 1 deletion tests/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def test_log_loss_shape(yhat):


@pytest.mark.skip(
reason="FutureWarning: The `needs_threshold` and `needs_proba` parameter are deprecated"
reason="FutureWarning: The `needs_threshold` and `needs_proba` "
+ "parameter are deprecated"
)
@pytest.mark.parametrize("y", [[0, 1, 1, 0], [0, 1, 2, 0]])
def test_log_loss_scoring(y):
Expand Down
3 changes: 2 additions & 1 deletion tests/metrics/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def test_mse_squared(squared):


@pytest.mark.skip(
reason="InvalidParameterError: The 'multioutput' parameter of mean_squared_error must be a string among..."
reason="InvalidParameterError: The 'multioutput' parameter of mean_squared_error "
+ "must be a string among..."
)
@pytest.mark.parametrize("multioutput", ["uniform_average", None])
def test_regression_metrics_unweighted_average_multioutput(metric_pairs, multioutput):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ def test_hyperparameter_searcher_with_fit_params(cls, kwargs):


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_grid_search_no_score():
Expand Down Expand Up @@ -156,7 +157,8 @@ def test_grid_search_no_score():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_score_method():
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
Expand Down Expand Up @@ -187,7 +189,8 @@ def test_grid_search_score_method():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_groups():
# Check if ValueError (when groups is None) propagates to dcv.GridSearchCV
Expand Down Expand Up @@ -272,7 +275,8 @@ def test_return_train_score_warn():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_classes__property():
Expand Down Expand Up @@ -415,7 +419,8 @@ def test_grid_search_bad_param_grid():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_sparse():
# Test that grid search works with both dense and sparse matrices
Expand All @@ -439,7 +444,8 @@ def test_grid_search_sparse():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_grid_search_sparse_scoring():
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
Expand Down Expand Up @@ -1010,7 +1016,8 @@ def test_search_cv_results_none_param():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
def test_grid_search_correct_score_results():
Expand Down Expand Up @@ -1221,7 +1228,8 @@ def test_grid_search_failing_classifier_raise():


@pytest.mark.skip(
reason="FutureWarning: The default value of `dual` will change from `True` to `'auto'`"
reason="FutureWarning: The default value of `dual` will change from "
+ "`True` to `'auto'`"
)
def test_search_train_scores_set_to_false():
X = np.arange(6).reshape(6, -1)
Expand Down
3 changes: 2 additions & 1 deletion tests/model_selection/test_incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@


@pytest.mark.skip(
reason="yaml.representer.RepresenterError: ('cannot represent an object', 0.05006666249977221)"
reason="yaml.representer.RepresenterError: "
+ "('cannot represent an object', 0.05006666249977221)"
)
@gen_cluster(client=True, timeout=1000)
async def test_basic(c, s, a, b):
Expand Down
6 changes: 4 additions & 2 deletions tests/model_selection/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@


@pytest.mark.skip(
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must be an instance of 'bool' or an instance of 'numpy.bool_'"
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split "
+ "must be an instance of 'bool' or an instance of 'numpy.bool_'"
)
def test_20_newsgroups():
data = fetch_20newsgroups()
Expand Down Expand Up @@ -246,7 +247,8 @@ def test_split_mixed():


@pytest.mark.skip(
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must be an instance of 'bool' or an instance of 'numpy.bool_'"
reason="InvalidParameterError: The 'shuffle' parameter of train_test_split must "
+ "be an instance of 'bool' or an instance of 'numpy.bool_'"
)
def test_split_3d_data():
X_3d = np.arange(1.0, 5001.0).reshape((100, 10, 5))
Expand Down
18 changes: 12 additions & 6 deletions tests/preprocessing/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def test_inverse_transform(self):
assert_eq_ar(result, X)

@pytest.mark.skip(
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword argument 'columns'"
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword "
+ "argument 'columns'"
)
@pytest.mark.xfail(reason="removed columns")
def test_df_inverse_transform(self):
Expand All @@ -139,7 +140,8 @@ def test_df_inverse_transform(self):
assert_eq_df(result, df2)

@pytest.mark.skip(
reason="AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount"
reason="AssertionError: found values in 'a' and 'b' which differ by more "
+ "than the allowed amount"
)
def test_df_values(self):
est1 = dpp.MinMaxScaler()
Expand All @@ -158,7 +160,8 @@ def test_df_values(self):
assert_eq_ar(result_ar, result_df)

@pytest.mark.skip(
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword argument 'columns'"
reason=" TypeError: MinMaxScaler.__init__() got an unexpected keyword "
+ "argument 'columns'"
)
@pytest.mark.xfail(reason="removed columns")
def test_df_column_slice(self):
Expand Down Expand Up @@ -211,7 +214,8 @@ def test_inverse_transform(self):
assert_eq_ar(result, X)

@pytest.mark.skip(
reason="DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`"
reason="DeprecationWarning: np.find_common_type is deprecated. Please use "
+ "`np.result_type` or `np.promote_types`"
)
def test_df_values(self):
est1 = dpp.RobustScaler()
Expand Down Expand Up @@ -354,7 +358,8 @@ def test_raises(self):

class TestDummyEncoder:
@pytest.mark.skip(
reason='AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="A_a") are different'
reason="AssertionError: Attributes of "
+ 'DataFrame.iloc[:, 1] (column name="A_a") are different'
)
@pytest.mark.parametrize("daskify", [False, True])
@pytest.mark.parametrize("values", [True, False])
Expand Down Expand Up @@ -634,7 +639,8 @@ def test_transformer_params(self):

mark = pytest.mark.xfail(
DASK_EXPR_ENABLED,
reason="dask-expr: NotImplementedError in assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)",
reason="dask-expr: NotImplementedError in "
+ "assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)",
)

@pytest.mark.parametrize("daskify", [pytest.param(True, marks=mark), False])
Expand Down
4 changes: 3 additions & 1 deletion tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tests for Incremental PCA."""

import numpy as np
import pytest
from dask import array as da
Expand Down Expand Up @@ -143,7 +144,8 @@ def test_incremental_pca_inverse():

@pytest.mark.skip(reason="AssertionError: Regex pattern did not match.")
@pytest.mark.skip(
reason="InvalidParameterError: The 'min_batch_size' parameter of gen_batches must be an int in the range [0, inf). Got -1 instead."
reason="InvalidParameterError: The 'min_batch_size' parameter of "
+ "gen_batches must be an int in the range [0, inf). Got -1 instead."
)
def test_incremental_pca_validation():
# Test that n_components is >=1 and <= n_features.
Expand Down
Loading
Loading