Skip to content

Commit

Permalink
fix compatibility with older scikit-learn
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Apr 9, 2021
1 parent 1142fcc commit b05213b
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
38 changes: 37 additions & 1 deletion dask_ml/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,50 @@ def mean_absolute_error(
return result


@derived_from(sklearn.metrics)
def mean_absolute_percentage_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
"""Mean absolute percentage error regression loss.
Note here that we do not represent the output as a percentage in range
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in
https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
If input is list then the shape must be (n_outputs,).
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
compute : bool
Whether to compute this result (default ``True``)
Returns
-------
loss : float or array-like of floats in the range [0, 1/eps]
If multioutput is 'raw_values', then mean absolute percentage error
is returned for each output separately.
If multioutput is 'uniform_average' or ``None``, then the
equally-weighted average of all output errors is returned.
MAPE output is non-negative floating point. The best value is 0.0.
But note the fact that bad predictions can lead to arbitarily large
MAPE values, especially if some y_true values are very close to zero.
Note that we return a large value instead of `inf` when y_true is zero.
"""
_check_sample_weight(sample_weight)
epsilon = np.finfo(np.float64).eps
mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon)
Expand Down
1 change: 1 addition & 0 deletions docs/source/modules/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ Regression Metrics
:toctree: generated/

metrics.mean_absolute_error
metrics.mean_absolute_percentage_error
metrics.mean_squared_error
metrics.mean_squared_log_error
metrics.r2_score
Expand Down
20 changes: 13 additions & 7 deletions tests/metrics/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,28 @@
import sklearn.metrics

import dask_ml.metrics
from dask_ml._compat import SK_024

_METRICS_TO_TEST = [
"mean_squared_error",
"mean_absolute_error",
"r2_score",
]

# mean_absolute_percentage_error() was added in scikit-learn 0.24.0
if SK_024:
_METRICS_TO_TEST.append("mean_absolute_percentage_error")


@pytest.fixture(
params=[
"mean_squared_error",
"mean_absolute_error",
"mean_absolute_percentage_error",
"r2_score",
]
params=_METRICS_TO_TEST
)
def metric_pairs(request):
"""Pairs of (dask-ml, sklearn) regression metrics.
* mean_squared_error
* mean_absolute_error
* mean_absolute_percentage_error
* mean_absolute_percentage_error (if scikit-learn >= 0.24.0)
* r2_score
"""
return (
Expand Down

0 comments on commit b05213b

Please sign in to comment.