diff --git a/python/metrics/setup.cfg b/python/metrics/setup.cfg index 39f040a8..be27f038 100644 --- a/python/metrics/setup.cfg +++ b/python/metrics/setup.cfg @@ -15,7 +15,7 @@ project_urls = Source = https://github.com/NOAA-OWP/hydrotools/tree/main/python/metrics Tracker = https://github.com/NOAA-OWP/hydrotools/issues classifiers = - Development Status :: 3 - Alpha + Development Status :: 4 - Beta Intended Audience :: Education Intended Audience :: Science/Research License :: Free To Use But Restricted diff --git a/python/metrics/src/hydrotools/metrics/_version.py b/python/metrics/src/hydrotools/metrics/_version.py index 10aa336c..7b1e3120 100644 --- a/python/metrics/src/hydrotools/metrics/_version.py +++ b/python/metrics/src/hydrotools/metrics/_version.py @@ -1 +1 @@ -__version__ = "1.2.3" +__version__ = "1.3.3" diff --git a/python/metrics/src/hydrotools/metrics/metrics.py b/python/metrics/src/hydrotools/metrics/metrics.py index 75235164..b8fcb469 100644 --- a/python/metrics/src/hydrotools/metrics/metrics.py +++ b/python/metrics/src/hydrotools/metrics/metrics.py @@ -19,24 +19,38 @@ - percent_correct - base_chance - equitable_threat_score - - mean_squared_error + - mean_error - nash_sutcliffe_efficiency - kling_gupta_efficiency + - volumetric_efficiency + - mean_squared_error + - root_mean_squared_error + - mean_error_skill_score + - coefficient_of_persistence + - coefficient_of_extrapolation """ import numpy as np import numpy.typing as npt import pandas as pd -from typing import Union, Mapping, MutableMapping +from typing import Union from . import _validation as validate +from functools import partial -def mean_squared_error( +def mean_error( y_true: npt.ArrayLike, y_pred: npt.ArrayLike, + power: float = 1.0, root: bool = False ) -> float: - """Compute the mean squared error, or optionally root mean squared error. + """Compute the mean error or deviation. Default is Mean Absolute Error. The mean error + is given by: + + $$ME = \frac{1}{n}\sum_{i=1}^{n}\left| y_{s,i} - y_{o,i} \right|^{p}$$ + + Where $n$ is the length of each array, $y_{s,i}$ is the *ith* simulated or predicted value, + $y_{o,i}$ is the *ith* observed or true value, and $p$ is the exponent. Parameters ---------- @@ -44,31 +58,129 @@ def mean_squared_error( Ground truth (correct) target values, also called observations, measurements, or observed values. y_pred: array-like of shape (n_samples,), required Estimated target values, also called simulations or modeled values. + power: float, default 1.0 + Exponent for each mean error summation value. root: bool, default False - When True, return the root mean squared error. + When True, return the root mean error. Returns ------- - error: float - Mean squared error or root mean squared error. + mean_error: float + Mean error or root mean error. """ - # Compute mean squared error - MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true) + # Compute mean error + _mean_error = np.sum(np.abs(np.subtract(y_true, y_pred)) ** power) / len(y_true) - # Return MSE, optionally return root mean squared error + # Return mean error, optionally return root mean error if root: - return np.sqrt(MSE) - return MSE + return np.sqrt(_mean_error) + return _mean_error + +mean_squared_error = partial(mean_error, power=2.0, root=False) +mean_squared_error.__doc__ = """Partial of hydrotools.metrics.mean_error with +a default power value of 2.0 and root set to False. + +See Also +-------- +mean_error +""" + +root_mean_squared_error = partial(mean_error, power=2.0, root=True) +root_mean_squared_error.__doc__ = """Partial of hydrotools.metrics.mean_error with +a default power value of 2.0 and root set to True. + +See Also +-------- +mean_error +""" + +def mean_error_skill_score( + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + y_base: npt.ArrayLike, + power: float = 1.0, + normalized: bool = False + ) -> float: + """Compute a generic mean error based model skill score. The mean error skill score + is given by: + + $$MESS = 1 - \frac{\sum_{i=1}^{n}\left| y_{p,i} - y_{o,i} \right|^{p}}{\sum_{i=1}^{n}\left| y_{b,i} - y_{o,i} \right|^{p}}$$ + + Where $n$ is the length of each array, $y_{s,i}$ is the *ith* simulated or predicted value, + $y_{b,i}$ is the *ith* baseline value, $y_{o,i}$ is the *ith* observed or true + value, and $p$ is the exponent. + + Parameters + ---------- + y_true: array-like of shape (n_samples,), required + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: array-like of shape (n_samples,), required + Estimated target values, also called simulations or modeled values. + y_base: array-like of shape (n_samples,), required + Baseline value(s) against which to assess skill of y_pred. + power: float, default 1.0 + Exponent for each mean error summation value. + normalized: bool, default False + When True, normalize the final skill score using the method from + Nossent & Bauwens, 2012. + + Returns + ------- + score: float + Skill score of y_pred relative to y_base. + + References + ---------- + Nossent, J., & Bauwens, W. (2012, April). Application of a normalized + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol'sensitivity + analysis of a hydrological model. In EGU General Assembly Conference + Abstracts (p. 237). + + """ + # Compute components + numerator = mean_error(y_true, y_pred, power=power) + denominator = mean_error(y_true, y_base, power=power) + + # Compute score, optionally normalize + if normalized: + return 1.0 / (1.0 + numerator/denominator) + return 1.0 - numerator/denominator + +volumetric_efficiency = partial(mean_error_skill_score, y_base=0.0, power=1.0) +volumetric_efficiency.__doc__ = """Partial of hydrotools.metrics.mean_error_skill_score +with a default y_base of 0.0 and a power value of 1.0. Volumetric efficiency ranges from +-inf to 1.0, higher is better. According to the authors, volumetric efficiency indicates +the "portion of water that arrives on time." Note: that large over-predictions result in +deeply negative values. + +See Also +-------- +mean_error_skill_score + +References +---------- +Criss, R. E., & Winston, W. E. (2008). Do Nash values have value? Discussion + and alternate proposals. Hydrological Processes: An International Journal, + 22(14), 2723-2725. +""" def nash_sutcliffe_efficiency( y_true: npt.ArrayLike, y_pred: npt.ArrayLike, log: bool = False, + power: float = 2.0, normalized: bool = False ) -> float: """Compute the Nash-Sutcliffe model efficiency coefficient (NSE), also called the - mean squared error skill score or the R^2 (coefficient of determination) regression score. + mean squared error skill score or the R^2 (coefficient of determination) regression score. + The NSE compares model errors to observed variance. The default NSE ranges from -inf to 1.0, + higher is better. A score of 0.0 indicates the model is as good a predictor as the mean of + observations. A score of 1.0 indicates the model exactly matches the observations. + + The "normalized" Nash-Sutcliffe model efficiency re-scales the NSE to a range from 0.0 to 1.0. + In this case, A score of 0.5 indicates the model is as good a predictor as the mean of + observations. A score of 1.0 still indicates the model exactly matches the observations. Parameters ---------- @@ -79,6 +191,8 @@ def nash_sutcliffe_efficiency( log: bool, default False Apply numpy.log (natural logarithm) to y_true and y_pred before computing the NSE. + power: float, default 2.0 + Exponent for each mean error summation value. normalized: bool, default False When True, normalize the final NSE value using the method from Nossent & Bauwens, 2012. @@ -94,9 +208,9 @@ def nash_sutcliffe_efficiency( conceptual models part I—A discussion of principles. Journal of hydrology, 10(3), 282-290. Nossent, J., & Bauwens, W. (2012, April). Application of a normalized - Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' - sensitivity analysis of a hydrological model. In EGU General Assembly - Conference Abstracts (p. 237). + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol'sensitivity + analysis of a hydrological model. In EGU General Assembly Conference + Abstracts (p. 237). """ # Raise if not 1-D arrays @@ -110,14 +224,151 @@ def nash_sutcliffe_efficiency( y_true = np.log(y_true) y_pred = np.log(y_pred) - # Compute components - numerator = mean_squared_error(y_true, y_pred) - denominator = mean_squared_error(y_true, np.mean(y_true)) + # Compute score + return mean_error_skill_score(y_true, y_pred, np.mean(y_true), + power=power, normalized=normalized) - # Compute score, optionally normalize - if normalized: - return 1.0 / (1.0 + numerator/denominator) - return 1.0 - numerator/denominator +def coefficient_of_persistence( + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + lag: int = 1, + log: bool = False, + power: float = 2.0, + normalized: bool = False + ) -> float: + """Compute the coefficient of persistence (Kitanidis & Bras, 1980). The coefficient of + persistence compares the model to a recent observation, given some lag. This score assesses + the model's skill compared to assuming a previous observation does not change (persists). + + In the default case, the *ith* modeled value will be compared to the *i-1* observed value. + The result is the mean squared error skill score using the *i-1* observed values as a baseline. + The coefficient of persistence ranges from -inf to 1.0, higher is better. A score of 0.0 indicates + skill no better than assuming the last observation would persist. A perfect score is 1.0. + + Parameters + ---------- + y_true: array-like of shape (n_samples,), required + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: array-like of shape (n_samples,), required + Estimated target values, also called simulations or modeled values. + lag: int, default 1 + Number of values by which to lag the baseline. + log: bool, default False + Apply numpy.log (natural logarithm) to y_true and y_pred + before computing the score. + power: float, default 2.0 + Exponent for each mean error summation value. + normalized: bool, default False + When True, normalize the final score using the method from + Nossent & Bauwens, 2012. + + Returns + ------- + score: float + Coefficient of persistence. + + See Also + -------- + mean_error_skill_score: Generic method for computing model skill. + + References + ---------- + Kitanidis, P. K., & Bras, R. L. (1980). Real-time forecasting with a conceptual + hydrologic model: 2. Applications and results. Water Resources Research, + 16(6), 1034-1044. + Nossent, J., & Bauwens, W. (2012, April). Application of a normalized + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol'sensitivity + analysis of a hydrological model. In EGU General Assembly Conference + Abstracts (p. 237). + + """ + # Raise if not 1-D arrays + validate.raise_for_non_vector(y_true, y_pred) + + # Raise if not same shape + validate.raise_for_inconsistent_shapes(y_true, y_pred) + + # Optionally transform components + if log: + y_true = np.log(y_true) + y_pred = np.log(y_pred) + + # Compute baseline + y_base = np.roll(y_true, lag) + + # Compute score + return mean_error_skill_score(y_true[lag:], y_pred[lag:], y_base[lag:], + power=power, normalized=normalized) + +def coefficient_of_extrapolation( + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + log: bool = False, + power: float = 2.0, + normalized: bool = False + ) -> float: + """Compute the coefficient of extrapolation (Kitanidis & Bras, 1980). The coefficient of + extrapolation compares the model output to the last two values of the observations, assuming + the linear trend of the these values will continue. In other words, the coefficient of + extrapolation is a skill score with baseline values $y_{b,i} = y_{b,i-1} + (y_{b,i-1} - y_{b,i-2})$. + + The coefficient of extrapolation ranges from -inf to 1.0, higher is better. A score of 0.0 indicates + skill no better than assuming the difference between the last two observations will persist. A perfect + score is 1.0. + + Parameters + ---------- + y_true: array-like of shape (n_samples,), required + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: array-like of shape (n_samples,), required + Estimated target values, also called simulations or modeled values. + log: bool, default False + Apply numpy.log (natural logarithm) to y_true and y_pred + before computing the NSE. + power: float, default 2.0 + Exponent for each mean error summation value. + normalized: bool, default False + When True, normalize the final NSE value using the method from + Nossent & Bauwens, 2012. + + Returns + ------- + score: float + Coefficient of extrapolation. + + See Also + -------- + mean_error_skill_score: Generic method for computing model skill. + + References + ---------- + Kitanidis, P. K., & Bras, R. L. (1980). Real-time forecasting with a conceptual + hydrologic model: 2. Applications and results. Water Resources Research, + 16(6), 1034-1044. + Nossent, J., & Bauwens, W. (2012, April). Application of a normalized + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol'sensitivity + analysis of a hydrological model. In EGU General Assembly Conference + Abstracts (p. 237). + + """ + # Raise if not 1-D arrays + validate.raise_for_non_vector(y_true, y_pred) + + # Raise if not same shape + validate.raise_for_inconsistent_shapes(y_true, y_pred) + + # Optionally transform components + if log: + y_true = np.log(y_true) + y_pred = np.log(y_pred) + + # Compute baseline + slope = np.diff(y_true)[:-1] + y_base = y_true[2:] + slope + + # Compute score + return mean_error_skill_score(y_true[2:], y_pred[2:], y_base, + power=power, normalized=normalized) def kling_gupta_efficiency( y_true: npt.ArrayLike, @@ -126,7 +377,11 @@ def kling_gupta_efficiency( a_scale: float = 1.0, b_scale: float = 1.0 ) -> float: - """Compute the Kling-Gupta model efficiency coefficient (KGE). + """Compute the Kling-Gupta model efficiency coefficient (KGE). The KGE is a + summary metric that combines the relative mean, relative variance, and linear + correlation between observed and simulated values. The final metric is computed + using the root sum of squares with optional scaling factors, similar to + computing distance in a 3-dimensional Euclidean space. Parameters ---------- @@ -164,23 +419,23 @@ def kling_gupta_efficiency( validate.raise_for_inconsistent_shapes(y_true, y_pred) # Pearson correlation coefficient - r = np.corrcoef(y_pred, y_true)[0,1] + linear_correlation = np.corrcoef(y_pred, y_true)[0,1] # Relative variability - a = np.std(y_pred) / np.std(y_true) + relative_variability = np.std(y_pred) / np.std(y_true) # Relative mean - b = np.mean(y_pred) / np.mean(y_true) + relative_mean = np.mean(y_pred) / np.mean(y_true) # Scaled Euclidean distance - EDs = np.sqrt( - (r_scale * (r - 1.0)) ** 2.0 + - (a_scale * (a - 1.0)) ** 2.0 + - (b_scale * (b - 1.0)) ** 2.0 + euclidean_distance = np.sqrt( + (r_scale * (linear_correlation - 1.0)) ** 2.0 + + (a_scale * (relative_variability - 1.0)) ** 2.0 + + (b_scale * (relative_mean - 1.0)) ** 2.0 ) # Return KGE - return 1.0 - EDs + return 1.0 - euclidean_distance def compute_contingency_table( observed: npt.ArrayLike, @@ -190,7 +445,14 @@ def compute_contingency_table( false_negative_key: str = 'false_negative', true_negative_key: str = 'true_negative' ) -> pd.Series: - """Compute components of a contingency table. + """Compute components of a contingency table required for the evaluation of categorical + forecasts and simulations. Returns a pandas.Series indexed by table component. 'true_positive' + indicates the number of times the simulation correctly indicated True according to the + observations. 'false_positive' indicates the number of times the simulation incorrectly + indicated True according to the observations. 'false_negative' indicates the number of times + the simulation incorrectly indicated False according to the observations. 'true_negative' + indicates the number of times the simulation correctly indicated False according to the + observations. Parameters ---------- @@ -210,7 +472,19 @@ def compute_contingency_table( Returns ------- contingency_table: pandas.Series - pandas.Series of integer values keyed to pandas.Index([true_positive_key, false_positive_key, false_negative_key, true_negative_key]) + pandas.Series of integer values keyed to pandas.Index([true_positive_key, false_positive_key, + false_negative_key, true_negative_key]) + + Examples + -------- + >>> obs = [True, True, True, False, False, False, False, False, True, True] + >>> sim = [True, True, False, False, False, False, True, False, False, False] + >>> metrics.compute_contingency_table(obs, sim) + true_positive 2 + false_positive 1 + false_negative 3 + true_negative 4 + dtype: int64 """ # Raise if not 1-D arrays @@ -239,7 +513,12 @@ def probability_of_detection( true_positive_key: str = 'true_positive', false_negative_key: str = 'false_negative' ) -> float: - """Compute probability of detection (POD). + """Compute probability of detection (POD), also called the "hit rate". POD + is the ratio of true positives to the number of observations. POD ranges + from 0.0 to 1.0, higher is better. Note: that this statistic is easy to + "hedge" if the model always indicates occurence. This statistic should + be considered alongside some metric of false positives, like probability + of false alarm or threat score. Parameters ---------- @@ -271,7 +550,9 @@ def probability_of_false_detection( false_positive_key: str = 'false_positive', true_negative_key: str = 'true_negative' ) -> float: - """Compute probability of false detection/false alarm rate (POFD/FARate). + """Compute probability of false detection/false alarm rate (POFD/FARate). POFD + indicates the portion of non-occurences that were false alarms. POFD ranges from + 0.0 to 1.0, lower is better. Parameters ---------- @@ -303,7 +584,10 @@ def probability_of_false_alarm( true_positive_key: str = 'true_positive', false_positive_key: str = 'false_positive' ) -> float: - """Compute probability of false alarm/false alarm ratio (POFA/FARatio). + """Compute probability of false alarm/false alarm ratio (POFA/FARatio). POFA + indicates the portion of predictions or simulated values that were false alarms. + POFA ranges from 0.0 to 1.0, lower is better. The complement of POFA (1.0 - POFA) is + the 'post-agreement (PAG).' Parameters ---------- @@ -336,7 +620,11 @@ def threat_score( false_positive_key: str = 'false_positive', false_negative_key: str = 'false_negative' ) -> float: - """Compute threat score/critical success index (TS/CSI). + """Compute threat score/critical success index (TS/CSI). CSI is the ratio + true positives to the sum of true positives, false positives, and false + negatives. CSI ranges from 0.0 to 1.0, higher is better. CSI is sensitive + to event frequency, in which case the equitable threat score may be more + suitable. Parameters ---------- @@ -372,7 +660,10 @@ def frequency_bias( false_positive_key: str = 'false_positive', false_negative_key: str = 'false_negative' ) -> float: - """Compute frequency bias (FBI). + """Compute frequency bias (FBI). FBI measures the tendency of the simulation + or forecast to over or under-predict. FBI ranges from 0.0 to inf. A perfect + score is 1.0. Values less than 1.0 indicate under-prediction. Values greater + than 1.0 indicate over-prediction. Parameters ---------- @@ -409,7 +700,10 @@ def percent_correct( false_negative_key: str = 'false_negative', true_negative_key: str = 'true_negative' ) -> float: - """Compute percent correct (PC). + """Compute percent correct (PC). PC is the sum of both true positives and + true negatives compared to the total number of observations. PC is the portion + of correctly predicted occurences and non-occurences. PC ranges from 0.0 to 1.0, + higher is better. Parameters ---------- @@ -449,12 +743,15 @@ def base_chance( false_negative_key: str = 'false_negative', true_negative_key: str = 'true_negative' ) -> float: - """Compute base chance to hit (a_r). + """Compute base chance to hit (a_r). Base chance is the relative frequency of + occurences. In other words, this is the probability of scoring a "hit" or true positive + by chance. Parameters ---------- contingency_table: dict, pandas.DataFrame, or pandas.Series, required - Contingency table containing key-value pairs with the following keys: true_positive_key, false_positive_key, false_negative_key, true_negative_key; and int or float values + Contingency table containing key-value pairs with the following keys: true_positive_key, false_positive_key, + false_negative_key, true_negative_key; and int or float values true_positive_key: str, optional, default 'true_positive' Label to use for true positives. false_positive_key: str, optional, default 'false_positive' @@ -487,7 +784,9 @@ def equitable_threat_score( false_negative_key: str = 'false_negative', true_negative_key: str = 'true_negative' ) -> float: - """Compute equitable threat score (ETS). + """Compute equitable threat score (ETS). Threat score/Critical Success Index + tends to yield lower scores for rare events. ETS computes a threat score, but + accounts for the relative frequency of scoring a true positive by chance. Parameters ---------- diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py index 8c5c42c8..baea6ae8 100644 --- a/python/metrics/tests/test_metrics.py +++ b/python/metrics/tests/test_metrics.py @@ -48,6 +48,24 @@ n_true = [np.nan, np.nan, np.nan, np.nan] n_pred = [np.nan, np.nan, np.nan, np.nan] +y_true_series = pd.Series( + data=y_true, + index=pd.date_range( + start="2020-01-01", + end="2020-01-04", + freq="1D" + ) +) + +y_pred_series = pd.Series( + data=y_pred, + index=pd.date_range( + start="2020-01-01", + end="2020-01-04", + freq="1D" + ) +) + def test_compute_contingency_table(): obs = pd.Series([True, False, False, True, True, True, False, False, False, False], dtype="category") @@ -241,11 +259,15 @@ def test_equitable_threat_score(): ETS = metrics.equitable_threat_score(nan_contigency_table) assert np.isnan(ETS) -def test_mean_squared_error(): - MSE = metrics.mean_squared_error(y_true, y_pred) +def test_mean_absolute_error(): + MAE = metrics.mean_error(y_true, y_pred) + assert MAE == 2.0 + +def test_mean_error(): + MSE = metrics.mean_error(y_true, y_pred, power=2.0) assert MSE == 5.0 - RMSE = metrics.mean_squared_error(y_true, y_pred, root=True) + RMSE = metrics.mean_error(y_true, y_pred, power=2.0, root=True) assert RMSE == np.sqrt(5.0) def test_nash_sutcliffe_efficiency(): @@ -264,18 +286,18 @@ def test_nash_sutcliffe_efficiency(): np.exp(y_pred), log=True, normalized=True) assert NNSEL == 0.2 -def test_zero_mean_squared_error(): - MSE = metrics.mean_squared_error(z_true, z_pred) +def test_zero_mean_error(): + MSE = metrics.mean_error(z_true, z_pred, power=2.0) assert MSE == 0.0 - RMSE = metrics.mean_squared_error(z_true, z_pred, root=True) + RMSE = metrics.mean_error(z_true, z_pred, power=2.0, root=True) assert RMSE == 0.0 -def test_nan_mean_squared_error(): - MSE = metrics.mean_squared_error(n_true, n_pred) +def test_nan_mean_error(): + MSE = metrics.mean_error(n_true, n_pred, power=2.0) assert np.isnan(MSE) - RMSE = metrics.mean_squared_error(n_true, n_pred, root=True) + RMSE = metrics.mean_error(n_true, n_pred, power=2.0, root=True) assert np.isnan(RMSE) def test_zero_nash_sutcliffe_efficiency(): @@ -337,3 +359,84 @@ def test_kling_gupta_efficiency(): r_scale=0.5, a_scale=0.25, b_scale=0.25) expected = (1.0 - np.sqrt(9.0/128.0)) assert np.isclose(KGE, expected) + +def test_coefficient_of_persistence(): + # Default + COP = metrics.coefficient_of_persistence(y_true, y_pred) + expected = (1.0 - 11.0/3.0) + assert np.isclose(COP, expected) + + # Test with series + COP = metrics.coefficient_of_persistence(y_true_series, y_pred_series) + expected = (1.0 - 11.0/3.0) + assert np.isclose(COP, expected) + + # Identity + COP = metrics.coefficient_of_persistence(y_true, np.array(y_true) * 1.0) + expected = 1.0 + assert np.isclose(COP, expected) + + # Lag + COP = metrics.coefficient_of_persistence(y_true, y_pred, lag=2) + expected = -0.25 + assert np.isclose(COP, expected) + + # Power + COP = metrics.coefficient_of_persistence(y_true, y_pred, power=3) + expected = (1.0 - 29.0 / 3.0) + assert np.isclose(COP, expected) + + # Normalized + COP = metrics.coefficient_of_persistence(y_true, y_pred, normalized=True) + expected = 1.0 / (2.0 - (1.0 - 11.0/3.0)) + assert np.isclose(COP, expected) + + # Log + COP = metrics.coefficient_of_persistence(y_true, y_pred, log=True) + expected = -2.09313723301667 + assert np.isclose(COP, expected) + +def test_coefficient_of_extrapolation(): + # Default + v = [1, 2, 4, 5] + COE = metrics.coefficient_of_extrapolation(v, y_pred) + expected = -3.0 + assert np.isclose(COE, expected) + + # Test with series + s = pd.Series(data=v, index=y_true_series.index) + COE = metrics.coefficient_of_extrapolation(s, y_pred_series) + expected = -3.0 + assert np.isclose(COE, expected) + + # Identity + COE = metrics.coefficient_of_extrapolation(y_true, np.array(y_true) * 1.0) + expected = 1.0 + assert np.isclose(COE, expected) + + # Power + COE = metrics.coefficient_of_extrapolation(v, y_pred, power=1.5) + expected = -1.82842712474619 + assert np.isclose(COE, expected) + + # Normalized + COE = metrics.coefficient_of_extrapolation(v, y_pred, normalized=True) + expected = 0.2 + assert np.isclose(COE, expected) + + # Log + COE = metrics.coefficient_of_extrapolation(v, y_pred, log=True) + expected = -2.19567503891363 + assert np.isclose(COE, expected) + +def test_mean_squared_error(): + MSE = metrics.mean_squared_error(y_true, y_pred) + assert MSE == 5.0 + +def test_root_mean_squared_error(): + RMSE = metrics.root_mean_squared_error(y_true, y_pred) + assert RMSE == np.sqrt(5.0) + +def test_volumetric_efficiency(): + VE = metrics.volumetric_efficiency(y_true, y_pred) + assert np.isclose(VE, 0.2)