diff --git a/python/metrics/src/hydrotools/metrics/_version.py b/python/metrics/src/hydrotools/metrics/_version.py index 976498ab..0b2f79db 100644 --- a/python/metrics/src/hydrotools/metrics/_version.py +++ b/python/metrics/src/hydrotools/metrics/_version.py @@ -1 +1 @@ -__version__ = "1.0.3" +__version__ = "1.1.3" diff --git a/python/metrics/src/hydrotools/metrics/metrics.py b/python/metrics/src/hydrotools/metrics/metrics.py index 295b0429..3bbc9119 100644 --- a/python/metrics/src/hydrotools/metrics/metrics.py +++ b/python/metrics/src/hydrotools/metrics/metrics.py @@ -11,6 +11,7 @@ Functions --------- - compute_contingency_table + - convert_mapping_values - probability_of_detection - probability_of_false_detection - probability_of_false_alarm @@ -27,7 +28,7 @@ import numpy as np import numpy.typing as npt import pandas as pd -from typing import Union +from typing import Union, Mapping, MutableMapping def mean_squared_error( y_true: npt.ArrayLike, @@ -84,7 +85,7 @@ def nash_sutcliffe_efficiency( Returns ------- score: float - Nash–Sutcliffe model efficiency coefficient + Nash-Sutcliffe model efficiency coefficient References ---------- @@ -153,6 +154,36 @@ def compute_contingency_table( true_negative_key : ctab.loc[False, False] }) +def convert_mapping_values( + mapping: Mapping[str, npt.DTypeLike], + converter: np.dtype = np.float64 + ) -> MutableMapping: + """Convert mapping values to a consistent type. Primarily used to validate + contingency tables. + + Parameters + ---------- + mapping: dict-like, required + Input mapping with string keys and values that can be coerced into a + numpy data type. + converter: numpy.dtype, optional, default numpy.float64 + Converter data type or function used to convert mapping values to a + consistent type. + + Returns + ------- + converted_mapping: dict-like, same type as mapping + New mapping with converted values. + + """ + # Populate new dictionary with converted values + d = {} + for key, value in dict(mapping).items(): + d[key] = converter(value) + + # Return new mapping with same type as original + return type(mapping)(d) + def probability_of_detection( contingency_table: Union[dict, pd.DataFrame, pd.Series], true_positive_key: str = 'true_positive', @@ -177,6 +208,10 @@ def probability_of_detection( Probability of detection. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a = contingency_table[true_positive_key] c = contingency_table[false_negative_key] return a / (a+c) @@ -205,6 +240,10 @@ def probability_of_false_detection( Probability of false detection. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute b = contingency_table[false_positive_key] d = contingency_table[true_negative_key] return b / (b+d) @@ -233,6 +272,10 @@ def probability_of_false_alarm( Probability of false alarm. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute b = contingency_table[false_positive_key] a = contingency_table[true_positive_key] return b / (b+a) @@ -264,6 +307,10 @@ def threat_score( Threat score. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a = contingency_table[true_positive_key] b = contingency_table[false_positive_key] c = contingency_table[false_negative_key] @@ -296,6 +343,10 @@ def frequency_bias( Frequency bias. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a = contingency_table[true_positive_key] b = contingency_table[false_positive_key] c = contingency_table[false_negative_key] @@ -331,6 +382,10 @@ def percent_correct( Percent correct. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a = contingency_table[true_positive_key] b = contingency_table[false_positive_key] c = contingency_table[false_negative_key] @@ -365,6 +420,10 @@ def base_chance( Base chance to hit by chance. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a = contingency_table[true_positive_key] b = contingency_table[false_positive_key] c = contingency_table[false_negative_key] @@ -401,6 +460,10 @@ def equitable_threat_score( Equitable threat score. """ + # Convert values to numpy scalars + contingency_table = convert_mapping_values(contingency_table) + + # Compute a_r = base_chance(contingency_table, true_positive_key=true_positive_key, false_positive_key=false_positive_key, diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py index 1e5c2b86..b7fadf3f 100644 --- a/python/metrics/tests/test_metrics.py +++ b/python/metrics/tests/test_metrics.py @@ -2,7 +2,6 @@ from hydrotools.metrics import metrics import pandas as pd -from math import isclose import numpy as np contigency_table = { @@ -19,9 +18,36 @@ 'TN': 4 } +zero_contingency_table = { + 'true_positive': 0, + 'false_positive': 0, + 'false_negative': 0, + 'true_negative': 0 +} + +nan_contigency_table = { + 'true_positive': np.nan, + 'false_positive': np.nan, + 'false_negative': np.nan, + 'true_negative': np.nan +} + +char_contigency_table = { + 'true_positive': '1', + 'false_positive': '2', + 'false_negative': '3', + 'true_negative': '4' +} + y_true = [1., 2., 3., 4.] y_pred = [4., 3., 2., 1.] +z_true = [0., 0., 0., 0.] +z_pred = [0., 0., 0., 0.] + +n_true = [np.nan, np.nan, np.nan, np.nan] +n_pred = [np.nan, np.nan, np.nan, np.nan] + def test_compute_contingency_table(): obs = pd.Categorical([True, False, False, True, True, True, False, False, False, False]) @@ -57,20 +83,42 @@ def test_probability_of_detection(): ) assert POD == (1/4) + with pytest.warns(RuntimeWarning): + POD = metrics.probability_of_detection(zero_contingency_table) + assert np.isnan(POD) + + POD = metrics.probability_of_detection(nan_contigency_table) + assert np.isnan(POD) + + POD = metrics.probability_of_detection(char_contigency_table) + assert POD == (1/4) + def test_probability_of_false_detection(): POFD = metrics.probability_of_false_detection(contigency_table) assert POFD == (2/6) + POFD = metrics.probability_of_false_detection(char_contigency_table) + assert POFD == (2/6) + POFD = metrics.probability_of_false_detection(alt_contigency_table, false_positive_key='FP', true_negative_key='TN' ) assert POFD == (2/6) + with pytest.warns(RuntimeWarning): + POFD = metrics.probability_of_false_detection(zero_contingency_table) + assert np.isnan(POFD) + + POFD = metrics.probability_of_false_detection(nan_contigency_table) + assert np.isnan(POFD) + def test_probability_of_false_alarm(): POFA = metrics.probability_of_false_alarm(contigency_table) assert POFA == (2/3) + POFA = metrics.probability_of_false_alarm(char_contigency_table) + assert POFA == (2/3) POFA = metrics.probability_of_false_alarm(alt_contigency_table, true_positive_key='TP', @@ -78,10 +126,19 @@ def test_probability_of_false_alarm(): ) assert POFA == (2/3) + with pytest.warns(RuntimeWarning): + POFA = metrics.probability_of_false_alarm(zero_contingency_table) + assert np.isnan(POFA) + + POFA = metrics.probability_of_false_alarm(nan_contigency_table) + assert np.isnan(POFA) + def test_threat_score(): TS = metrics.threat_score(contigency_table) assert TS == (1/6) + TS = metrics.threat_score(char_contigency_table) + assert TS == (1/6) TS = metrics.threat_score(alt_contigency_table, true_positive_key='TP', @@ -90,10 +147,20 @@ def test_threat_score(): ) assert TS == (1/6) + with pytest.warns(RuntimeWarning): + TS = metrics.threat_score(zero_contingency_table) + assert np.isnan(TS) + + TS = metrics.threat_score(nan_contigency_table) + assert np.isnan(TS) + def test_frequency_bias(): FBI = metrics.frequency_bias(contigency_table) assert FBI == (3/4) + FBI = metrics.frequency_bias(char_contigency_table) + assert FBI == (3/4) + FBI = metrics.frequency_bias(alt_contigency_table, true_positive_key='TP', false_positive_key='FP', @@ -101,10 +168,20 @@ def test_frequency_bias(): ) assert FBI == (3/4) + with pytest.warns(RuntimeWarning): + FBI = metrics.frequency_bias(zero_contingency_table) + assert np.isnan(FBI) + + FBI = metrics.frequency_bias(nan_contigency_table) + assert np.isnan(FBI) + def test_percent_correct(): PC = metrics.percent_correct(contigency_table) assert PC == (5/10) + PC = metrics.percent_correct(char_contigency_table) + assert PC == (5/10) + PC = metrics.percent_correct(alt_contigency_table, true_positive_key='TP', false_positive_key='FP', @@ -113,10 +190,20 @@ def test_percent_correct(): ) assert PC == (5/10) + with pytest.warns(RuntimeWarning): + PC = metrics.percent_correct(zero_contingency_table) + assert np.isnan(PC) + + PC = metrics.percent_correct(nan_contigency_table) + assert np.isnan(PC) + def test_base_chance(): a_r = metrics.base_chance(contigency_table) assert a_r == (12/10) + a_r = metrics.base_chance(char_contigency_table) + assert a_r == (12/10) + a_r = metrics.base_chance(alt_contigency_table, true_positive_key='TP', false_positive_key='FP', @@ -125,9 +212,19 @@ def test_base_chance(): ) assert a_r == (12/10) + with pytest.warns(RuntimeWarning): + a_r = metrics.base_chance(zero_contingency_table) + assert np.isnan(a_r) + + a_r = metrics.base_chance(nan_contigency_table) + assert np.isnan(a_r) + def test_equitable_threat_score(): ETS = metrics.equitable_threat_score(contigency_table) - assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001) + assert np.isclose(ETS, (-0.2/4.8), atol=0.000001) + + ETS = metrics.equitable_threat_score(char_contigency_table) + assert np.isclose(ETS, (-0.2/4.8), atol=0.000001) ETS = metrics.equitable_threat_score(alt_contigency_table, true_positive_key='TP', @@ -135,7 +232,14 @@ def test_equitable_threat_score(): false_negative_key='FN', true_negative_key='TN' ) - assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001) + assert np.isclose(ETS, (-0.2/4.8), atol=0.000001) + + with pytest.warns(RuntimeWarning): + ETS = metrics.equitable_threat_score(zero_contingency_table) + assert np.isnan(ETS) + + ETS = metrics.equitable_threat_score(nan_contigency_table) + assert np.isnan(ETS) def test_mean_squared_error(): MSE = metrics.mean_squared_error(y_true, y_pred) @@ -159,4 +263,57 @@ def test_nash_sutcliffe_efficiency(): NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), np.exp(y_pred), log=True, normalized=True) assert NNSEL == 0.2 - print(NNSEL) + +def test_zero_mean_squared_error(): + MSE = metrics.mean_squared_error(z_true, z_pred) + assert MSE == 0.0 + + RMSE = metrics.mean_squared_error(z_true, z_pred, root=True) + assert RMSE == 0.0 + +def test_nan_mean_squared_error(): + MSE = metrics.mean_squared_error(n_true, n_pred) + assert np.isnan(MSE) + + RMSE = metrics.mean_squared_error(n_true, n_pred, root=True) + assert np.isnan(RMSE) + +def test_zero_nash_sutcliffe_efficiency(): + with pytest.warns(RuntimeWarning): + NSE = metrics.nash_sutcliffe_efficiency(z_true, z_pred) + assert np.isnan(NSE) + + NNSE = metrics.nash_sutcliffe_efficiency(z_true, z_pred, + normalized=True) + assert np.isnan(NNSE) + + NSEL = metrics.nash_sutcliffe_efficiency(np.exp(z_true), + np.exp(z_pred), log=True) + assert np.isnan(NSEL) + + NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(z_true), + np.exp(z_pred), log=True, normalized=True) + assert np.isnan(NNSEL) + +def test_nan_nash_sutcliffe_efficiency(): + NSE = metrics.nash_sutcliffe_efficiency(n_true, n_pred) + assert np.isnan(NSE) + + NNSE = metrics.nash_sutcliffe_efficiency(n_true, n_pred, + normalized=True) + assert np.isnan(NNSE) + + NSEL = metrics.nash_sutcliffe_efficiency(np.exp(n_true), + np.exp(n_pred), log=True) + assert np.isnan(NSEL) + + NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(n_true), + np.exp(n_pred), log=True, normalized=True) + assert np.isnan(NNSEL) + +def test_convert_mapping_values(): + char_series = pd.Series(char_contigency_table) + converted = metrics.convert_mapping_values(char_series) + + assert converted.dtype == np.float64 + \ No newline at end of file