From 35d64f880c8b6081f50d3bf07bf35003707d972a Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 17:28:46 +0100 Subject: [PATCH 01/22] Refactor: Move common nominal functions to utils --- .../functional/nominal/cramers.py | 54 +++---------------- src/torchmetrics/functional/nominal/utils.py | 47 +++++++++++++++- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 3b3a316d5d7..40e1e539044 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -19,55 +19,15 @@ from typing_extensions import Literal from torchmetrics.functional.classification.confusion_matrix import _multiclass_confusion_matrix_update -from torchmetrics.functional.nominal.utils import _handle_nan_in_data +from torchmetrics.functional.nominal.utils import ( + _compute_chi_squared, + _drop_empty_rows_and_cols, + _handle_nan_in_data, + _nominal_input_validation, +) from torchmetrics.utilities.prints import rank_zero_warn -def _cramers_input_validation(nan_strategy: str, nan_replace_value: Optional[Union[int, float]]) -> None: - if nan_strategy not in ["replace", "drop"]: - raise ValueError( - f"Argument `nan_strategy` is expected to be one of `['replace', 'drop']`, but got {nan_strategy}" - ) - if nan_strategy == "replace" and not isinstance(nan_replace_value, (int, float)): - raise ValueError( - "Argument `nan_replace` is expected to be of a type `int` or `float` when `nan_strategy = 'replace`, " - f"but got {nan_replace_value}" - ) - - -def _compute_expected_freqs(confmat: Tensor) -> Tensor: - """Compute the expected frequenceis from the provided confusion matrix.""" - margin_sum_rows, margin_sum_cols = confmat.sum(1), confmat.sum(0) - expected_freqs = torch.einsum("r, c -> rc", margin_sum_rows, margin_sum_cols) / confmat.sum() - return expected_freqs - - -def _compute_chi_squared(confmat: Tensor, bias_correction: bool) -> Tensor: - """Chi-square test of independenc of variables in a confusion matrix table. - - Adapted from: https://github.com/scipy/scipy/blob/v1.9.2/scipy/stats/contingency.py. - """ - expected_freqs = _compute_expected_freqs(confmat) - # Get degrees of freedom - df = expected_freqs.numel() - sum(expected_freqs.shape) + expected_freqs.ndim - 1 - if df == 0: - return torch.tensor(0.0, device=confmat.device) - - if df == 1 and bias_correction: - diff = expected_freqs - confmat - direction = diff.sign() - confmat += direction * torch.minimum(0.5 * torch.ones_like(direction), direction.abs()) - - return torch.sum((confmat - expected_freqs) ** 2 / expected_freqs) - - -def _drop_empty_rows_and_cols(confmat: Tensor) -> Tensor: - """Drop all rows and columns containing only zeros.""" - confmat = confmat[confmat.sum(1) != 0] - confmat = confmat[:, confmat.sum(0) != 0] - return confmat - - def _cramers_v_update( preds: Tensor, target: Tensor, @@ -210,7 +170,7 @@ def cramers_v_matrix( [0.0542, 0.0000, 0.0000, 1.0000, 0.1100], [0.1337, 0.0000, 0.0649, 0.1100, 1.0000]]) """ - _cramers_input_validation(nan_strategy, nan_replace_value) + _nominal_input_validation(nan_strategy, nan_replace_value) num_variables = matrix.shape[1] cramers_v_matrix_value = torch.ones(num_variables, num_variables, device=matrix.device) for i, j in itertools.combinations(range(num_variables), 2): diff --git a/src/torchmetrics/functional/nominal/utils.py b/src/torchmetrics/functional/nominal/utils.py index b106d5986e8..f685a36562c 100644 --- a/src/torchmetrics/functional/nominal/utils.py +++ b/src/torchmetrics/functional/nominal/utils.py @@ -11,13 +11,58 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple +from typing import Optional, Tuple, Union import torch from torch import Tensor from typing_extensions import Literal +def _nominal_input_validation(nan_strategy: str, nan_replace_value: Optional[Union[int, float]]) -> None: + if nan_strategy not in ["replace", "drop"]: + raise ValueError( + f"Argument `nan_strategy` is expected to be one of `['replace', 'drop']`, but got {nan_strategy}" + ) + if nan_strategy == "replace" and not isinstance(nan_replace_value, (int, float)): + raise ValueError( + "Argument `nan_replace` is expected to be of a type `int` or `float` when `nan_strategy = 'replace`, " + f"but got {nan_replace_value}" + ) + + +def _compute_expected_freqs(confmat: Tensor) -> Tensor: + """Compute the expected frequenceis from the provided confusion matrix.""" + margin_sum_rows, margin_sum_cols = confmat.sum(1), confmat.sum(0) + expected_freqs = torch.einsum("r, c -> rc", margin_sum_rows, margin_sum_cols) / confmat.sum() + return expected_freqs + + +def _compute_chi_squared(confmat: Tensor, bias_correction: bool) -> Tensor: + """Chi-square test of independenc of variables in a confusion matrix table. + + Adapted from: https://github.com/scipy/scipy/blob/v1.9.2/scipy/stats/contingency.py. + """ + expected_freqs = _compute_expected_freqs(confmat) + # Get degrees of freedom + df = expected_freqs.numel() - sum(expected_freqs.shape) + expected_freqs.ndim - 1 + if df == 0: + return torch.tensor(0.0, device=confmat.device) + + if df == 1 and bias_correction: + diff = expected_freqs - confmat + direction = diff.sign() + confmat += direction * torch.minimum(0.5 * torch.ones_like(direction), direction.abs()) + + return torch.sum((confmat - expected_freqs) ** 2 / expected_freqs) + + +def _drop_empty_rows_and_cols(confmat: Tensor) -> Tensor: + """Drop all rows and columns containing only zeros.""" + confmat = confmat[confmat.sum(1) != 0] + confmat = confmat[:, confmat.sum(0) != 0] + return confmat + + def _handle_nan_in_data( preds: Tensor, target: Tensor, From 9d5afb5a9609d0be076859c78cab9c2d422514a0 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 17:38:38 +0100 Subject: [PATCH 02/22] Refactor: Refactor bias correction calculation into function and move to utils --- .../functional/nominal/cramers.py | 13 +++---- src/torchmetrics/functional/nominal/utils.py | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 40e1e539044..469a2646f67 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -20,12 +20,13 @@ from torchmetrics.functional.classification.confusion_matrix import _multiclass_confusion_matrix_update from torchmetrics.functional.nominal.utils import ( + _compute_bias_corrected_values, _compute_chi_squared, _drop_empty_rows_and_cols, _handle_nan_in_data, _nominal_input_validation, + _unable_to_use_bias_correction_warning, ) -from torchmetrics.utilities.prints import rank_zero_warn def _cramers_v_update( @@ -70,15 +71,11 @@ def _cramers_v_compute(confmat: Tensor, bias_correction: bool) -> Tensor: n_rows, n_cols = confmat.shape if bias_correction: - phi_squared_corrected = torch.max( - torch.tensor(0.0, device=confmat.device), phi_squared - ((n_rows - 1) * (n_cols - 1)) / (cm_sum - 1) + phi_squared_corrected, rows_corrected, cols_corrected = _compute_bias_corrected_values( + phi_squared, n_rows, n_cols, cm_sum ) - rows_corrected = n_rows - (n_rows - 1) ** 2 / (cm_sum - 1) - cols_corrected = n_cols - (n_cols - 1) ** 2 / (cm_sum - 1) if min(rows_corrected, cols_corrected) == 1: - rank_zero_warn( - "Unable to compute Cramer's V using bias correction. Please consider to set `bias_correction=False`." - ) + _unable_to_use_bias_correction_warning(metric_name="Cramer's V") return torch.tensor(float("nan"), device=confmat.device) cramers_v_value = torch.sqrt(phi_squared_corrected / min(rows_corrected - 1, cols_corrected - 1)) else: diff --git a/src/torchmetrics/functional/nominal/utils.py b/src/torchmetrics/functional/nominal/utils.py index f685a36562c..7da19ee8077 100644 --- a/src/torchmetrics/functional/nominal/utils.py +++ b/src/torchmetrics/functional/nominal/utils.py @@ -17,6 +17,8 @@ from torch import Tensor from typing_extensions import Literal +from torchmetrics.utilities.prints import rank_zero_warn + def _nominal_input_validation(nan_strategy: str, nan_replace_value: Optional[Union[int, float]]) -> None: if nan_strategy not in ["replace", "drop"]: @@ -63,6 +65,32 @@ def _drop_empty_rows_and_cols(confmat: Tensor) -> Tensor: return confmat +def _compute_phi_squared_corrected( + phi_squared: Tensor, + n_rows: int, + n_cols: int, + confmat_sum: Tensor, +) -> Tensor: + """Compute bias-corrected Phi Squared.""" + return torch.max( + torch.tensor(0.0, device=phi_squared.device), phi_squared - ((n_rows - 1) * (n_cols - 1)) / (confmat_sum - 1) + ) + + +def _compute_rows_and_cols_corrected(n_rows: int, n_cols: int, confmat_sum: Tensor) -> Tuple[Tensor, Tensor]: + rows_corrected = n_rows - (n_rows - 1) ** 2 / (confmat_sum - 1) + cols_corrected = n_cols - (n_cols - 1) ** 2 / (confmat_sum - 1) + return rows_corrected, cols_corrected + + +def _compute_bias_corrected_values( + phi_squared: Tensor, n_rows: int, n_cols: int, confmat_sum: Tensor +) -> Tuple[Tensor, Tensor, Tensor]: + phi_squared_corrected = _compute_phi_squared_corrected(phi_squared, n_rows, n_cols, confmat_sum) + rows_corrected, cols_corrected = _compute_rows_and_cols_corrected(n_rows, n_cols, confmat_sum) + return phi_squared_corrected, rows_corrected, cols_corrected + + def _handle_nan_in_data( preds: Tensor, target: Tensor, @@ -91,3 +119,9 @@ def _handle_nan_in_data( return preds.nan_to_num(nan_replace_value), target.nan_to_num(nan_replace_value) rows_contain_nan = torch.logical_or(preds.isnan(), target.isnan()) return preds[~rows_contain_nan], target[~rows_contain_nan] + + +def _unable_to_use_bias_correction_warning(metric_name: str) -> None: + rank_zero_warn( + f"Unable to compute {metric_name} using bias correction. Please consider to set `bias_correction=False`." + ) From ef3467d84b099eb870a9103450f03f9e2b17ca11 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 17:39:21 +0100 Subject: [PATCH 03/22] Add missing docstrings --- src/torchmetrics/functional/nominal/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/torchmetrics/functional/nominal/utils.py b/src/torchmetrics/functional/nominal/utils.py index 7da19ee8077..af0e3020c4e 100644 --- a/src/torchmetrics/functional/nominal/utils.py +++ b/src/torchmetrics/functional/nominal/utils.py @@ -78,6 +78,7 @@ def _compute_phi_squared_corrected( def _compute_rows_and_cols_corrected(n_rows: int, n_cols: int, confmat_sum: Tensor) -> Tuple[Tensor, Tensor]: + """Compute bias-corrected number of rows and columns.""" rows_corrected = n_rows - (n_rows - 1) ** 2 / (confmat_sum - 1) cols_corrected = n_cols - (n_cols - 1) ** 2 / (confmat_sum - 1) return rows_corrected, cols_corrected @@ -86,6 +87,7 @@ def _compute_rows_and_cols_corrected(n_rows: int, n_cols: int, confmat_sum: Tens def _compute_bias_corrected_values( phi_squared: Tensor, n_rows: int, n_cols: int, confmat_sum: Tensor ) -> Tuple[Tensor, Tensor, Tensor]: + """Compute bias-corrected Phi Squared and number of rows and columns.""" phi_squared_corrected = _compute_phi_squared_corrected(phi_squared, n_rows, n_cols, confmat_sum) rows_corrected, cols_corrected = _compute_rows_and_cols_corrected(n_rows, n_cols, confmat_sum) return phi_squared_corrected, rows_corrected, cols_corrected From 93ad3d2a6f64e0c57df9f4edbbc5160618c3e580 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 18:40:13 +0100 Subject: [PATCH 04/22] Add Tschuprow's T --- docs/source/links.rst | 1 + docs/source/nominal/tschuprows_t.rst | 26 +++ src/torchmetrics/__init__.py | 3 +- src/torchmetrics/functional/__init__.py | 2 + .../functional/nominal/__init__.py | 1 + .../functional/nominal/cramers.py | 1 + .../functional/nominal/tschuprows.py | 183 ++++++++++++++++++ src/torchmetrics/nominal/__init__.py | 1 + src/torchmetrics/nominal/cramers.py | 5 +- src/torchmetrics/nominal/tschuprows.py | 102 ++++++++++ tests/unittests/nominal/test_tschuprows.py | 131 +++++++++++++ 11 files changed, 453 insertions(+), 3 deletions(-) create mode 100644 docs/source/nominal/tschuprows_t.rst create mode 100644 src/torchmetrics/functional/nominal/tschuprows.py create mode 100644 src/torchmetrics/nominal/tschuprows.py create mode 100644 tests/unittests/nominal/test_tschuprows.py diff --git a/docs/source/links.rst b/docs/source/links.rst index 687577ddcfa..4aa7f6e3a1a 100644 --- a/docs/source/links.rst +++ b/docs/source/links.rst @@ -97,3 +97,4 @@ .. _Kendall Rank Correlation Coefficient: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient .. _The Treatment of Ties in Ranking Problems: https://www.jstor.org/stable/2332303 .. _LogCosh Error: https://arxiv.org/pdf/2101.10427.pdf +.. _Tschuprow's T: https://en.wikipedia.org/wiki/Tschuprow%27s_T diff --git a/docs/source/nominal/tschuprows_t.rst b/docs/source/nominal/tschuprows_t.rst new file mode 100644 index 00000000000..dafbea3f3e7 --- /dev/null +++ b/docs/source/nominal/tschuprows_t.rst @@ -0,0 +1,26 @@ +.. customcarditem:: + :header: Tschuprow's T + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg + :tags: Nominal + +############# +Tschuprow's T +############# + +Module Interface +________________ + +.. autoclass:: torchmetrics.TschuprowsT + :noindex: + +Functional Interface +____________________ + +.. autofunction:: torchmetrics.functional.tschuprows_t + :noindex: + +tschuprows_t_matrix +^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: torchmetrics.functional.nominal.tschuprows_t_matrix + :noindex: diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index 4906f5ee690..1086452c56f 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -53,7 +53,7 @@ UniversalImageQualityIndex, ) from torchmetrics.metric import Metric # noqa: E402 -from torchmetrics.nominal import CramersV # noqa: E402 +from torchmetrics.nominal import CramersV, TschuprowsT # noqa: E402 from torchmetrics.regression import ( # noqa: E402 ConcordanceCorrCoef, CosineSimilarity, @@ -186,6 +186,7 @@ "SymmetricMeanAbsolutePercentageError", "TotalVariation", "TranslationEditRate", + "TschuprowsT", "UniversalImageQualityIndex", "WeightedMeanAbsolutePercentageError", "WordErrorRate", diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py index 60b538056ab..72965840270 100644 --- a/src/torchmetrics/functional/__init__.py +++ b/src/torchmetrics/functional/__init__.py @@ -43,6 +43,7 @@ from torchmetrics.functional.image.tv import total_variation from torchmetrics.functional.image.uqi import universal_image_quality_index from torchmetrics.functional.nominal.cramers import cramers_v +from torchmetrics.functional.nominal.tschuprows import tschuprows_t from torchmetrics.functional.pairwise.cosine import pairwise_cosine_similarity from torchmetrics.functional.pairwise.euclidean import pairwise_euclidean_distance from torchmetrics.functional.pairwise.linear import pairwise_linear_similarity @@ -165,6 +166,7 @@ "symmetric_mean_absolute_percentage_error", "total_variation", "translation_edit_rate", + "tschuprows_t", "universal_image_quality_index", "spectral_angle_mapper", "weighted_mean_absolute_percentage_error", diff --git a/src/torchmetrics/functional/nominal/__init__.py b/src/torchmetrics/functional/nominal/__init__.py index 415a7b842d5..b5c64e81686 100644 --- a/src/torchmetrics/functional/nominal/__init__.py +++ b/src/torchmetrics/functional/nominal/__init__.py @@ -12,3 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix # noqa: F401 +from torchmetrics.functional.nominal.tschuprows import tschuprows_t, tschuprows_t_matrix # noqa: F401 diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 469a2646f67..2835207da1f 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -129,6 +129,7 @@ def cramers_v( >>> cramers_v(preds, target) tensor(0.5284) """ + _nominal_input_validation(nan_strategy, nan_replace_value) num_classes = len(torch.cat([preds, target]).unique()) confmat = _cramers_v_update(preds, target, num_classes, nan_strategy, nan_replace_value) return _cramers_v_compute(confmat, bias_correction) diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py new file mode 100644 index 00000000000..09fe533cd0e --- /dev/null +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -0,0 +1,183 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from typing import Optional, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.classification.confusion_matrix import _multiclass_confusion_matrix_update +from torchmetrics.functional.nominal.utils import ( + _compute_bias_corrected_values, + _compute_chi_squared, + _drop_empty_rows_and_cols, + _handle_nan_in_data, + _nominal_input_validation, + _unable_to_use_bias_correction_warning, +) + + +def _tschuprows_t_update( + preds: Tensor, + target: Tensor, + num_classes: int, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + """Computes the bins to update the confusion matrix with for Tschuprow's T calculation. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + target: 1D or 2D tensor of categorical (nominal) data + num_classes: Integer specifing the number of classes + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN`s when ``nan_strategy = 'replace``` + + Returns: + Non-reduced confusion matrix + """ + preds = preds.argmax(1) if preds.ndim == 2 else preds + target = target.argmax(1) if target.ndim == 2 else target + preds, target = _handle_nan_in_data(preds, target, nan_strategy, nan_replace_value) + return _multiclass_confusion_matrix_update(preds, target, num_classes) + + +def _tschuprows_t_compute(confmat: Tensor, bias_correction: bool) -> Tensor: + """Compute Tschuprow's T statistic based on a pre-computed confusion matrix. + + Args: + confmat: Confusion matrix for observed data + bias_correction: Indication of whether to use bias correction. + + Returns: + Tschuprow's T statistic + """ + confmat = _drop_empty_rows_and_cols(confmat) + cm_sum = confmat.sum() + chi_squared = _compute_chi_squared(confmat, bias_correction) + phi_squared = chi_squared / cm_sum + n_rows, n_cols = confmat.shape + + if bias_correction: + phi_squared_corrected, rows_corrected, cols_corrected = _compute_bias_corrected_values( + phi_squared, n_rows, n_cols, cm_sum + ) + if min(rows_corrected, cols_corrected) == 1: + _unable_to_use_bias_correction_warning(metric_name="Tschuprow's T") + return torch.tensor(float("nan"), device=confmat.device) + tschuprows_t_value = torch.sqrt(phi_squared_corrected / torch.sqrt((rows_corrected - 1) * (cols_corrected - 1))) + else: + n_rows = torch.tensor(n_rows, device=phi_squared.device) + n_cols = torch.tensor(n_cols, device=phi_squared.device) + tschuprows_t_value = torch.sqrt(phi_squared / torch.sqrt((n_rows - 1) * (n_cols - 1))) + return tschuprows_t_value.clamp(0.0, 1.0) + + +def tschuprows_t( + preds: Tensor, + target: Tensor, + bias_correction: bool = True, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. + + .. math:: + T = \sqrt{\frac{\chi^2 / 2}{\sqrt{(r - 1) * (k - 1)}}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + Tschuprow's T is a symmetric coefficient, i.e. + + .. math:: + T(preds, target) = T(target, preds) + + The output values lies in [0, 1]. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + target: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + bias_correction: Indication of whether to use bias correction. + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + + Returns: + Tschuprow's T statistic + + Example: + >>> from torchmetrics.functional import tschuprows_t + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> tschuprows_t(preds, target) + tensor(0.5284) + """ + _nominal_input_validation(nan_strategy, nan_replace_value) + num_classes = len(torch.cat([preds, target]).unique()) + confmat = _tschuprows_t_update(preds, target, num_classes, nan_strategy, nan_replace_value) + return _tschuprows_t_compute(confmat, bias_correction) + + +def tschuprows_t_matrix( + matrix: Tensor, + bias_correction: bool = True, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + r"""Compute `Tschuprow's T`_ statistic between a set of multiple variables. + + This can serve as a convenient tool to compute Tschuprow's T statistic for analyses of correlation between + categorical variables in your dataset. + + Args: + matrix: A tensor of categorical (nominal) data, where: + - rows represent a number of data points + - columns represent a number of categorical (nominal) features + bias_correction: Indication of whether to use bias correction. + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + + Returns: + Tschuprow's T statistic for a dataset of categorical variables + + Example: + >>> from torchmetrics.functional.nominal import tschuprows_t_matrix + >>> _ = torch.manual_seed(42) + >>> matrix = torch.randint(0, 4, (200, 5)) + >>> tschuprows_t_matrix(matrix) + tensor([[1.0000, 0.0637, 0.0000, 0.0542, 0.1337], + [0.0637, 1.0000, 0.0000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000, 0.0000, 0.0649], + [0.0542, 0.0000, 0.0000, 1.0000, 0.1100], + [0.1337, 0.0000, 0.0649, 0.1100, 1.0000]]) + """ + _nominal_input_validation(nan_strategy, nan_replace_value) + num_variables = matrix.shape[1] + tschuprows_t_matrix_matrix_value = torch.ones(num_variables, num_variables, device=matrix.device) + for i, j in itertools.combinations(range(num_variables), 2): + x, y = matrix[:, i], matrix[:, j] + num_classes = len(torch.cat([x, y]).unique()) + confmat = _tschuprows_t_update(x, y, num_classes, nan_strategy, nan_replace_value) + tschuprows_t_matrix_matrix_value[i, j] = tschuprows_t_matrix_matrix_value[j, i] = _tschuprows_t_compute( + confmat, bias_correction + ) + return tschuprows_t_matrix_matrix_value diff --git a/src/torchmetrics/nominal/__init__.py b/src/torchmetrics/nominal/__init__.py index c4f7d3d7208..d087dad052e 100644 --- a/src/torchmetrics/nominal/__init__.py +++ b/src/torchmetrics/nominal/__init__.py @@ -12,3 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from torchmetrics.nominal.cramers import CramersV # noqa: F401 +from torchmetrics.nominal.tschuprows import TschuprowsT # noqa: F401 diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index 167e788e5c8..1967f83a879 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -17,7 +17,8 @@ from torch import Tensor from typing_extensions import Literal -from torchmetrics.functional.nominal.cramers import _cramers_input_validation, _cramers_v_compute, _cramers_v_update +from torchmetrics.functional.nominal.cramers import _cramers_v_compute, _cramers_v_update +from torchmetrics.functional.nominal.utils import _nominal_input_validation from torchmetrics.metric import Metric @@ -76,7 +77,7 @@ def __init__( self.num_classes = num_classes self.bias_correction = bias_correction - _cramers_input_validation(nan_strategy, nan_replace_value) + _nominal_input_validation(nan_strategy, nan_replace_value) self.nan_strategy = nan_strategy self.nan_replace_value = nan_replace_value diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py new file mode 100644 index 00000000000..c5a57c0a90e --- /dev/null +++ b/src/torchmetrics/nominal/tschuprows.py @@ -0,0 +1,102 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.nominal.tschuprows import _tschuprows_t_compute, _tschuprows_t_update +from torchmetrics.functional.nominal.utils import _nominal_input_validation +from torchmetrics.metric import Metric + + +class TschuprowsT(Metric): + r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. + + .. math:: + T = \sqrt{\frac{\chi^2 / 2}{\sqrt{(r - 1) * (k - 1)}}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + Tschuprow's T is a symmetric coefficient, i.e. + + .. math:: + T(preds, target) = T(target, preds) + + The output values lies in [0, 1]. + + Args: + num_classes: Integer specifing the number of classes + bias_correction: Indication of whether to use bias correction. + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Returns: + Tschuprow's T statistic + + Example: + >>> from torchmetrics import TschuprowsT + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> tschuprows_t = TschuprowsT(num_classes=5) + >>> tschuprows_t(preds, target) + tensor(0.5284) + """ + + full_state_update = False + is_differentiable = False + higher_is_better = False + confmat: Tensor + + def __init__( + self, + num_classes: int, + bias_correction: bool = True, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, + **kwargs: Any, + ): + super().__init__(**kwargs) + self.num_classes = num_classes + self.bias_correction = bias_correction + + _nominal_input_validation(nan_strategy, nan_replace_value) + self.nan_strategy = nan_strategy + self.nan_replace_value = nan_replace_value + + self.add_state("confmat", torch.zeros(num_classes, num_classes), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + target: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + """ + confmat = _tschuprows_t_update(preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value) + self.confmat += confmat + + def compute(self) -> Tensor: + """Computer Tschuprow's T statistic.""" + return _tschuprows_t_compute(self.confmat, self.bias_correction) diff --git a/tests/unittests/nominal/test_tschuprows.py b/tests/unittests/nominal/test_tschuprows.py new file mode 100644 index 00000000000..c8f6560fa65 --- /dev/null +++ b/tests/unittests/nominal/test_tschuprows.py @@ -0,0 +1,131 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import operator +from collections import namedtuple + +import pandas as pd +import pytest +import torch +from scipy.stats.contingency import association + +from torchmetrics.functional.nominal.tschuprows import tschuprows_t, tschuprows_t_matrix +from torchmetrics.nominal.tschuprows import TschuprowsT +from torchmetrics.utilities.imports import _compare_version +from unittests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester + +Input = namedtuple("Input", ["preds", "target"]) +NUM_CLASSES = 4 + +_input_default = Input( + preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), + target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_logits = Input( + preds=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), target=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES) +) + +# No testing with replacing NaN's values is done as not supported in SciPy + + +@pytest.fixture +def _matrix_input(): + matrix = torch.cat( + [ + torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + torch.randint(high=NUM_CLASSES + 2, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + torch.randint(high=2, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + ], + dim=-1, + ) + return matrix + + +def _sk_tschuprows_t(preds, target): + preds = preds.argmax(1) if preds.ndim == 2 else preds + target = target.argmax(1) if target.ndim == 2 else target + preds, target = preds.numpy().astype(int), target.numpy().astype(int) + observed_values = pd.crosstab(preds, target) + + t = association(observed=observed_values, method="tschuprow") + return torch.tensor(t) + + +def _sk_tschuprows_t_matrix(matrix): + num_variables = matrix.shape[1] + tschuprows_t_matrix_value = torch.ones(num_variables, num_variables) + for i, j in itertools.combinations(range(num_variables), 2): + x, y = matrix[:, i], matrix[:, j] + tschuprows_t_matrix_value[i, j] = tschuprows_t_matrix_value[j, i] = _sk_tschuprows_t(x, y) + return tschuprows_t_matrix_value + + +@pytest.mark.skipif( + _compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`" +) +@pytest.mark.skipif( # TODO: testing on CUDA fails with pandas 1.3.5, and newer is not available for python 3.7 + torch.cuda.is_available(), reason="Tests fail on CUDA with the most up-to-date available pandas" +) +@pytest.mark.parametrize( + "preds, target", + [ + (_input_default.preds, _input_default.target), + (_input_logits.preds, _input_logits.target), + ], +) +class TestTschuprowsT(MetricTester): + atol = 1e-5 + + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_tschuprows_ta(self, ddp, dist_sync_on_step, preds, target): + metric_args = {"bias_correction": False, "num_classes": NUM_CLASSES} + self.run_class_metric_test( + ddp=ddp, + dist_sync_on_step=dist_sync_on_step, + preds=preds, + target=target, + metric_class=TschuprowsT, + sk_metric=_sk_tschuprows_t, + metric_args=metric_args, + ) + + def test_tschuprows_t_functional(self, preds, target): + metric_args = {"bias_correction": False} + self.run_functional_metric_test( + preds, target, metric_functional=tschuprows_t, sk_metric=_sk_tschuprows_t, metric_args=metric_args + ) + + def test_tschuprows_t_differentiability(self, preds, target): + metric_args = {"bias_correction": False, "num_classes": NUM_CLASSES} + self.run_differentiability_test( + preds, + target, + metric_module=TschuprowsT, + metric_functional=tschuprows_t, + metric_args=metric_args, + ) + + +@pytest.mark.skipif( + _compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`" +) +@pytest.mark.skipif( # TODO: testing on CUDA fails with pandas 1.3.5, and newer is not available for python 3.7 + torch.cuda.is_available(), reason="Tests fail on CUDA with the most up-to-date available pandas" +) +def test_tschuprows_t_matrix(_matrix_input): + tm_score = tschuprows_t_matrix(_matrix_input, bias_correction=False) + reference_score = _sk_tschuprows_t_matrix(_matrix_input) + assert torch.allclose(tm_score, reference_score) From bd89f1996715de144cdfaf40b26f76dc7766c631 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 19:04:34 +0100 Subject: [PATCH 05/22] Add Pearson's Contingency Coefficient --- docs/source/links.rst | 1 + .../pearsons_contingency_coefficient.rst | 26 +++ requirements/nominal_test.txt | 1 + src/torchmetrics/__init__.py | 3 +- src/torchmetrics/functional/__init__.py | 2 + .../functional/nominal/__init__.py | 4 + .../functional/nominal/cramers.py | 2 +- .../functional/nominal/pearson.py | 165 ++++++++++++++++++ .../functional/nominal/tschuprows.py | 8 +- src/torchmetrics/nominal/__init__.py | 1 + src/torchmetrics/nominal/cramers.py | 2 +- src/torchmetrics/nominal/pearson.py | 105 +++++++++++ src/torchmetrics/nominal/tschuprows.py | 2 +- tests/unittests/nominal/test_pearson.py | 133 ++++++++++++++ 14 files changed, 447 insertions(+), 8 deletions(-) create mode 100644 docs/source/nominal/pearsons_contingency_coefficient.rst create mode 100644 src/torchmetrics/functional/nominal/pearson.py create mode 100644 src/torchmetrics/nominal/pearson.py create mode 100644 tests/unittests/nominal/test_pearson.py diff --git a/docs/source/links.rst b/docs/source/links.rst index 4aa7f6e3a1a..b6aa3e8cb61 100644 --- a/docs/source/links.rst +++ b/docs/source/links.rst @@ -98,3 +98,4 @@ .. _The Treatment of Ties in Ranking Problems: https://www.jstor.org/stable/2332303 .. _LogCosh Error: https://arxiv.org/pdf/2101.10427.pdf .. _Tschuprow's T: https://en.wikipedia.org/wiki/Tschuprow%27s_T +.. _Pearson's Contingency Coefficient: https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/pearcont.htm diff --git a/docs/source/nominal/pearsons_contingency_coefficient.rst b/docs/source/nominal/pearsons_contingency_coefficient.rst new file mode 100644 index 00000000000..6715ade54d6 --- /dev/null +++ b/docs/source/nominal/pearsons_contingency_coefficient.rst @@ -0,0 +1,26 @@ +.. customcarditem:: + :header: Pearson's Contingency Coefficient + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg + :tags: Nominal + +################################# +Pearson's Contingency Coefficient +################################# + +Module Interface +________________ + +.. autoclass:: torchmetrics.PearsonsContingencyCoefficient + :noindex: + +Functional Interface +____________________ + +.. autofunction:: torchmetrics.functional.pearsons_contingency_coefficient + :noindex: + +pearsons_contingency_coefficient_matrix +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: torchmetrics.functional.nominal.pearsons_contingency_coefficient_matrix + :noindex: diff --git a/requirements/nominal_test.txt b/requirements/nominal_test.txt index ff0f19472d4..d7add64db38 100644 --- a/requirements/nominal_test.txt +++ b/requirements/nominal_test.txt @@ -1,2 +1,3 @@ pandas # cannot pin version due to numpy version incompatibility dython # todo: pin version, but some version resolution issue +scipy diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index 1086452c56f..6fe0bb8d8a8 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -53,7 +53,7 @@ UniversalImageQualityIndex, ) from torchmetrics.metric import Metric # noqa: E402 -from torchmetrics.nominal import CramersV, TschuprowsT # noqa: E402 +from torchmetrics.nominal import CramersV, PearsonsContingencyCoefficient, TschuprowsT # noqa: E402 from torchmetrics.regression import ( # noqa: E402 ConcordanceCorrCoef, CosineSimilarity, @@ -152,6 +152,7 @@ "MultioutputWrapper", "MultiScaleStructuralSimilarityIndexMeasure", "PearsonCorrCoef", + "PearsonsContingencyCoefficient", "PermutationInvariantTraining", "Perplexity", "Precision", diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py index 72965840270..a5766c82381 100644 --- a/src/torchmetrics/functional/__init__.py +++ b/src/torchmetrics/functional/__init__.py @@ -43,6 +43,7 @@ from torchmetrics.functional.image.tv import total_variation from torchmetrics.functional.image.uqi import universal_image_quality_index from torchmetrics.functional.nominal.cramers import cramers_v +from torchmetrics.functional.nominal.pearson import pearsons_contingency_coefficient from torchmetrics.functional.nominal.tschuprows import tschuprows_t from torchmetrics.functional.pairwise.cosine import pairwise_cosine_similarity from torchmetrics.functional.pairwise.euclidean import pairwise_euclidean_distance @@ -132,6 +133,7 @@ "pairwise_linear_similarity", "pairwise_manhattan_distance", "pearson_corrcoef", + "pearsons_contingency_coefficient", "permutation_invariant_training", "perplexity", "pit_permutate", diff --git a/src/torchmetrics/functional/nominal/__init__.py b/src/torchmetrics/functional/nominal/__init__.py index b5c64e81686..f7588dbb7df 100644 --- a/src/torchmetrics/functional/nominal/__init__.py +++ b/src/torchmetrics/functional/nominal/__init__.py @@ -12,4 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix # noqa: F401 +from torchmetrics.functional.nominal.pearson import ( # noqa: F401 + pearsons_contingency_coefficient, + pearsons_contingency_coefficient_matrix, +) from torchmetrics.functional.nominal.tschuprows import tschuprows_t, tschuprows_t_matrix # noqa: F401 diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 2835207da1f..2ef2299c0cb 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -93,7 +93,7 @@ def cramers_v( r"""Compute `Cramer's V`_ statistic measuring the association between two categorical (nominal) data series. .. math:: - V = \sqrt{\frac{\chi^2 / 2}{\min(r - 1, k - 1)}} + V = \sqrt{\frac{\chi^2 / n}{\min(r - 1, k - 1)}} where diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py new file mode 100644 index 00000000000..6109b49037d --- /dev/null +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -0,0 +1,165 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from typing import Optional, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.classification.confusion_matrix import _multiclass_confusion_matrix_update +from torchmetrics.functional.nominal.utils import ( + _compute_chi_squared, + _drop_empty_rows_and_cols, + _handle_nan_in_data, + _nominal_input_validation, +) + + +def _pearsons_contingency_coefficient_update( + preds: Tensor, + target: Tensor, + num_classes: int, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + """Computes the bins to update the confusion matrix with for Pearson's Contingency Coefficient calculation. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + target: 1D or 2D tensor of categorical (nominal) data + num_classes: Integer specifing the number of classes + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN`s when ``nan_strategy = 'replace``` + + Returns: + Non-reduced confusion matrix + """ + preds = preds.argmax(1) if preds.ndim == 2 else preds + target = target.argmax(1) if target.ndim == 2 else target + preds, target = _handle_nan_in_data(preds, target, nan_strategy, nan_replace_value) + return _multiclass_confusion_matrix_update(preds, target, num_classes) + + +def _pearsons_contingency_coefficient_compute(confmat: Tensor) -> Tensor: + """Compute Pearson's Contingency Coefficient based on a pre-computed confusion matrix. + + Args: + confmat: Confusion matrix for observed data + + Returns: + Pearson's Contingency Coefficient + """ + confmat = _drop_empty_rows_and_cols(confmat) + cm_sum = confmat.sum() + chi_squared = _compute_chi_squared(confmat, bias_correction=False) + phi_squared = chi_squared / cm_sum + + tschuprows_t_value = torch.sqrt(phi_squared / (1 + phi_squared)) + return tschuprows_t_value.clamp(0.0, 1.0) + + +def pearsons_contingency_coefficient( + preds: Tensor, + target: Tensor, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + r"""Compute `Pearson's Contingency Coefficient`_ measuring the association between two categorical (nominal) + ata series. + + .. math:: + T = \sqrt{\frac{\chi^2 / n}{\frac{1 + \chi^2 / n}}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + + .. math:: + T(preds, target) = T(target, preds) + + The output values lies in [0, 1]. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + target: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + + Returns: + Pearson's Contingency Coefficient + + Example: + >>> from torchmetrics.functional import pearsons_contingency_coefficient + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> pearsons_contingency_coefficient(preds, target) + tensor(0.6948) + """ + _nominal_input_validation(nan_strategy, nan_replace_value) + num_classes = len(torch.cat([preds, target]).unique()) + confmat = _pearsons_contingency_coefficient_update(preds, target, num_classes, nan_strategy, nan_replace_value) + return _pearsons_contingency_coefficient_compute(confmat) + + +def pearsons_contingency_coefficient_matrix( + matrix: Tensor, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, +) -> Tensor: + r"""Compute `Pearson's Contingency Coefficient`_ statistic between a set of multiple variables. + + This can serve as a convenient tool to compute Pearson's Contingency Coefficient for analyses + of correlation between categorical variables in your dataset. + + Args: + matrix: A tensor of categorical (nominal) data, where: + - rows represent a number of data points + - columns represent a number of categorical (nominal) features + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + + Returns: + Pearson's Contingency Coefficient statistic for a dataset of categorical variables + + Example: + >>> from torchmetrics.functional.nominal import pearsons_contingency_coefficient_matrix + >>> _ = torch.manual_seed(42) + >>> matrix = torch.randint(0, 4, (200, 5)) + >>> pearsons_contingency_coefficient_matrix(matrix) + tensor([[1.0000, 0.2326, 0.1959, 0.2262, 0.2989], + [0.2326, 1.0000, 0.1386, 0.1895, 0.1329], + [0.1959, 0.1386, 1.0000, 0.1840, 0.2335], + [0.2262, 0.1895, 0.1840, 1.0000, 0.2737], + [0.2989, 0.1329, 0.2335, 0.2737, 1.0000]]) + """ + _nominal_input_validation(nan_strategy, nan_replace_value) + num_variables = matrix.shape[1] + pearsons_cont_coef_matrix_value = torch.ones(num_variables, num_variables, device=matrix.device) + for i, j in itertools.combinations(range(num_variables), 2): + x, y = matrix[:, i], matrix[:, j] + num_classes = len(torch.cat([x, y]).unique()) + confmat = _pearsons_contingency_coefficient_update(x, y, num_classes, nan_strategy, nan_replace_value) + pearsons_cont_coef_matrix_value[i, j] = pearsons_cont_coef_matrix_value[ + j, i + ] = _pearsons_contingency_coefficient_compute(confmat) + return pearsons_cont_coef_matrix_value diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 09fe533cd0e..4ba163db8fe 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -95,7 +95,7 @@ def tschuprows_t( r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. .. math:: - T = \sqrt{\frac{\chi^2 / 2}{\sqrt{(r - 1) * (k - 1)}}} + T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} where @@ -172,12 +172,12 @@ def tschuprows_t_matrix( """ _nominal_input_validation(nan_strategy, nan_replace_value) num_variables = matrix.shape[1] - tschuprows_t_matrix_matrix_value = torch.ones(num_variables, num_variables, device=matrix.device) + tschuprows_t_matrix_value = torch.ones(num_variables, num_variables, device=matrix.device) for i, j in itertools.combinations(range(num_variables), 2): x, y = matrix[:, i], matrix[:, j] num_classes = len(torch.cat([x, y]).unique()) confmat = _tschuprows_t_update(x, y, num_classes, nan_strategy, nan_replace_value) - tschuprows_t_matrix_matrix_value[i, j] = tschuprows_t_matrix_matrix_value[j, i] = _tschuprows_t_compute( + tschuprows_t_matrix_value[i, j] = tschuprows_t_matrix_value[j, i] = _tschuprows_t_compute( confmat, bias_correction ) - return tschuprows_t_matrix_matrix_value + return tschuprows_t_matrix_value diff --git a/src/torchmetrics/nominal/__init__.py b/src/torchmetrics/nominal/__init__.py index d087dad052e..2049c2239ac 100644 --- a/src/torchmetrics/nominal/__init__.py +++ b/src/torchmetrics/nominal/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. from torchmetrics.nominal.cramers import CramersV # noqa: F401 +from torchmetrics.nominal.pearson import PearsonsContingencyCoefficient # noqa: F401 from torchmetrics.nominal.tschuprows import TschuprowsT # noqa: F401 diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index 1967f83a879..d8ce35343ba 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -26,7 +26,7 @@ class CramersV(Metric): r"""Compute `Cramer's V`_ statistic measuring the association between two categorical (nominal) data series. .. math:: - V = \sqrt{\frac{\chi^2 / 2}{\min(r - 1, k - 1)}} + V = \sqrt{\frac{\chi^2 / n}{\min(r - 1, k - 1)}} where diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py new file mode 100644 index 00000000000..3bd6efd563f --- /dev/null +++ b/src/torchmetrics/nominal/pearson.py @@ -0,0 +1,105 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.nominal.pearson import ( + _pearsons_contingency_coefficient_compute, + _pearsons_contingency_coefficient_update, +) +from torchmetrics.functional.nominal.utils import _nominal_input_validation +from torchmetrics.metric import Metric + + +class PearsonsContingencyCoefficient(Metric): + r"""Compute `Pearson's Contingency Coefficient`_ statistic measuring the association between two categorical + (nominal) data series. + + .. math:: + T = \sqrt{\frac{\chi^2 / n}{\frac{1 + \chi^2 / n}}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + + .. math:: + T(preds, target) = T(target, preds) + + The output values lies in [0, 1]. + + Args: + num_classes: Integer specifing the number of classes + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Returns: + Pearson's Contingency Coefficient statistic + + Example: + >>> from torchmetrics import PearsonsContingencyCoefficient + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> pearsons_contingency_coefficient = PearsonsContingencyCoefficient(num_classes=5) + >>> pearsons_contingency_coefficient(preds, target) + tensor(0.6948) + """ + + full_state_update = False + is_differentiable = False + higher_is_better = False + confmat: Tensor + + def __init__( + self, + num_classes: int, + nan_strategy: Literal["replace", "drop"] = "replace", + nan_replace_value: Optional[Union[int, float]] = 0.0, + **kwargs: Any, + ): + super().__init__(**kwargs) + self.num_classes = num_classes + + _nominal_input_validation(nan_strategy, nan_replace_value) + self.nan_strategy = nan_strategy + self.nan_replace_value = nan_replace_value + + self.add_state("confmat", torch.zeros(num_classes, num_classes), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + + Args: + preds: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + target: 1D or 2D tensor of categorical (nominal) data + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + """ + confmat = _pearsons_contingency_coefficient_update( + preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value + ) + self.confmat += confmat + + def compute(self) -> Tensor: + """Computer Pearson's Contingency Coefficient statistic.""" + return _pearsons_contingency_coefficient_compute(self.confmat) diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index c5a57c0a90e..008778b2c43 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -26,7 +26,7 @@ class TschuprowsT(Metric): r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. .. math:: - T = \sqrt{\frac{\chi^2 / 2}{\sqrt{(r - 1) * (k - 1)}}} + T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} where diff --git a/tests/unittests/nominal/test_pearson.py b/tests/unittests/nominal/test_pearson.py new file mode 100644 index 00000000000..558c784d140 --- /dev/null +++ b/tests/unittests/nominal/test_pearson.py @@ -0,0 +1,133 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import operator +from collections import namedtuple + +import pandas as pd +import pytest +import torch +from scipy.stats.contingency import association + +from torchmetrics.functional.nominal.pearson import ( + pearsons_contingency_coefficient, + pearsons_contingency_coefficient_matrix, +) +from torchmetrics.nominal.pearson import PearsonsContingencyCoefficient +from torchmetrics.utilities.imports import _compare_version +from unittests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester + +Input = namedtuple("Input", ["preds", "target"]) +NUM_CLASSES = 4 + +_input_default = Input( + preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), + target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_logits = Input( + preds=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), target=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES) +) + +# No testing with replacing NaN's values is done as not supported in SciPy + + +@pytest.fixture +def _matrix_input(): + matrix = torch.cat( + [ + torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + torch.randint(high=NUM_CLASSES + 2, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + torch.randint(high=2, size=(NUM_BATCHES * BATCH_SIZE, 1), dtype=torch.float), + ], + dim=-1, + ) + return matrix + + +def _sk_pearsons_t(preds, target): + preds = preds.argmax(1) if preds.ndim == 2 else preds + target = target.argmax(1) if target.ndim == 2 else target + preds, target = preds.numpy().astype(int), target.numpy().astype(int) + observed_values = pd.crosstab(preds, target) + + t = association(observed=observed_values, method="pearson") + return torch.tensor(t) + + +def _sk_pearsons_t_matrix(matrix): + num_variables = matrix.shape[1] + pearsons_t_matrix_value = torch.ones(num_variables, num_variables) + for i, j in itertools.combinations(range(num_variables), 2): + x, y = matrix[:, i], matrix[:, j] + pearsons_t_matrix_value[i, j] = pearsons_t_matrix_value[j, i] = _sk_pearsons_t(x, y) + return pearsons_t_matrix_value + + +@pytest.mark.skipif( + _compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`" +) +@pytest.mark.skipif( # TODO: testing on CUDA fails with pandas 1.3.5, and newer is not available for python 3.7 + torch.cuda.is_available(), reason="Tests fail on CUDA with the most up-to-date available pandas" +) +@pytest.mark.parametrize( + "preds, target", + [ + (_input_default.preds, _input_default.target), + (_input_logits.preds, _input_logits.target), + ], +) +class TestPearsonsContingencyCoefficient(MetricTester): + atol = 1e-5 + + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_pearsons_ta(self, ddp, dist_sync_on_step, preds, target): + metric_args = {"num_classes": NUM_CLASSES} + self.run_class_metric_test( + ddp=ddp, + dist_sync_on_step=dist_sync_on_step, + preds=preds, + target=target, + metric_class=PearsonsContingencyCoefficient, + sk_metric=_sk_pearsons_t, + metric_args=metric_args, + ) + + def test_pearsons_t_functional(self, preds, target): + self.run_functional_metric_test( + preds, target, metric_functional=pearsons_contingency_coefficient, sk_metric=_sk_pearsons_t + ) + + def test_pearsons_t_differentiability(self, preds, target): + metric_args = {"num_classes": NUM_CLASSES} + self.run_differentiability_test( + preds, + target, + metric_module=PearsonsContingencyCoefficient, + metric_functional=pearsons_contingency_coefficient, + metric_args=metric_args, + ) + + +@pytest.mark.skipif( + _compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`" +) +@pytest.mark.skipif( # TODO: testing on CUDA fails with pandas 1.3.5, and newer is not available for python 3.7 + torch.cuda.is_available(), reason="Tests fail on CUDA with the most up-to-date available pandas" +) +def test_pearsons_contingency_coefficient_matrix(_matrix_input): + tm_score = pearsons_contingency_coefficient_matrix(_matrix_input) + reference_score = _sk_pearsons_t_matrix(_matrix_input) + assert torch.allclose(tm_score, reference_score) From 911fc9c815d8eb90d25714c8050f82df77301646 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 19:09:12 +0100 Subject: [PATCH 06/22] Fix doctest for Tschuprow's T --- src/torchmetrics/functional/nominal/tschuprows.py | 2 +- src/torchmetrics/nominal/tschuprows.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 4ba163db8fe..4251e5a479a 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -129,7 +129,7 @@ def tschuprows_t( >>> preds = torch.randint(0, 4, (100,)) >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) >>> tschuprows_t(preds, target) - tensor(0.5284) + tensor(0.4930) """ _nominal_input_validation(nan_strategy, nan_replace_value) num_classes = len(torch.cat([preds, target]).unique()) diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 008778b2c43..90e15f71a72 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -57,7 +57,7 @@ class TschuprowsT(Metric): >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) >>> tschuprows_t = TschuprowsT(num_classes=5) >>> tschuprows_t(preds, target) - tensor(0.5284) + tensor(0.4930) """ full_state_update = False From 963c6df013ce8e319246b5007d7f39fd58429b6b Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 19:16:45 +0100 Subject: [PATCH 07/22] pearson: Fix math in docstring --- src/torchmetrics/functional/nominal/pearson.py | 4 ++-- src/torchmetrics/nominal/pearson.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py index 6109b49037d..56dfe79e334 100644 --- a/src/torchmetrics/functional/nominal/pearson.py +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -80,7 +80,7 @@ def pearsons_contingency_coefficient( ata series. .. math:: - T = \sqrt{\frac{\chi^2 / n}{\frac{1 + \chi^2 / n}}} + Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} where @@ -90,7 +90,7 @@ def pearsons_contingency_coefficient( Pearson's Contingency Coefficient is a symmetric coefficient, i.e. .. math:: - T(preds, target) = T(target, preds) + Pearson(preds, target) = Pearson(target, preds) The output values lies in [0, 1]. diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 3bd6efd563f..6c2f398ec66 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -30,7 +30,7 @@ class PearsonsContingencyCoefficient(Metric): (nominal) data series. .. math:: - T = \sqrt{\frac{\chi^2 / n}{\frac{1 + \chi^2 / n}}} + Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} where @@ -40,7 +40,7 @@ class PearsonsContingencyCoefficient(Metric): Pearson's Contingency Coefficient is a symmetric coefficient, i.e. .. math:: - T(preds, target) = T(target, preds) + Pearson(preds, target) = Pearson(target, preds) The output values lies in [0, 1]. From d69056374b67164390e6aa4c507fa67ea06742cb Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Mon, 14 Nov 2022 19:30:42 +0100 Subject: [PATCH 08/22] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6da9e6eed7..ba42f7f6327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `CramersV` to the new nominal package ([#1298](https://github.com/Lightning-AI/metrics/pull/1298)) +- Added `PearsonsContingencyCoefficient` and `TschuprowsT` to nominal package ([#1334](https://github.com/Lightning-AI/metrics/pull/1334)) + + ### Changed - Changed `MeanAveragePrecision` to vectorize `_find_best_gt_match` operation ([#1259](https://github.com/Lightning-AI/metrics/pull/1259)) From 1f1b0a3b7fb2634294ee39885226bd27825d1626 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 20:00:12 +0100 Subject: [PATCH 09/22] Apply suggestions from code reviews --- requirements/nominal_test.txt | 2 +- src/torchmetrics/functional/nominal/cramers.py | 7 ++----- src/torchmetrics/functional/nominal/pearson.py | 8 +++----- src/torchmetrics/functional/nominal/tschuprows.py | 7 ++----- src/torchmetrics/nominal/cramers.py | 13 +++++-------- src/torchmetrics/nominal/pearson.py | 12 +++++------- src/torchmetrics/nominal/tschuprows.py | 13 +++++-------- tests/unittests/nominal/test_pearson.py | 12 ++++++------ tests/unittests/nominal/test_tschuprows.py | 12 ++++++------ 9 files changed, 35 insertions(+), 51 deletions(-) diff --git a/requirements/nominal_test.txt b/requirements/nominal_test.txt index d7add64db38..0c35d6a74ab 100644 --- a/requirements/nominal_test.txt +++ b/requirements/nominal_test.txt @@ -1,3 +1,3 @@ pandas # cannot pin version due to numpy version incompatibility dython # todo: pin version, but some version resolution issue -scipy +scipy>=1.0.0 diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 2ef2299c0cb..810af791059 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -100,12 +100,9 @@ def cramers_v( .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - Cramer's V is a symmetric coefficient, i.e. + Cramer's V is a symmetric coefficient, i.e. :math:`V(preds, target) = V(target, preds)`. - .. math:: - V(preds, target) = V(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: preds: 1D or 2D tensor of categorical (nominal) data diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py index 56dfe79e334..fc027d9a763 100644 --- a/src/torchmetrics/functional/nominal/pearson.py +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -77,7 +77,7 @@ def pearsons_contingency_coefficient( nan_replace_value: Optional[Union[int, float]] = 0.0, ) -> Tensor: r"""Compute `Pearson's Contingency Coefficient`_ measuring the association between two categorical (nominal) - ata series. + data series. .. math:: Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} @@ -88,11 +88,9 @@ def pearsons_contingency_coefficient( \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + :math:`Pearson(preds, target) = Pearson(target, preds)`. - .. math:: - Pearson(preds, target) = Pearson(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: preds: 1D or 2D tensor of categorical (nominal) data diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 4251e5a479a..8b8600f8d55 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -102,12 +102,9 @@ def tschuprows_t( .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - Tschuprow's T is a symmetric coefficient, i.e. + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. - .. math:: - T(preds, target) = T(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: preds: 1D or 2D tensor of categorical (nominal) data diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index d8ce35343ba..8770795ec56 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -33,12 +33,9 @@ class CramersV(Metric): .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - Cramer's V is a symmetric coefficient, i.e. + Cramer's V is a symmetric coefficient, i.e. :math:`V(preds, target) = V(target, preds)`. - .. math:: - V(preds, target) = V(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: num_classes: Integer specifing the number of classes @@ -60,9 +57,9 @@ class CramersV(Metric): tensor(0.5284) """ - full_state_update = False - is_differentiable = False - higher_is_better = False + full_state_update: bool = False + is_differentiable: bool = False + higher_is_better: bool = False confmat: Tensor def __init__( diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 6c2f398ec66..519f4fb2109 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -38,11 +38,9 @@ class PearsonsContingencyCoefficient(Metric): \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + :math:`Pearson(preds, target) = Pearson(target, preds)`. - .. math:: - Pearson(preds, target) = Pearson(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: num_classes: Integer specifing the number of classes @@ -63,9 +61,9 @@ class PearsonsContingencyCoefficient(Metric): tensor(0.6948) """ - full_state_update = False - is_differentiable = False - higher_is_better = False + full_state_update: bool = False + is_differentiable: bool = False + higher_is_better: bool = False confmat: Tensor def __init__( diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 90e15f71a72..d1bf2e674ae 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -33,12 +33,9 @@ class TschuprowsT(Metric): .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - Tschuprow's T is a symmetric coefficient, i.e. + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. - .. math:: - T(preds, target) = T(target, preds) - - The output values lies in [0, 1]. + The output values lies in [0, 1] with 1 meaning the perfect association. Args: num_classes: Integer specifing the number of classes @@ -60,9 +57,9 @@ class TschuprowsT(Metric): tensor(0.4930) """ - full_state_update = False - is_differentiable = False - higher_is_better = False + full_state_update: bool = False + is_differentiable: bool = False + higher_is_better: bool = False confmat: Tensor def __init__( diff --git a/tests/unittests/nominal/test_pearson.py b/tests/unittests/nominal/test_pearson.py index 558c784d140..b628a7cd406 100644 --- a/tests/unittests/nominal/test_pearson.py +++ b/tests/unittests/nominal/test_pearson.py @@ -56,7 +56,7 @@ def _matrix_input(): return matrix -def _sk_pearsons_t(preds, target): +def _pd_pearsons_t(preds, target): preds = preds.argmax(1) if preds.ndim == 2 else preds target = target.argmax(1) if target.ndim == 2 else target preds, target = preds.numpy().astype(int), target.numpy().astype(int) @@ -66,12 +66,12 @@ def _sk_pearsons_t(preds, target): return torch.tensor(t) -def _sk_pearsons_t_matrix(matrix): +def _pd_pearsons_t_matrix(matrix): num_variables = matrix.shape[1] pearsons_t_matrix_value = torch.ones(num_variables, num_variables) for i, j in itertools.combinations(range(num_variables), 2): x, y = matrix[:, i], matrix[:, j] - pearsons_t_matrix_value[i, j] = pearsons_t_matrix_value[j, i] = _sk_pearsons_t(x, y) + pearsons_t_matrix_value[i, j] = pearsons_t_matrix_value[j, i] = _pd_pearsons_t(x, y) return pearsons_t_matrix_value @@ -101,13 +101,13 @@ def test_pearsons_ta(self, ddp, dist_sync_on_step, preds, target): preds=preds, target=target, metric_class=PearsonsContingencyCoefficient, - sk_metric=_sk_pearsons_t, + sk_metric=_pd_pearsons_t, metric_args=metric_args, ) def test_pearsons_t_functional(self, preds, target): self.run_functional_metric_test( - preds, target, metric_functional=pearsons_contingency_coefficient, sk_metric=_sk_pearsons_t + preds, target, metric_functional=pearsons_contingency_coefficient, sk_metric=_pd_pearsons_t ) def test_pearsons_t_differentiability(self, preds, target): @@ -129,5 +129,5 @@ def test_pearsons_t_differentiability(self, preds, target): ) def test_pearsons_contingency_coefficient_matrix(_matrix_input): tm_score = pearsons_contingency_coefficient_matrix(_matrix_input) - reference_score = _sk_pearsons_t_matrix(_matrix_input) + reference_score = _pd_pearsons_t_matrix(_matrix_input) assert torch.allclose(tm_score, reference_score) diff --git a/tests/unittests/nominal/test_tschuprows.py b/tests/unittests/nominal/test_tschuprows.py index c8f6560fa65..c079bfdffd8 100644 --- a/tests/unittests/nominal/test_tschuprows.py +++ b/tests/unittests/nominal/test_tschuprows.py @@ -53,7 +53,7 @@ def _matrix_input(): return matrix -def _sk_tschuprows_t(preds, target): +def _pd_tschuprows_t(preds, target): preds = preds.argmax(1) if preds.ndim == 2 else preds target = target.argmax(1) if target.ndim == 2 else target preds, target = preds.numpy().astype(int), target.numpy().astype(int) @@ -63,12 +63,12 @@ def _sk_tschuprows_t(preds, target): return torch.tensor(t) -def _sk_tschuprows_t_matrix(matrix): +def _pd_tschuprows_t_matrix(matrix): num_variables = matrix.shape[1] tschuprows_t_matrix_value = torch.ones(num_variables, num_variables) for i, j in itertools.combinations(range(num_variables), 2): x, y = matrix[:, i], matrix[:, j] - tschuprows_t_matrix_value[i, j] = tschuprows_t_matrix_value[j, i] = _sk_tschuprows_t(x, y) + tschuprows_t_matrix_value[i, j] = tschuprows_t_matrix_value[j, i] = _pd_tschuprows_t(x, y) return tschuprows_t_matrix_value @@ -98,14 +98,14 @@ def test_tschuprows_ta(self, ddp, dist_sync_on_step, preds, target): preds=preds, target=target, metric_class=TschuprowsT, - sk_metric=_sk_tschuprows_t, + sk_metric=_pd_tschuprows_t, metric_args=metric_args, ) def test_tschuprows_t_functional(self, preds, target): metric_args = {"bias_correction": False} self.run_functional_metric_test( - preds, target, metric_functional=tschuprows_t, sk_metric=_sk_tschuprows_t, metric_args=metric_args + preds, target, metric_functional=tschuprows_t, sk_metric=_pd_tschuprows_t, metric_args=metric_args ) def test_tschuprows_t_differentiability(self, preds, target): @@ -127,5 +127,5 @@ def test_tschuprows_t_differentiability(self, preds, target): ) def test_tschuprows_t_matrix(_matrix_input): tm_score = tschuprows_t_matrix(_matrix_input, bias_correction=False) - reference_score = _sk_tschuprows_t_matrix(_matrix_input) + reference_score = _pd_tschuprows_t_matrix(_matrix_input) assert torch.allclose(tm_score, reference_score) From b63f186bfec503b5aff134df873fd569a76ef295 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 20:05:06 +0100 Subject: [PATCH 10/22] Fix mypy issues --- src/torchmetrics/functional/nominal/cramers.py | 4 ++-- src/torchmetrics/functional/nominal/tschuprows.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 810af791059..15fa481e029 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -74,10 +74,10 @@ def _cramers_v_compute(confmat: Tensor, bias_correction: bool) -> Tensor: phi_squared_corrected, rows_corrected, cols_corrected = _compute_bias_corrected_values( phi_squared, n_rows, n_cols, cm_sum ) - if min(rows_corrected, cols_corrected) == 1: + if torch.min(rows_corrected, cols_corrected) == 1: _unable_to_use_bias_correction_warning(metric_name="Cramer's V") return torch.tensor(float("nan"), device=confmat.device) - cramers_v_value = torch.sqrt(phi_squared_corrected / min(rows_corrected - 1, cols_corrected - 1)) + cramers_v_value = torch.sqrt(phi_squared_corrected / torch.min(rows_corrected - 1, cols_corrected - 1)) else: cramers_v_value = torch.sqrt(phi_squared / min(n_rows - 1, n_cols - 1)) return cramers_v_value.clamp(0.0, 1.0) diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 8b8600f8d55..30be52ac16b 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -74,14 +74,14 @@ def _tschuprows_t_compute(confmat: Tensor, bias_correction: bool) -> Tensor: phi_squared_corrected, rows_corrected, cols_corrected = _compute_bias_corrected_values( phi_squared, n_rows, n_cols, cm_sum ) - if min(rows_corrected, cols_corrected) == 1: + if torch.min(rows_corrected, cols_corrected) == 1: _unable_to_use_bias_correction_warning(metric_name="Tschuprow's T") return torch.tensor(float("nan"), device=confmat.device) tschuprows_t_value = torch.sqrt(phi_squared_corrected / torch.sqrt((rows_corrected - 1) * (cols_corrected - 1))) else: - n_rows = torch.tensor(n_rows, device=phi_squared.device) - n_cols = torch.tensor(n_cols, device=phi_squared.device) - tschuprows_t_value = torch.sqrt(phi_squared / torch.sqrt((n_rows - 1) * (n_cols - 1))) + n_rows_tensor = torch.tensor(n_rows, device=phi_squared.device) + n_cols_tensor = torch.tensor(n_cols, device=phi_squared.device) + tschuprows_t_value = torch.sqrt(phi_squared / torch.sqrt((n_rows_tensor - 1) * (n_cols_tensor - 1))) return tschuprows_t_value.clamp(0.0, 1.0) From be2ecc123778921c53575d01c04029749e7c8362 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 20:06:51 +0100 Subject: [PATCH 11/22] Add functional matrix module to functional __init__ --- src/torchmetrics/functional/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py index a5766c82381..fd988b38ad0 100644 --- a/src/torchmetrics/functional/__init__.py +++ b/src/torchmetrics/functional/__init__.py @@ -42,9 +42,12 @@ ) from torchmetrics.functional.image.tv import total_variation from torchmetrics.functional.image.uqi import universal_image_quality_index -from torchmetrics.functional.nominal.cramers import cramers_v -from torchmetrics.functional.nominal.pearson import pearsons_contingency_coefficient -from torchmetrics.functional.nominal.tschuprows import tschuprows_t +from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix +from torchmetrics.functional.nominal.pearson import ( + pearsons_contingency_coefficient, + pearsons_contingency_coefficient_matrix, +) +from torchmetrics.functional.nominal.tschuprows import tschuprows_t, tschuprows_t_matrix from torchmetrics.functional.pairwise.cosine import pairwise_cosine_similarity from torchmetrics.functional.pairwise.euclidean import pairwise_euclidean_distance from torchmetrics.functional.pairwise.linear import pairwise_linear_similarity @@ -106,6 +109,7 @@ "confusion_matrix", "cosine_similarity", "cramers_v", + "cramers_v_matrix", "tweedie_deviance_score", "dice_score", "dice", @@ -134,6 +138,7 @@ "pairwise_manhattan_distance", "pearson_corrcoef", "pearsons_contingency_coefficient", + "pearsons_contingency_coefficient_matrix", "permutation_invariant_training", "perplexity", "pit_permutate", @@ -169,6 +174,7 @@ "total_variation", "translation_edit_rate", "tschuprows_t", + "tschuprows_t_matrix", "universal_image_quality_index", "spectral_angle_mapper", "weighted_mean_absolute_percentage_error", From 2945c66deb0329f7c419e2a9cecd552b99611336 Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 20:13:26 +0100 Subject: [PATCH 12/22] Add missing info to docs --- src/torchmetrics/functional/nominal/cramers.py | 3 +++ src/torchmetrics/functional/nominal/pearson.py | 3 +++ src/torchmetrics/functional/nominal/tschuprows.py | 3 +++ src/torchmetrics/nominal/cramers.py | 3 +++ src/torchmetrics/nominal/pearson.py | 3 +++ src/torchmetrics/nominal/tschuprows.py | 3 +++ 6 files changed, 18 insertions(+) diff --git a/src/torchmetrics/functional/nominal/cramers.py b/src/torchmetrics/functional/nominal/cramers.py index 15fa481e029..652d413cea8 100644 --- a/src/torchmetrics/functional/nominal/cramers.py +++ b/src/torchmetrics/functional/nominal/cramers.py @@ -100,6 +100,9 @@ def cramers_v( .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Cramer's V is a symmetric coefficient, i.e. :math:`V(preds, target) = V(target, preds)`. The output values lies in [0, 1] with 1 meaning the perfect association. diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py index fc027d9a763..bf77a234c3d 100644 --- a/src/torchmetrics/functional/nominal/pearson.py +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -87,6 +87,9 @@ def pearsons_contingency_coefficient( .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. :math:`Pearson(preds, target) = Pearson(target, preds)`. diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 30be52ac16b..0450bc5f5cb 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -102,6 +102,9 @@ def tschuprows_t( .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. The output values lies in [0, 1] with 1 meaning the perfect association. diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index 8770795ec56..94fc8385e7e 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -33,6 +33,9 @@ class CramersV(Metric): .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Cramer's V is a symmetric coefficient, i.e. :math:`V(preds, target) = V(target, preds)`. The output values lies in [0, 1] with 1 meaning the perfect association. diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 519f4fb2109..c732331506f 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -37,6 +37,9 @@ class PearsonsContingencyCoefficient(Metric): .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. :math:`Pearson(preds, target) = Pearson(target, preds)`. diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index d1bf2e674ae..083cbaf904f 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -33,6 +33,9 @@ class TschuprowsT(Metric): .. math:: \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. The output values lies in [0, 1] with 1 meaning the perfect association. From f46f8fe9e94fc49a5603c774d6e4b257c9eaf5dd Mon Sep 17 00:00:00 2001 From: stancld Date: Mon, 14 Nov 2022 20:27:56 +0100 Subject: [PATCH 13/22] Try to hack reqs for oldest CI config --- requirements/nominal_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/nominal_test.txt b/requirements/nominal_test.txt index 0c35d6a74ab..1e25abf697e 100644 --- a/requirements/nominal_test.txt +++ b/requirements/nominal_test.txt @@ -1,3 +1,3 @@ pandas # cannot pin version due to numpy version incompatibility dython # todo: pin version, but some version resolution issue -scipy>=1.0.0 +scipy # cannot pin version due to some version conflicts with `oldest` CI configuration From 16d484e40b29fc1b25d82223fba99944c4660ba9 Mon Sep 17 00:00:00 2001 From: Daniel Stancl <46073029+stancld@users.noreply.github.com> Date: Tue, 15 Nov 2022 10:52:44 +0100 Subject: [PATCH 14/22] Apply suggestions from code review Co-authored-by: Nicki Skafte Detlefsen --- src/torchmetrics/nominal/cramers.py | 2 +- src/torchmetrics/nominal/pearson.py | 7 ++++++- src/torchmetrics/nominal/tschuprows.py | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index 94fc8385e7e..bbdccc76509 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -62,7 +62,7 @@ class CramersV(Metric): full_state_update: bool = False is_differentiable: bool = False - higher_is_better: bool = False + higher_is_better: bool = True confmat: Tensor def __init__( diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index c732331506f..bc9b099b716 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -51,6 +51,11 @@ class PearsonsContingencyCoefficient(Metric): nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. +Raises: + ValueError: + If `nan_strategy` is not one of `'replace'` and `'drop'` + ValueError: + If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` Returns: Pearson's Contingency Coefficient statistic @@ -66,7 +71,7 @@ class PearsonsContingencyCoefficient(Metric): full_state_update: bool = False is_differentiable: bool = False - higher_is_better: bool = False + higher_is_better: bool = True confmat: Tensor def __init__( diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 083cbaf904f..fbd878e8a4f 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -50,6 +50,11 @@ class TschuprowsT(Metric): Returns: Tschuprow's T statistic +Raises: + ValueError: + If `nan_strategy` is not one of `'replace'` and `'drop'` + ValueError: + If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` Example: >>> from torchmetrics import TschuprowsT >>> _ = torch.manual_seed(42) @@ -62,7 +67,7 @@ class TschuprowsT(Metric): full_state_update: bool = False is_differentiable: bool = False - higher_is_better: bool = False + higher_is_better: bool = True confmat: Tensor def __init__( From d21cf37445f90dd425c9fd91dcd01db462438ae6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Nov 2022 09:53:17 +0000 Subject: [PATCH 15/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/torchmetrics/nominal/pearson.py | 80 +++++++++++++------------- src/torchmetrics/nominal/tschuprows.py | 76 ++++++++++++------------ 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index bc9b099b716..65c7ad84dd1 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -27,46 +27,46 @@ class PearsonsContingencyCoefficient(Metric): r"""Compute `Pearson's Contingency Coefficient`_ statistic measuring the association between two categorical - (nominal) data series. - - .. math:: - Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} - - where - - .. math:: - \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - - where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` - represent frequencies of values in ``preds`` and ``target``, respectively. - - Pearson's Contingency Coefficient is a symmetric coefficient, i.e. - :math:`Pearson(preds, target) = Pearson(target, preds)`. - - The output values lies in [0, 1] with 1 meaning the perfect association. - - Args: - num_classes: Integer specifing the number of classes - nan_strategy: Indication of whether to replace or drop ``NaN`` values - nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - -Raises: - ValueError: - If `nan_strategy` is not one of `'replace'` and `'drop'` - ValueError: - If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` - Returns: - Pearson's Contingency Coefficient statistic - - Example: - >>> from torchmetrics import PearsonsContingencyCoefficient - >>> _ = torch.manual_seed(42) - >>> preds = torch.randint(0, 4, (100,)) - >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) - >>> pearsons_contingency_coefficient = PearsonsContingencyCoefficient(num_classes=5) - >>> pearsons_contingency_coefficient(preds, target) - tensor(0.6948) + (nominal) data series. + + .. math:: + Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + :math:`Pearson(preds, target) = Pearson(target, preds)`. + + The output values lies in [0, 1] with 1 meaning the perfect association. + + Args: + num_classes: Integer specifing the number of classes + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Raises: + ValueError: + If `nan_strategy` is not one of `'replace'` and `'drop'` + ValueError: + If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` + Returns: + Pearson's Contingency Coefficient statistic + + Example: + >>> from torchmetrics import PearsonsContingencyCoefficient + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> pearsons_contingency_coefficient = PearsonsContingencyCoefficient(num_classes=5) + >>> pearsons_contingency_coefficient(preds, target) + tensor(0.6948) """ full_state_update: bool = False diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index fbd878e8a4f..5a87b9edddc 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -25,44 +25,44 @@ class TschuprowsT(Metric): r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. - .. math:: - T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} - - where - - .. math:: - \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - - where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` - represent frequencies of values in ``preds`` and ``target``, respectively. - - Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. - - The output values lies in [0, 1] with 1 meaning the perfect association. - - Args: - num_classes: Integer specifing the number of classes - bias_correction: Indication of whether to use bias correction. - nan_strategy: Indication of whether to replace or drop ``NaN`` values - nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Returns: - Tschuprow's T statistic - -Raises: - ValueError: - If `nan_strategy` is not one of `'replace'` and `'drop'` - ValueError: - If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` - Example: - >>> from torchmetrics import TschuprowsT - >>> _ = torch.manual_seed(42) - >>> preds = torch.randint(0, 4, (100,)) - >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) - >>> tschuprows_t = TschuprowsT(num_classes=5) - >>> tschuprows_t(preds, target) - tensor(0.4930) + .. math:: + T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} + + where + + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` + represent frequencies of values in ``preds`` and ``target``, respectively. + + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. + + The output values lies in [0, 1] with 1 meaning the perfect association. + + Args: + num_classes: Integer specifing the number of classes + bias_correction: Indication of whether to use bias correction. + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Returns: + Tschuprow's T statistic + + Raises: + ValueError: + If `nan_strategy` is not one of `'replace'` and `'drop'` + ValueError: + If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` + Example: + >>> from torchmetrics import TschuprowsT + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> tschuprows_t = TschuprowsT(num_classes=5) + >>> tschuprows_t(preds, target) + tensor(0.4930) """ full_state_update: bool = False From 98f0d8f5672ca1c941f1f1792ad49ac7eef72535 Mon Sep 17 00:00:00 2001 From: stancld Date: Tue, 15 Nov 2022 10:58:34 +0100 Subject: [PATCH 16/22] Fix docstring indentation --- src/torchmetrics/nominal/pearson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 65c7ad84dd1..2d599c7c49b 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -27,7 +27,7 @@ class PearsonsContingencyCoefficient(Metric): r"""Compute `Pearson's Contingency Coefficient`_ statistic measuring the association between two categorical - (nominal) data series. + (nominal) data series. .. math:: Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} From 4fd16eaa87fe6e78b49b8c8d53f59b7554e5d463 Mon Sep 17 00:00:00 2001 From: stancld Date: Tue, 15 Nov 2022 11:36:04 +0100 Subject: [PATCH 17/22] Fix docstring indentation --- src/torchmetrics/nominal/cramers.py | 6 +++ src/torchmetrics/nominal/pearson.py | 53 +++++++++++++------------- src/torchmetrics/nominal/tschuprows.py | 51 +++++++++++++------------ 3 files changed, 59 insertions(+), 51 deletions(-) diff --git a/src/torchmetrics/nominal/cramers.py b/src/torchmetrics/nominal/cramers.py index bbdccc76509..31b378fc5ca 100644 --- a/src/torchmetrics/nominal/cramers.py +++ b/src/torchmetrics/nominal/cramers.py @@ -50,6 +50,12 @@ class CramersV(Metric): Returns: Cramer's V statistic + Raises: + ValueError: + If `nan_strategy` is not one of `'replace'` and `'drop'` + ValueError: + If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` + Example: >>> from torchmetrics import CramersV >>> _ = torch.manual_seed(42) diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 2d599c7c49b..f1b95091343 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -29,44 +29,45 @@ class PearsonsContingencyCoefficient(Metric): r"""Compute `Pearson's Contingency Coefficient`_ statistic measuring the association between two categorical (nominal) data series. - .. math:: - Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} + .. math:: + Pearson = \sqrt{\frac{\chi^2 / n}{1 + \chi^2 / n}} - where + where - .. math:: - \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` - represent frequencies of values in ``preds`` and ``target``, respectively. + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed + with :math:`A_i, B_j` represent frequencies of values in ``preds`` and ``target``, respectively. - Pearson's Contingency Coefficient is a symmetric coefficient, i.e. - :math:`Pearson(preds, target) = Pearson(target, preds)`. + Pearson's Contingency Coefficient is a symmetric coefficient, i.e. + :math:`Pearson(preds, target) = Pearson(target, preds)`. - The output values lies in [0, 1] with 1 meaning the perfect association. + The output values lies in [0, 1] with 1 meaning the perfect association. - Args: - num_classes: Integer specifing the number of classes - nan_strategy: Indication of whether to replace or drop ``NaN`` values - nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + Args: + num_classes: Integer specifing the number of classes + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Returns: + Pearson's Contingency Coefficient statistic Raises: ValueError: If `nan_strategy` is not one of `'replace'` and `'drop'` ValueError: If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` - Returns: - Pearson's Contingency Coefficient statistic - - Example: - >>> from torchmetrics import PearsonsContingencyCoefficient - >>> _ = torch.manual_seed(42) - >>> preds = torch.randint(0, 4, (100,)) - >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) - >>> pearsons_contingency_coefficient = PearsonsContingencyCoefficient(num_classes=5) - >>> pearsons_contingency_coefficient(preds, target) - tensor(0.6948) + + Example: + >>> from torchmetrics import PearsonsContingencyCoefficient + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> pearsons_contingency_coefficient = PearsonsContingencyCoefficient(num_classes=5) + >>> pearsons_contingency_coefficient(preds, target) + tensor(0.6948) """ full_state_update: bool = False diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 5a87b9edddc..2e1b40fd261 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -25,44 +25,45 @@ class TschuprowsT(Metric): r"""Compute `Tschuprow's T`_ statistic measuring the association between two categorical (nominal) data series. - .. math:: - T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} + .. math:: + T = \sqrt{\frac{\chi^2 / n}{\sqrt{(r - 1) * (k - 1)}}} - where + where - .. math:: - \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} + .. math:: + \chi^2 = \sum_{i,j} \ frac{\left(n_{ij} - \frac{n_{i.} n_{.j}}{n}\right)^2}{\frac{n_{i.} n_{.j}}{n}} - where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed with :math:`A_i, B_j` - represent frequencies of values in ``preds`` and ``target``, respectively. + where :math:`n_{ij}` denotes the number of times the values :math:`(A_i, B_j)` are observed + with :math:`A_i, B_j` represent frequencies of values in ``preds`` and ``target``, respectively. - Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. + Tschuprow's T is a symmetric coefficient, i.e. :math:`T(preds, target) = T(target, preds)`. - The output values lies in [0, 1] with 1 meaning the perfect association. + The output values lies in [0, 1] with 1 meaning the perfect association. - Args: - num_classes: Integer specifing the number of classes - bias_correction: Indication of whether to use bias correction. - nan_strategy: Indication of whether to replace or drop ``NaN`` values - nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + Args: + num_classes: Integer specifing the number of classes + bias_correction: Indication of whether to use bias correction. + nan_strategy: Indication of whether to replace or drop ``NaN`` values + nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - Returns: - Tschuprow's T statistic + Returns: + Tschuprow's T statistic Raises: ValueError: If `nan_strategy` is not one of `'replace'` and `'drop'` ValueError: If `nan_strategy` is equal to `'replace'` and `nan_replace_value` is not an `int` or `float` - Example: - >>> from torchmetrics import TschuprowsT - >>> _ = torch.manual_seed(42) - >>> preds = torch.randint(0, 4, (100,)) - >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) - >>> tschuprows_t = TschuprowsT(num_classes=5) - >>> tschuprows_t(preds, target) - tensor(0.4930) + + Example: + >>> from torchmetrics import TschuprowsT + >>> _ = torch.manual_seed(42) + >>> preds = torch.randint(0, 4, (100,)) + >>> target = torch.round(preds + torch.randn(100)).clamp(0, 4) + >>> tschuprows_t = TschuprowsT(num_classes=5) + >>> tschuprows_t(preds, target) + tensor(0.4930) """ full_state_update: bool = False From e5bc89e3e83ee701b48135014ab5310bff061c84 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:22:54 +0100 Subject: [PATCH 18/22] Apply suggestions from code review --- CHANGELOG.md | 6 +++--- src/torchmetrics/functional/nominal/pearson.py | 13 ++++++++++--- .../functional/nominal/tschuprows.py | 10 ++++++++-- src/torchmetrics/nominal/pearson.py | 16 ++++++++++------ src/torchmetrics/nominal/tschuprows.py | 16 ++++++++++------ 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba42f7f6327..7814780fbf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,9 +27,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `CramersV` to the new nominal package ([#1298](https://github.com/Lightning-AI/metrics/pull/1298)) - - -- Added `PearsonsContingencyCoefficient` and `TschuprowsT` to nominal package ([#1334](https://github.com/Lightning-AI/metrics/pull/1334)) + * `CramersV` ([#1298](https://github.com/Lightning-AI/metrics/pull/1298)) + * `PearsonsContingencyCoefficient` ([#1334](https://github.com/Lightning-AI/metrics/pull/1334)) + * `TschuprowsT` ([#1334](https://github.com/Lightning-AI/metrics/pull/1334)) ### Changed diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py index bf77a234c3d..7678a43793d 100644 --- a/src/torchmetrics/functional/nominal/pearson.py +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -96,12 +96,16 @@ def pearsons_contingency_coefficient( The output values lies in [0, 1] with 1 meaning the perfect association. Args: - preds: 1D or 2D tensor of categorical (nominal) data + preds: 1D or 2D tensor of categorical (nominal) data: + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - target: 1D or 2D tensor of categorical (nominal) data + + target: 1D or 2D tensor of categorical (nominal) data: + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) + nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -134,8 +138,10 @@ def pearsons_contingency_coefficient_matrix( Args: matrix: A tensor of categorical (nominal) data, where: + - rows represent a number of data points - columns represent a number of categorical (nominal) features + nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -160,7 +166,8 @@ def pearsons_contingency_coefficient_matrix( x, y = matrix[:, i], matrix[:, j] num_classes = len(torch.cat([x, y]).unique()) confmat = _pearsons_contingency_coefficient_update(x, y, num_classes, nan_strategy, nan_replace_value) + val = _pearsons_contingency_coefficient_compute(confmat) pearsons_cont_coef_matrix_value[i, j] = pearsons_cont_coef_matrix_value[ j, i - ] = _pearsons_contingency_coefficient_compute(confmat) + ] = val return pearsons_cont_coef_matrix_value diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index 0450bc5f5cb..e3579becefe 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -110,12 +110,16 @@ def tschuprows_t( The output values lies in [0, 1] with 1 meaning the perfect association. Args: - preds: 1D or 2D tensor of categorical (nominal) data + preds: 1D or 2D tensor of categorical (nominal) data: + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - target: 1D or 2D tensor of categorical (nominal) data + + target: 1D or 2D tensor of categorical (nominal) data: + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) + bias_correction: Indication of whether to use bias correction. nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -150,8 +154,10 @@ def tschuprows_t_matrix( Args: matrix: A tensor of categorical (nominal) data, where: + - rows represent a number of data points - columns represent a number of categorical (nominal) features + bias_correction: Indication of whether to use bias correction. nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index f1b95091343..6de46f11eb5 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -95,12 +95,16 @@ def update(self, preds: Tensor, target: Tensor) -> None: """Update state with predictions and targets. Args: - preds: 1D or 2D tensor of categorical (nominal) data - - 1D shape: (batch_size,) - - 2D shape: (batch_size, num_classes) - target: 1D or 2D tensor of categorical (nominal) data - - 1D shape: (batch_size,) - - 2D shape: (batch_size, num_classes) + preds: 1D or 2D tensor of categorical (nominal) data: + + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + + target: 1D or 2D tensor of categorical (nominal) data: + + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + """ confmat = _pearsons_contingency_coefficient_update( preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 2e1b40fd261..8d980bc2f0a 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -93,12 +93,16 @@ def update(self, preds: Tensor, target: Tensor) -> None: """Update state with predictions and targets. Args: - preds: 1D or 2D tensor of categorical (nominal) data - - 1D shape: (batch_size,) - - 2D shape: (batch_size, num_classes) - target: 1D or 2D tensor of categorical (nominal) data - - 1D shape: (batch_size,) - - 2D shape: (batch_size, num_classes) + preds: 1D or 2D tensor of categorical (nominal) data: + + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + + target: 1D or 2D tensor of categorical (nominal) data: + + - 1D shape: (batch_size,) + - 2D shape: (batch_size, num_classes) + """ confmat = _tschuprows_t_update(preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value) self.confmat += confmat From 8e648366e91944ff9a84a0188e3a4ff5797fa28a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Nov 2022 11:23:28 +0000 Subject: [PATCH 19/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/torchmetrics/functional/nominal/pearson.py | 16 +++++++--------- .../functional/nominal/tschuprows.py | 12 ++++++------ src/torchmetrics/nominal/pearson.py | 7 +++---- src/torchmetrics/nominal/tschuprows.py | 7 +++---- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/torchmetrics/functional/nominal/pearson.py b/src/torchmetrics/functional/nominal/pearson.py index 7678a43793d..43702b746cb 100644 --- a/src/torchmetrics/functional/nominal/pearson.py +++ b/src/torchmetrics/functional/nominal/pearson.py @@ -97,15 +97,15 @@ def pearsons_contingency_coefficient( Args: preds: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + target: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -138,10 +138,10 @@ def pearsons_contingency_coefficient_matrix( Args: matrix: A tensor of categorical (nominal) data, where: - + - rows represent a number of data points - columns represent a number of categorical (nominal) features - + nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -167,7 +167,5 @@ def pearsons_contingency_coefficient_matrix( num_classes = len(torch.cat([x, y]).unique()) confmat = _pearsons_contingency_coefficient_update(x, y, num_classes, nan_strategy, nan_replace_value) val = _pearsons_contingency_coefficient_compute(confmat) - pearsons_cont_coef_matrix_value[i, j] = pearsons_cont_coef_matrix_value[ - j, i - ] = val + pearsons_cont_coef_matrix_value[i, j] = pearsons_cont_coef_matrix_value[j, i] = val return pearsons_cont_coef_matrix_value diff --git a/src/torchmetrics/functional/nominal/tschuprows.py b/src/torchmetrics/functional/nominal/tschuprows.py index e3579becefe..bb80f227dbd 100644 --- a/src/torchmetrics/functional/nominal/tschuprows.py +++ b/src/torchmetrics/functional/nominal/tschuprows.py @@ -111,15 +111,15 @@ def tschuprows_t( Args: preds: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + target: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + bias_correction: Indication of whether to use bias correction. nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` @@ -154,10 +154,10 @@ def tschuprows_t_matrix( Args: matrix: A tensor of categorical (nominal) data, where: - + - rows represent a number of data points - columns represent a number of categorical (nominal) features - + bias_correction: Indication of whether to use bias correction. nan_strategy: Indication of whether to replace or drop ``NaN`` values nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'`` diff --git a/src/torchmetrics/nominal/pearson.py b/src/torchmetrics/nominal/pearson.py index 6de46f11eb5..5d7411b4e0d 100644 --- a/src/torchmetrics/nominal/pearson.py +++ b/src/torchmetrics/nominal/pearson.py @@ -96,15 +96,14 @@ def update(self, preds: Tensor, target: Tensor) -> None: Args: preds: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + target: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - """ confmat = _pearsons_contingency_coefficient_update( preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value diff --git a/src/torchmetrics/nominal/tschuprows.py b/src/torchmetrics/nominal/tschuprows.py index 8d980bc2f0a..4a0327fb348 100644 --- a/src/torchmetrics/nominal/tschuprows.py +++ b/src/torchmetrics/nominal/tschuprows.py @@ -94,15 +94,14 @@ def update(self, preds: Tensor, target: Tensor) -> None: Args: preds: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - + target: 1D or 2D tensor of categorical (nominal) data: - + - 1D shape: (batch_size,) - 2D shape: (batch_size, num_classes) - """ confmat = _tschuprows_t_update(preds, target, self.num_classes, self.nan_strategy, self.nan_replace_value) self.confmat += confmat From 915abca62686d3444471ab8e288c9ddd1e48f1ff Mon Sep 17 00:00:00 2001 From: stancld Date: Tue, 15 Nov 2022 19:26:58 +0100 Subject: [PATCH 20/22] Add doc example for function --- src/torchmetrics/functional/nominal/utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/torchmetrics/functional/nominal/utils.py b/src/torchmetrics/functional/nominal/utils.py index af0e3020c4e..97523d6f29c 100644 --- a/src/torchmetrics/functional/nominal/utils.py +++ b/src/torchmetrics/functional/nominal/utils.py @@ -59,7 +59,22 @@ def _compute_chi_squared(confmat: Tensor, bias_correction: bool) -> Tensor: def _drop_empty_rows_and_cols(confmat: Tensor) -> Tensor: - """Drop all rows and columns containing only zeros.""" + """Drop all rows and columns containing only zeros. + + Example: + >>> import torch + >>> from torchmetrics.funcitonal.nominal.utils import _drop_empty_rows_and_cols + >>> _ = torch.manual_seed(22) + >>> matrix = torch.randint(10, size=(3, 3)) + >>> matrix[1, :] = matrix[:, 1] = 0 + >>> matrix + tensor([[9, 0, 6], + [0, 0, 0], + [2, 0, 8]]) + >>> _drop_empty_rows_and_cols(matrix) + tensor([[9, 6], + [2, 8]]) + """ confmat = confmat[confmat.sum(1) != 0] confmat = confmat[:, confmat.sum(0) != 0] return confmat From 6df6bc1f16481de36cbe2a58334adf32c27ea7ce Mon Sep 17 00:00:00 2001 From: stancld Date: Tue, 15 Nov 2022 19:39:55 +0100 Subject: [PATCH 21/22] Fix a typo --- src/torchmetrics/functional/nominal/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchmetrics/functional/nominal/utils.py b/src/torchmetrics/functional/nominal/utils.py index 97523d6f29c..b02f533e02c 100644 --- a/src/torchmetrics/functional/nominal/utils.py +++ b/src/torchmetrics/functional/nominal/utils.py @@ -63,7 +63,7 @@ def _drop_empty_rows_and_cols(confmat: Tensor) -> Tensor: Example: >>> import torch - >>> from torchmetrics.funcitonal.nominal.utils import _drop_empty_rows_and_cols + >>> from torchmetrics.functional.nominal.utils import _drop_empty_rows_and_cols >>> _ = torch.manual_seed(22) >>> matrix = torch.randint(10, size=(3, 3)) >>> matrix[1, :] = matrix[:, 1] = 0 From b094e9bfa6fd823f585f4cf6ca84c9415fe95a60 Mon Sep 17 00:00:00 2001 From: stancld Date: Tue, 15 Nov 2022 20:10:12 +0100 Subject: [PATCH 22/22] Re-trigger CI