Skip to content

Commit

Permalink
Rename confidence to agreement
Browse files Browse the repository at this point in the history
  • Loading branch information
hagenw committed Aug 23, 2024
1 parent 0f347bb commit 536939f
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 54 deletions.
6 changes: 3 additions & 3 deletions audpsychometric/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from audpsychometric.core import datasets
from audpsychometric.core.datasets import list_datasets
from audpsychometric.core.datasets import read_dataset
from audpsychometric.core.gold_standard import confidence_categorical
from audpsychometric.core.gold_standard import confidence_numerical
from audpsychometric.core.gold_standard import agreement_categorical
from audpsychometric.core.gold_standard import agreement_numerical
from audpsychometric.core.gold_standard import evaluator_weighted_estimator
from audpsychometric.core.gold_standard import mode
from audpsychometric.core.gold_standard import rater_confidence_pearson
from audpsychometric.core.gold_standard import rater_agreement_pearson
import audpsychometric.core.reliability
from audpsychometric.core.reliability import congeneric_reliability
from audpsychometric.core.reliability import cronbachs_alpha
Expand Down
64 changes: 32 additions & 32 deletions audpsychometric/core/gold_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import audmetric


def confidence_categorical(
def agreement_categorical(
ratings: typing.Sequence,
*,
axis: int = 1,
) -> typing.Union[float, np.ndarray]:
r"""Confidence score for categorical ratings.
The confidence for categorical data
The agreement for categorical data
is given by the fraction of raters per item
with the rating being equal to that of the gold standard
as given by :func:`audpsychometric.mode`.
Expand All @@ -23,33 +23,33 @@ def confidence_categorical(
ratings: ratings.
When given as a 1-dimensional array,
it is treated as a row vector
axis: axis along which the confidences are computed.
axis: axis along which the agreement is computed.
A value of ``1``
assumes stimuli as rows
and raters as columns
Returns:
categorical confidence score
categorical agreement score(s)
Examples:
>>> confidence_categorical([0, 1])
>>> agreement_categorical([0, 1])
0.5
>>> confidence_categorical(["a", "b"])
>>> agreement_categorical(["a", "b"])
0.5
>>> confidence_categorical([1, 1, np.nan])
>>> agreement_categorical([1, 1, np.nan])
1.0
"""
ratings = np.atleast_2d(np.array(ratings))

def _confidence(x):
def _agreement(x):
x = _remove_empty(x)
return np.sum(x == _mode(x)) / len(x)

return _value_or_array(np.apply_along_axis(_confidence, axis, ratings))
return _value_or_array(np.apply_along_axis(_agreement, axis, ratings))


def confidence_numerical(
def agreement_numerical(
ratings: typing.Sequence,
minimum: float,
maximum: float,
Expand All @@ -59,7 +59,7 @@ def confidence_numerical(
r"""Confidence score for numerical ratings.
.. math::
\text{confidence}(\text{ratings}) =
\text{agreement}(\text{ratings}) =
\max(
0, 1 - \frac{\text{std}(\text{ratings})}
{\text{maximum} - \frac{1}{2} (\text{minimum} + \text{maximum})}
Expand All @@ -73,22 +73,22 @@ def confidence_numerical(
it is treated as a row vector
minimum: lower limit of possible rating value
maximum: upper limit of possible rating value
axis: axis along which the confidences are computed.
axis: axis along which the agreement is computed.
A value of ``1``
assumes stimuli as rows
and raters as columns
Returns:
numerical confidence score(s)
numerical agreement score(s)
Examples:
>>> confidence_numerical([0, 1], 0, 1)
>>> agreement_numerical([0, 1], 0, 1)
0.0
>>> confidence_numerical([0, 1], 0, 2)
>>> agreement_numerical([0, 1], 0, 2)
0.5
>>> confidence_numerical([0, 0], 0, 1)
>>> agreement_numerical([0, 0], 0, 1)
1.0
>>> confidence_numerical([0, np.nan], 0, 1)
>>> agreement_numerical([0, np.nan], 0, 1)
nan
"""
Expand Down Expand Up @@ -140,11 +140,11 @@ def evaluator_weighted_estimator(
"""
ratings = np.array(ratings)
confidences = rater_confidence_pearson(ratings, axis=axis)
agreements = rater_agreement_pearson(ratings, axis=axis)
# Ensure columns represents different raters
if axis == 0:
ratings = ratings.T
return _value_or_array(np.inner(ratings, confidences) / np.sum(confidences))
return _value_or_array(np.inner(ratings, agreements) / np.sum(agreements))


def mode(
Expand Down Expand Up @@ -197,37 +197,37 @@ def mode(
)


def rater_confidence_pearson(
def rater_agreement_pearson(
ratings: typing.Sequence,
*,
axis: int = 1,
) -> np.ndarray:
"""Calculate rater confidences.
"""Calculate rater agreements.
Calculate the confidence of a rater
Calculate the agreement of a rater
by the correlation of a rater
with the mean score of all other raters.
This should not be confused with the confidence value
This should not be confused with the agreement value
that relates to a rated stimulus,
e.g. :func:`audspychometric.confidence_numerical`.
e.g. :func:`audspychometric.agreement_numerical`.
Args:
ratings: ratings.
Has to contain more than one rater
and more than one stimuli
axis: axis along which the rater confidence is computed.
axis: axis along which the rater agreement is computed.
A value of ``1``
assumes stimuli as rows
and raters as columns
Returns:
rater confidences
rater agreements
Examples:
>>> rater_confidence_pearson([[1, 1, 0], [2, 2, 1]])
>>> rater_agreement_pearson([[1, 1, 0], [2, 2, 1]])
array([1., 1., 1.])
>>> rater_confidence_pearson([[1, 1, 0], [2, 2, 1], [2, 2, 2]])
>>> rater_agreement_pearson([[1, 1, 0], [2, 2, 1], [2, 2, 2]])
array([0.94491118, 0.94491118, 0.8660254 ])
"""
Expand All @@ -241,17 +241,17 @@ def rater_confidence_pearson(
# which miss ratings for one rater or more
ratings = ratings[:, ~np.isnan(ratings).any(axis=0)]

# Calculate confidence as Pearson Correlation Coefficient
# Calculate agreement as Pearson Correlation Coefficient
# between the raters' ratings
# and the average ratings of all other raters
confidences = []
agreements = []
for n in range(ratings.shape[1]):
ratings_selected_rater = ratings[:, n]
average_ratings_other_raters = np.delete(ratings, n, axis=1).mean(axis=1)
confidences.append(
agreements.append(
audmetric.pearson_cc(ratings_selected_rater, average_ratings_other_raters)
)
return np.array(confidences)
return np.array(agreements)


def _value_or_array(values: np.ndarray) -> typing.Union[float, np.ndarray]:
Expand Down
6 changes: 3 additions & 3 deletions docs/api-src/audpsychometric.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ Gold Standard Calculation
:toctree:
:nosignatures:

confidence_categorical
confidence_numerical
agreement_categorical
agreement_numerical
evaluator_weighted_estimator
mode
rater_confidence_pearson
rater_agreement_pearson


Demo Datasets
Expand Down
32 changes: 16 additions & 16 deletions tests/test_gold_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,26 @@ def to_list_array_frame_series(
([None, np.nan, 1], 1, 1.0),
],
)
def test_confidence_categorical(ratings, axis, expected):
"""Test confidence for categorical ratings.
def test_agreement_categorical(ratings, axis, expected):
"""Test agreement for categorical ratings.
Args:
ratings: ratings as list
axis: axis along to compute confidence
expected: expected confidence score(s)
axis: axis along to compute agreement
expected: expected agreement score(s)
"""
for x in to_list_array_frame_series(ratings):
np.testing.assert_equal(
audpsychometric.confidence_categorical(x, axis=axis),
audpsychometric.agreement_categorical(x, axis=axis),
expected,
)


# The expected confidence value for this test
# The expected agreement value for this test
# can be calculated by:
#
# def confidence(rating, minimum, maximum):
# def agreement(rating, minimum, maximum):
# max_std = (maximum - minimum) / 2
# std = np.std(rating)
# std_norm = np.clip(std/max_std, 0, 1)
Expand Down Expand Up @@ -130,31 +130,31 @@ def test_confidence_categorical(ratings, axis, expected):
),
],
)
def test_confidence_numerical(ratings, minimum, maximum, axis, expected):
"""Test confidence for numerical ratings.
def test_agreement_numerical(ratings, minimum, maximum, axis, expected):
"""Test agreement for numerical ratings.
If only a vector is given for ``ratings``,
it should be treated as column vector.
An value of 0 for ``axis``
should compute the confidence scores along rows.
should compute the agreement scores along rows.
Args:
ratings: ratings as list
minimum: lower limit of ratings
maximum: upper limit of ratings
axis: axis along to compute confidence
expected: expected confidence score(s)
axis: axis along to compute agreement
expected: expected agreement score(s)
"""
for x in to_list_array_frame_series(ratings):
np.testing.assert_equal(
audpsychometric.confidence_numerical(x, minimum, maximum, axis=axis),
audpsychometric.agreement_numerical(x, minimum, maximum, axis=axis),
expected,
)


def test_rater_confidence_pearson(df_holzinger_swineford):
"""Test rater confidence."""
def test_rater_agreement_pearson(df_holzinger_swineford):
"""Test rater agreement."""
# there is a very unrealible rater in this set with .24
expected = np.array(
[
Expand All @@ -170,7 +170,7 @@ def test_rater_confidence_pearson(df_holzinger_swineford):
],
)
np.testing.assert_allclose(
audpsychometric.rater_confidence_pearson(df_holzinger_swineford),
audpsychometric.rater_agreement_pearson(df_holzinger_swineford),
expected,
)

Expand Down

0 comments on commit 536939f

Please sign in to comment.