From 1e9ba61822113aa791980adf5e3e570b21345abc Mon Sep 17 00:00:00 2001
From: Michel Lang <michellang@gmail.com>
Date: Tue, 29 Oct 2019 22:06:46 +0100
Subject: [PATCH] added bacc

---
 DESCRIPTION                   |   1 +
 NAMESPACE                     |   1 +
 NEWS.md                       |   1 +
 R/classif_bacc.R              |  47 ++++++++++++
 R/roxygen.R                   |   4 ++
 inst/references.bib           | 132 ++++++++++++++++++----------------
 man/acc.Rd                    |   4 +-
 man/bacc.Rd                   |  73 +++++++++++++++++++
 man/ce.Rd                     |   2 +-
 man/classif_params.Rd         |   5 ++
 man/logloss.Rd                |   2 +-
 man/measures.Rd               |   2 +-
 tests/testthat/test_classif.R |  18 +++++
 13 files changed, 225 insertions(+), 67 deletions(-)
 create mode 100644 R/classif_bacc.R
 create mode 100644 man/bacc.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 7277db96..de38f51f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -46,6 +46,7 @@ Collate:
     'binary_tp.R'
     'binary_tpr.R'
     'classif_acc.R'
+    'classif_bacc.R'
     'classif_ce.R'
     'classif_logloss.R'
     'confusion_matrix.R'
diff --git a/NAMESPACE b/NAMESPACE
index bd4e6bf9..c4dc1aed 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(acc)
 export(auc)
+export(bacc)
 export(bias)
 export(ce)
 export(confusion_matrix)
diff --git a/NEWS.md b/NEWS.md
index 6893d051..f6fd2859 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,7 @@
 
 * Fixed some tests which stochastically failed.
 * The name / title of the measure is now also stored in the meta data.
+* Added new measure `bacc` (Balanced Accuracy).
 
 # mlr3measures 0.1.0
 
diff --git a/R/classif_bacc.R b/R/classif_bacc.R
new file mode 100644
index 00000000..99f94df5
--- /dev/null
+++ b/R/classif_bacc.R
@@ -0,0 +1,47 @@
+#' @title Balanced Accuracy
+#'
+#' @description
+#' Computes the weighted balanced accuracy, suitable for imbalanced data sets.
+#' It is defined analogously to the definition in [sklearn](https://scikit-learn.org/).
+#'
+#' First, the sample weights \eqn{w} are normalized per class:
+#' \deqn{
+#'  \hat{w}_i = \frac{w_i}{\sum_j 1(y_j = y_i) w_i}.
+#' }{
+#'  w_hat[i] = w[i] / sum((t == t[i]) * w[i]).
+#' }
+#' The balanced accuracy is calculated as
+#' \deqn{
+#'  \frac{1}{\sum_i \hat{w}_i} \sum_i 1(r_i = t_i) \hat{w}_i.
+#' }{
+#'  1 / sum(w_hat) * sum((r == t) * w_hat).
+#' }
+#'
+#' @references
+#' \cite{brodersen_2010}
+#'
+#' \cite{guyon_2015}
+#'
+#' @templateVar mid bacc
+#' @template classif_template
+#'
+#' @inheritParams classif_params
+#' @template classif_example
+#' @export
+bacc = function(truth, response, sample_weights = NULL, ...) {
+  assert_classif(truth, response = response)
+  if (is.null(sample_weights)) {
+    sample_weights = rep(1, length(truth))
+  } else {
+    assert_numeric(sample_weights, lower = 0, any.missing = FALSE)
+  }
+
+  label_weights = vapply(split(sample_weights, truth), sum, NA_real_)
+  sample_weights = sample_weights / label_weights[truth]
+  sample_weights[is.na(sample_weights)] = 0
+
+  sum((truth == response) * sample_weights) / sum(sample_weights)
+}
+
+#' @include measures.R
+add_measure(bacc, "Balanced Accuracy", "classif", 0, 1, FALSE)
diff --git a/R/roxygen.R b/R/roxygen.R
index d46b01b4..05519808 100644
--- a/R/roxygen.R
+++ b/R/roxygen.R
@@ -50,6 +50,10 @@ NULL
 #'   Matrix of predicted probabilities, each column is a vector of probabilities for a
 #'   specific class label.
 #'   Columns must be named with levels of `truth`.
+#' @param sample_weights :: `numeric()`\cr
+#'   Non-negative sample weights.
+#'   Must have the same levels and length as `truth`.
+#'   Defaults to equal sample weights.
 #' @param na_value :: `numeric(1)`\cr
 #'   Value that should be returned if the measure is not defined for the input
 #'   (as described in the note). Default is `NaN`.
diff --git a/inst/references.bib b/inst/references.bib
index 99003a65..64f3bf0b 100644
--- a/inst/references.bib
+++ b/inst/references.bib
@@ -1,71 +1,79 @@
-@article{rosset_2006,
-    doi = {10.1007/s10115-006-0037-3},
-    year = {2006},
-    month = oct,
-    publisher = {Springer Science and Business Media {LLC}},
-    volume = {12},
-    number = {3},
-    pages = {331--353},
-    author = {Saharon Rosset and Claudia Perlich and Bianca Zadrozny},
-    title = {Ranking-based evaluation of regression models},
-    journal = {Knowledge and Information Systems}
+@inproceedings{brodersen_2010,
+	title        = {The Balanced Accuracy and Its Posterior Distribution},
+	author       = {Kay Henning Brodersen and Cheng Soon Ong and Klaas Enno Stephan and Joachim M. Buhmann},
+	year         = 2010,
+	booktitle    = {2010 20th International Conference on Pattern Recognition},
+	publisher    = {{IEEE}},
+	doi          = {10.1109/icpr.2010.764}
 }
-
 @article{de_myttenaere_2016,
-    title={Mean Absolute Percentage Error for regression models},
-    volume={192},
-    ISSN={0925-2312},
-    DOI={10.1016/j.neucom.2015.12.114},
-    journal={Neurocomputing},
-    publisher={Elsevier BV},
-    author={de Myttenaere, Arnaud and Golden, Boris and Le Grand, Bénédicte and Rossi, Fabrice},
-    year={2016},
-    month={Jun},
-    pages={38–48}
+	title        = {Mean Absolute Percentage Error for regression models},
+	author       = {de Myttenaere, Arnaud and Golden, Boris and Le Grand, Bénédicte and Rossi, Fabrice},
+	year         = 2016,
+	journal      = {Neurocomputing},
+	publisher    = {Elsevier BV},
+	volume       = 192,
+	pages        = {38–48},
+	doi          = {10.1016/j.neucom.2015.12.114},
+	issn         = {0925-2312}
 }
-
-@article{matthews_1975,
-    doi = {10.1016/0005-2795(75)90109-9},
-    year = {1975},
-    month = oct,
-    publisher = {Elsevier {BV}},
-    volume = {405},
-    number = {2},
-    pages = {442--451},
-    author = {Brian W. Matthews},
-    title = {Comparison of the predicted and observed secondary structure of T4 phage lysozyme},
-    journal = {Biochimica et Biophysica Acta ({BBA}) - Protein Structure}
+@inproceedings{guyon_2015,
+	title        = {Design of the 2015 {ChaLearn} {AutoML} challenge},
+	author       = {Isabelle Guyon and Kristin Bennett and Gavin Cawley and Hugo Jair Escalante and Sergio Escalera and  Tin Kam Ho and Nuria Macia and Bisakha Ray and Mehreen Saeed and Alexander Statnikov and Evelyne Viegas},
+	year         = 2015,
+	booktitle    = {2015 International Joint Conference on Neural Networks ({IJCNN})},
+	publisher    = {{IEEE}},
+	doi          = {10.1109/ijcnn.2015.7280767}
 }
-
-@article{sasaki_2007,
-    title        = {The truth of the F-measure},
-    author       = {Sasaki, Yutaka and others},
-    year         = 2007,
-    journal      = {Teach Tutor mater},
-    volume       = 1,
-    number       = 5,
-    pages        = {1--5},
-    url          = {https://www.cs.odu.edu/~mukka/cs795sum10dm/Lecturenotes/Day3/F-measure-YS-26Oct07.pdf}
+@article{matthews_1975,
+	title        = {Comparison of the predicted and observed secondary structure of T4 phage lysozyme},
+	author       = {Brian W. Matthews},
+	year         = 1975,
+	journal      = {Biochimica et Biophysica Acta ({BBA}) - Protein Structure},
+	publisher    = {Elsevier {BV}},
+	volume       = 405,
+	number       = 2,
+	pages        = {442--451},
+	doi          = {10.1016/0005-2795(75)90109-9}
 }
-
 @book{rijsbergen_1979,
-    author = {Rijsbergen, C. J. Van},
-    title = {Information Retrieval},
-    year = {1979},
-    isbn = {0408709294},
-    edition = {2nd},
-    publisher = {Butterworth-Heinemann},
-    address = {Newton, MA, USA},
+	title        = {Information Retrieval},
+	author       = {Rijsbergen, C. J. Van},
+	year         = 1979,
+	publisher    = {Butterworth-Heinemann},
+	address      = {Newton, MA, USA},
+	isbn         = 408709294,
+	edition      = {2nd}
+}
+@article{rosset_2006,
+	title        = {Ranking-based evaluation of regression models},
+	author       = {Saharon Rosset and Claudia Perlich and Bianca Zadrozny},
+	year         = 2006,
+	journal      = {Knowledge and Information Systems},
+	publisher    = {Springer Science and Business Media {LLC}},
+	volume       = 12,
+	number       = 3,
+	pages        = {331--353},
+	doi          = {10.1007/s10115-006-0037-3}
+}
+@article{sasaki_2007,
+	title        = {The truth of the F-measure},
+	author       = {Sasaki, Yutaka and others},
+	year         = 2007,
+	journal      = {Teach Tutor mater},
+	volume       = 1,
+	number       = 5,
+	pages        = {1--5},
+	url          = {https://www.cs.odu.edu/~mukka/cs795sum10dm/Lecturenotes/Day3/F-measure-YS-26Oct07.pdf}
 }
-
 @article{youden_1950,
-    doi = {10.1002/1097-0142(1950)3:1<32::aid-cncr2820030106>3.0.co;2-3},
-    year = {1950},
-    publisher = {Wiley},
-    volume = {3},
-    number = {1},
-    pages = {32--35},
-    author = {W. J. Youden},
-    title = {Index for rating diagnostic tests},
-    journal = {Cancer}
+	title        = {Index for rating diagnostic tests},
+	author       = {W. J. Youden},
+	year         = 1950,
+	journal      = {Cancer},
+	publisher    = {Wiley},
+	volume       = 3,
+	number       = 1,
+	pages        = {32--35},
+	doi          = {10.1002/1097-0142(1950)3:1<32::aid-cncr2820030106>3.0.co;2-3}
 }
diff --git a/man/acc.Rd b/man/acc.Rd
index d7a26d7f..15359923 100644
--- a/man/acc.Rd
+++ b/man/acc.Rd
@@ -46,8 +46,8 @@ response = factor(sample(lvls, 10, replace = TRUE), levels = lvls)
 acc(truth, response)
 }
 \seealso{
-Other Classification Measures: \code{\link{ce}},
-  \code{\link{logloss}}
+Other Classification Measures: \code{\link{bacc}},
+  \code{\link{ce}}, \code{\link{logloss}}
 }
 \concept{Classification Measures}
 \concept{classification_measure}
diff --git a/man/bacc.Rd b/man/bacc.Rd
new file mode 100644
index 00000000..3283a14f
--- /dev/null
+++ b/man/bacc.Rd
@@ -0,0 +1,73 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/classif_bacc.R
+\name{bacc}
+\alias{bacc}
+\title{Balanced Accuracy}
+\usage{
+bacc(truth, response, sample_weights = NULL, ...)
+}
+\arguments{
+\item{truth}{:: \code{factor()}\cr
+True (observed) labels.
+Must have the same levels and length as \code{response}.}
+
+\item{response}{:: \code{factor()}\cr
+Predicted response labels.
+Must have the same levels and length as \code{truth}.}
+
+\item{sample_weights}{:: \code{numeric()}\cr
+Non-negative sample weights.
+Must have the same levels and length as \code{truth}.
+Defaults to equal sample weights.}
+
+\item{...}{:: \code{any}\cr
+Additional arguments. Currently ignored.}
+}
+\value{
+Performance value as \code{numeric(1)}.
+}
+\description{
+Computes the weighted balanced accuracy, suitable for imbalanced data sets.
+It is defined analogously to the definition in \href{https://scikit-learn.org/}{sklearn}.
+
+First, the sample weights \eqn{w} are normalized per class:
+\deqn{
+ \hat{w}_i = \frac{w_i}{\sum_j 1(y_j = y_i) w_i}.
+}{
+ w_hat[i] = w[i] / sum((t == t[i]) * w[i]).
+}
+The balanced accuracy is calculated as
+\deqn{
+ \frac{1}{\sum_i \hat{w}_i} \sum_i 1(r_i = t_i) \hat{w}_i.
+}{
+ 1 / sum(w_hat) * sum((r == t) * w_hat).
+}
+}
+\section{Meta Information}{
+
+\itemize{
+\item Type: \code{"classif"}
+\item Range: \eqn{[0, 1]}{[0, 1]}
+\item Minimize: \code{FALSE}
+\item Required prediction: \code{response}
+}
+}
+
+\examples{
+set.seed(1)
+lvls = c("a", "b", "c")
+truth = factor(sample(lvls, 10, replace = TRUE), levels = lvls)
+response = factor(sample(lvls, 10, replace = TRUE), levels = lvls)
+bacc(truth, response)
+}
+\references{
+\cite{brodersen_2010}
+
+\cite{guyon_2015}
+}
+\seealso{
+Other Classification Measures: \code{\link{acc}},
+  \code{\link{ce}}, \code{\link{logloss}}
+}
+\concept{Classification Measures}
+\concept{classification_measure}
diff --git a/man/ce.Rd b/man/ce.Rd
index 579f8513..9fb6b3a2 100644
--- a/man/ce.Rd
+++ b/man/ce.Rd
@@ -47,7 +47,7 @@ ce(truth, response)
 }
 \seealso{
 Other Classification Measures: \code{\link{acc}},
-  \code{\link{logloss}}
+  \code{\link{bacc}}, \code{\link{logloss}}
 }
 \concept{Classification Measures}
 \concept{classification_measure}
diff --git a/man/classif_params.Rd b/man/classif_params.Rd
index 0f9e5c25..f0cf77a9 100644
--- a/man/classif_params.Rd
+++ b/man/classif_params.Rd
@@ -17,6 +17,11 @@ Matrix of predicted probabilities, each column is a vector of probabilities for
 specific class label.
 Columns must be named with levels of \code{truth}.}
 
+\item{sample_weights}{:: \code{numeric()}\cr
+Non-negative sample weights.
+Must have the same levels and length as \code{truth}.
+Defaults to equal sample weights.}
+
 \item{na_value}{:: \code{numeric(1)}\cr
 Value that should be returned if the measure is not defined for the input
 (as described in the note). Default is \code{NaN}.}
diff --git a/man/logloss.Rd b/man/logloss.Rd
index f068ae61..55ae3a84 100644
--- a/man/logloss.Rd
+++ b/man/logloss.Rd
@@ -54,7 +54,7 @@ logloss(truth, prob)
 }
 \seealso{
 Other Classification Measures: \code{\link{acc}},
-  \code{\link{ce}}
+  \code{\link{bacc}}, \code{\link{ce}}
 }
 \concept{Classification Measures}
 \concept{classification_measure}
diff --git a/man/measures.Rd b/man/measures.Rd
index e120b37b..5245834b 100644
--- a/man/measures.Rd
+++ b/man/measures.Rd
@@ -4,7 +4,7 @@
 \name{measures}
 \alias{measures}
 \title{Measure Registry}
-\format{An object of class \code{environment} of length 44.}
+\format{An object of class \code{environment} of length 45.}
 \usage{
 measures
 }
diff --git a/tests/testthat/test_classif.R b/tests/testthat/test_classif.R
index 0f8e5c34..c5287c9e 100644
--- a/tests/testthat/test_classif.R
+++ b/tests/testthat/test_classif.R
@@ -70,6 +70,24 @@ test_that("tests from Metrics", {
   # expect_equal(kappa, 0.624536446425734)
 })
 
+test_that("bacc", {
+  truth = factor(c("a", "a", "b", "b"), levels = c("a", "b"))
+  response = factor(c("a", "a", "b", "a"), levels = c("a", "b"))
+  expect_equal(bacc(truth, response), 0.75)
+  expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 0.25)), 0.75)
+  expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 1)), 0.6)
+
+  truth = factor(c("a", "a", "a", "a", "a", "b"), levels = c("a", "b"))
+  response = factor(c("a", "a", "a", "a", "b", "b"), levels = c("a", "b"))
+  expect_equal(bacc(truth, response), 0.9)
+  expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0, 1)), 1)
+  expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0.5, 0.5)), 0.5)
+
+  truth = factor(c("c", "a", "a", "a", "a", "b"), levels = c("a", "b", "c"))
+  response = factor(c("c", "a", "a", "a", "b", "b"), levels = c("a", "b", "c"))
+  expect_equal(round(bacc(truth, response), 3), 0.917)
+})
+
 # test_that("ber", {
 #   truth = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c"))
 #   response = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c"))