Skip to content

Commit

Permalink
added bacc
Browse files Browse the repository at this point in the history
  • Loading branch information
mllg committed Oct 29, 2019
1 parent 3278db5 commit 1e9ba61
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 67 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Collate:
'binary_tp.R'
'binary_tpr.R'
'classif_acc.R'
'classif_bacc.R'
'classif_ce.R'
'classif_logloss.R'
'confusion_matrix.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(acc)
export(auc)
export(bacc)
export(bias)
export(ce)
export(confusion_matrix)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Fixed some tests which stochastically failed.
* The name / title of the measure is now also stored in the meta data.
* Added new measure `bacc` (Balanced Accuracy).

# mlr3measures 0.1.0

Expand Down
47 changes: 47 additions & 0 deletions R/classif_bacc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' @title Balanced Accuracy
#'
#' @description
#' Computes the weighted balanced accuracy, suitable for imbalanced data sets.
#' It is defined analogously to the definition in [sklearn](https://scikit-learn.org/).
#'
#' First, the sample weights \eqn{w} are normalized per class:
#' \deqn{
#' \hat{w}_i = \frac{w_i}{\sum_j 1(y_j = y_i) w_i}.
#' }{
#' w_hat[i] = w[i] / sum((t == t[i]) * w[i]).
#' }
#' The balanced accuracy is calculated as
#' \deqn{
#' \frac{1}{\sum_i \hat{w}_i} \sum_i 1(r_i = t_i) \hat{w}_i.
#' }{
#' 1 / sum(w_hat) * sum((r == t) * w_hat).
#' }
#'
#' @references
#' \cite{brodersen_2010}
#'
#' \cite{guyon_2015}
#'
#' @templateVar mid bacc
#' @template classif_template
#'
#' @inheritParams classif_params
#' @template classif_example
#' @export
bacc = function(truth, response, sample_weights = NULL, ...) {
assert_classif(truth, response = response)
if (is.null(sample_weights)) {
sample_weights = rep(1, length(truth))
} else {
assert_numeric(sample_weights, lower = 0, any.missing = FALSE)
}

label_weights = vapply(split(sample_weights, truth), sum, NA_real_)
sample_weights = sample_weights / label_weights[truth]
sample_weights[is.na(sample_weights)] = 0

sum((truth == response) * sample_weights) / sum(sample_weights)
}

#' @include measures.R
add_measure(bacc, "Balanced Accuracy", "classif", 0, 1, FALSE)
4 changes: 4 additions & 0 deletions R/roxygen.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ NULL
#' Matrix of predicted probabilities, each column is a vector of probabilities for a
#' specific class label.
#' Columns must be named with levels of `truth`.
#' @param sample_weights :: `numeric()`\cr
#' Non-negative sample weights.
#' Must have the same levels and length as `truth`.
#' Defaults to equal sample weights.
#' @param na_value :: `numeric(1)`\cr
#' Value that should be returned if the measure is not defined for the input
#' (as described in the note). Default is `NaN`.
Expand Down
132 changes: 70 additions & 62 deletions inst/references.bib
Original file line number Diff line number Diff line change
@@ -1,71 +1,79 @@
@article{rosset_2006,
doi = {10.1007/s10115-006-0037-3},
year = {2006},
month = oct,
publisher = {Springer Science and Business Media {LLC}},
volume = {12},
number = {3},
pages = {331--353},
author = {Saharon Rosset and Claudia Perlich and Bianca Zadrozny},
title = {Ranking-based evaluation of regression models},
journal = {Knowledge and Information Systems}
@inproceedings{brodersen_2010,
title = {The Balanced Accuracy and Its Posterior Distribution},
author = {Kay Henning Brodersen and Cheng Soon Ong and Klaas Enno Stephan and Joachim M. Buhmann},
year = 2010,
booktitle = {2010 20th International Conference on Pattern Recognition},
publisher = {{IEEE}},
doi = {10.1109/icpr.2010.764}
}

@article{de_myttenaere_2016,
title={Mean Absolute Percentage Error for regression models},
volume={192},
ISSN={0925-2312},
DOI={10.1016/j.neucom.2015.12.114},
journal={Neurocomputing},
publisher={Elsevier BV},
author={de Myttenaere, Arnaud and Golden, Boris and Le Grand, Bénédicte and Rossi, Fabrice},
year={2016},
month={Jun},
pages={38–48}
title = {Mean Absolute Percentage Error for regression models},
author = {de Myttenaere, Arnaud and Golden, Boris and Le Grand, Bénédicte and Rossi, Fabrice},
year = 2016,
journal = {Neurocomputing},
publisher = {Elsevier BV},
volume = 192,
pages = {38–48},
doi = {10.1016/j.neucom.2015.12.114},
issn = {0925-2312}
}

@article{matthews_1975,
doi = {10.1016/0005-2795(75)90109-9},
year = {1975},
month = oct,
publisher = {Elsevier {BV}},
volume = {405},
number = {2},
pages = {442--451},
author = {Brian W. Matthews},
title = {Comparison of the predicted and observed secondary structure of T4 phage lysozyme},
journal = {Biochimica et Biophysica Acta ({BBA}) - Protein Structure}
@inproceedings{guyon_2015,
title = {Design of the 2015 {ChaLearn} {AutoML} challenge},
author = {Isabelle Guyon and Kristin Bennett and Gavin Cawley and Hugo Jair Escalante and Sergio Escalera and Tin Kam Ho and Nuria Macia and Bisakha Ray and Mehreen Saeed and Alexander Statnikov and Evelyne Viegas},
year = 2015,
booktitle = {2015 International Joint Conference on Neural Networks ({IJCNN})},
publisher = {{IEEE}},
doi = {10.1109/ijcnn.2015.7280767}
}

@article{sasaki_2007,
title = {The truth of the F-measure},
author = {Sasaki, Yutaka and others},
year = 2007,
journal = {Teach Tutor mater},
volume = 1,
number = 5,
pages = {1--5},
url = {https://www.cs.odu.edu/~mukka/cs795sum10dm/Lecturenotes/Day3/F-measure-YS-26Oct07.pdf}
@article{matthews_1975,
title = {Comparison of the predicted and observed secondary structure of T4 phage lysozyme},
author = {Brian W. Matthews},
year = 1975,
journal = {Biochimica et Biophysica Acta ({BBA}) - Protein Structure},
publisher = {Elsevier {BV}},
volume = 405,
number = 2,
pages = {442--451},
doi = {10.1016/0005-2795(75)90109-9}
}

@book{rijsbergen_1979,
author = {Rijsbergen, C. J. Van},
title = {Information Retrieval},
year = {1979},
isbn = {0408709294},
edition = {2nd},
publisher = {Butterworth-Heinemann},
address = {Newton, MA, USA},
title = {Information Retrieval},
author = {Rijsbergen, C. J. Van},
year = 1979,
publisher = {Butterworth-Heinemann},
address = {Newton, MA, USA},
isbn = 408709294,
edition = {2nd}
}
@article{rosset_2006,
title = {Ranking-based evaluation of regression models},
author = {Saharon Rosset and Claudia Perlich and Bianca Zadrozny},
year = 2006,
journal = {Knowledge and Information Systems},
publisher = {Springer Science and Business Media {LLC}},
volume = 12,
number = 3,
pages = {331--353},
doi = {10.1007/s10115-006-0037-3}
}
@article{sasaki_2007,
title = {The truth of the F-measure},
author = {Sasaki, Yutaka and others},
year = 2007,
journal = {Teach Tutor mater},
volume = 1,
number = 5,
pages = {1--5},
url = {https://www.cs.odu.edu/~mukka/cs795sum10dm/Lecturenotes/Day3/F-measure-YS-26Oct07.pdf}
}

@article{youden_1950,
doi = {10.1002/1097-0142(1950)3:1<32::aid-cncr2820030106>3.0.co;2-3},
year = {1950},
publisher = {Wiley},
volume = {3},
number = {1},
pages = {32--35},
author = {W. J. Youden},
title = {Index for rating diagnostic tests},
journal = {Cancer}
title = {Index for rating diagnostic tests},
author = {W. J. Youden},
year = 1950,
journal = {Cancer},
publisher = {Wiley},
volume = 3,
number = 1,
pages = {32--35},
doi = {10.1002/1097-0142(1950)3:1<32::aid-cncr2820030106>3.0.co;2-3}
}
4 changes: 2 additions & 2 deletions man/acc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 73 additions & 0 deletions man/bacc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/classif_params.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/logloss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/measures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/testthat/test_classif.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,24 @@ test_that("tests from Metrics", {
# expect_equal(kappa, 0.624536446425734)
})

test_that("bacc", {
truth = factor(c("a", "a", "b", "b"), levels = c("a", "b"))
response = factor(c("a", "a", "b", "a"), levels = c("a", "b"))
expect_equal(bacc(truth, response), 0.75)
expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 0.25)), 0.75)
expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 1)), 0.6)

truth = factor(c("a", "a", "a", "a", "a", "b"), levels = c("a", "b"))
response = factor(c("a", "a", "a", "a", "b", "b"), levels = c("a", "b"))
expect_equal(bacc(truth, response), 0.9)
expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0, 1)), 1)
expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0.5, 0.5)), 0.5)

truth = factor(c("c", "a", "a", "a", "a", "b"), levels = c("a", "b", "c"))
response = factor(c("c", "a", "a", "a", "b", "b"), levels = c("a", "b", "c"))
expect_equal(round(bacc(truth, response), 3), 0.917)
})

# test_that("ber", {
# truth = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c"))
# response = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c"))
Expand Down

0 comments on commit 1e9ba61

Please sign in to comment.