diff --git a/R/eval_forecasts_quantile.R b/R/eval_forecasts_quantile.R index 6395ea7fa..745aff1e1 100644 --- a/R/eval_forecasts_quantile.R +++ b/R/eval_forecasts_quantile.R @@ -117,8 +117,12 @@ eval_forecasts_quantile <- function(data, quantile_data[, quantile_coverage := (true_value <= prediction)] } - # merge only if something was computed - if (any(c("aem", "quantile_coverage") %in% metrics)) { + # merge metrics computed on quantile data (i.e. aem, quantile_coverage) back + # into metrics computed on range data. One important side effect of this is + # that it controls whether we count the median twice for the interval score + # (row is then duplicated) or only once. However, merge only needs to happen + # if we computed either the interval score or the aem or quantile coverage + if (any(c("aem", "interval_score", "quantile_coverage") %in% metrics)) { # delete unnecessary columns before merging back keep_cols <- unique(c(by, "quantile", "aem", "quantile_coverage", "boundary", "range")) @@ -183,7 +187,7 @@ eval_forecasts_quantile <- function(data, } # if neither quantile nor range are in summarise_by, remove coverage and quantile_coverage - if (!("range" %in% summarise_by)) { + if (!("range" %in% summarise_by) & ("coverage" %in% colnames(res))) { res[, c("coverage") := NULL] } if (!("quantile" %in% summarise_by) & "quantile_coverage" %in% names(res)) { diff --git a/man/interval_score.Rd b/man/interval_score.Rd index 5ae6e1dfb..75b6153fd 100644 --- a/man/interval_score.Rd +++ b/man/interval_score.Rd @@ -75,6 +75,7 @@ interval_score(true_values = true_values, upper = upper, interval_range = interval_range) +# example with missing values and separate results interval_score(true_values = c(true_values, NA), lower = c(lower, NA), upper = c(NA, upper), diff --git a/tests/testthat/test-eval_forecasts.R b/tests/testthat/test-eval_forecasts.R index 3c0e6caab..34c1d64b6 100644 --- a/tests/testthat/test-eval_forecasts.R +++ b/tests/testthat/test-eval_forecasts.R @@ -81,6 +81,21 @@ test_that("function produces output even if only some metrics are chosen", { TRUE) }) +test_that("WIS is the same with other metrics omitted or included", { + range_example_wide <- data.table::setDT(scoringutils::range_example_data_wide) + range_example <- scoringutils::range_wide_to_long(range_example_wide) + + eval <- scoringutils::eval_forecasts(range_example, + summarise_by = c("model", "range"), + metrics = "interval_score") + + eval2 <- scoringutils::eval_forecasts(range_example, + summarise_by = c("model", "range")) + + expect_equal(sum(eval$interval_score), + sum(eval2$interval_score)) +}) +