diff --git a/NAMESPACE b/NAMESPACE index ad4dbe690..846c98b15 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -58,7 +58,6 @@ export(get_metrics) export(get_pairwise_comparisons) export(get_pit) export(interval_coverage) -export(interval_coverage_deviation) export(is_forecast) export(is_forecast_binary) export(is_forecast_nominal) diff --git a/NEWS.md b/NEWS.md index 806b047b9..d6820db7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -70,7 +70,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape ### Renamed functions - The function `find_duplicates()` was renamed to `get_duplicate_forecasts()`. -- Renamed `interval_coverage_quantile()` and `interval_coverage_dev_quantile()` to `interval_coverage()` and `interval_coverage_deviation()`, respectively. +- Renamed `interval_coverage_quantile()` to `interval_coverage()`. - "range" was consistently renamed to "interval_range" in the code. The "range"-format (which was mostly used internally) was renamed to "interval"-format - Renamed `correlation()` to `get_correlations()` and `plot_correlation()` to `plot_correlations()` - `pit()` was renamed to `get_pit()` and converted to an S3 method. @@ -83,6 +83,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape - Removed the function `merge_pred_and_obs()` that was used to merge two separate data frames with forecasts and observations. We moved its contents to a new "Deprecated functions"-vignette. - Removed `interval_coverage_sample()` as users are now expected to convert to a quantile format first before scoring. - Function `set_forecast_unit()` was deleted. Instead there is now a `forecast_unit` argument in `as_forecast_()` as well as in `get_duplicate_forecasts()`. +- Removed `interval_coverage_dev_quantile()`. Users can still access the difference between nominal and actual interval coverage using `get_coverage()`. ### Function changes - `bias_quantile()` changed the way it handles forecasts where the median is missing: The median is now imputed by linear interpolation between the innermost quantiles. Previously, we imputed the median by simply taking the mean of the innermost quantiles. diff --git a/R/default-scoring-rules.R b/R/default-scoring-rules.R index fd31bc865..0f9aae29f 100644 --- a/R/default-scoring-rules.R +++ b/R/default-scoring-rules.R @@ -226,7 +226,6 @@ get_metrics.forecast_sample <- function(x, select = NULL, exclude = NULL, ...) { #' - "interval_coverage_90" = purrr::partial( #' interval_coverage, interval_range = 90 #' ) -#' - "interval_coverage_deviation" = [interval_coverage_deviation()], #' - "ae_median" = [ae_median_quantile()] #' #' Note: The `interval_coverage_90` scoring rule is created by modifying @@ -255,7 +254,6 @@ get_metrics.forecast_quantile <- function(x, select = NULL, exclude = NULL, ...) interval_coverage_90 = purrr::partial( interval_coverage, interval_range = 90 ), - interval_coverage_deviation = interval_coverage_deviation, ae_median = ae_median_quantile ) select_metrics(all, select, exclude) diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R index e5a93ce4f..3d9c76b9a 100644 --- a/R/metrics-quantile.R +++ b/R/metrics-quantile.R @@ -294,103 +294,6 @@ interval_coverage <- function(observed, predicted, } -#' @title Interval coverage deviation (for quantile-based forecasts) -#' @description -#' Check the agreement between desired and actual interval coverage -#' of a forecast. -#' -#' The function is similar to [interval_coverage()], -#' but takes all provided prediction intervals into account and -#' compares nominal interval coverage (i.e. the desired interval coverage) with -#' the actual observed interval coverage. -#' -#' A central symmetric prediction interval is defined by a lower and an -#' upper bound formed by a pair of predictive quantiles. For example, a 50% -#' prediction interval is formed by the 0.25 and 0.75 quantiles of the -#' predictive distribution. Ideally, a forecaster should aim to cover about -#' 50% of all observed values with their 50% prediction intervals, 90% of all -#' observed values with their 90% prediction intervals, and so on. -#' -#' For every prediction interval, the deviation is computed as the difference -#' between the observed interval coverage and the nominal interval coverage -#' For a single observed value and a single prediction interval, coverage is -#' always either 0 or 1 (`FALSE` or `TRUE`). This is not the case for a single -#' observed value and multiple prediction intervals, -#' but it still doesn't make that much -#' sense to compare nominal (desired) coverage and actual coverage for a single -#' observation. In that sense coverage deviation only really starts to make -#' sense as a metric when averaged across multiple observations). -#' -#' Positive values of interval coverage deviation are an indication for -#' underconfidence, i.e. the forecaster could likely have issued a narrower -#' forecast. Negative values are an indication for overconfidence, i.e. the -#' forecasts were too narrow. -#' -#' \deqn{ -#' \textrm{interval coverage deviation} = -#' \mathbf{1}(\textrm{observed value falls within interval}) - -#' \textrm{nominal interval coverage} -#' }{ -#' interval coverage deviation = -#' 1(observed value falls within interval) - nominal interval coverage -#' } -#' The interval coverage deviation is then averaged across all prediction -#' intervals. The median is ignored when computing coverage deviation. -#' @inheritParams wis -#' @importFrom cli cli_warn -#' @return -#' A numeric vector of length n with the interval coverage deviation -#' for each forecast (with the forecast itself comprising one or multiple -#' prediction intervals). -#' @inheritSection illustration-input-metric-quantile Input format -#' @export -#' @keywords metric -#' @examples -#' observed <- c(1, -15, 22) -#' predicted <- rbind( -#' c(-1, 0, 1, 2, 3), -#' c(-2, 1, 2, 2, 4), -#' c(-2, 0, 3, 3, 4) -#' ) -#' quantile_level <- c(0.1, 0.25, 0.5, 0.75, 0.9) -#' interval_coverage_deviation(observed, predicted, quantile_level) -interval_coverage_deviation <- function(observed, predicted, quantile_level) { - assert_input_quantile(observed, predicted, quantile_level) - - # transform available quantile_levels into central interval ranges - available_ranges <- unique(get_range_from_quantile(quantile_level)) - - # check if all necessary quantile_levels are available - necessary_quantiles <- unique( - c((100 - available_ranges) / 2, 100 - (100 - available_ranges) / 2) / 100 - ) - if (!all(necessary_quantiles %in% quantile_level)) { - #nolint start: keyword_quote_linter object_usage_linter - missing <- necessary_quantiles[!necessary_quantiles %in% quantile_level] - cli_warn( - c( - "x" = "To compute interval coverage deviation, all quantiles must form - central symmetric prediction intervals.", - "i" = "Missing quantiles: {.val {missing}}. Returning {.val {NA}}." - ) - ) - #nolint end - return(NA) - } - - reformatted <- quantile_to_interval( - observed, predicted, quantile_level - )[interval_range != 0] - reformatted[, interval_coverage := (observed >= lower) & (observed <= upper)] - reformatted[, interval_coverage_deviation := - interval_coverage - interval_range / 100] - out <- reformatted[, .( - interval_coverage_deviation = mean(interval_coverage_deviation) - ), by = "forecast_id"] - return(out$interval_coverage_deviation) -} - - #' @title Determines bias of quantile forecasts #' #' @description diff --git a/man/get_metrics.forecast_quantile.Rd b/man/get_metrics.forecast_quantile.Rd index dd8647db3..9740cb44d 100644 --- a/man/get_metrics.forecast_quantile.Rd +++ b/man/get_metrics.forecast_quantile.Rd @@ -30,7 +30,6 @@ For quantile-based forecasts, the default scoring rules are: \item "interval_coverage_90" = purrr::partial( interval_coverage, interval_range = 90 ) -\item "interval_coverage_deviation" = \code{\link[=interval_coverage_deviation]{interval_coverage_deviation()}}, \item "ae_median" = \code{\link[=ae_median_quantile]{ae_median_quantile()}} } diff --git a/man/interval_coverage_deviation.Rd b/man/interval_coverage_deviation.Rd deleted file mode 100644 index 8a480ef32..000000000 --- a/man/interval_coverage_deviation.Rd +++ /dev/null @@ -1,90 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metrics-quantile.R -\name{interval_coverage_deviation} -\alias{interval_coverage_deviation} -\title{Interval coverage deviation (for quantile-based forecasts)} -\usage{ -interval_coverage_deviation(observed, predicted, quantile_level) -} -\arguments{ -\item{observed}{Numeric vector of size n with the observed values.} - -\item{predicted}{Numeric nxN matrix of predictive -quantiles, n (number of rows) being the number of forecasts (corresponding -to the number of observed values) and N -(number of columns) the number of quantiles per forecast. -If \code{observed} is just a single number, then predicted can just be a -vector of size N.} - -\item{quantile_level}{Vector of of size N with the quantile levels -for which predictions were made.} -} -\value{ -A numeric vector of length n with the interval coverage deviation -for each forecast (with the forecast itself comprising one or multiple -prediction intervals). -} -\description{ -Check the agreement between desired and actual interval coverage -of a forecast. - -The function is similar to \code{\link[=interval_coverage]{interval_coverage()}}, -but takes all provided prediction intervals into account and -compares nominal interval coverage (i.e. the desired interval coverage) with -the actual observed interval coverage. - -A central symmetric prediction interval is defined by a lower and an -upper bound formed by a pair of predictive quantiles. For example, a 50\% -prediction interval is formed by the 0.25 and 0.75 quantiles of the -predictive distribution. Ideally, a forecaster should aim to cover about -50\% of all observed values with their 50\% prediction intervals, 90\% of all -observed values with their 90\% prediction intervals, and so on. - -For every prediction interval, the deviation is computed as the difference -between the observed interval coverage and the nominal interval coverage -For a single observed value and a single prediction interval, coverage is -always either 0 or 1 (\code{FALSE} or \code{TRUE}). This is not the case for a single -observed value and multiple prediction intervals, -but it still doesn't make that much -sense to compare nominal (desired) coverage and actual coverage for a single -observation. In that sense coverage deviation only really starts to make -sense as a metric when averaged across multiple observations). - -Positive values of interval coverage deviation are an indication for -underconfidence, i.e. the forecaster could likely have issued a narrower -forecast. Negative values are an indication for overconfidence, i.e. the -forecasts were too narrow. - -\deqn{ -\textrm{interval coverage deviation} = -\mathbf{1}(\textrm{observed value falls within interval}) - -\textrm{nominal interval coverage} -}{ -interval coverage deviation = -1(observed value falls within interval) - nominal interval coverage -} -The interval coverage deviation is then averaged across all prediction -intervals. The median is ignored when computing coverage deviation. -} -\section{Input format}{ -\if{html}{ - \out{
} - \figure{metrics-quantile.png}{options: style="width:750px;max-width:100\%;"} - \out{
} -} -\if{latex}{ - \figure{metrics-quantile.png} -} -} - -\examples{ -observed <- c(1, -15, 22) -predicted <- rbind( - c(-1, 0, 1, 2, 3), - c(-2, 1, 2, 2, 4), - c(-2, 0, 3, 3, 4) -) -quantile_level <- c(0.1, 0.25, 0.5, 0.75, 0.9) -interval_coverage_deviation(observed, predicted, quantile_level) -} -\keyword{metric} diff --git a/tests/testthat/_snaps/plot_correlation/plot-correlation.svg b/tests/testthat/_snaps/plot_correlation/plot-correlation.svg index fbe1da1fb..a8adc3e89 100644 --- a/tests/testthat/_snaps/plot_correlation/plot-correlation.svg +++ b/tests/testthat/_snaps/plot_correlation/plot-correlation.svg @@ -20,152 +20,130 @@ - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -1 -0.94 -1 -0.28 --0.03 -1 -0.46 -0.32 -0.15 -1 -0.11 -0.22 --0.35 -0.11 -1 --0.21 --0.15 --0.21 --0.09 -0.01 -1 --0.41 --0.32 --0.36 --0.09 -0.1 -0.37 -1 --0.34 --0.25 --0.33 --0.12 -0.06 -0.85 -0.64 -1 -0.99 -0.9 -0.34 -0.54 -0.1 --0.25 --0.41 --0.38 -1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 +0.94 +1 +0.28 +-0.03 +1 +0.46 +0.32 +0.15 +1 +0.11 +0.22 +-0.35 +0.11 +1 +-0.21 +-0.15 +-0.21 +-0.09 +0.01 +1 +-0.41 +-0.32 +-0.36 +-0.09 +0.1 +0.37 +1 +0.99 +0.9 +0.34 +0.54 +0.1 +-0.25 +-0.41 +1 - -wis -overprediction -underprediction -dispersion -bias -interval_coverage_50 -interval_coverage_90 -interval_coverage_deviation -ae_median - - - - - - - - - - - - - - - - - - - -ae_median -interval_coverage_deviation -interval_coverage_90 -interval_coverage_50 -bias -dispersion -underprediction -overprediction -wis -Correlation - - - - - - - -0.0 -0.5 -1.0 -plot__correlation + +wis +overprediction +underprediction +dispersion +bias +interval_coverage_50 +interval_coverage_90 +ae_median + + + + + + + + + + + + + + + + + +ae_median +interval_coverage_90 +interval_coverage_50 +bias +dispersion +underprediction +overprediction +wis +Correlation + + + + + + + +0.0 +0.5 +1.0 +plot__correlation diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index afa4d137c..1ef6389f4 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -2,13 +2,12 @@ data.table::setDTthreads(2) # restricts number of cores used on CRAN metrics_no_cov <- get_metrics( example_quantile, - exclude = c("interval_coverage_50", "interval_coverage_90", - "interval_coverage_deviation") + exclude = c("interval_coverage_50", "interval_coverage_90") ) metrics_no_cov_no_ae <- get_metrics( example_quantile, exclude = c("interval_coverage_50", "interval_coverage_90", - "interval_coverage_deviation", "ae_median") + "ae_median") ) example_quantile_df <- as.data.frame(na.omit(example_quantile)) diff --git a/tests/testthat/test-metrics-quantile.R b/tests/testthat/test-metrics-quantile.R index 6214db5d8..16b9bfe17 100644 --- a/tests/testthat/test-metrics-quantile.R +++ b/tests/testthat/test-metrics-quantile.R @@ -671,30 +671,6 @@ test_that("interval_coverage_quantile throws a warning when a required quantile }) -# ============================================================================ # -# `interval_coverage_deviation` ============================================== # -# ============================================================================ # -test_that("interval_coverage_deviation works", { - existing_ranges <- unique(get_range_from_quantile(quantile_level)) - expect_equal(existing_ranges, c(80, 50, 0)) - - cov_50 <- interval_coverage(observed, predicted, quantile_level, interval_range = c(50)) - cov_80 <- interval_coverage(observed, predicted, quantile_level, interval_range = c(80)) - manual <- 0.5 * (cov_50 - 0.5) + 0.5 * (cov_80 - 0.8) - - expect_equal( - interval_coverage_deviation(observed, predicted, quantile_level), - manual - ) - expect_warning( - interval_coverage_deviation( - observed, predicted, c(quantile_level[-4], 0.76) - ), - "all quantiles must form central symmetric prediction intervals" - ) -}) - - # ============================================================================ # # `bias_quantile` ============================================================ # # ============================================================================ #