diff --git a/NAMESPACE b/NAMESPACE index 4db2d9ce6..6a8e5338f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,9 +21,9 @@ S3method(get_metrics,forecast_point) S3method(get_metrics,forecast_quantile) S3method(get_metrics,forecast_sample) S3method(get_metrics,scores) -S3method(get_pit,default) -S3method(get_pit,forecast_quantile) -S3method(get_pit,forecast_sample) +S3method(get_pit_histogram,default) +S3method(get_pit_histogram,forecast_quantile) +S3method(get_pit_histogram,forecast_sample) S3method(head,forecast) S3method(print,forecast) S3method(score,default) @@ -56,7 +56,7 @@ export(get_forecast_counts) export(get_forecast_unit) export(get_metrics) export(get_pairwise_comparisons) -export(get_pit) +export(get_pit_histogram) export(interval_coverage) export(is_forecast) export(is_forecast_binary) @@ -72,13 +72,12 @@ export(mad_sample) export(new_forecast) export(overprediction_quantile) export(overprediction_sample) -export(pit_sample) +export(pit_histogram_sample) export(plot_correlations) export(plot_forecast_counts) export(plot_heatmap) export(plot_interval_coverage) export(plot_pairwise_comparisons) -export(plot_pit) export(plot_quantile_coverage) export(plot_wis) export(quantile_score) @@ -103,6 +102,7 @@ importFrom(checkmate,assert_data_table) importFrom(checkmate,assert_disjunct) importFrom(checkmate,assert_factor) importFrom(checkmate,assert_function) +importFrom(checkmate,assert_int) importFrom(checkmate,assert_list) importFrom(checkmate,assert_logical) importFrom(checkmate,assert_matrix) @@ -115,9 +115,7 @@ importFrom(checkmate,assert_vector) importFrom(checkmate,check_atomic_vector) importFrom(checkmate,check_function) importFrom(checkmate,check_matrix) -importFrom(checkmate,check_number) importFrom(checkmate,check_numeric) -importFrom(checkmate,check_set_equal) importFrom(checkmate,check_vector) importFrom(checkmate,test_atomic_vector) importFrom(checkmate,test_list) @@ -138,6 +136,7 @@ importFrom(data.table,as.data.table) importFrom(data.table,copy) importFrom(data.table,data.table) importFrom(data.table,dcast) +importFrom(data.table,fcase) importFrom(data.table,is.data.table) importFrom(data.table,melt) importFrom(data.table,nafill) @@ -150,7 +149,6 @@ importFrom(data.table,setorderv) importFrom(ggplot2,.data) importFrom(ggplot2,`%+replace%`) importFrom(ggplot2,aes) -importFrom(ggplot2,after_stat) importFrom(ggplot2,coord_cartesian) importFrom(ggplot2,coord_flip) importFrom(ggplot2,element_blank) @@ -159,7 +157,6 @@ importFrom(ggplot2,element_text) importFrom(ggplot2,facet_grid) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_col) -importFrom(ggplot2,geom_histogram) importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_linerange) importFrom(ggplot2,geom_polygon) @@ -175,7 +172,6 @@ importFrom(ggplot2,scale_fill_gradient) importFrom(ggplot2,scale_fill_gradient2) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_y_continuous) -importFrom(ggplot2,stat) importFrom(ggplot2,theme) importFrom(ggplot2,theme_light) importFrom(ggplot2,theme_minimal) @@ -187,9 +183,7 @@ importFrom(purrr,partial) importFrom(scoringRules,crps_sample) importFrom(scoringRules,dss_sample) importFrom(scoringRules,logs_sample) -importFrom(stats,as.formula) importFrom(stats,cor) -importFrom(stats,density) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,na.omit) diff --git a/NEWS.md b/NEWS.md index a4899aa66..a96c9bac4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape - Users can now also use their own scoring rules (making use of the `metrics` argument, which takes in a named list of functions). Default scoring rules can be accessed using the function `get_metrics()`, which is a a generic with S3 methods for each forecast type. It returns a named list of scoring rules suitable for the respective forecast object. For example, you could call `get_metrics(example_quantile)`. Column names of scores in the output of `score()` correspond to the names of the scoring rules (i.e. the names of the functions in the list of metrics). - Instead of supplying arguments to `score()` to manipulate individual scoring rules users should now manipulate the metric list being supplied using `purrr::partial()` and `select_metric()`. See `?score()` for more information. - the CRPS is now reported as decomposition into dispersion, overprediction and underprediction. + - functionality to calculate the Probability Integral Transform (PIT) has been deprecated and replaced by functionality to calculate PIT histograms, using the `get_pit_histogram()` function; as part of this change, nonrandomised PITs can now be calculated for count data, and this is is done by default ### Creating a forecast object - The `as_forecast_()` functions create a forecast object and validates it. They also allow users to rename/specify required columns and specify the forecast unit in a single step, taking over the functionality of `set_forecast_unit()` in most cases. See `?as_forecast()` for more information. @@ -73,7 +74,6 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape - Renamed `interval_coverage_quantile()` to `interval_coverage()`. - "range" was consistently renamed to "interval_range" in the code. The "range"-format (which was mostly used internally) was renamed to "interval"-format - Renamed `correlation()` to `get_correlations()` and `plot_correlation()` to `plot_correlations()` -- `pit()` was renamed to `get_pit()` and converted to an S3 method. ### Deleted functions - Removed abs_error and squared_error from the package in favour of `Metrics::ae` and `Metrics::se`.`get_duplicate_forecasts()` now sorts outputs according to the forecast unit, making it easier to spot duplicates. In addition, there is a `counts` option that allows the user to display the number of duplicates for each forecast unit, rather than the raw duplicated rows. @@ -84,6 +84,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape - Removed `interval_coverage_sample()` as users are now expected to convert to a quantile format first before scoring. - Function `set_forecast_unit()` was deleted. Instead there is now a `forecast_unit` argument in `as_forecast_()` as well as in `get_duplicate_forecasts()`. - Removed `interval_coverage_dev_quantile()`. Users can still access the difference between nominal and actual interval coverage using `get_coverage()`. +- `pit()`, `pit_sample()` and `plot_pit()` have been removed and replaced by functionality to create PIT histograms (`pit_histogram_sampel()` and `get_pit_histogram()`) ### Function changes - `bias_quantile()` changed the way it handles forecasts where the median is missing: The median is now imputed by linear interpolation between the innermost quantiles. Previously, we imputed the median by simply taking the mean of the innermost quantiles. diff --git a/R/class-forecast-quantile.R b/R/class-forecast-quantile.R index 152121ee1..94972fece 100644 --- a/R/class-forecast-quantile.R +++ b/R/class-forecast-quantile.R @@ -175,27 +175,57 @@ get_metrics.forecast_quantile <- function(x, select = NULL, exclude = NULL, ...) } -#' @rdname get_pit +#' @rdname get_pit_histogram #' @importFrom stats na.omit #' @importFrom data.table `:=` as.data.table #' @export -get_pit.forecast_quantile <- function(forecast, by, ...) { +get_pit_histogram.forecast_quantile <- function(forecast, num_bins = NULL, + breaks = NULL, by, ...) { + assert_number(num_bins, lower = 1, null.ok = TRUE) + assert_numeric(breaks, lower = 0, upper = 1, null.ok = TRUE) forecast <- clean_forecast(forecast, copy = TRUE, na.omit = TRUE) forecast <- as.data.table(forecast) + present_quantiles <- unique(c(0, forecast$quantile_level, 1)) + present_quantiles <- round(present_quantiles, 10) + if (!is.null(breaks)) { + quantiles <- unique(c(0, breaks, 1)) + } else if (is.null(num_bins) || num_bins == "auto") { + quantiles <- present_quantiles + } else { + quantiles <- seq(0, 1, 1 / num_bins) + } + ## avoid rounding errors + quantiles <- round(quantiles, 10) + diffs <- round(diff(quantiles), 10) + + if (length(setdiff(quantiles, present_quantiles)) > 0) { + cli::cli_warn( + "Some requested quantiles are missing in the forecast. ", + "The PIT histogram will be based on the quantiles present in the forecast." + ) + } + + forecast <- forecast[quantile_level %in% quantiles] forecast[, quantile_coverage := (observed <= predicted)] + quantile_coverage <- forecast[, .(quantile_coverage = mean(quantile_coverage)), by = c(unique(c(by, "quantile_level")))] - quantile_coverage <- quantile_coverage[ + + bins <- sprintf("[%s,%s)", quantiles[-length(quantiles)], quantiles[-1]) + mids <- (quantiles[-length(quantiles)] + quantiles[-1]) / 2 + + pit_histogram <- quantile_coverage[ order(quantile_level), .( - quantile_level = c(quantile_level, 1), - pit_value = diff(c(0, quantile_coverage, 1)) + density = diff(c(0, quantile_coverage, 1)) / diffs, + bin = bins, + mid = mids ), by = c(get_forecast_unit(quantile_coverage)) ] - return(quantile_coverage[]) + return(pit_histogram[]) } diff --git a/R/class-forecast-sample.R b/R/class-forecast-sample.R index bb4ea569f..a0983bd7d 100644 --- a/R/class-forecast-sample.R +++ b/R/class-forecast-sample.R @@ -165,31 +165,53 @@ get_metrics.forecast_sample <- function(x, select = NULL, exclude = NULL, ...) { } -#' @rdname get_pit -#' @importFrom stats na.omit +#' @rdname get_pit_histogram #' @importFrom data.table `:=` as.data.table dcast -#' @inheritParams pit_sample +#' @importFrom checkmate assert_int assert_numeric +#' @inheritParams pit_histogram_sample +#' @seealso [pit_histogram_sample()] #' @export -get_pit.forecast_sample <- function(forecast, by, n_replicates = 100, ...) { +get_pit_histogram.forecast_sample <- function(forecast, num_bins = 10, + breaks = NULL, by, integers = c( + "nonrandom", "random", "ignore" + ), n_replicates = NULL, ...) { + integers <- match.arg(integers) + assert_int(num_bins, lower = 1, null.ok = FALSE) + assert_numeric(breaks, lower = 0, upper = 1, null.ok = TRUE) forecast <- clean_forecast(forecast, copy = TRUE, na.omit = TRUE) forecast <- as.data.table(forecast) - # if prediction type is not quantile, calculate PIT values based on samples + if (is.null(breaks)) { + quantiles <- seq(0, 1, 1 / num_bins) + } else { + quantiles <- unique(c(0, breaks, 1)) + } + forecast_wide <- data.table::dcast( forecast, ... ~ paste0("InternalSampl_", sample_id), value.var = "predicted" ) - pit <- forecast_wide[, .(pit_value = pit_sample( - observed = observed, - predicted = as.matrix(.SD) - )), + bins <- sprintf("[%s,%s)", quantiles[-length(quantiles)], quantiles[-1]) + mids <- (quantiles[-length(quantiles)] + quantiles[-1]) / 2 + + pit_histogram <- forecast_wide[, .( + density = pit_histogram_sample( + observed = observed, + predicted = as.matrix(.SD), + quantiles = quantiles, + integers = integers, + n_replicates = n_replicates + ), + bin = bins, + mid = mids + ), by = by, .SDcols = grepl("InternalSampl_", names(forecast_wide), fixed = TRUE) ] - return(pit[]) + return(pit_histogram[]) } diff --git a/R/get-coverage.R b/R/get-coverage.R index d16fc6239..a7287f976 100644 --- a/R/get-coverage.R +++ b/R/get-coverage.R @@ -112,7 +112,7 @@ get_coverage <- function(forecast, by = "model") { #' Default is "model". #' @return ggplot object with a plot of interval coverage #' @importFrom ggplot2 ggplot scale_colour_manual scale_fill_manual .data -#' facet_wrap facet_grid geom_polygon geom_line +#' facet_wrap facet_grid geom_polygon geom_line xlab ylab #' @importFrom checkmate assert_subset #' @importFrom data.table dcast #' @export diff --git a/R/get-pit-histogram.R b/R/get-pit-histogram.R new file mode 100644 index 000000000..7aeab2459 --- /dev/null +++ b/R/get-pit-histogram.R @@ -0,0 +1,65 @@ +#' @title Probability integral transformation histogram +#' +#' @description +#' Generate a Probability Integral Transformation (PIT) histogram for +#' validated forecast objects. +#' +#' See the examples for how to plot the result of this function. +#' +#' @inherit score params +#' @param num_bins The number of bins in the PIT histogram. For sample-based +#' forecasts, the default is 10 bins. For quantile-based forecasts, the +#' default is one bin for each available quantile. +#' You can control the number of bins by supplying a number. This is fine for +#' sample-based pit histograms, but may fail for quantile-based formats. In +#' this case it is preferred to supply explicit breaks points using the +#' `breaks` argument. +#' @param breaks Numeric vector with the break points for the bins in the +#' PIT histogram. This is preferred when creating a PIT histogram based on +#' quantile-based data. Default is `NULL` and breaks will be determined by +#' `num_bins`. If `breaks` is used, `num_bins` will be ignored. +#' 0 and 1 will always be added as left and right bounds, respectively. +#' @param by Character vector with the columns according to which the +#' PIT values shall be grouped. If you e.g. have the columns 'model' and +#' 'location' in the input data and want to have a PIT histogram for +#' every model and location, specify `by = c("model", "location")`. +#' @inheritParams pit_histogram_sample +#' @return A data.table with density values for each bin in the PIT histogram. +#' @examples +#' library("ggplot2") +#' +#' example <- as_forecast_sample(example_sample_continuous) +#' result <- get_pit_histogram(example, by = "model") +#' ggplot(result, aes(x = mid, y = density)) + +#' geom_col() + +#' facet_wrap(. ~ model) + +#' labs(x = "Quantile", "Density") +#' +#' # example with quantile data +#' example <- as_forecast_quantile(example_quantile) +#' result <- get_pit_histogram(example, by = "model") +#' ggplot(result, aes(x = mid, y = density)) + +#' geom_col() + +#' facet_wrap(. ~ model) + +#' labs(x = "Quantile", "Density") +#' @export +#' @keywords scoring +#' @references +#' Sebastian Funk, Anton Camacho, Adam J. Kucharski, Rachel Lowe, +#' Rosalind M. Eggo, W. John Edmunds (2019) Assessing the performance of +#' real-time epidemic forecasts: A case study of Ebola in the Western Area +#' region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} +get_pit_histogram <- function(forecast, num_bins, breaks, by, + ...) { + UseMethod("get_pit_histogram") +} + + +#' @rdname get_pit_histogram +#' @importFrom cli cli_abort +#' @export +get_pit_histogram.default <- function(forecast, num_bins, breaks, by, ...) { + cli_abort(c( + "!" = "The input needs to be a valid forecast object represented as quantiles or samples." # nolint + )) +} diff --git a/R/get-pit.R b/R/get-pit.R deleted file mode 100644 index a93a68e45..000000000 --- a/R/get-pit.R +++ /dev/null @@ -1,177 +0,0 @@ -#' @title Probability integral transformation (data.frame version) -#' -#' @description -#' Compute the Probability Integral Transformation (PIT) for -#' validated forecast objects. -#' -#' @inherit score params -#' @param by Character vector with the columns according to which the -#' PIT values shall be grouped. If you e.g. have the columns 'model' and -#' 'location' in the input data and want to have a PIT histogram for -#' every model and location, specify `by = c("model", "location")`. -#' @inheritParams pit_sample -#' @return A data.table with PIT values according to the grouping specified in -#' `by`. -#' @examples -#' example <- as_forecast_sample(example_sample_continuous) -#' result <- get_pit(example, by = "model") -#' plot_pit(result) -#' -#' # example with quantile data -#' example <- as_forecast_quantile(example_quantile) -#' result <- get_pit(example, by = "model") -#' plot_pit(result) -#' @export -#' @keywords scoring -#' @references -#' Sebastian Funk, Anton Camacho, Adam J. Kucharski, Rachel Lowe, -#' Rosalind M. Eggo, W. John Edmunds (2019) Assessing the performance of -#' real-time epidemic forecasts: A case study of Ebola in the Western Area -#' region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} -get_pit <- function(forecast, by, ...) { - UseMethod("get_pit") -} - - -#' @rdname get_pit -#' @importFrom cli cli_abort -#' @export -get_pit.default <- function(forecast, by, ...) { - cli_abort(c( - "!" = "The input needs to be a valid forecast object represented as quantiles or samples." # nolint - )) -} - - -#' @title PIT histogram -#' -#' @description -#' Make a simple histogram of the probability integral transformed values to -#' visually check whether a uniform distribution seems likely. -#' -#' @param pit Either a vector with the PIT values, or a data.table as -#' produced by [get_pit()]. -#' @param num_bins The number of bins in the PIT histogram, default is "auto". -#' When `num_bins == "auto"`, [plot_pit()] will either display 10 bins, or it -#' will display a bin for each available quantile in case you passed in data in -#' a quantile-based format. -#' You can control the number of bins by supplying a number. This is fine for -#' sample-based pit histograms, but may fail for quantile-based formats. In this -#' case it is preferred to supply explicit breaks points using the `breaks` -#' argument. -#' @param breaks Numeric vector with the break points for the bins in the -#' PIT histogram. This is preferred when creating a PIT histogram based on -#' quantile-based data. Default is `NULL` and breaks will be determined by -#' `num_bins`. If `breaks` is used, `num_bins` will be ignored. -#' @importFrom stats as.formula -#' @importFrom ggplot2 geom_col -#' @importFrom stats density -#' @return A ggplot object with a histogram of PIT values -#' @examples -#' \dontshow{ -#' data.table::setDTthreads(2) # restricts number of cores used on CRAN -#' } -#' library(magrittr) # pipe operator -#' -#' # PIT histogram in vector based format -#' observed <- rnorm(30, mean = 1:30) -#' predicted <- replicate(200, rnorm(n = 30, mean = 1:30)) -#' pit <- pit_sample(observed, predicted) -#' plot_pit(pit) -#' -#' # quantile-based pit -#' pit <- example_quantile %>% -#' as_forecast_quantile() %>% -#' get_pit(by = "model") -#' plot_pit(pit, breaks = seq(0.1, 1, 0.1)) -#' -#' # sample-based pit -#' pit <- example_sample_discrete %>% -#' as_forecast_sample %>% -#' get_pit(by = "model") -#' plot_pit(pit) -#' @importFrom ggplot2 ggplot aes xlab ylab geom_histogram stat theme_light after_stat -#' @importFrom checkmate assert check_set_equal check_number -#' @export -plot_pit <- function(pit, - num_bins = "auto", - breaks = NULL) { - assert( - check_set_equal(num_bins, "auto"), - check_number(num_bins, lower = 1) - ) - assert_numeric(breaks, lower = 0, upper = 1, null.ok = TRUE) - - # vector-format is always sample-based, for data.frames there are two options - if ("quantile_level" %in% names(pit)) { - type <- "quantile-based" - } else { - type <- "sample-based" - } - - # use breaks if explicitly given, otherwise assign based on number of bins - if (!is.null(breaks)) { - plot_quantiles <- unique(c(0, breaks, 1)) - } else if (is.null(num_bins) || num_bins == "auto") { - # automatically set number of bins - if (type == "sample-based") { - num_bins <- 10 - width <- 1 / num_bins - plot_quantiles <- seq(0, 1, width) - } - if (type == "quantile-based") { - plot_quantiles <- unique(c(0, pit$quantile_level, 1)) - } - } else { - # if num_bins is explicitly given - width <- 1 / num_bins - plot_quantiles <- seq(0, 1, width) - } - - # function for data.frames - if (is.data.frame(pit)) { - facet_cols <- get_forecast_unit(pit) - formula <- as.formula(paste("~", paste(facet_cols, collapse = "+"))) - - # quantile version - if (type == "quantile-based") { - hist <- ggplot( - data = pit[quantile_level %in% plot_quantiles], - aes(x = quantile_level, y = pit_value) - ) + - geom_col(position = "dodge", colour = "grey") + - facet_wrap(formula) - } - - if (type == "sample-based") { - hist <- ggplot( - data = pit, - aes(x = pit_value) - ) + - geom_histogram( - aes(y = after_stat(width * density)), - breaks = plot_quantiles, - colour = "grey" - ) + - facet_wrap(formula) - } - } else { - # non data.frame version - hist <- ggplot( - data = data.frame(x = pit, stringsAsFactors = TRUE), - aes(x = x) - ) + - geom_histogram( - aes(y = after_stat(width * density)), - breaks = plot_quantiles, - colour = "grey" - ) - } - - hist <- hist + - xlab("PIT") + - ylab("Frequency") + - theme_scoringutils() - - return(hist) -} diff --git a/R/metrics-sample.R b/R/metrics-sample.R index c88c6f26e..23308a05b 100644 --- a/R/metrics-sample.R +++ b/R/metrics-sample.R @@ -447,27 +447,61 @@ mad_sample <- function(observed = NULL, predicted, ...) { #' #' In the case of discrete nonnegative outcomes such as incidence counts, #' the PIT is no longer uniform even when forecasts are ideal. -#' In that case a randomised PIT can be used instead: +#' In that case two methods are available ase described by Czado et al. (2007). +#' +#' By default, a nonrandomised PIT is calculated using the conditional +#' cumulative distribution function #' \deqn{ -#' u_t = P_t(k_t) + v * (P_t(k_t) - P_t(k_t - 1) ) +#' F(u) = +#' \begin{cases} +#' 0 & \text{if } v < P_t(k_t - 1) \\ +#' (v - P_t(k_t - 1)) / (P_t(k_t) - P_t(k_t - 1)) & \text{if } P_t(k_t - 1) \leq v < P_t(k_t) \\ +#' 1 & \text{if } v \geq P_t(k_t) +#' \end{cases} #' } #' #' where \eqn{k_t} is the observed count, \eqn{P_t(x)} is the predictive -#' cumulative probability of observing incidence k at time t, -#' \eqn{P_t (-1) = 0} by definition and v is standard uniform and independent -#' of k. If \eqn{P_t} is the true cumulative -#' probability distribution, then \eqn{u_t} is standard uniform. +#' cumulative probability of observing incidence \eqn{k} at time \eqn{t} and +#' \eqn{P_t (-1) = 0} by definition. +#' Values of the PIT histogram are then created by averaging over the \eqn{n} +#' predictions, +#' +#' \deqn{ +# \bar{F}(u) = \frac{i = 1}{n} \sum_{i=1}^{n} F^{(i)}(u) +#' } +#' +#' And calculating the value at each bin between quantile \eqn{q_i} and quantile +#' \eqn{q_{i + 1}} as +#' +#' \deqn{ +# \bar{F}(q_i) - \bar{F}(q_{i + 1}) +#' } +#' +#' Alternatively, a randomised PIT can be used instead. In this case, the PIT is +#' \deqn{ +#' u_t = P_t(k_t) + v * (P_t(k_t) - P_t(k_t - 1)) +#' } #' -#' @param n_replicates The number of draws for the randomised PIT for -#' discrete predictions. Will be ignored if forecasts are continuous. +#' where \eqn{v} is standard uniform and independent of \eqn{k}. The values of +#' the PIT histogram are then calculated by binning the $u_t$ values as above. +#' +#' @param quantiles A vector of quantiles between which to calculate the PIT. +#' @param integers How to handle integer forecasts (count data). This is based +#' on methods described Czado et al. (2007). If "nonrandom" (default) the +#' function will use the non-randomised PIT method. If "random", will use the +#' randomised PIT method. If "ignore", will treat integer forecasts as if they +#' were continuous. +#' @param n_replicates The number of draws for the randomised PIT for discrete +#' predictions. Will be ignored if forecasts are continuous or `integers` is +#' not set to `random`. #' @inheritParams ae_median_sample -#' @return A vector with PIT-values. For continuous forecasts, the vector will -#' correspond to the length of `observed`. For integer forecasts, a -#' randomised PIT will be returned of length -#' `length(observed) * n_replicates`. -#' @seealso [get_pit()] +#' @inheritParams get_pit_histogram +#' @return A vector with PIT histogram densities for the bins corresponding +#' to the given quantiles. +#' @seealso [get_pit_histogram()] #' @importFrom stats runif -#' @importFrom cli cli_abort cli_inform +#' @importFrom data.table fcase +#' @importFrom cli cli_warn cli_abort #' @examples #' \dontshow{ #' data.table::setDTthreads(2) # restricts number of cores used on CRAN @@ -476,14 +510,20 @@ mad_sample <- function(observed = NULL, predicted, ...) { #' ## continuous predictions #' observed <- rnorm(20, mean = 1:20) #' predicted <- replicate(100, rnorm(n = 20, mean = 1:20)) -#' pit <- pit_sample(observed, predicted) -#' plot_pit(pit) +#' pit <- pit_histogram_sample(observed, predicted, quantiles = seq(0, 1, 0.1)) #' #' ## integer predictions #' observed <- rpois(20, lambda = 1:20) #' predicted <- replicate(100, rpois(n = 20, lambda = 1:20)) -#' pit <- pit_sample(observed, predicted, n_replicates = 30) -#' plot_pit(pit) +#' pit <- pit_histogram_sample(observed, predicted, quantiles = seq(0, 1, 0.1)) +#' +#' ## integer predictions, randomised PIT +#' observed <- rpois(20, lambda = 1:20) +#' predicted <- replicate(100, rpois(n = 20, lambda = 1:20)) +#' pit <- pit_histogram_sample( +#' observed, predicted, quantiles = seq(0, 1, 0.1), +#' integers = "random", n_replicates = 30 +#' ) #' @export #' @references #' Claudia Czado, Tilmann Gneiting Leonhard Held (2009) Predictive model @@ -494,30 +534,59 @@ mad_sample <- function(observed = NULL, predicted, ...) { #' real-time epidemic forecasts: A case study of Ebola in the Western Area #' region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} #' @keywords metric -pit_sample <- function(observed, - predicted, - n_replicates = 100) { +pit_histogram_sample <- function(observed, + predicted, + quantiles, + integers = c("nonrandom", "random", "ignore"), + n_replicates = NULL) { assert_input_sample(observed = observed, predicted = predicted) - assert_number(n_replicates) + integers <- match.arg(integers) + assert_number( + n_replicates, null.ok = (integers != "random"), + .var.name = paste("n_replicates with `integers` = ", integers) + ) if (is.vector(predicted)) { predicted <- matrix(predicted, nrow = 1) } + if (integers != "random" && !is.null(n_replicates)) { + cli::cli_warn("`n_replicates` is ignored when `integers` is not `random`") + } + # calculate PIT-values ------------------------------------------------------- n_pred <- ncol(predicted) - # calculate emipirical cumulative distribution function as + # calculate empirical cumulative distribution function as # Portion of (y_observed <= y_predicted) p_x <- rowSums(predicted <= observed) / n_pred # PIT calculation is different for integer and continuous predictions - if (get_type(predicted) == "integer") { + predicted <- round(predicted) + if (get_type(predicted) == "integer" && integers != "ignore") { p_xm1 <- rowSums(predicted <= (observed - 1)) / n_pred - pit_values <- as.vector( - replicate(n_replicates, p_xm1 + runif(1) * (p_x - p_xm1)) - ) + if (integers == "random") { + pit_values <- as.vector( + replicate(n_replicates, p_xm1 + runif(1) * (p_x - p_xm1)) + ) + } else { + f_bar <- function(u) { + f <- fcase( + u <= p_xm1, 0, + u >= p_x, 1, + default = (u - p_xm1) / (p_x - p_xm1) + ) + mean(f) + } + pit_histogram <- diff(vapply(quantiles, f_bar, numeric(1))) / + diff(quantiles) + } } else { pit_values <- p_x } - return(pit_values) + + if (integers != "nonrandom") { + pit_histogram <- hist(pit_values, breaks = quantiles, plot = FALSE)$density + } + + return(pit_histogram) } diff --git a/R/plot-wis.R b/R/plot-wis.R index 26a17c933..305476537 100644 --- a/R/plot-wis.R +++ b/R/plot-wis.R @@ -16,7 +16,7 @@ #' @return A ggplot object showing a contributions from the three components of #' the weighted interval score. #' @importFrom ggplot2 ggplot aes geom_linerange facet_wrap labs -#' scale_fill_discrete coord_flip +#' scale_fill_discrete coord_flip geom_col #' theme theme_light unit guides guide_legend .data #' @importFrom data.table melt #' @importFrom checkmate assert_subset assert_logical diff --git a/inst/manuscript/manuscript.Rmd b/inst/manuscript/manuscript.Rmd index d4be90edc..5ecc7025a 100644 --- a/inst/manuscript/manuscript.Rmd +++ b/inst/manuscript/manuscript.Rmd @@ -334,9 +334,11 @@ Users can obtain PIT histograms based on validated forecast objects using the fu ```{r pit-plots, fig.pos = "!h", fig.cap="PIT histograms of all models stratified by forecast target. Histograms should ideally be uniform. A u-shape usually indicates overconfidence (forecasts are too narrow), a hump-shaped form indicates underconfidence (forecasts are too uncertain) and a triangle-shape indicates bias.", fig.width = 8, fig.height=4} example_sample_continuous |> as_forecast_sample() |> - get_pit(by = c("model", "target_type")) |> - plot_pit() + - facet_grid(target_type ~ model) + get_pit_histogram(by = c("model", "target_type")) |> + ggplot(aes(x = mid, y = density)) + + geom_col() + + facet_grid(target_type ~ model) + + labs(x = "Quantile", "Density") ``` It is, in theory, possible to conduct a formal test for probabilistic calibration, for example by employing an Anderson-Darling test on the uniformity of PIT values. In practice, this can be difficult as forecasts, and therefore PIT values as well, are often correlated. Personal experience suggests that the Anderson-Darling test is often too quick to reject the null hypothesis of uniformity. @@ -773,7 +775,7 @@ df <- data.table(observed = rep(truth, each = n_samples), as_forecast_sample() res <- score(df) -pit <- get_pit(df, by = "model") +pit <- get_pit_histogram(df, by = "model") df[, model := factor(`model`, levels = c("Pred: N(0, 1)", "Pred: N(0.5, 1)", @@ -803,9 +805,12 @@ pred_hist <- df |> # create pit plots ------------------------------------------------------------- -pit_plots <- plot_pit(pit) + +pit_plots <- pit |> + ggplot(aes(x = mid, y = density)) + + geom_col() + facet_wrap(~ model, nrow = 1) + - theme_scoringutils() + theme_scoringutils() + + labs(y = "Density", x = "Quantile") # create interval and quantile coverage plots ---------------------------------- # create coverage plots by transforming to quantile format first diff --git a/man/get_pit.Rd b/man/get_pit.Rd deleted file mode 100644 index bba4a675d..000000000 --- a/man/get_pit.Rd +++ /dev/null @@ -1,60 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class-forecast-quantile.R, -% R/class-forecast-sample.R, R/get-pit.R -\name{get_pit.forecast_quantile} -\alias{get_pit.forecast_quantile} -\alias{get_pit.forecast_sample} -\alias{get_pit} -\alias{get_pit.default} -\title{Probability integral transformation (data.frame version)} -\usage{ -\method{get_pit}{forecast_quantile}(forecast, by, ...) - -\method{get_pit}{forecast_sample}(forecast, by, n_replicates = 100, ...) - -get_pit(forecast, by, ...) - -\method{get_pit}{default}(forecast, by, ...) -} -\arguments{ -\item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} - -\item{by}{Character vector with the columns according to which the -PIT values shall be grouped. If you e.g. have the columns 'model' and -'location' in the input data and want to have a PIT histogram for -every model and location, specify \code{by = c("model", "location")}.} - -\item{...}{Currently unused. You \emph{cannot} pass additional arguments to scoring -functions via \code{...}. See the \emph{Customising metrics} section below for -details on how to use \code{\link[purrr:partial]{purrr::partial()}} to pass arguments to individual -metrics.} - -\item{n_replicates}{The number of draws for the randomised PIT for -discrete predictions. Will be ignored if forecasts are continuous.} -} -\value{ -A data.table with PIT values according to the grouping specified in -\code{by}. -} -\description{ -Compute the Probability Integral Transformation (PIT) for -validated forecast objects. -} -\examples{ -example <- as_forecast_sample(example_sample_continuous) -result <- get_pit(example, by = "model") -plot_pit(result) - -# example with quantile data -example <- as_forecast_quantile(example_quantile) -result <- get_pit(example, by = "model") -plot_pit(result) -} -\references{ -Sebastian Funk, Anton Camacho, Adam J. Kucharski, Rachel Lowe, -Rosalind M. Eggo, W. John Edmunds (2019) Assessing the performance of -real-time epidemic forecasts: A case study of Ebola in the Western Area -region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} -} -\keyword{scoring} diff --git a/man/get_pit_histogram.Rd b/man/get_pit_histogram.Rd new file mode 100644 index 000000000..074aa4940 --- /dev/null +++ b/man/get_pit_histogram.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class-forecast-quantile.R, +% R/class-forecast-sample.R, R/get-pit-histogram.R +\name{get_pit_histogram.forecast_quantile} +\alias{get_pit_histogram.forecast_quantile} +\alias{get_pit_histogram.forecast_sample} +\alias{get_pit_histogram} +\alias{get_pit_histogram.default} +\title{Probability integral transformation histogram} +\usage{ +\method{get_pit_histogram}{forecast_quantile}(forecast, num_bins = NULL, breaks = NULL, by, ...) + +\method{get_pit_histogram}{forecast_sample}( + forecast, + num_bins = 10, + breaks = NULL, + by, + integers = c("nonrandom", "random", "ignore"), + n_replicates = NULL, + ... +) + +get_pit_histogram(forecast, num_bins, breaks, by, ...) + +\method{get_pit_histogram}{default}(forecast, num_bins, breaks, by, ...) +} +\arguments{ +\item{forecast}{A forecast object (a validated data.table with predicted and +observed values, see \code{\link[=as_forecast]{as_forecast()}}).} + +\item{num_bins}{The number of bins in the PIT histogram. For sample-based +forecasts, the default is 10 bins. For quantile-based forecasts, the +default is one bin for each available quantile. +You can control the number of bins by supplying a number. This is fine for +sample-based pit histograms, but may fail for quantile-based formats. In +this case it is preferred to supply explicit breaks points using the +\code{breaks} argument.} + +\item{breaks}{Numeric vector with the break points for the bins in the +PIT histogram. This is preferred when creating a PIT histogram based on +quantile-based data. Default is \code{NULL} and breaks will be determined by +\code{num_bins}. If \code{breaks} is used, \code{num_bins} will be ignored. +0 and 1 will always be added as left and right bounds, respectively.} + +\item{by}{Character vector with the columns according to which the +PIT values shall be grouped. If you e.g. have the columns 'model' and +'location' in the input data and want to have a PIT histogram for +every model and location, specify \code{by = c("model", "location")}.} + +\item{...}{Currently unused. You \emph{cannot} pass additional arguments to scoring +functions via \code{...}. See the \emph{Customising metrics} section below for +details on how to use \code{\link[purrr:partial]{purrr::partial()}} to pass arguments to individual +metrics.} + +\item{integers}{How to handle integer forecasts (count data). This is based +on methods described Czado et al. (2007). If "nonrandom" (default) the +function will use the non-randomised PIT method. If "random", will use the +randomised PIT method. If "ignore", will treat integer forecasts as if they +were continuous.} + +\item{n_replicates}{The number of draws for the randomised PIT for discrete +predictions. Will be ignored if forecasts are continuous or \code{integers} is +not set to \code{random}.} +} +\value{ +A data.table with density values for each bin in the PIT histogram. +} +\description{ +Generate a Probability Integral Transformation (PIT) histogram for +validated forecast objects. + +See the examples for how to plot the result of this function. +} +\examples{ +library("ggplot2") + +example <- as_forecast_sample(example_sample_continuous) +result <- get_pit_histogram(example, by = "model") +ggplot(result, aes(x = mid, y = density)) + + geom_col() + + facet_wrap(. ~ model) + + labs(x = "Quantile", "Density") + +# example with quantile data +example <- as_forecast_quantile(example_quantile) +result <- get_pit_histogram(example, by = "model") +ggplot(result, aes(x = mid, y = density)) + + geom_col() + + facet_wrap(. ~ model) + + labs(x = "Quantile", "Density") +} +\references{ +Sebastian Funk, Anton Camacho, Adam J. Kucharski, Rachel Lowe, +Rosalind M. Eggo, W. John Edmunds (2019) Assessing the performance of +real-time epidemic forecasts: A case study of Ebola in the Western Area +region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} +} +\seealso{ +\code{\link[=pit_histogram_sample]{pit_histogram_sample()}} +} +\keyword{scoring} diff --git a/man/pit_sample.Rd b/man/pit_histogram_sample.Rd similarity index 53% rename from man/pit_sample.Rd rename to man/pit_histogram_sample.Rd index a9e7effff..9b1d9f94e 100644 --- a/man/pit_sample.Rd +++ b/man/pit_histogram_sample.Rd @@ -1,10 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/metrics-sample.R -\name{pit_sample} -\alias{pit_sample} +\name{pit_histogram_sample} +\alias{pit_histogram_sample} \title{Probability integral transformation for counts} \usage{ -pit_sample(observed, predicted, n_replicates = 100) +pit_histogram_sample( + observed, + predicted, + quantiles, + integers = c("nonrandom", "random", "ignore"), + n_replicates = NULL +) } \arguments{ \item{observed}{A vector with observed values of size n} @@ -13,14 +19,21 @@ pit_sample(observed, predicted, n_replicates = 100) the number of data points and N (number of columns) the number of Monte Carlo samples. Alternatively, \code{predicted} can just be a vector of size n.} -\item{n_replicates}{The number of draws for the randomised PIT for -discrete predictions. Will be ignored if forecasts are continuous.} +\item{quantiles}{A vector of quantiles between which to calculate the PIT.} + +\item{integers}{How to handle integer forecasts (count data). This is based +on methods described Czado et al. (2007). If "nonrandom" (default) the +function will use the non-randomised PIT method. If "random", will use the +randomised PIT method. If "ignore", will treat integer forecasts as if they +were continuous.} + +\item{n_replicates}{The number of draws for the randomised PIT for discrete +predictions. Will be ignored if forecasts are continuous or \code{integers} is +not set to \code{random}.} } \value{ -A vector with PIT-values. For continuous forecasts, the vector will -correspond to the length of \code{observed}. For integer forecasts, a -randomised PIT will be returned of length -\code{length(observed) * n_replicates}. +A vector with PIT histogram densities for the bins corresponding +to the given quantiles. } \description{ Uses a Probability integral transformation (PIT) (or a @@ -49,16 +62,41 @@ In that case, the probabilities \eqn{u_t} are distributed uniformly. In the case of discrete nonnegative outcomes such as incidence counts, the PIT is no longer uniform even when forecasts are ideal. -In that case a randomised PIT can be used instead: +In that case two methods are available ase described by Czado et al. (2007). + +By default, a nonrandomised PIT is calculated using the conditional +cumulative distribution function \deqn{ -u_t = P_t(k_t) + v * (P_t(k_t) - P_t(k_t - 1) ) + F(u) = + \begin{cases} + 0 & \text{if } v < P_t(k_t - 1) \\ + (v - P_t(k_t - 1)) / (P_t(k_t) - P_t(k_t - 1)) & \text{if } P_t(k_t - 1) \leq v < P_t(k_t) \\ + 1 & \text{if } v \geq P_t(k_t) + \end{cases} } where \eqn{k_t} is the observed count, \eqn{P_t(x)} is the predictive -cumulative probability of observing incidence k at time t, -\eqn{P_t (-1) = 0} by definition and v is standard uniform and independent -of k. If \eqn{P_t} is the true cumulative -probability distribution, then \eqn{u_t} is standard uniform. +cumulative probability of observing incidence \eqn{k} at time \eqn{t} and +\eqn{P_t (-1) = 0} by definition. +Values of the PIT histogram are then created by averaging over the \eqn{n} +predictions, + +\deqn{ +} + +And calculating the value at each bin between quantile \eqn{q_i} and quantile +\eqn{q_{i + 1}} as + +\deqn{ +} + +Alternatively, a randomised PIT can be used instead. In this case, the PIT is +\deqn{ + u_t = P_t(k_t) + v * (P_t(k_t) - P_t(k_t - 1)) +} + +where \eqn{v} is standard uniform and independent of \eqn{k}. The values of +the PIT histogram are then calculated by binning the $u_t$ values as above. } \examples{ \dontshow{ @@ -68,14 +106,20 @@ probability distribution, then \eqn{u_t} is standard uniform. ## continuous predictions observed <- rnorm(20, mean = 1:20) predicted <- replicate(100, rnorm(n = 20, mean = 1:20)) -pit <- pit_sample(observed, predicted) -plot_pit(pit) +pit <- pit_histogram_sample(observed, predicted, quantiles = seq(0, 1, 0.1)) ## integer predictions observed <- rpois(20, lambda = 1:20) predicted <- replicate(100, rpois(n = 20, lambda = 1:20)) -pit <- pit_sample(observed, predicted, n_replicates = 30) -plot_pit(pit) +pit <- pit_histogram_sample(observed, predicted, quantiles = seq(0, 1, 0.1)) + +## integer predictions, randomised PIT +observed <- rpois(20, lambda = 1:20) +predicted <- replicate(100, rpois(n = 20, lambda = 1:20)) +pit <- pit_histogram_sample( + observed, predicted, quantiles = seq(0, 1, 0.1), + integers = "random", n_replicates = 30 +) } \references{ Claudia Czado, Tilmann Gneiting Leonhard Held (2009) Predictive model @@ -86,6 +130,6 @@ real-time epidemic forecasts: A case study of Ebola in the Western Area region of Sierra Leone, 2014-15, \doi{10.1371/journal.pcbi.1006785} } \seealso{ -\code{\link[=get_pit]{get_pit()}} +\code{\link[=get_pit_histogram]{get_pit_histogram()}} } \keyword{metric} diff --git a/man/plot_pit.Rd b/man/plot_pit.Rd deleted file mode 100644 index f439e68bc..000000000 --- a/man/plot_pit.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get-pit.R -\name{plot_pit} -\alias{plot_pit} -\title{PIT histogram} -\usage{ -plot_pit(pit, num_bins = "auto", breaks = NULL) -} -\arguments{ -\item{pit}{Either a vector with the PIT values, or a data.table as -produced by \code{\link[=get_pit]{get_pit()}}.} - -\item{num_bins}{The number of bins in the PIT histogram, default is "auto". -When \code{num_bins == "auto"}, \code{\link[=plot_pit]{plot_pit()}} will either display 10 bins, or it -will display a bin for each available quantile in case you passed in data in -a quantile-based format. -You can control the number of bins by supplying a number. This is fine for -sample-based pit histograms, but may fail for quantile-based formats. In this -case it is preferred to supply explicit breaks points using the \code{breaks} -argument.} - -\item{breaks}{Numeric vector with the break points for the bins in the -PIT histogram. This is preferred when creating a PIT histogram based on -quantile-based data. Default is \code{NULL} and breaks will be determined by -\code{num_bins}. If \code{breaks} is used, \code{num_bins} will be ignored.} -} -\value{ -A ggplot object with a histogram of PIT values -} -\description{ -Make a simple histogram of the probability integral transformed values to -visually check whether a uniform distribution seems likely. -} -\examples{ -\dontshow{ - data.table::setDTthreads(2) # restricts number of cores used on CRAN -} -library(magrittr) # pipe operator - -# PIT histogram in vector based format -observed <- rnorm(30, mean = 1:30) -predicted <- replicate(200, rnorm(n = 30, mean = 1:30)) -pit <- pit_sample(observed, predicted) -plot_pit(pit) - -# quantile-based pit -pit <- example_quantile \%>\% - as_forecast_quantile() \%>\% - get_pit(by = "model") -plot_pit(pit, breaks = seq(0.1, 1, 0.1)) - -# sample-based pit -pit <- example_sample_discrete \%>\% - as_forecast_sample \%>\% - get_pit(by = "model") -plot_pit(pit) -} diff --git a/tests/testthat/_snaps/get-pit/plot-pit-integer.svg b/tests/testthat/_snaps/get-pit/plot-pit-integer.svg deleted file mode 100644 index cf1798eb2..000000000 --- a/tests/testthat/_snaps/get-pit/plot-pit-integer.svg +++ /dev/null @@ -1,185 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -UMass-MechBayes - - - - - - - - - -epiforecasts-EpiNow2 - - - - - - - - - -EuroCOVIDhub-baseline - - - - - - - - - -EuroCOVIDhub-ensemble - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - -0.00 -0.05 -0.10 -0.15 -0.20 - - - - - - -0.00 -0.05 -0.10 -0.15 -0.20 - - - - - -PIT -Frequency -plot_pit_integer - - diff --git a/tests/testthat/_snaps/get-pit/plot-pit-quantile-2.svg b/tests/testthat/_snaps/get-pit/plot-pit-quantile-2.svg deleted file mode 100644 index 7e3b1a2f6..000000000 --- a/tests/testthat/_snaps/get-pit/plot-pit-quantile-2.svg +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -UMass-MechBayes - - - - - - - - - -epiforecasts-EpiNow2 - - - - - - - - - -EuroCOVIDhub-baseline - - - - - - - - - -EuroCOVIDhub-ensemble - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - -0.00 -0.03 -0.06 -0.09 -0.12 - - - - - - -0.00 -0.03 -0.06 -0.09 -0.12 - - - - - -PIT -Frequency -plot_pit_quantile_2 - - diff --git a/tests/testthat/_snaps/get-pit/plot-pit-quantile.svg b/tests/testthat/_snaps/get-pit/plot-pit-quantile.svg deleted file mode 100644 index 898d67745..000000000 --- a/tests/testthat/_snaps/get-pit/plot-pit-quantile.svg +++ /dev/null @@ -1,173 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -UMass-MechBayes - - - - - - - - - -epiforecasts-EpiNow2 - - - - - - - - - -EuroCOVIDhub-baseline - - - - - - - - - -EuroCOVIDhub-ensemble - - - - - - - -0.25 -0.50 -0.75 -1.00 - - - - - -0.25 -0.50 -0.75 -1.00 - -0.00 -0.03 -0.06 -0.09 -0.12 - - - - - - -0.00 -0.03 -0.06 -0.09 -0.12 - - - - - -PIT -Frequency -plot_pit_quantile - - diff --git a/tests/testthat/_snaps/get-pit/plot-pit-sample.svg b/tests/testthat/_snaps/get-pit/plot-pit-sample.svg deleted file mode 100644 index 9b6096925..000000000 --- a/tests/testthat/_snaps/get-pit/plot-pit-sample.svg +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.05 -0.10 -0.15 -0.20 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -PIT -Frequency -plot_pit_sample - - diff --git a/tests/testthat/test-class-forecast-quantile.R b/tests/testthat/test-class-forecast-quantile.R index ecff751f7..46daaa9b6 100644 --- a/tests/testthat/test-class-forecast-quantile.R +++ b/tests/testthat/test-class-forecast-quantile.R @@ -355,12 +355,12 @@ test_that("get_metrics.forecast_quantile() works as expected", { # ============================================================================== -# get_pit.forecast_quantile() +# get_pit_histogram.forecast_quantile() # ============================================================================== -test_that("get_pit.forecast_quantile() works as expected", { - pit_quantile <- get_pit(example_quantile, by = "model") +test_that("get_pit_histogram.forecast_quantile() works as expected", { + pit_quantile <- get_pit_histogram(example_quantile, by = "model") - expect_equal(names(pit_quantile), c("model", "quantile_level", "pit_value")) + expect_equal(names(pit_quantile), c("model", "density", "bin", "mid")) expect_s3_class(pit_quantile, c("data.table", "data.frame"), exact = TRUE) # check printing works diff --git a/tests/testthat/test-class-forecast-sample.R b/tests/testthat/test-class-forecast-sample.R index 5a5715e31..f695c6a6f 100644 --- a/tests/testthat/test-class-forecast-sample.R +++ b/tests/testthat/test-class-forecast-sample.R @@ -57,14 +57,14 @@ test_that("get_metrics.forecast_sample() works as expected", { # ============================================================================== -# get_pit.forecast_sample() +# get_pit_histogram.forecast_sample() # ============================================================================== -test_that("get_pit.forecast_sample() works as expected", { - pit_continuous <- get_pit(example_sample_continuous, by = c("model", "target_type")) - pit_integer <- get_pit(example_sample_discrete, by = c("model", "location")) +test_that("get_pit_histogram.forecast_sample() works as expected", { + pit_continuous <- get_pit_histogram(example_sample_continuous, by = c("model", "target_type")) + pit_integer <- get_pit_histogram(example_sample_discrete, by = c("model", "location")) - expect_equal(names(pit_continuous), c("model", "target_type", "pit_value")) - expect_equal(names(pit_integer), c("model", "location", "pit_value")) + expect_equal(names(pit_continuous), c("model", "target_type", "density", "bin", "mid")) + expect_equal(names(pit_integer), c("model", "location", "density", "bin", "mid")) # check printing works expect_output(print(pit_continuous)) diff --git a/tests/testthat/test-get-pit.R b/tests/testthat/test-get-pit.R deleted file mode 100644 index 86aea7b5c..000000000 --- a/tests/testthat/test-get-pit.R +++ /dev/null @@ -1,40 +0,0 @@ -# ============================================================================== -# plot_pit() -# ============================================================================== -test_that("plot_pit() works as expected with quantile forecasts", { - pit <- example_quantile %>% - na.omit() %>% - as_forecast_quantile() %>% - get_pit(by = "model") - p <- plot_pit(pit, breaks = seq(0.1, 1, 0.1)) - expect_s3_class(p, "ggplot") - skip_on_cran() - vdiffr::expect_doppelganger("plot_pit_quantile", p) - - p2 <- plot_pit(pit) - expect_s3_class(p2, "ggplot") - skip_on_cran() - vdiffr::expect_doppelganger("plot_pit_quantile_2", p2) -}) - -test_that("plot_pit() works as expected with integer forecasts", { - set.seed(587) - pit <- example_sample_discrete %>% - na.omit() %>% - as_forecast_sample() %>% - get_pit(by = "model") - p <- plot_pit(pit) - expect_s3_class(p, "ggplot") - skip_on_cran() - vdiffr::expect_doppelganger("plot_pit_integer", p) -}) - -test_that("plot_pit() works as expected with sample forecasts", { - observed <- rnorm(30, mean = 1:30) - predicted <- replicate(200, rnorm(n = 30, mean = 1:30)) - pit <- pit_sample(observed, predicted) - p <- plot_pit(pit) - expect_s3_class(p, "ggplot") - skip_on_cran() - vdiffr::expect_doppelganger("plot_pit_sample", p) -}) diff --git a/tests/testthat/test-metrics-sample.R b/tests/testthat/test-metrics-sample.R index 4476ac1bb..915ef848c 100644 --- a/tests/testthat/test-metrics-sample.R +++ b/tests/testthat/test-metrics-sample.R @@ -216,47 +216,68 @@ test_that("function throws an error when missing 'predicted'", { # ============================================================================ # -# pit_sample() +# pit_histogram_sample() # ============================================================================ # -test_that("pit_sample() function throws an error when missing args", { +test_that("pit_histogram_sample() function throws an error when missing args", { observed <- rpois(10, lambda = 1:10) predicted <- replicate(50, rpois(n = 10, lambda = 1:10)) expect_error( - pit_sample(predicted = predicted), + pit_histogram_sample(predicted = predicted), 'argument "observed" is missing, with no default' ) expect_error( - pit_sample(observed = observed), + pit_histogram_sample(observed = observed), 'argument "predicted" is missing, with no default' ) + + expect_error( + pit_histogram_sample(predicted = predicted, observed = observed), + 'argument "quantiles" is missing, with no default' + ) + + expect_error( + pit_histogram_sample( + predicted = predicted, observed = observed, + quantiles = seq(0, 1, by = 0.1), integers = "random" + ), + "Assertion on 'n_replicates with `integers` = random' failed:" + ) }) -test_that("pit_sample() function works for integer observed and predicted", { +test_that("pit_histogram_sample() function works for integer observed and predicted", { observed <- rpois(10, lambda = 1:10) predicted <- replicate(10, rpois(10, lambda = 1:10)) - output <- pit_sample( + output <- pit_histogram_sample( observed = observed, predicted = predicted, - n_replicates = 56 + quantiles = seq(0, 1, by = 0.1) ) expect_equal( length(output), - 560 + 10 ) checkmate::expect_class(output, "numeric") + + output2 <- pit_histogram_sample( + observed = observed, + predicted = predicted, + quantiles = seq(0, 1, by = 0.1), + integers = "random", + n_replicates = 56 + ) }) -test_that("pit_sample() function works for continuous observed and predicted", { +test_that("pit_histogram_sample() function works for continuous observed and predicted", { observed <- rnorm(10) predicted <- replicate(10, rnorm(10)) - output <- pit_sample( + output <- pit_histogram_sample( observed = observed, predicted = predicted, - n_replicates = 56 + quantiles = seq(0, 1, by = 0.1) ) expect_equal( length(output), @@ -264,34 +285,43 @@ test_that("pit_sample() function works for continuous observed and predicted", { ) }) -test_that("pit_sample() works with a single observvation", { +test_that("pit_histogram_sample() works with a single observvation", { expect_no_condition( - output <- pit_sample(observed = 2.5, predicted = 1.5:10.5) + output <- pit_histogram_sample( + observed = 2.5, predicted = 1.5:10.5, quantiles = seq(0, 1, by = 0.1) + ) ) - expect_equal(length(output), 1) + expect_equal(length(output), 10) # test discrete case expect_no_condition( - output2 <- pit_sample( - observed = 3, predicted = 1:10, n_replicates = 24 + output2 <- pit_histogram_sample( + observed = 3, predicted = 1:10, quantiles = seq(0, 1, by = 0.1) ) ) - expect_equal(length(output2), 24) + expect_equal(length(output2), 10) }) -test_that("pit_sample() throws an error if inputs are wrong", { +test_that("pit_histogram_sample() throws an error if inputs are wrong", { observed <- 1.5:20.5 predicted <- replicate(100, 1.5:20.5) # expect an error if predicted cannot be coerced to a matrix expect_error( - pit_sample(observed, function(x) {}), + pit_histogram_sample(observed, function(x) {}), "Assertion on 'predicted' failed: Must be of type 'matrix'" ) # expect an error if the number of rows in predicted does not match the length of observed expect_error( - pit_sample(observed, predicted[1:10, ]), + pit_histogram_sample(observed, predicted[1:10, ]), "Assertion on 'predicted' failed: Must have exactly 20 rows, but has 10 rows." ) + + expect_warning( + pit_histogram_sample( + observed, predicted, quantiles = seq(0, 1, by = 0.1), n_replicates = 10 + ), + "`n_replicates` is ignored when `integers` is not `random`" + ) })