Merge pull request #157 from epiforecasts/master

update branch from master
epiforecasts · Nov 24, 2021 · 42f4d8b · 42f4d8b
2 parents 9160039 + ba120ae
commit 42f4d8b
Show file tree

Hide file tree

Showing 19 changed files with 3,601 additions and 51 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,6 +12,11 @@ Authors@R: c(
            role = c("aut"),
            email = "[email protected]",
            comment = c(ORCID = "0000-0001-8057-8037")), 
+    person(given = "Hugo",
+           family = "Gruson",
+           role = c("aut"),
+           email = "[email protected]",
+           comment = c(ORCID = "https://orcid.org/0000-0002-4094-1476")),
     person(given = "Johannes Bracher",
            role = c("ctb"),
            email = "[email protected]",
@@ -65,7 +70,8 @@ Imports:
 Suggests: 
     testthat,
     knitr,
-    rmarkdown
+    rmarkdown,
+    vdiffr
 RoxygenNote: 7.1.1
 URL: https://github.com/epiforecasts/scoringutils, https://epiforecasts.io/scoringutils/
 BugReports: https://github.com/epiforecasts/scoringutils/issues

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ S3method(print,scoringutils_check)
 export(abs_error)
 export(ae_median_quantile)
 export(ae_median_sample)
+export(available_metrics)
 export(bias)
 export(brier_score)
 export(check_forecasts)

diff --git a/R/bias.R b/R/bias.R
@@ -87,7 +87,7 @@ bias <- function(true_values, predictions) {
   # ============================================
 
   ## check whether continuous or integer
-  if (all.equal(as.vector(predictions), as.integer(predictions)) != TRUE) {
+  if (!isTRUE(all.equal(as.vector(predictions), as.integer(predictions)))) {
     continuous_predictions <- TRUE
   } else {
     continuous_predictions <- FALSE

diff --git a/R/eval_forecasts.R b/R/eval_forecasts.R
@@ -86,7 +86,8 @@
 #' may want to include 'range', 'quantile' or 'sample', to summarise by
 #' range, quantile or sample.
 #' @param metrics the metrics you want to have in the output. If `NULL` (the
-#' default), all available metrics will be computed.
+#' default), all available metrics will be computed. For a list of available
+#' metrics see [available_metrics()]
 #' @param quantiles numeric vector of quantiles to be returned when summarising.
 #' Instead of just returning a mean, quantiles will be returned for the
 #' groups specified through `summarise_by`. By default, no quantiles are
@@ -242,7 +243,7 @@ eval_forecasts <- function(data = NULL,
   }
 
   # check metrics to be computed
-  available_metrics <- list_of_avail_metrics()
+  available_metrics <- available_metrics()
   if (is.null(metrics)) {
     metrics <- available_metrics
   } else {
@@ -259,13 +260,13 @@ eval_forecasts <- function(data = NULL,
   if (any(grepl("lower", names(data))) | "boundary" %in% names(data) |
       "quantile" %in% names(data) | "range" %in% names(data)) {
     prediction_type <- "quantile"
-  } else if (all.equal(data$prediction, as.integer(data$prediction)) == TRUE) {
+  } else if (isTRUE(all.equal(data$prediction, as.integer(data$prediction)))) {
     prediction_type <- "integer"
   } else {
     prediction_type <- "continuous"
   }
 
-  if (all.equal(data$true_value, as.integer(data$true_value)) == TRUE) {
+  if (isTRUE(all.equal(data$true_value, as.integer(data$true_value)))) {
     if (all(data$true_value %in% c(0,1)) && all(data$prediction >= 0) && all(data$prediction <= 1)) {
       target_type = "binary"
     } else {

diff --git a/R/eval_forecasts_continuous_integer.R b/R/eval_forecasts_continuous_integer.R
@@ -43,7 +43,7 @@ eval_forecasts_sample <- function(data,
                                   pit_plots) {
 
   if (missing(prediction_type)) {
-    if (all.equal(data$prediction, as.integer(data$prediction)) == TRUE) {
+    if (isTRUE(all.equal(data$prediction, as.integer(data$prediction)))) {
       prediction_type <- "integer"
     } else {
       prediction_type <- "continuous"

diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -75,7 +75,7 @@ pairwise_comparison <- function(scores,
   # usually, by = NULL should be fine and only needs to be specified if there
   # are additional columns that are not metrics and not related to the unit of observation
   if (is.null(by)) {
-    all_metrics <- list_of_avail_metrics()
+    all_metrics <- available_metrics()
     by <- setdiff(names(scores), c(all_metrics, "model"))
   }
 

diff --git a/R/pit.R b/R/pit.R
@@ -58,7 +58,7 @@
 #' number of Monte Carlo samples
 #' @param plot logical. If `TRUE`, a histogram of the PIT values will be
 #' returned as well
-#' @param num_bins the number of bins in the PIT histogram (if `plot == TRUE`)
+#' @param num_bins the number of bins in the PIT histogram (if `plot = TRUE`)
 #' If not given, the square root of n will be used
 #' @param n_replicates the number of tests to perform,
 #' each time re-randomising the PIT
@@ -148,7 +148,7 @@ pit <- function(true_values,
 
   # check data type ------------------------------------------------------------
   # check whether continuous or integer
-  if (all.equal(as.vector(predictions), as.integer(predictions)) != TRUE) {
+  if (!isTRUE(all.equal(as.vector(predictions), as.integer(predictions)))) {
     continuous_predictions <- TRUE
   } else {
     continuous_predictions <- FALSE
@@ -230,7 +230,7 @@ pit <- function(true_values,
 #' \item `data`: the input data.frame (not including rows where prediction is `NA`),
 #' with added columns `pit_p_val` and `pit_sd`
 #' \item `hist_PIT` a plot object with the PIT histogram. Only returned
-#' if `plot == TRUE`. Call
+#' if `plot = TRUE`. Call
 #' `plot(PIT(...)$hist_PIT)` to display the histogram.
 #' \item `p_values`: all p_values generated from the Anderson-Darling tests on the
 #' (randomised) PIT. Only returned if `full_output = TRUE`

diff --git a/R/plot.R b/R/plot.R
@@ -61,7 +61,7 @@ score_table <- function(summarised_scores,
   # identify metrics -----------------------------------------------------------
   # identify metrics by looking at which of the available column names
   # are metrics. All other variables are treated as identifier variables
-  all_metrics <- list_of_avail_metrics()
+  all_metrics <- available_metrics()
 
   metrics <- names(summarised_scores)[names(summarised_scores) %in% all_metrics]
   id_vars <- names(summarised_scores)[!(names(summarised_scores) %in% all_metrics)]
@@ -196,7 +196,7 @@ correlation_plot <- function(scores,
                              select_metrics = NULL) {
 
   # define possible metrics
-  all_metrics <- list_of_avail_metrics()
+  all_metrics <- available_metrics()
 
   # find metrics present
   metrics <- names(scores)[names(scores) %in% all_metrics]
@@ -695,6 +695,7 @@ plot_predictions <- function(data = NULL,
   colnames <- colnames(forecasts)
   if ("sample" %in% colnames) {
     forecasts <- scoringutils::sample_to_range_long(forecasts,
+                                                    range = range,
                                                     keep_quantile_col = FALSE)
   } else if ("quantile" %in% colnames) {
     forecasts <- scoringutils::quantile_to_range_long(forecasts,
@@ -711,28 +712,28 @@ plot_predictions <- function(data = NULL,
     intervals[, quantile := NULL]
   }
 
-  # if there isn't any data to plot, return NULL
-  if (nrow(intervals) == 0) {
-    return(NULL)
-  }
-
-  # pivot wider and convert range to a factor
-  intervals <- data.table::dcast(intervals, ... ~ boundary,
-                                 value.var = "prediction")
-  intervals[, range := as.factor(range)]
+  pal <- grDevices::colorRampPalette(c("lightskyblue1", "steelblue3"))
 
-  # plot prediciton rnages
-  plot <- ggplot2::ggplot(intervals, ggplot2::aes(x = !!ggplot2::sym(x))) +
-    ggplot2::geom_ribbon(ggplot2::aes(ymin = lower, ymax = upper,
-                                      group = range, fill = range),
-                         alpha = 0.4) +
+  plot <- ggplot2::ggplot(data = data, aes(x = !!ggplot2::sym(x))) +
     ggplot2::scale_colour_manual("",values = c("black", "steelblue4")) +
-    ggplot2::scale_fill_manual("range", values = c("steelblue3",
-                                                   "lightskyblue3",
-                                                   "lightskyblue2",
-                                                   "lightskyblue1")) +
+    ggplot2::scale_fill_manual(name = "range", values = pal(length(range))) +
     ggplot2::theme_light()
 
+  if (nrow(intervals) != 0) {
+    # pivot wider and convert range to a factor
+    intervals <- data.table::dcast(intervals, ... ~ boundary,
+                                   value.var = "prediction")
+    intervals[, range := factor(range,
+                                levels = sort(unique(range), decreasing = TRUE),
+                                ordered = TRUE)]
+
+    # plot prediction ranges
+    plot <- plot +
+      ggplot2::geom_ribbon(data = intervals,
+                           ggplot2::aes(ymin = lower, ymax = upper,
+                                        group = range, fill = range))
+  }
+
   # add median in a different colour
   if (0 %in% range) {
     select_median <- (forecasts$range %in% 0 & forecasts$boundary == "lower")
@@ -746,6 +747,20 @@ plot_predictions <- function(data = NULL,
       }
   }
 
+  # add true_values
+  if (nrow(truth_data) > 0) {
+    plot <- plot +
+      ggplot2::geom_point(data = truth_data,
+                          ggplot2::aes(y = true_value, colour = "actual"),
+                          size = 0.5) +
+      ggplot2::geom_line(data = truth_data,
+                         ggplot2::aes(y = true_value, colour = "actual"),
+                         lwd = 0.2)
+  }
+
+  plot <- plot +
+    ggplot2::labs(x = xlab, y = ylab)
+
   # facet if specified by the user
   if (!is.null(facet_formula)) {
     if (facet_wrap_or_grid == "facet_wrap") {
@@ -757,19 +772,6 @@ plot_predictions <- function(data = NULL,
     }
   }
 
-  # add true_values
-  if (nrow(truth_data) > 0) {
-    plot <- plot +
-      ggplot2::labs(x = xlab, y = ylab)
-
-    plot <- plot +
-      ggplot2::geom_point(data = truth_data,
-                          ggplot2::aes(y = true_value, colour = "actual"),
-                          size = 0.5) +
-      ggplot2::geom_line(data = truth_data,
-                         ggplot2::aes(y = true_value, colour = "actual"),
-                         lwd = 0.2)
-  }
   return(plot)
 }
 

diff --git a/R/utils.R b/R/utils.R
@@ -136,7 +136,12 @@ globalVariables(c("..index",
                   "g"))
 
 
-list_of_avail_metrics <- function() {
+#' @title Available metrics in scoringutils
+#'
+#' @return A vector with the name of all available metrics
+#' @export
+
+available_metrics <- function() {
   available_metrics <- c("ae_point", "aem", "log_score", "sharpness", "bias", "dss", "crps",
                          "coverage", "coverage_deviation", "quantile_coverage",
                          "pit_p_val", "pit_sd","interval_score",

diff --git a/man/available_metrics.Rd b/man/available_metrics.Rd
diff --git a/man/eval_forecasts.Rd b/man/eval_forecasts.Rd
diff --git a/man/eval_forecasts_binary.Rd b/man/eval_forecasts_binary.Rd
diff --git a/man/eval_forecasts_sample.Rd b/man/eval_forecasts_sample.Rd
diff --git a/man/pit.Rd b/man/pit.Rd
diff --git a/man/pit_df.Rd b/man/pit_df.Rd