epiforecasts · nikosbosse · Mar 26, 2022 · Mar 18, 2022 · Mar 22, 2022 · Mar 22, 2022
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ inst/manuscript/manuscript.log
 inst/manuscript/manuscript.pdf
 inst/manuscript/manuscript.tex
 inst/manuscript/manuscript_files/
-docs
+docs
+..bfg-report/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -65,7 +65,7 @@ Config/Needs/website:
     amirmasoudabdol/preferably
 Config/testthat/edition: 3
 RoxygenNote: 7.1.2
-URL: https://github.com/epiforecasts/scoringutils, https://epiforecasts.io/scoringutils/
+URL: https://epiforecasts.io/scoringutils/, https://github.com/epiforecasts/scoringutils
 BugReports: https://github.com/epiforecasts/scoringutils/issues
 VignetteBuilder: knitr
 Depends: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -76,7 +76,6 @@ importFrom(ggplot2,geom_polygon)
 importFrom(ggplot2,geom_text)
 importFrom(ggplot2,geom_tile)
 importFrom(ggplot2,ggplot)
-importFrom(ggplot2,ggtitle)
 importFrom(ggplot2,guide_legend)
 importFrom(ggplot2,guides)
 importFrom(ggplot2,labs)

diff --git a/R/data.R b/R/data.R
@@ -3,6 +3,9 @@
 #' A data set with predictions for COVID-19 cases and deaths submitted to the
 #' European Forecast Hub.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -25,6 +28,9 @@
 #' A data set with continuous predictions for COVID-19 cases and deaths
 #' constructed from data submitted to the European Forecast Hub.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with 13,429 rows and 10 columns:
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -47,6 +53,9 @@
 #' A data set with integer predictions for COVID-19 cases and deaths
 #' constructed from data submitted to the European Forecast Hub.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with 13,429 rows and 10 columns:
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -75,6 +84,9 @@
 #' This should not be understood as sound statistical practice, but rather
 #' as a practical way to create an example data set.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with 346 rows and 10 columns:
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -96,6 +108,9 @@
 #' A data set with quantile predictions for COVID-19 cases and deaths
 #' submitted to the European Forecast Hub.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with 7,581 rows and 9 columns:
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -116,6 +131,9 @@
 #' A data set with truth values for COVID-19 cases and deaths
 #' submitted to the European Forecast Hub.
 #'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @format A data frame with 140 rows and 5 columns:
 #' \describe{
 #'   \item{location}{the country for which a prediction was made}
@@ -131,5 +149,9 @@
 #'
 #' A data set with summary information on selected metrics implemented in
 #' \pkg{scoringutils}
+#'
+#' The data was created using the script create-metric-tables.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
 #' @keywords info
-"metrics_summary"
+"metrics"
diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -202,32 +202,13 @@ pairwise_comparison_one_group <- function(scores,
   )]
 
   # calculate relative skill as geometric mean
-  # small theta is again better. If a baseline is given, exclude it
-  # from the computation of the geometric mean
-  # maybe there is a more elegant way to do this
-  if (!is.null(baseline)) {
-    result_without_baseline <- data.table::copy(result)
-    # filter out all ratios where compare_against is the baseline
-    result_without_baseline <- result_without_baseline[
-      compare_against != baseline
-    ]
-    result_without_baseline[, `:=`(theta = geom_mean_helper(ratio)),
-      by = "model"
-    ]
-    # merge back to retain the ratios even for comparisons with the baseline
-    result <- merge(result, result_without_baseline, all.x = TRUE)
-    # avoid mixture of NA and NaN which can cause problems downstream
-    result[is.na(theta), theta := NA_real_]
-    # remove NAs form merge in the thetas
-    result[, theta := unique(na.omit(theta)), by = "model"]
-  } else {
-    result[, `:=`(
-        theta = geom_mean_helper(ratio),
-        rel_to_baseline = NA_real_
-      ),
-      by = "model"
-    ]
-  }
+  # small theta is again better (assuming that the score is negatively oriented)
+  result[, `:=`(
+    theta = geom_mean_helper(ratio),
+    rel_to_baseline = NA_real_
+  ),
+  by = "model"
+  ]
 
   if (!is.null(baseline)) {
     baseline_theta <- unique(result[model == baseline, ]$theta)

diff --git a/R/plot.R b/R/plot.R
@@ -741,7 +741,7 @@ plot_quantile_coverage <- function(scores,
 #' @importFrom data.table as.data.table setnames rbindlist
 #' @importFrom stats reorder
 #' @importFrom ggplot2 labs coord_cartesian facet_wrap facet_grid theme
-#' element_text element_blank ggtitle
+#' element_text element_blank
 #' @export
 #'
 #' @examples
@@ -992,8 +992,7 @@ plot_pairwise_comparison <- function(comparison_result,
           hjust = 1, color = "brown4"
         ),
         axis.text.y = element_text(color = "steelblue4")
-      ) +
-      ggtitle("Pairwise comparisons - ratio of mean scores (for overlapping forecast sets)")
+      )
   }
 
   return(plot)

diff --git a/R/score.R b/R/score.R
@@ -12,7 +12,7 @@
 #' before scoring.
 #'
 #' To obtain a quick overview of the evaluation metrics used, have a look at the
-#' [metrics_summary] data included in the package.
+#' [metrics] data included in the package.
 #'
 #' @param data A data.frame or data.table with the predictions and observations.
 #' The following columns need to be present:

diff --git a/R/scoringutils.R b/R/scoringutils.R
@@ -1,53 +1,2 @@
-#' @title scoringutils
-#'
-#' @description
-#' This package is designed to help with assessing the quality of predictions.
-#' It provides a collection of proper scoring rules and metrics as well that
-#' can be accessed independently or collectively through a higher-level wrapper
-#' function.
-#'
-#' Predictions can be either probabilistic forecasts (generally predictive
-#' samples generated by Markov Chain Monte Carlo procedures), quantile
-#' forecasts or point forecasts. The true values can be either continuous,
-#' integer, or binary.
-#'
-#' A collection of different metrics and scoring rules can be accessed through
-#' the function [score()]. Given a data.frame of the
-#' correct form the function will automatically figure out the type of
-#' prediction and true values and return appropriate scoring metrics.
-#'
-#' The package also has a lot of default visualisation based on the output
-#' created by [score()].
-#'
-#' - [plot_score_table()]
-#' - [plot_correlation()]
-#' - [plot_wis()]
-#' - [plot_ranges()]
-#' - [plot_heatmap()]
-#' - [plot_predictions()]
-#' - [plot_interval_coverage()]
-#' - [plot_quantile_coverage()]
-#'
-#' Alternatively, the following functions can be accessed directly:
-#'
-#' - [brier_score()]
-#' - [pit()]
-#' - [bias_sample()]
-#' - [bias_quantile()]
-#' - [bias_range()]
-#' - [mad_sample()]
-#' - [crps_sample()]
-#' - [logs_sample()]
-#' - [dss_sample()]
-#' - [ae_median_sample()]
-#'
-#' Predictions can be evaluated in a lot of different formats. If you want to
-#' convert from one format to the other, the following helper functions can
-#' do that for you:
-#'
-#' - [sample_to_quantile()]
-#'
-#' @docType package
-#' @name scoringutils
-
-NULL
+#' @keywords internal
+"_PACKAGE"
diff --git a/R/utils.R b/R/utils.R
@@ -49,7 +49,7 @@ globalVariables(c(
   "mean_scores_ratio",
   "metric",
   "metrics_select",
-  "metrics_summary",
+  "metrics",
   "model",
   "n_obs",
   "n_obs wis_component_name",
@@ -88,7 +88,7 @@ globalVariables(c(
 #' @keywords info
 
 available_metrics <- function() {
-  return(unique(metrics_summary$Name))
+  return(unique(metrics$Name))
 }
 
 #' @title Simple permutation test

diff --git a/README.md b/README.md
@@ -110,19 +110,17 @@ example_quantile %>%
   kable()
 #> The following messages were produced when checking inputs:
 #> 1.  Some values for `prediction` are NA in the data provided and the corresponding rows were removed. This may indicate a problem if unexpected.
-#> Warning in any(sign(scores[[metric]] < 0)): coercing argument of type 'double'
-#> to logical
 ```
 
 | model                 | target_type | interval_score | dispersion | underprediction | overprediction | coverage_deviation |    bias | ae_median | coverage_50 | coverage_90 | relative_skill | scaled_rel_skill |
 |:----------------------|:------------|---------------:|-----------:|----------------:|---------------:|-------------------:|--------:|----------:|------------:|------------:|---------------:|-----------------:|
-| EuroCOVIDhub-baseline | Cases       |          28000 |       4100 |         10000.0 |        14000.0 |             -0.110 |  0.0980 |     38000 |        0.33 |        0.82 |           1.20 |              1.6 |
-| EuroCOVIDhub-baseline | Deaths      |            160 |         91 |             2.1 |           66.0 |              0.120 |  0.3400 |       230 |        0.66 |        1.00 |           1.90 |              3.8 |
-| EuroCOVIDhub-ensemble | Cases       |          18000 |       3700 |          4200.0 |        10000.0 |             -0.098 | -0.0560 |     24000 |        0.39 |        0.80 |           0.74 |              1.0 |
-| EuroCOVIDhub-ensemble | Deaths      |             41 |         30 |             4.1 |            7.1 |              0.200 |  0.0730 |        53 |        0.88 |        1.00 |           0.50 |              1.0 |
-| UMass-MechBayes       | Deaths      |             53 |         27 |            17.0 |            9.0 |             -0.023 | -0.0220 |        78 |        0.46 |        0.88 |           0.63 |              1.2 |
-| epiforecasts-EpiNow2  | Cases       |          21000 |       5700 |          3300.0 |        12000.0 |             -0.067 | -0.0790 |     28000 |        0.47 |        0.79 |           0.86 |              1.2 |
-| epiforecasts-EpiNow2  | Deaths      |             67 |         32 |            16.0 |           19.0 |             -0.043 | -0.0051 |       100 |        0.42 |        0.91 |           0.83 |              1.6 |
+| EuroCOVIDhub-baseline | Cases       |          28000 |       4100 |         10000.0 |        14000.0 |             -0.110 |  0.0980 |     38000 |        0.33 |        0.82 |           1.30 |              1.6 |
+| EuroCOVIDhub-baseline | Deaths      |            160 |         91 |             2.1 |           66.0 |              0.120 |  0.3400 |       230 |        0.66 |        1.00 |           2.30 |              3.8 |
+| EuroCOVIDhub-ensemble | Cases       |          18000 |       3700 |          4200.0 |        10000.0 |             -0.098 | -0.0560 |     24000 |        0.39 |        0.80 |           0.82 |              1.0 |
+| EuroCOVIDhub-ensemble | Deaths      |             41 |         30 |             4.1 |            7.1 |              0.200 |  0.0730 |        53 |        0.88 |        1.00 |           0.60 |              1.0 |
+| UMass-MechBayes       | Deaths      |             53 |         27 |            17.0 |            9.0 |             -0.023 | -0.0220 |        78 |        0.46 |        0.88 |           0.75 |              1.3 |
+| epiforecasts-EpiNow2  | Cases       |          21000 |       5700 |          3300.0 |        12000.0 |             -0.067 | -0.0790 |     28000 |        0.47 |        0.79 |           0.95 |              1.2 |
+| epiforecasts-EpiNow2  | Deaths      |             67 |         32 |            16.0 |           19.0 |             -0.043 | -0.0051 |       100 |        0.42 |        0.91 |           0.98 |              1.6 |
 
 `scoringutils` contains additional functionality to summarise these
 scores at different levels, to visualise them, and to explore the

diff --git a/data/metrics.rda b/data/metrics.rda
diff --git a/data/metrics_summary.rda b/data/metrics_summary.rda