diff --git a/R/eval_forecasts_quantile.R b/R/eval_forecasts_quantile.R
index 6395ea7fa..745aff1e1 100644
--- a/R/eval_forecasts_quantile.R
+++ b/R/eval_forecasts_quantile.R
@@ -117,8 +117,12 @@ eval_forecasts_quantile <- function(data,
     quantile_data[, quantile_coverage := (true_value <= prediction)]
   }
 
-  # merge only if something was computed
-  if (any(c("aem", "quantile_coverage") %in% metrics)) {
+  # merge metrics computed on quantile data (i.e. aem, quantile_coverage) back
+  # into metrics computed on range data. One important side effect of this is
+  # that it controls whether we count the median twice for the interval score
+  # (row is then duplicated) or only once. However, merge only needs to happen
+  # if we computed either the interval score or the aem or quantile coverage
+  if (any(c("aem", "interval_score", "quantile_coverage") %in% metrics)) {
     # delete unnecessary columns before merging back
     keep_cols <- unique(c(by, "quantile", "aem", "quantile_coverage",
                           "boundary", "range"))
@@ -183,7 +187,7 @@ eval_forecasts_quantile <- function(data,
   }
 
   # if neither quantile nor range are in summarise_by, remove coverage and quantile_coverage
-  if (!("range" %in% summarise_by)) {
+  if (!("range" %in% summarise_by) & ("coverage" %in% colnames(res))) {
     res[, c("coverage") := NULL]
   }
   if (!("quantile" %in% summarise_by) & "quantile_coverage" %in% names(res)) {
diff --git a/man/interval_score.Rd b/man/interval_score.Rd
index 5ae6e1dfb..75b6153fd 100644
--- a/man/interval_score.Rd
+++ b/man/interval_score.Rd
@@ -75,6 +75,7 @@ interval_score(true_values = true_values,
                upper = upper,
                interval_range = interval_range)
 
+# example with missing values and separate results
 interval_score(true_values = c(true_values, NA),
                lower = c(lower, NA),
                upper = c(NA, upper),
diff --git a/tests/testthat/test-eval_forecasts.R b/tests/testthat/test-eval_forecasts.R
index 3c0e6caab..34c1d64b6 100644
--- a/tests/testthat/test-eval_forecasts.R
+++ b/tests/testthat/test-eval_forecasts.R
@@ -81,6 +81,21 @@ test_that("function produces output even if only some metrics are chosen", {
                TRUE)
 })
 
+test_that("WIS is the same with other metrics omitted or included", {
+  range_example_wide <- data.table::setDT(scoringutils::range_example_data_wide)
+  range_example <- scoringutils::range_wide_to_long(range_example_wide)
+
+  eval <- scoringutils::eval_forecasts(range_example,
+                                       summarise_by = c("model", "range"),
+                                       metrics = "interval_score")
+
+  eval2 <- scoringutils::eval_forecasts(range_example,
+                                        summarise_by = c("model", "range"))
+
+  expect_equal(sum(eval$interval_score),
+               sum(eval2$interval_score))
+})
+