From b12cb14ba652d1fee9368594090512d25a079cab Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Sun, 1 Dec 2024 18:05:53 +0000 Subject: [PATCH 01/20] break up `create_clean_reported_cases()` --- NAMESPACE | 2 + NEWS.md | 4 + R/create.R | 62 ++------- R/estimate_infections.R | 25 ++++ R/estimate_secondary.R | 23 ++++ R/preprocessing.R | 153 +++++++++++++++++++++- _pkgdown.yml | 7 + man/add_breakpoints.Rd | 30 +++++ man/add_horizon.Rd | 31 +++++ man/apply_zero_threshold.Rd | 41 ++++++ man/create_clean_reported_cases.Rd | 9 +- man/fill_missing.Rd | 2 +- man/filter_leading_zeros.Rd | 41 ++++++ tests/testthat/test-estimate_infections.R | 9 ++ tests/testthat/test-estimate_secondary.R | 9 ++ 15 files changed, 387 insertions(+), 61 deletions(-) create mode 100644 man/add_breakpoints.Rd create mode 100644 man/add_horizon.Rd create mode 100644 man/apply_zero_threshold.Rd create mode 100644 man/filter_leading_zeros.Rd diff --git a/NAMESPACE b/NAMESPACE index 3ca6f47d8..b28f13928 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(LogNormal) export(NonParametric) export(Normal) export(R_to_growth) +export(add_breakpoints) export(adjust_infection_to_report) export(apply_tolerance) export(backcalc_opts) @@ -64,6 +65,7 @@ export(extract_inits) export(extract_samples) export(extract_stan_param) export(fill_missing) +export(filter_leading_zeros) export(fix_dist) export(fix_parameters) export(forecast_infections) diff --git a/NEWS.md b/NEWS.md index 28df3b2a2..f806ed533 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,6 +19,10 @@ - A bug was fixed where an internal function for applying a default cdf cutoff failed due to a difference a vector length issue. By @jamesmbaazam in #858 and reviewed by @sbfnk. - All parameters have been changed to the new parameter interface. By @sbfnk in #871 and reviewed by @seabbs. +## Package changes + +- The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in # and reviewed by @. + ## Documentation - Brought the docs on `alpha_sd` up to date with the code change from prior PR #853. By @zsusswein in #862 and reviewed by @jamesmbaazam. diff --git a/R/create.R b/R/create.R index ed4524003..ade71b2e4 100644 --- a/R/create.R +++ b/R/create.R @@ -1,5 +1,5 @@ #' Create Clean Reported Cases -#' @description `r lifecycle::badge("stable")` +#' @description `r lifecycle::badge("deprecated")` #' Filters leading zeros, completes dates, and applies an optional threshold at #' which point 0 cases are replaced with a user supplied value (defaults to #' `NA`). @@ -12,16 +12,12 @@ #' number of cases based on the 7-day average. If the average is above this #' threshold then the zero is replaced using `fill`. #' -#' @param fill Numeric, defaults to NA. Value to use to replace NA values or -#' zeroes that are flagged because the 7-day average is above the -#' `zero_threshold`. If the default NA is used then dates with NA values or with -#' 7-day averages above the `zero_threshold` will be skipped in model fitting. -#' If this is set to 0 then the only effect is to replace NA values with 0. +#' @param fill Deprecated; zero dates with 7-day averages above the +#' `zero_threshold` will be skipped in model fitting. #' @param add_breakpoints Logical, defaults to TRUE. Should a breakpoint column #' be added to the data frame if it does not exist. #' #' @inheritParams estimate_infections -#' @importFrom data.table copy merge.data.table setorder setDT frollsum #' @return A cleaned data frame of reported cases #' @keywords internal #' @examples @@ -33,55 +29,15 @@ create_clean_reported_cases <- function(data, horizon = 0, zero_threshold = Inf, fill = NA_integer_, add_breakpoints = TRUE) { - reported_cases <- data.table::setDT(data) - reported_cases_grid <- data.table::copy(reported_cases)[, - .(date = seq(min(date), max(date) + horizon, by = "days")) - ] - - reported_cases <- data.table::merge.data.table( - reported_cases, reported_cases_grid, - by = "date", all.y = TRUE - ) - - if (is.null(reported_cases$breakpoint) && add_breakpoints) { - reported_cases$breakpoint <- 0 + reported_cases <- add_horizon(data, horizon = horizon) + if (add_breakpoints) { + reported_cases <- add_breakpoints(reported_cases) } - if (!is.null(reported_cases$breakpoint)) { - reported_cases[is.na(breakpoint), breakpoint := 0] - } - reported_cases <- data.table::setorder(reported_cases, date) - ## Filter out 0 reported cases from the beginning of the data if (filter_leading_zeros) { - reported_cases <- reported_cases[order(date)][ - date >= min(date[confirm[!is.na(confirm)] > 0]) - ] - } - # Calculate `average_7_day` which for rows with `confirm == 0` - # (the only instance where this is being used) equates to the 7-day - # right-aligned moving average at the previous data point. - reported_cases <- - reported_cases[ - , - `:=`(average_7_day = ( - data.table::frollsum(confirm, n = 8, na.rm = TRUE) - ) / 7 - ) - ] - # Check case counts preceding zero case counts and set to 7 day average if - # average over last 7 days is greater than a threshold - if (!is.infinite(zero_threshold)) { - reported_cases <- reported_cases[ - confirm == 0 & average_7_day > zero_threshold, - confirm := NA_integer_ - ] - } - reported_cases[is.na(confirm), confirm := fill] - reported_cases[, "average_7_day" := NULL] - ## set accumulate to FALSE in added rows - if ("accumulate" %in% colnames(reported_cases)) { - reported_cases[is.na(accumulate), accumulate := FALSE] + reported_cases <- filter_leading_zeros(reported_cases) } - return(reported_cases) + reported_cases <- apply_zero_threshold(reported_cases, zero_threshold) + return(reported_cases[]) } #' Create complete cases diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 1b4fff209..6f64f8394 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -140,6 +140,20 @@ estimate_infections <- function(data, "estimate_infections(data)" ) } + if (!missing(filter_leading_zeros)) { + lifecycle::deprecate_warn( + "1.7.0", + "estimate_infections(filter_leading_zeros)", + "filter_leading_zeros()" + ) + } + if (!missing(zero_threshold)) { + lifecycle::deprecate_warn( + "1.7.0", + "estimate_infections(zero_threshold)", + "apply_zero_threshold()" + ) + } # Validate inputs check_reports_valid(data, model = "estimate_infections") assert_class(generation_time, "generation_time_opts") @@ -184,6 +198,17 @@ estimate_infections <- function(data, ) # Fill missing dates reported_cases <- default_fill_missing_obs(data, obs, "confirm") + # Check initial zeros to check for deprecated filter zero functionality + if (reported_cases[date == min(date), "confirm"] == 0) { + cli_warn(c( + "!" = "Filtering initial zero observations in the data. This + functionality will be removed in future versions of EpiNow2. In order + to retain the default behaviour and filter initial zero observations + use the {.fn filter_leading_zeros()} function on the data before + calling {.fn estimate_infections()}." + )) + } + # Create clean and complete cases reported_cases <- create_clean_reported_cases( reported_cases, horizon, diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 13054e956..0473acd20 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -172,6 +172,20 @@ estimate_secondary <- function(data, "estimate_secondary(data)" ) } + if (!missing(filter_leading_zeros)) { + lifecycle::deprecate_warn( + "1.7.0", + "estimate_secondary(filter_leading_zeros)", + "filter_leading_zeros()" + ) + } + if (!missing(zero_threshold)) { + lifecycle::deprecate_warn( + "1.7.0", + "estimate_secondary(zero_threshold)", + "apply_zero_threshold()" + ) + } # Validate the inputs check_reports_valid(data, model = "estimate_secondary") assert_class(secondary, "secondary_opts") @@ -200,6 +214,15 @@ estimate_secondary <- function(data, secondary_reports_dirty <- reports[, list(date, confirm = secondary, accumulate)] + if (secondary_reports_dirty[date == min(date), "confirm"] == 0) { + cli_warn( + "!" = "Filtering initial zero observations in the data. This + functionality will be removed in future versions of EpiNow2. In order + to retain the default behaviour and filter initial zero observations + use the {.fn filter_leading_zeros()} function on the data before + calling {.fn estimate_secondary()." + ) + } secondary_reports <- create_clean_reported_cases( secondary_reports_dirty, filter_leading_zeros = filter_leading_zeros, diff --git a/R/preprocessing.R b/R/preprocessing.R index a43171bad..31d833376 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -42,7 +42,7 @@ ##' using a data set that has multiple columns of hwich one of them ##' corresponds to observations that are to be processed here. ##' @param by Character vector. Name(s) of any additional column(s) where -##' missing data should be processed separately for each value in the column. +##' data processing should be done separately for each value in the column. ##' This is useful when using data representing e.g. multiple geographies. If ##' NULL (default) no such grouping is done. ##' @return a data.table with an `accumulate` column that indicates whether @@ -177,3 +177,154 @@ default_fill_missing_obs <- function(data, obs, obs_column) { } return(data) } + +##' Add missing values for future dates +##' +##' @param accumulate The number of days to accumulate when generating posterior +##' prediction, e.g. 7 for weekly accumulated forecasts. +##' @inheritParams add_horizon +##' @inheritParams estimate_infections +##' @importFrom data.table copy merge.data.table setDT +##' @return A data.table with missing values for future dates +##' @keywords internal +add_horizon <- function(data, horizon, accumulate = 1L, + obs_column = "confirm", by = NULL) { + assert_data_frame(data) + assert_character(obs_column) + assert_character(by, null.ok = TRUE) + assert_names( + colnames(data), + must.include = c("date", by, obs_column) + ) + assert_integerish(horizon, lower = 0) + assert_integerish(accumulate, lower = 1) + assert_date(data$date, any.missing = FALSE) + + reported_cases <- data.table::setDT(data) + if (horizon > 0) { + reported_cases_grid <- data.table::copy(reported_cases)[, + .(date = seq(max(date) + 1, max(date) + horizon, by = "days")), + by = by + ] + ## if we accumulate add the column + if (accumulate > 1 || "accumulate" %in% colnames(data)) { + reported_cases_grid[, accumulate := TRUE] + ## set accumulation to FALSE where appropriate + if (horizon >= accumulate) { + reported_cases_grid[ + as.integer(date - min(date) - 1) %% accumulate == 0, + accumulate := FALSE + ] + } + } + ## fill any missing columns + reported_cases_grid <- data.table::merge.data.table( + reported_cases, reported_cases_grid, + by = "date", all.y = TRUE + ) + } + return(reported_cases[]) +} + +##' Add breakpoints to certain dates in a data set. +##' +##' @param dates A vector of dates to use as breakpoints. +##' @inheritParams estimate_infections +##' @return A data.table with `breakpoint` set to 1 on each of the specified +##' dates. +##' @export +##' @importFrom data.table setDT +##' @examples +##' reported_cases <- add_breakpoints(example_confirmed, as.Date("2020-03-26")) +add_breakpoints <- function(data, dates = as.Date(character(0))) { + assert_data_frame(data) + assert_names(colnames(data), must.include = "date") + assert_date(dates) + assert_date(data$date, any.missing = FALSE) + reported_cases <- data.table::setDT(data) + if (is.null(reported_cases$breakpoint)) { + reported_cases$breakpoint <- 0 + } + missing_dates <- setdiff(dates, data$date) + if (length(missing_dates) > 0) { + cli_abort("Breakpoint date{?s} not found in data: {.var {missing_dates}}") + } + reported_cases[date %in% dates, breakpoint := 1] + return(reported_cases) +} + +##' Filter leading zeros from a data set. +##' +##' @inheritParams estimate_infections +##' @inheritParams fill_missing +##' @return A data.table with leading zeros removed. +##' @export +##' @importFrom data.table setDT +##' @examples +##' cases <- data.table( +##' date = as.Date("2020-01-01") + 0:10, +##' confirm = c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) +##' ) +##' filter_leading_zeros(cases) +filter_leading_zeros <- function(data, obs_column = "confirm", by = NULL) { + assert_data_frame(data) + assert_character(obs_column) + assert_character(by, null.ok = TRUE) + assert_names( + colnames(data), + must.include = c("date", by, obs_column) + ) + reported_cases <- data.table::setDT(data) + reported_cases <- reported_cases[order(date)][ + date >= min(date[get(obs_column)[!is.na(get(obs_column))] > 0]) + ] + return(reported_cases[]) +} + +##' Converts zero case counts to NA (missing) if the 7-day average is above a +##' threshold. +##' +##' This function aims to detect spurious zeroes by comparing the 7-day average +##' of the case counts to a threshold. If the 7-day average is above the +##' threshold, the zero case count is replaced with NA. +##' +##' @param threshold Numeric, defaults to Inf. Indicates if detected zero cases +##' are meaningful by using a threshold number of cases based on the 7-day +##' average. If the average is above this threshold at the time of a zero +##' observation count then the zero is replaced with a missing (`NA`) count +##' and thus ignored in the likelihood. +##' +##' @inheritParams estimate_infections +##' @inheritParams fill_missing +##' @importFrom data.table setDT frollsum +##' @return A data.table with the zero threshold applied. +##' @author Sebastian Funk +apply_zero_threshold <- function(data, threshold = Inf, + obs_column = "confirm") { + assert_data_frame(data) + assert_numeric(threshold) + reported_cases <- data.table::setDT(data) + + # Calculate `average_7_day` which for rows with `confirm == 0` + # (the only instance where this is being used) equates to the 7-day + # right-aligned moving average at the previous data point. + reported_cases <- + reported_cases[ + , + `:=`(average_7_day = ( + data.table::frollsum(get(obs_column), n = 8, na.rm = TRUE) + ) / 7 + ) + ] + # Check case counts preceding zero case counts and set to 7 day average if + # average over last 7 days is greater than a threshold + if (!is.infinite(threshold)) { + reported_cases <- reported_cases[ + get(obs_column) == 0 & average_7_day > threshold, + paste(obs_column) := NA_integer_ + ] + } + reported_cases[is.na(get(obs_column)), paste(obs_column) := NA_integer_] + reported_cases[, "average_7_day" := NULL] + return(reported_cases[]) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index ae09e56fe..370830c69 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -78,6 +78,13 @@ reference: contents: - contains("_opts") - opts_list + - title: Preprocess data + desc: Functions used for prepropcessing data + contents: + - fill_missing + - add_breakpoints + - filter_leading_zeros + - apply_zero_threshold - title: Summarise Across Regions desc: Functions used for summarising across regions (designed for use with regional_epinow) contents: diff --git a/man/add_breakpoints.Rd b/man/add_breakpoints.Rd new file mode 100644 index 000000000..761d310d9 --- /dev/null +++ b/man/add_breakpoints.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{add_breakpoints} +\alias{add_breakpoints} +\title{Add breakpoints to certain dates in a data set.} +\usage{ +add_breakpoints(data, dates = as.Date(character(0))) +} +\arguments{ +\item{data}{A \verb{} of confirmed cases (confirm) by date (date). +\code{confirm} must be numeric and \code{date} must be in date format. Optionally +this can also have a logical \code{accumulate} column which indicates whether +data should be added to the next data point. This is useful when modelling +e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which +helps add the \code{accumulate} column with the desired properties when dealing +with non-daily data. If any accumulation is done this happens after +truncation as specified by the \code{truncation} argument.} + +\item{dates}{A vector of dates to use as breakpoints.} +} +\value{ +A data.table with \code{breakpoint} set to 1 on each of the specified +dates. +} +\description{ +Add breakpoints to certain dates in a data set. +} +\examples{ +reported_cases <- add_breakpoints(example_confirmed, as.Date("2020-03-26")) +} diff --git a/man/add_horizon.Rd b/man/add_horizon.Rd new file mode 100644 index 000000000..524cbc1b0 --- /dev/null +++ b/man/add_horizon.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{add_horizon} +\alias{add_horizon} +\title{Add missing values for future dates} +\usage{ +add_horizon(data, horizon, accumulate = 1L, obs_column = "confirm", by = NULL) +} +\arguments{ +\item{data}{A \verb{} of confirmed cases (confirm) by date (date). +\code{confirm} must be numeric and \code{date} must be in date format. Optionally +this can also have a logical \code{accumulate} column which indicates whether +data should be added to the next data point. This is useful when modelling +e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which +helps add the \code{accumulate} column with the desired properties when dealing +with non-daily data. If any accumulation is done this happens after +truncation as specified by the \code{truncation} argument.} + +\item{horizon}{Numeric, defaults to 7. Number of days into the future to +forecast.} + +\item{accumulate}{The number of days to accumulate when generating posterior +prediction, e.g. 7 for weekly accumulated forecasts.} +} +\value{ +A data.table with missing values for future dates +} +\description{ +Add missing values for future dates +} +\keyword{internal} diff --git a/man/apply_zero_threshold.Rd b/man/apply_zero_threshold.Rd new file mode 100644 index 000000000..481ce188b --- /dev/null +++ b/man/apply_zero_threshold.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{apply_zero_threshold} +\alias{apply_zero_threshold} +\title{Converts zero case counts to NA (missing) if the 7-day average is above a +threshold.} +\usage{ +apply_zero_threshold(data, threshold = Inf, obs_column = "confirm") +} +\arguments{ +\item{data}{A \verb{} of confirmed cases (confirm) by date (date). +\code{confirm} must be numeric and \code{date} must be in date format. Optionally +this can also have a logical \code{accumulate} column which indicates whether +data should be added to the next data point. This is useful when modelling +e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which +helps add the \code{accumulate} column with the desired properties when dealing +with non-daily data. If any accumulation is done this happens after +truncation as specified by the \code{truncation} argument.} + +\item{threshold}{Numeric, defaults to Inf. Indicates if detected zero cases +are meaningful by using a threshold number of cases based on the 7-day +average. If the average is above this threshold at the time of a zero +observation count then the zero is replaced with a missing (\code{NA}) count +and thus ignored in the likelihood.} + +\item{obs_column}{Character (default: "confirm"). If given, only the column +specified here will be used for checking missingness. This is useful if +using a data set that has multiple columns of hwich one of them +corresponds to observations that are to be processed here.} +} +\value{ +A data.table with the zero threshold applied. +} +\description{ +This function aims to detect spurious zeroes by comparing the 7-day average +of the case counts to a threshold. If the 7-day average is above the +threshold, the zero case count is replaced with NA. +} +\author{ +Sebastian Funk +} diff --git a/man/create_clean_reported_cases.Rd b/man/create_clean_reported_cases.Rd index 949880c81..d973aef40 100644 --- a/man/create_clean_reported_cases.Rd +++ b/man/create_clean_reported_cases.Rd @@ -34,11 +34,8 @@ to Inf. Indicates if detected zero cases are meaningful by using a threshold number of cases based on the 7-day average. If the average is above this threshold then the zero is replaced using \code{fill}.} -\item{fill}{Numeric, defaults to NA. Value to use to replace NA values or -zeroes that are flagged because the 7-day average is above the -\code{zero_threshold}. If the default NA is used then dates with NA values or with -7-day averages above the \code{zero_threshold} will be skipped in model fitting. -If this is set to 0 then the only effect is to replace NA values with 0.} +\item{fill}{Deprecated; zero dates with 7-day averages above the +\code{zero_threshold} will be skipped in model fitting.} \item{add_breakpoints}{Logical, defaults to TRUE. Should a breakpoint column be added to the data frame if it does not exist.} @@ -47,7 +44,7 @@ be added to the data frame if it does not exist.} A cleaned data frame of reported cases } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Filters leading zeros, completes dates, and applies an optional threshold at which point 0 cases are replaced with a user supplied value (defaults to \code{NA}). diff --git a/man/fill_missing.Rd b/man/fill_missing.Rd index 2120c6ddf..9e37e0b02 100644 --- a/man/fill_missing.Rd +++ b/man/fill_missing.Rd @@ -52,7 +52,7 @@ using a data set that has multiple columns of hwich one of them corresponds to observations that are to be processed here.} \item{by}{Character vector. Name(s) of any additional column(s) where -missing data should be processed separately for each value in the column. +data processing should be done separately for each value in the column. This is useful when using data representing e.g. multiple geographies. If NULL (default) no such grouping is done.} } diff --git a/man/filter_leading_zeros.Rd b/man/filter_leading_zeros.Rd new file mode 100644 index 000000000..863bfdef3 --- /dev/null +++ b/man/filter_leading_zeros.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{filter_leading_zeros} +\alias{filter_leading_zeros} +\title{Filter leading zeros from a data set.} +\usage{ +filter_leading_zeros(data, obs_column = "confirm", by = NULL) +} +\arguments{ +\item{data}{A \verb{} of confirmed cases (confirm) by date (date). +\code{confirm} must be numeric and \code{date} must be in date format. Optionally +this can also have a logical \code{accumulate} column which indicates whether +data should be added to the next data point. This is useful when modelling +e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which +helps add the \code{accumulate} column with the desired properties when dealing +with non-daily data. If any accumulation is done this happens after +truncation as specified by the \code{truncation} argument.} + +\item{obs_column}{Character (default: "confirm"). If given, only the column +specified here will be used for checking missingness. This is useful if +using a data set that has multiple columns of hwich one of them +corresponds to observations that are to be processed here.} + +\item{by}{Character vector. Name(s) of any additional column(s) where +data processing should be done separately for each value in the column. +This is useful when using data representing e.g. multiple geographies. If +NULL (default) no such grouping is done.} +} +\value{ +A data.table with leading zeros removed. +} +\description{ +Filter leading zeros from a data set. +} +\examples{ +cases <- data.table( + date = as.Date("2020-01-01") + 0:10, + confirm = c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) +) +filter_leading_zeros(cases) +} diff --git a/tests/testthat/test-estimate_infections.R b/tests/testthat/test-estimate_infections.R index 549cfeef0..598e5fca0 100644 --- a/tests/testthat/test-estimate_infections.R +++ b/tests/testthat/test-estimate_infections.R @@ -164,3 +164,12 @@ test_that("estimate_infections works as expected with failing chains", { ) )) }) + +test_that("a warning is thrown when using deprecated functionality", { + suppressWarnings(expect_deprecated(estimate_infections( + reported_cases, filter_leading_zeros = TRUE, verbose = FALSE + ), "filter_leading_zeros")) + suppressWarnings(expect_deprecated(estimate_infections( + reported_cases, zero_threshold = 50, verbose = FALSE + ), "zero_threshold")) +}) diff --git a/tests/testthat/test-estimate_secondary.R b/tests/testthat/test-estimate_secondary.R index 669679507..58214ee91 100644 --- a/tests/testthat/test-estimate_secondary.R +++ b/tests/testthat/test-estimate_secondary.R @@ -249,3 +249,12 @@ test_that("estimate_secondary works with zero_threshold set", { expect_s3_class(out, "estimate_secondary") expect_named(out, c("predictions", "posterior", "data", "fit")) }) + +test_that("a warning is thrown when using deprecated functionality", { + suppressWarnings(expect_deprecated(estimate_secondary( + inc_cases, filter_leading_zeros = TRUE, verbose = FALSE + ), "filter_leading_zeros")) + suppressWarnings(expect_deprecated(estimate_secondary( + inc_cases, zero_threshold = 50, verbose = FALSE + ), "zero_threshold")) +}) From fefe4f019f92b1193f54c3f1fd4bf3a19f72307c Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 14:18:15 +0000 Subject: [PATCH 02/20] remove author Co-authored-by: Sam Abbott --- R/preprocessing.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 31d833376..564d741ce 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -298,7 +298,6 @@ filter_leading_zeros <- function(data, obs_column = "confirm", by = NULL) { ##' @inheritParams fill_missing ##' @importFrom data.table setDT frollsum ##' @return A data.table with the zero threshold applied. -##' @author Sebastian Funk apply_zero_threshold <- function(data, threshold = Inf, obs_column = "confirm") { assert_data_frame(data) From d612b4cff3cefa9cb3108710600116ac13464927 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 14:18:32 +0000 Subject: [PATCH 03/20] use data.frame Co-authored-by: Sam Abbott --- R/preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 564d741ce..e7f7ae8c1 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -261,7 +261,7 @@ add_breakpoints <- function(data, dates = as.Date(character(0))) { ##' @export ##' @importFrom data.table setDT ##' @examples -##' cases <- data.table( +##' cases <- data.frame( ##' date = as.Date("2020-01-01") + 0:10, ##' confirm = c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) ##' ) From 3b5ddf3cb8c0aaf8e1604ab14cf856ae9dc8b9d4 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 14:19:34 +0000 Subject: [PATCH 04/20] add PR# and reviewer --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f806ed533..2b527e4ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,7 +21,7 @@ ## Package changes -- The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in # and reviewed by @. +- The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in #884 and reviewed by @seabbs. ## Documentation From 4339bf09d4f9aebc71c2d33be33863f4006a6849 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 16:06:23 +0000 Subject: [PATCH 05/20] render docs --- man/apply_zero_threshold.Rd | 3 --- man/filter_leading_zeros.Rd | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/man/apply_zero_threshold.Rd b/man/apply_zero_threshold.Rd index 481ce188b..6bbf9041b 100644 --- a/man/apply_zero_threshold.Rd +++ b/man/apply_zero_threshold.Rd @@ -36,6 +36,3 @@ This function aims to detect spurious zeroes by comparing the 7-day average of the case counts to a threshold. If the 7-day average is above the threshold, the zero case count is replaced with NA. } -\author{ -Sebastian Funk -} diff --git a/man/filter_leading_zeros.Rd b/man/filter_leading_zeros.Rd index 863bfdef3..527f21e6c 100644 --- a/man/filter_leading_zeros.Rd +++ b/man/filter_leading_zeros.Rd @@ -33,7 +33,7 @@ A data.table with leading zeros removed. Filter leading zeros from a data set. } \examples{ -cases <- data.table( +cases <- data.frame( date = as.Date("2020-01-01") + 0:10, confirm = c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) ) From ac42ac148ffc2adda2e670c9eb1cd1f0448f84ff Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 22:29:44 +0000 Subject: [PATCH 06/20] fix if statements --- R/estimate_infections.R | 3 ++- R/estimate_secondary.R | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 6f64f8394..5534b8837 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -199,7 +199,8 @@ estimate_infections <- function(data, # Fill missing dates reported_cases <- default_fill_missing_obs(data, obs, "confirm") # Check initial zeros to check for deprecated filter zero functionality - if (reported_cases[date == min(date), "confirm"] == 0) { + if (filter_leading_zeros && + reported_cases[date == min(date), "confirm"] == 0) { cli_warn(c( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 0473acd20..9f9060cd4 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -214,13 +214,13 @@ estimate_secondary <- function(data, secondary_reports_dirty <- reports[, list(date, confirm = secondary, accumulate)] - if (secondary_reports_dirty[date == min(date), "confirm"] == 0) { + if (filter_leading_zeros && + secondary_reports_dirty[date == min(date), "secondary"] == 0) { cli_warn( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order - to retain the default behaviour and filter initial zero observations - use the {.fn filter_leading_zeros()} function on the data before - calling {.fn estimate_secondary()." + to filter initial zero observations use the {.fn filter_leading_zeros()} + function on the data before calling {.fn estimate_secondary()." ) } secondary_reports <- create_clean_reported_cases( From 4d83a505db38d5dc75b07d16253df3f8e726a596 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 9 Dec 2024 22:29:55 +0000 Subject: [PATCH 07/20] use rbind instead of merge --- R/preprocessing.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index e7f7ae8c1..3ac30511b 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -202,25 +202,25 @@ add_horizon <- function(data, horizon, accumulate = 1L, reported_cases <- data.table::setDT(data) if (horizon > 0) { - reported_cases_grid <- data.table::copy(reported_cases)[, + reported_cases_future <- data.table::copy(reported_cases)[, .(date = seq(max(date) + 1, max(date) + horizon, by = "days")), by = by ] ## if we accumulate add the column if (accumulate > 1 || "accumulate" %in% colnames(data)) { - reported_cases_grid[, accumulate := TRUE] + reported_cases_future[, accumulate := TRUE] ## set accumulation to FALSE where appropriate if (horizon >= accumulate) { - reported_cases_grid[ + reported_cases_future[ as.integer(date - min(date) - 1) %% accumulate == 0, accumulate := FALSE ] } } ## fill any missing columns - reported_cases_grid <- data.table::merge.data.table( - reported_cases, reported_cases_grid, - by = "date", all.y = TRUE + reported_cases <- rbind( + reported_cases, reported_cases_future, + fill = TRUE ) } return(reported_cases[]) From bb55fe4e3d94675e57bdc06fbe38d6fef892e594 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 11:07:34 +0000 Subject: [PATCH 08/20] check for not NA --- R/estimate_infections.R | 1 + R/estimate_secondary.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 5534b8837..1d5b5f004 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -200,6 +200,7 @@ estimate_infections <- function(data, reported_cases <- default_fill_missing_obs(data, obs, "confirm") # Check initial zeros to check for deprecated filter zero functionality if (filter_leading_zeros && + !is.na(reported_cases[date == min(date), "confirm"]) && reported_cases[date == min(date), "confirm"] == 0) { cli_warn(c( "!" = "Filtering initial zero observations in the data. This diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 9f9060cd4..81f781dd4 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -215,7 +215,8 @@ estimate_secondary <- function(data, secondary_reports_dirty <- reports[, list(date, confirm = secondary, accumulate)] if (filter_leading_zeros && - secondary_reports_dirty[date == min(date), "secondary"] == 0) { + !is.na(reported_cases[date == min(date), "confirm"]) && + reported_cases[date == min(date), "confirm"] == 0) { cli_warn( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order From 7e4e98fcb33a25679959c852a2e863fc708c05a5 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 11:07:44 +0000 Subject: [PATCH 09/20] set NA to zero --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 3ac30511b..8dc10bf82 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -250,6 +250,7 @@ add_breakpoints <- function(data, dates = as.Date(character(0))) { cli_abort("Breakpoint date{?s} not found in data: {.var {missing_dates}}") } reported_cases[date %in% dates, breakpoint := 1] + reported_cases[is.na(breakpoint), breakpoint := 0] return(reported_cases) } From aca1bc4279a0f8223dc82847db44b88763f3d361 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 11:07:53 +0000 Subject: [PATCH 10/20] silence deprecation warnings --- tests/testthat/test-estimate_secondary.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-estimate_secondary.R b/tests/testthat/test-estimate_secondary.R index 58214ee91..3d70b714c 100644 --- a/tests/testthat/test-estimate_secondary.R +++ b/tests/testthat/test-estimate_secondary.R @@ -224,6 +224,8 @@ test_that("estimate_secondary works with weigh_delay_priors = TRUE", { }) test_that("estimate_secondary works with filter_leading_zeros set", { + ## testing deprecated functionality + withr::local_options(lifecycle_verbosity = "quiet") modified_data <- inc_cases[1:10, secondary := 0] out <- estimate_secondary( modified_data, @@ -238,6 +240,8 @@ test_that("estimate_secondary works with filter_leading_zeros set", { }) test_that("estimate_secondary works with zero_threshold set", { + ## testing deprecated functionality + withr::local_options(lifecycle_verbosity = "quiet") modified_data <- inc_cases[sample(1:30, 10), primary := 0] out <- estimate_secondary( modified_data, From 59dc5eaffee0c4c0f2045b3bb491b8e7b6398395 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 11:33:34 +0000 Subject: [PATCH 11/20] add global --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 5fb413fc5..3a73bacec 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -442,6 +442,6 @@ globalVariables( "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm", "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled", "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution", - "accumulate", "..present" + "accumulate", "..present", "reported_cases" ) ) From 6371442879c5644367ba3c883a636d46ad045165 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 11:58:55 +0000 Subject: [PATCH 12/20] call correct object --- R/estimate_secondary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 81f781dd4..c26899e81 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -215,8 +215,8 @@ estimate_secondary <- function(data, secondary_reports_dirty <- reports[, list(date, confirm = secondary, accumulate)] if (filter_leading_zeros && - !is.na(reported_cases[date == min(date), "confirm"]) && - reported_cases[date == min(date), "confirm"] == 0) { + !is.na(secondary_reports_dirty[date == min(date), "confirm"]) && + secondary_reports_dirty[date == min(date), "confirm"] == 0) { cli_warn( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order From 316970e9494cd4d7c09b58bd6e6ffd6e8ccb5067 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 13:17:48 +0000 Subject: [PATCH 13/20] remove initial line --- tests/testthat/test-create_clean_reported_cases.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-create_clean_reported_cases.R b/tests/testthat/test-create_clean_reported_cases.R index 44bb0ef78..3b737b271 100644 --- a/tests/testthat/test-create_clean_reported_cases.R +++ b/tests/testthat/test-create_clean_reported_cases.R @@ -1,4 +1,3 @@ - test_that("create_clean_reported_cases runs without errors", { expect_no_error(create_clean_reported_cases(example_confirmed, 7)) }) From 2e84f0e4d2038f5fc913f8ceadb64d821cfcc8dc Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 13:50:37 +0000 Subject: [PATCH 14/20] missing c --- R/estimate_secondary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index c26899e81..78d1cf1ba 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -217,12 +217,12 @@ estimate_secondary <- function(data, if (filter_leading_zeros && !is.na(secondary_reports_dirty[date == min(date), "confirm"]) && secondary_reports_dirty[date == min(date), "confirm"] == 0) { - cli_warn( + cli_warn(c( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order to filter initial zero observations use the {.fn filter_leading_zeros()} function on the data before calling {.fn estimate_secondary()." - ) + )) } secondary_reports <- create_clean_reported_cases( secondary_reports_dirty, From e3326c984d38388a7d9add12895fb3d8ca408957 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 14:14:14 +0000 Subject: [PATCH 15/20] Apply suggestions from code review Co-authored-by: James Azam --- R/preprocessing.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 8dc10bf82..a66e5ea52 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -282,14 +282,14 @@ filter_leading_zeros <- function(data, obs_column = "confirm", by = NULL) { return(reported_cases[]) } -##' Converts zero case counts to NA (missing) if the 7-day average is above a +##' Convert zero case counts to `NA` (missing) if the 7-day average is above a ##' threshold. ##' ##' This function aims to detect spurious zeroes by comparing the 7-day average ##' of the case counts to a threshold. If the 7-day average is above the -##' threshold, the zero case count is replaced with NA. +##' threshold, the zero case count is replaced with `NA`. ##' -##' @param threshold Numeric, defaults to Inf. Indicates if detected zero cases +##' @param threshold Numeric, defaults to `Inf`. Indicates if detected zero cases ##' are meaningful by using a threshold number of cases based on the 7-day ##' average. If the average is above this threshold at the time of a zero ##' observation count then the zero is replaced with a missing (`NA`) count From 98854970bf98435ef418263853f6dd41c7a0bdd2 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 14:16:41 +0000 Subject: [PATCH 16/20] fix docs --- R/preprocessing.R | 6 +++++- man/add_horizon.Rd | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index a66e5ea52..9c1939156 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -180,9 +180,13 @@ default_fill_missing_obs <- function(data, obs, obs_column) { ##' Add missing values for future dates ##' +##' @param data Data frame with a `date` column. The other columns depend on the +##' model that the data are to be used, e.g. [estimate_infections()] or +##' [estimate_secondary()]. See the documentation there for the expected +##' format. ##' @param accumulate The number of days to accumulate when generating posterior ##' prediction, e.g. 7 for weekly accumulated forecasts. -##' @inheritParams add_horizon +##' @inheritParams fill_missing ##' @inheritParams estimate_infections ##' @importFrom data.table copy merge.data.table setDT ##' @return A data.table with missing values for future dates diff --git a/man/add_horizon.Rd b/man/add_horizon.Rd index 524cbc1b0..ca1ce27cf 100644 --- a/man/add_horizon.Rd +++ b/man/add_horizon.Rd @@ -7,20 +7,26 @@ add_horizon(data, horizon, accumulate = 1L, obs_column = "confirm", by = NULL) } \arguments{ -\item{data}{A \verb{} of confirmed cases (confirm) by date (date). -\code{confirm} must be numeric and \code{date} must be in date format. Optionally -this can also have a logical \code{accumulate} column which indicates whether -data should be added to the next data point. This is useful when modelling -e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which -helps add the \code{accumulate} column with the desired properties when dealing -with non-daily data. If any accumulation is done this happens after -truncation as specified by the \code{truncation} argument.} +\item{data}{Data frame with a \code{date} column. The other columns depend on the +model that the data are to be used, e.g. \code{\link[=estimate_infections]{estimate_infections()}} or +\code{\link[=estimate_secondary]{estimate_secondary()}}. See the documentation there for the expected +format.} \item{horizon}{Numeric, defaults to 7. Number of days into the future to forecast.} \item{accumulate}{The number of days to accumulate when generating posterior prediction, e.g. 7 for weekly accumulated forecasts.} + +\item{obs_column}{Character (default: "confirm"). If given, only the column +specified here will be used for checking missingness. This is useful if +using a data set that has multiple columns of hwich one of them +corresponds to observations that are to be processed here.} + +\item{by}{Character vector. Name(s) of any additional column(s) where +data processing should be done separately for each value in the column. +This is useful when using data representing e.g. multiple geographies. If +NULL (default) no such grouping is done.} } \value{ A data.table with missing values for future dates From c9d369341eff6b526281da4eac2c4a3bbe21c9d7 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 14:17:31 +0000 Subject: [PATCH 17/20] Update NEWS.md Co-authored-by: James Azam --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2b527e4ce..29b8a2ebf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,7 +21,7 @@ ## Package changes -- The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in #884 and reviewed by @seabbs. +- The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in #884 and reviewed by @seabbs and @jamesmbaazam. ## Documentation From 9c7d631bff0c935a8277b4ab14d564a94cea3483 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 14:25:51 +0000 Subject: [PATCH 18/20] re-format --- R/preprocessing.R | 10 +++++----- man/apply_zero_threshold.Rd | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 9c1939156..913300d7b 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -293,11 +293,11 @@ filter_leading_zeros <- function(data, obs_column = "confirm", by = NULL) { ##' of the case counts to a threshold. If the 7-day average is above the ##' threshold, the zero case count is replaced with `NA`. ##' -##' @param threshold Numeric, defaults to `Inf`. Indicates if detected zero cases -##' are meaningful by using a threshold number of cases based on the 7-day -##' average. If the average is above this threshold at the time of a zero -##' observation count then the zero is replaced with a missing (`NA`) count -##' and thus ignored in the likelihood. +##' @param threshold Numeric, defaults to `Inf`. Indicates if detected zero +##' cases are meaningful by using a threshold number of cases based on the +##' 7-day average. If the average is above this threshold at the time of a +##' zero observation count then the zero is replaced with a missing (`NA`) +##' count and thus ignored in the likelihood. ##' ##' @inheritParams estimate_infections ##' @inheritParams fill_missing diff --git a/man/apply_zero_threshold.Rd b/man/apply_zero_threshold.Rd index 6bbf9041b..4d123c9ce 100644 --- a/man/apply_zero_threshold.Rd +++ b/man/apply_zero_threshold.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/preprocessing.R \name{apply_zero_threshold} \alias{apply_zero_threshold} -\title{Converts zero case counts to NA (missing) if the 7-day average is above a +\title{Convert zero case counts to \code{NA} (missing) if the 7-day average is above a threshold.} \usage{ apply_zero_threshold(data, threshold = Inf, obs_column = "confirm") @@ -17,11 +17,11 @@ helps add the \code{accumulate} column with the desired properties when dealing with non-daily data. If any accumulation is done this happens after truncation as specified by the \code{truncation} argument.} -\item{threshold}{Numeric, defaults to Inf. Indicates if detected zero cases -are meaningful by using a threshold number of cases based on the 7-day -average. If the average is above this threshold at the time of a zero -observation count then the zero is replaced with a missing (\code{NA}) count -and thus ignored in the likelihood.} +\item{threshold}{Numeric, defaults to \code{Inf}. Indicates if detected zero +cases are meaningful by using a threshold number of cases based on the +7-day average. If the average is above this threshold at the time of a +zero observation count then the zero is replaced with a missing (\code{NA}) +count and thus ignored in the likelihood.} \item{obs_column}{Character (default: "confirm"). If given, only the column specified here will be used for checking missingness. This is useful if @@ -34,5 +34,5 @@ A data.table with the zero threshold applied. \description{ This function aims to detect spurious zeroes by comparing the 7-day average of the case counts to a threshold. If the 7-day average is above the -threshold, the zero case count is replaced with NA. +threshold, the zero case count is replaced with \code{NA}. } From 515ba695b1c1457754bd13a1802839e2227b23d5 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 16:52:01 +0000 Subject: [PATCH 19/20] remove empty test --- tests/testthat/test-create_clean_reported_cases.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/testthat/test-create_clean_reported_cases.R b/tests/testthat/test-create_clean_reported_cases.R index 3b737b271..2314a2adc 100644 --- a/tests/testthat/test-create_clean_reported_cases.R +++ b/tests/testthat/test-create_clean_reported_cases.R @@ -1,7 +1,3 @@ -test_that("create_clean_reported_cases runs without errors", { - expect_no_error(create_clean_reported_cases(example_confirmed, 7)) -}) - test_that("create_clean_reported_cases returns a data table", { result <- create_clean_reported_cases(example_confirmed, 7) expect_s3_class(result, "data.table") From a34df0cd5f709cd0419436db90bfd2f397923cec Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 10 Dec 2024 16:53:24 +0000 Subject: [PATCH 20/20] fix warning message --- R/estimate_secondary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 78d1cf1ba..09b635d7d 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -220,8 +220,8 @@ estimate_secondary <- function(data, cli_warn(c( "!" = "Filtering initial zero observations in the data. This functionality will be removed in future versions of EpiNow2. In order - to filter initial zero observations use the {.fn filter_leading_zeros()} - function on the data before calling {.fn estimate_secondary()." + to filter initial zero observations use the {.fn filter_leading_zeros} + function on the data before calling {.fn estimate_secondary}." )) } secondary_reports <- create_clean_reported_cases(