From 1b3d83f1b63af02146d0c20a8f72870d517d3f94 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:30:24 -0400 Subject: [PATCH 01/14] Create dev_history.R --- dev/dev_history.R | 92 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 dev/dev_history.R diff --git a/dev/dev_history.R b/dev/dev_history.R new file mode 100644 index 0000000..cbd7f45 --- /dev/null +++ b/dev/dev_history.R @@ -0,0 +1,92 @@ +# Prepare for CRAN ---- + +# Update dependencies in DESCRIPTION +# install.packages('attachment', repos = 'https://thinkr-open.r-universe.dev') +attachment::att_amend_desc() + +# Check package coverage +covr::package_coverage() +covr::report() + +# Run tests +devtools::test() +testthat::test_dir("tests/testthat/") + +# Run examples 22 +devtools::run_examples() + +# autotest::autotest_package(test = TRUE) + +# Check package as CRAN using the correct CRAN repo +withr::with_options(list(repos = c(CRAN = "https://cloud.r-project.org/")), + {callr::default_repos() + rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"))}) +# devtools::check(args = c("--no-manual", "--as-cran")) + +# Check content +# install.packages('checkhelper', repos = 'https://thinkr-open.r-universe.dev') +# All functions must have either `@noRd` or an `@export`. +checkhelper::find_missing_tags() + +# Check that you let the house clean after the check, examples and tests +# If you used parallel testing, you may need to avoid it for the next check with `Config/testthat/parallel: false` in DESCRIPTION +all_files_remaining <- checkhelper::check_clean_userspace() +all_files_remaining +# If needed, set back parallel testing with `Config/testthat/parallel: true` in DESCRIPTION + +# Check spelling - No typo +# usethis::use_spell_check() +spelling::spell_check_package() + +# Check URL are correct +# install.packages('urlchecker', repos = 'https://r-lib.r-universe.dev') +urlchecker::url_check() +urlchecker::url_update() + +# check on other distributions +# _rhub v2 +rhub::rhub_setup() # Commit, push, merge +rhub::rhub_doctor() +rhub::rhub_platforms() +rhub::rhub_check() # launch manually + + +# _win devel CRAN +devtools::check_win_devel() +# _win release CRAN +devtools::check_win_release() +# _macos CRAN +# Need to follow the URL proposed to see the results +devtools::check_mac_release() + +# Check reverse dependencies +# remotes::install_github("r-lib/revdepcheck") +usethis::use_git_ignore("revdep/") +usethis::use_build_ignore("revdep/") + +devtools::revdep() +library(revdepcheck) +# In another session because Rstudio interactive change your config: +id <- rstudioapi::terminalExecute("Rscript -e 'revdepcheck::revdep_check(num_workers = 4)'") +rstudioapi::terminalKill(id) +# if [Exit Code] is not 0, there is a problem ! +# to see the problem: execute the command in a new terminal manually. + +# See outputs now available in revdep/ +revdep_details(revdep = "pkg") +revdep_summary() # table of results by package +revdep_report() +# Clean up when on CRAN +revdep_reset() + +# Update NEWS +# Bump version manually and add list of changes + +# Add comments for CRAN +usethis::use_cran_comments(open = rlang::is_interactive()) + +# Upgrade version number +usethis::use_version(which = c("patch", "minor", "major", "dev")[1]) + +# Verify you're ready for release, and release +devtools::release() From 696e7e5543472a755a923bc004d92ae51f81023f Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:31:59 -0400 Subject: [PATCH 02/14] Create config_attachment.yaml --- dev/config_attachment.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 dev/config_attachment.yaml diff --git a/dev/config_attachment.yaml b/dev/config_attachment.yaml new file mode 100644 index 0000000..46e24ec --- /dev/null +++ b/dev/config_attachment.yaml @@ -0,0 +1,12 @@ +path.n: NAMESPACE +path.d: DESCRIPTION +dir.r: R +dir.v: vignettes +dir.t: tests +extra.suggests: ~ +pkg_ignore: ~ +document: yes +normalize: yes +inside_rmd: no +must.exist: yes +check_if_suggests_is_installed: yes From 02541dd2bcb36d7753d307f9987792f9cc202043 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:33:40 -0400 Subject: [PATCH 03/14] Amend DESCRIPTION from package code parsing --- DESCRIPTION | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a272f80..b39d1e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,33 +1,37 @@ Package: distfromq Title: Reconstruct a Distribution from a Collection of Quantiles Version: 1.0.4 -Authors@R: - c(person("Evan", "Ray", , "elray@umass.edu", role = c("aut", "cre")), - person("Aaron", "Gerding", , "agerding@umass.edu", role = c("aut")), - person("Li", "Shandross", , "lshandross@umass.edu", role = c("ctb")), - person("Nick", "Reich", , "nick@umass.edu", role = c("ctb"))) -Description: Given a set of predictive quantiles from a distribution, estimate - the distribution and create `d`, `p`, `q`, and `r` functions to evaluate its - density function, distribution function, and quantile function, and generate - random samples. On the interior of the provided quantiles, an interpolation - method such as a monotonic cubic spline is used; the tails are approximated - by a location-scale family. +Authors@R: c( + person("Evan", "Ray", , "elray@umass.edu", role = c("aut", "cre")), + person("Aaron", "Gerding", , "agerding@umass.edu", role = "aut"), + person("Li", "Shandross", , "lshandross@umass.edu", role = "ctb"), + person("Nick", "Reich", , "nick@umass.edu", role = "ctb") + ) +Description: Given a set of predictive quantiles from a distribution, + estimate the distribution and create `d`, `p`, `q`, and `r` functions + to evaluate its density function, distribution function, and quantile + function, and generate random samples. On the interior of the provided + quantiles, an interpolation method such as a monotonic cubic spline is + used; the tails are approximated by a location-scale family. License: GPL (>= 3) -Encoding: UTF-8 -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 -Suggests: - knitr, - rmarkdown, - dplyr, - ggplot2, - testthat (>= 3.0.0) +URL: http://reichlab.io/distfromq/ Imports: checkmate, purrr, splines, + stats, + utils, zeallot -VignetteBuilder: knitr +Suggests: + dplyr, + ggplot2, + knitr, + rmarkdown, + testthat (>= 3.0.0) +VignetteBuilder: + knitr Config/testthat/edition: 3 -URL: http://reichlab.io/distfromq/ +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.1 From 655642229b62ab352b87f9498a5de6ed9bb444b7 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Tue, 23 Jul 2024 17:18:00 -0400 Subject: [PATCH 04/14] Address no visible binding for global variable note (#25) --- R/ext.R | 2 ++ R/make_dist_fns.R | 1 + R/utils.R | 2 ++ 3 files changed, 5 insertions(+) diff --git a/R/ext.R b/R/ext.R index 3f2184c..cfa58dd 100644 --- a/R/ext.R +++ b/R/ext.R @@ -1,3 +1,5 @@ +a <- b <- NULL + #' Calculate location and scale parameters for a specified distribution so that #' it matches two specified quantiles #' diff --git a/R/make_dist_fns.R b/R/make_dist_fns.R index ad50a92..4a92d7a 100644 --- a/R/make_dist_fns.R +++ b/R/make_dist_fns.R @@ -4,6 +4,7 @@ #' @importFrom splines backSpline NULL +disc_weight <- disc_ps <- disc_qs <- cont_ps <- cont_qs <- disc_ps_range <- NULL #' Clean up ps and qs provided by user: handle missing and unsorted values #' diff --git a/R/utils.R b/R/utils.R index d87a2e3..f7df477 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,5 @@ +dup_run_starts <- dup_run_ends <- NULL + #' Identify duplicated values in a sorted numeric vector, where comparison is #' up to a specified numeric tolerance. If there is a run of values where each #' consecutive pair is closer together than the tolerance, all are labeled as From 53d2155a9d399b4ccbdf59cecbcbdb93cca080d5 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:40:34 -0400 Subject: [PATCH 05/14] Add lint workflow --- .github/workflows/lint.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/lint.yaml diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..c3a09d8 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,34 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: lint + +permissions: read-all + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true From e2209043e704753cce28d2f3327bb01de90f84fd Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:30:10 -0400 Subject: [PATCH 06/14] Create .lintr --- .lintr | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .lintr diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..101e17a --- /dev/null +++ b/.lintr @@ -0,0 +1,5 @@ +linters: linters_with_defaults( + line_length_linter = line_length_linter(120L), + commented_code_linter = NULL, + object_name_linter = NULL + ) From 7aad3c932bd9387c6d81a95059ba365eddbd3191 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:30:17 -0400 Subject: [PATCH 07/14] Update .Rbuildignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index d8fd826..c1e1623 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,3 +7,4 @@ ^docs$ ^pkgdown$ ^\.github$ +^\.lintr$ From 7f91831326d2afda1eb24d838dca830bf67aba5e Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:30:49 -0400 Subject: [PATCH 08/14] Lint package --- R/ext.R | 110 +- R/int.R | 565 +++++----- R/make_dist_fns.R | 638 ++++++----- R/utils.R | 321 +++--- tests/testthat/test-ext.R | 14 +- tests/testthat/test-make_d_fn.R | 272 ++--- tests/testthat/test-make_p_fn_make_q_fn.R | 1102 +++++++++---------- tests/testthat/test-make_r_fn.R | 42 +- tests/testthat/test-split_disc_cont_ps_qs.R | 497 ++++----- tests/testthat/test-step_interpolate.R | 260 ++--- tests/testthat/test-unique_duplicated_tol.R | 118 +- vignettes/distfromq.Rmd | 563 +++++----- 12 files changed, 2243 insertions(+), 2259 deletions(-) diff --git a/R/ext.R b/R/ext.R index cfa58dd..3cab4fb 100644 --- a/R/ext.R +++ b/R/ext.R @@ -16,18 +16,18 @@ a <- b <- NULL #' @return named list with entries `"a"`, the location parameter, and `"b"`, the #' scale parameter calc_loc_scale_params <- function(ps, qs, dist) { - if (dist == "lnorm") { - if (any(qs <= 0.0)) { - stop("For dist = 'lnorm', all qs must be positive") - } - qs <- log(qs) - qdst <- qnorm - } else { - qdst <- get(paste0("q", dist)) + if (dist == "lnorm") { + if (any(qs <= 0.0)) { + stop("For dist = 'lnorm', all qs must be positive") } - b <- suppressWarnings((qs[2] - qs[1]) / (qdst(ps[2]) - qdst(ps[1]))) - a <- suppressWarnings(qs[1] - b * qdst(ps[1])) - return(list(a = a, b = b)) + qs <- log(qs) + qdst <- qnorm + } else { + qdst <- get(paste0("q", dist)) + } + b <- suppressWarnings((qs[2] - qs[1]) / (qdst(ps[2]) - qdst(ps[1]))) + a <- suppressWarnings(qs[1] - b * qdst(ps[1])) + return(list(a = a, b = b)) } @@ -49,25 +49,25 @@ calc_loc_scale_params <- function(ps, qs, dist) { #' specified location-scale family that has quantiles matching those in `ps` #' and `qs` d_ext_factory <- function(ps, qs, dist) { - c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) + c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) - if (dist == "lnorm") { - d_ext <- function(x, log = FALSE) { - return(dlnorm(x, meanlog = a, sdlog = b, log = log)) - } - } else { - ddst <- get(paste0("d", dist)) - d_ext <- function(x, log = FALSE) { - result <- ddst((x - a) / b, log = TRUE) - log(b) - if (log) { - return(result) - } else { - return(exp(result)) - } - } + if (dist == "lnorm") { + d_ext <- function(x, log = FALSE) { + return(dlnorm(x, meanlog = a, sdlog = b, log = log)) } + } else { + ddst <- get(paste0("d", dist)) + d_ext <- function(x, log = FALSE) { + result <- ddst((x - a) / b, log = TRUE) - log(b) + if (log) { + return(result) + } else { + return(exp(result)) + } + } + } - return(d_ext) + return(d_ext) } @@ -89,21 +89,21 @@ d_ext_factory <- function(ps, qs, dist) { #' distribution in the specified location-scale family that has quantiles #' matching those in `ps` and `qs` p_ext_factory <- function(ps, qs, dist) { - c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) + c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) - if (dist == "lnorm") { - p_ext <- function(q, log.p = FALSE) { - return(plnorm(q, meanlog = a, sdlog = b, log.p = log.p)) - } - } else { - pdst <- get(paste0("p", dist)) + if (dist == "lnorm") { + p_ext <- function(q, log.p = FALSE) { + return(plnorm(q, meanlog = a, sdlog = b, log.p = log.p)) + } + } else { + pdst <- get(paste0("p", dist)) - p_ext <- function(q, log.p = FALSE) { - return(pdst((q - a) / b, log.p = log.p)) - } + p_ext <- function(q, log.p = FALSE) { + return(pdst((q - a) / b, log.p = log.p)) } + } - return(p_ext) + return(p_ext) } @@ -124,25 +124,25 @@ p_ext_factory <- function(ps, qs, dist) { #' quantile function of the distribution in the specified location-scale #' family that has quantiles matching those in `ps` and `qs` q_ext_factory <- function(ps, qs, dist) { - c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) + c(a, b) %<-% calc_loc_scale_params(ps, qs, dist) - if (dist == "lnorm") { - q_ext <- function(p) { - return(qlnorm(p, meanlog = a, sdlog = b)) - } - } else { - qdst <- get(paste0("q", dist)) + if (dist == "lnorm") { + q_ext <- function(p) { + return(qlnorm(p, meanlog = a, sdlog = b)) + } + } else { + qdst <- get(paste0("q", dist)) - if (b == 0) { - q_ext <- function(p) { - rep(a, length(p)) - } - } else { - q_ext <- function(p) { - return(a + b * qdst(p)) - } - } + if (b == 0) { + q_ext <- function(p) { + rep(a, length(p)) + } + } else { + q_ext <- function(p) { + return(a + b * qdst(p)) + } } + } - return(q_ext) + return(q_ext) } diff --git a/R/int.R b/R/int.R index e0cfdc5..1f7ae1b 100644 --- a/R/int.R +++ b/R/int.R @@ -1,18 +1,17 @@ #' Create a polySpline object representing a monotonic Hermite spline #' interpolating a given set of points. #' -#' @param x vector giving the x coordinates of the points to be -#' interpolated. Alternatively a single plotting structure can -#' be specified: see ‘xy.coords’. +#' @param x vector giving the x coordinates of the points to be interpolated. +#' Alternatively a single plotting structure can be specified: see ‘xy.coords’. #' -#' @param y vector giving the y coordinates of the points to be -#' interpolated. Must be increasing or decreasing for ‘method = "hyman"’. -#' Alternatively a single plotting structure can be specified: see ‘xy.coords’. +#' @param y vector giving the y coordinates of the points to be interpolated. +#' Must be increasing or decreasing for ‘method = "hyman"’. Alternatively a +#' single plotting structure can be specified: see ‘xy.coords’. #' -#' @param m (for ‘splinefunH()’) vector of _slopes_ \eqn{m_i}{m[i]} at the points -#' \eqn{(x_i,y_i)}{(x[i],y[i])}; these together determine the *H*ermite “spline” -#' which is piecewise cubic, (only) _once_ differentiable -#' continuously. +#' @param m (for ‘splinefunH()’) vector of _slopes_ \eqn{m_i}{m[i]} at the +#' points \eqn{(x_i,y_i)}{(x[i],y[i])}; these together determine the +#' *H*ermite “spline” which is piecewise cubic, (only) _once_ differentiable +#' continuously. #' #' @details This function essentially reproduces `stats::splinefunH`, but it #' returns a polynomial spline object as used in the `splines` package rather @@ -22,76 +21,77 @@ #' @return An object of class `polySpline` with the spline object, suitable for #' use with other functionality from the `splines` package. mono_Hermite_spline <- function(x, y, m) { - # For a degree 3 Hermite polynomial, let d be an interpolation parameter - # between 0 and delta[i] where delta[i] = x[i+1] - x[i] representing the - # position of x between x[i] and x[i+1], and t = d/delta[i]. - # g(d) = (2 t^3 - 3 t^2 + 1) y[i] + (t^3 - 2 t^2 + t) m[i] - # + (-2 t^3 + 3 t^2) y[i+1] + (t^3 - t^2) m[i+1] - # Note that g(0) = y[i], g(delta[i]) = y[i+1], - # g'(0) = m[i] / delta_i, and g'(delta[i]) = d/dt ... * dt/dd = m[i+1] / delta_i. - # We therefore need to adjust the slopes by the factor delta_i. - # - # Collecting like terms, we arrive at - # g(d) = y[i] + m[i] t + (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) t^2 - # + (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) t^3 - # = y[i] + m[i] d / delta[i] - # + (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) (d / delta[i])^2 - # + (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) (d / delta[i])^3 - # = c0 + c1 * d + c2 * d^2 + c3 * d^3, where - # - # c0 = y[i], - # c1 = m[i] / delta[i], - # c2 = (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) / delta[i]^2, and - # c3 = (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) / delta[i]^3 - n <- length(y) - x_i <- x[-n] - x_ip1 <- x[-1] - y_i <- y[-n] - y_ip1 <- y[-1] - delta <- x_ip1 - x_i - delta2 <- delta^2 - delta3 <- delta^3 - - # adjustments to m to ensure monotonicity; see steps 3 through 5 at - # https://en.wikipedia.org/wiki/Monotone_cubic_interpolation#Monotone_cubic_Hermite_interpolation - for (i in seq_along(x_i)) { - if (y_i[i] == y_ip1[i]) { - m[i] <- m[i+1] <- 0.0 - } + # For a degree 3 Hermite polynomial, let d be an interpolation parameter + # between 0 and delta[i] where delta[i] = x[i+1] - x[i] representing the + # position of x between x[i] and x[i+1], and t = d/delta[i]. + # g(d) = (2 t^3 - 3 t^2 + 1) y[i] + (t^3 - 2 t^2 + t) m[i] + # + (-2 t^3 + 3 t^2) y[i+1] + (t^3 - t^2) m[i+1] + # Note that g(0) = y[i], g(delta[i]) = y[i+1], + # g'(0) = m[i] / delta_i, and g'(delta[i]) = d/dt ... * dt/dd = m[i+1] / delta_i. + # We therefore need to adjust the slopes by the factor delta_i. + # + # Collecting like terms, we arrive at + # g(d) = y[i] + m[i] t + (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) t^2 + # + (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) t^3 + # = y[i] + m[i] d / delta[i] + # + (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) (d / delta[i])^2 + # + (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) (d / delta[i])^3 + # = c0 + c1 * d + c2 * d^2 + c3 * d^3, where + # + # c0 = y[i], + # c1 = m[i] / delta[i], + # c2 = (-3 y[i] - 2 m[i] + 3 y[i+1] - m[i+1]) / delta[i]^2, and + # c3 = (2 y[i] + m[i] - 2 y[i+1] + m[i+1]) / delta[i]^3 + n <- length(y) + x_i <- x[-n] + x_ip1 <- x[-1] + y_i <- y[-n] + y_ip1 <- y[-1] + delta <- x_ip1 - x_i + delta2 <- delta^2 + delta3 <- delta^3 + + # adjustments to m to ensure monotonicity; see steps 3 through 5 at + # https://en.wikipedia.org/wiki/Monotone_cubic_interpolation#Monotone_cubic_Hermite_interpolation + for (i in seq_along(x_i)) { + if (y_i[i] == y_ip1[i]) { + m[i] <- m[i + 1] <- 0.0 } - for (i in seq_along(x_i)) { - if (y_i[i] != y_ip1[i]) { - d <- (y_ip1[i] - y_i[i]) / (x_ip1[i] - x_i[i]) - alpha <- m[i] / d - beta <- m[i+1] / d - ab_sq <- alpha^2 + beta^2 - if (ab_sq > 9) { - tau <- 3 / sqrt(ab_sq) - m[i] <- tau * m[i] - m[i+1] <- tau * m[i+1] - } - } + } + for (i in seq_along(x_i)) { + if (y_i[i] != y_ip1[i]) { + d <- (y_ip1[i] - y_i[i]) / (x_ip1[i] - x_i[i]) + alpha <- m[i] / d + beta <- m[i + 1] / d + ab_sq <- alpha^2 + beta^2 + if (ab_sq > 9) { + tau <- 3 / sqrt(ab_sq) + m[i] <- tau * m[i] + m[i + 1] <- tau * m[i + 1] + } } - - m_i <- m[-n] - m_ip1 <- m[-1] - m_i <- m_i * delta - m_ip1 <- m_ip1 * delta - - ct_0 <- y_i - ct_1 <- m_i - ct_2 <- (-3 * y_i - 2 * m_i + 3 * y_ip1 - m_ip1) - ct_3 <- (2 * y_i + m_i - 2 * y_ip1 + m_ip1) - spl <- structure( - list( - knots = x, - coefficients = rbind( - cbind(ct_0, ct_1 / delta, ct_2 / delta2, ct_3 / delta3), - matrix(c(tail(y, 1), tail(m, 1), 0, 0), nrow = 1) - ) - ), - class = c("npolySpline", "polySpline", "spline")) - return(spl) + } + + m_i <- m[-n] + m_ip1 <- m[-1] + m_i <- m_i * delta + m_ip1 <- m_ip1 * delta + + ct_0 <- y_i + ct_1 <- m_i + ct_2 <- (-3 * y_i - 2 * m_i + 3 * y_ip1 - m_ip1) + ct_3 <- (2 * y_i + m_i - 2 * y_ip1 + m_ip1) + spl <- structure( + list( + knots = x, + coefficients = rbind( + cbind(ct_0, ct_1 / delta, ct_2 / delta2, ct_3 / delta3), + matrix(c(tail(y, 1), tail(m, 1), 0, 0), nrow = 1) + ) + ), + class = c("npolySpline", "polySpline", "spline") + ) + return(spl) } @@ -114,91 +114,91 @@ mono_Hermite_spline <- function(x, y, m) { #' the input data points. step_interp_factory <- function(x, y, cont_dir = c("right", "left"), increasing = TRUE) { - if ((length(x) != length(y)) || length(x) == 0) { - stop("x and y must have the same non-zero length") + if ((length(x) != length(y)) || length(x) == 0) { + stop("x and y must have the same non-zero length") + } + cont_dir <- match.arg(cont_dir) + + dup_inds <- duplicated(x) + if (sum(dup_inds) == 0) { + dup_x <- NULL + } else { + dup_x <- sort(x[dup_inds]) + } + interval_endpoints <- unique(c(min(x), dup_x, max(x))) + n_intervals <- length(interval_endpoints) - 1 + + interp_funs <- purrr::map( + seq_len(n_intervals), + function(i) { + l <- interval_endpoints[i] + u <- interval_endpoints[i + 1] + + l_ind <- which(x == l) + l_ind <- l_ind[which.max(y[l_ind])] + u_ind <- which(x == u) + u_ind <- u_ind[which.min(y[u_ind])] + int_inds <- which((x > l) & (x < u)) + inds <- c(l_ind, int_inds, u_ind) + + return(approxfun(x[inds], y[inds])) } - cont_dir <- match.arg(cont_dir) - - dup_inds <- duplicated(x) - if (sum(dup_inds) == 0) { - dup_x <- NULL - } else { - dup_x <- sort(x[dup_inds]) - } - interval_endpoints <- unique(c(min(x), dup_x, max(x))) - n_intervals <- length(interval_endpoints) - 1 - - interp_funs <- purrr::map( - seq_len(n_intervals), - function(i) { - l <- interval_endpoints[i] - u <- interval_endpoints[i + 1] - - l_ind <- which(x == l) - l_ind <- l_ind[which.max(y[l_ind])] - u_ind <- which(x == u) - u_ind <- u_ind[which.min(y[u_ind])] - int_inds <- which((x > l) & (x < u)) - inds <- c(l_ind, int_inds, u_ind) - - return(approxfun(x[inds], y[inds])) + ) + + # overall left/right endpoints and comparator functions for determining + # whether x values are out of bounds on each side, and the prediction + # at the endpoint that is not covered by half-open intervals + l <- interval_endpoints[1] + u <- tail(interval_endpoints, 1) + if (cont_dir == "right") { + l_comp <- `<` + r_comp <- `>=` + # approaching the right endpoint from the right in an increasing + # function, we should predict the max of the corresponding y's; + # in a decreasing function, predict the min of the y's + extr_endpoint <- u + extr_fn <- if (increasing) max else min + extr_y <- extr_fn(y[x == extr_endpoint]) + } else { + l_comp <- `<=` + r_comp <- `>` + # approaching the left endpoint from the left in an increasing + # function, we should predict the min of the corresponding y's; + # in a decreasing function, predict the max of the y's + extr_endpoint <- l + extr_fn <- if (increasing) min else max + extr_y <- extr_fn(y[x == extr_endpoint]) + } + + # function of x that performs linear interpolation with steps + step_interp <- function(x) { + result <- rep(NA_real_, length(x)) + + # prediction at endpoint not covered by half-open intervals + extr_inds <- (x == extr_endpoint) + result[extr_inds] <- extr_y + + # predictions by linear interpolation between jump points + if (n_intervals > 0) { + # identify which interval each x observation falls into (if any) + interval_ind <- rep(-1L, length(x)) + ib_inds <- !(l_comp(x, l) | r_comp(x, u)) + interval_ind[ib_inds] <- cut(x[ib_inds], + breaks = interval_endpoints, + labels = FALSE, + right = (cont_dir != "right")) + for (i in unique(interval_ind)) { + if (i > 0) { + x_inds <- (interval_ind == i) + result[x_inds] <- interp_funs[[i]](x[x_inds]) } - ) - - # overall left/right endpoints and comparator functions for determining - # whether x values are out of bounds on each side, and the prediction - # at the endpoint that is not covered by half-open intervals - l <- interval_endpoints[1] - u <- tail(interval_endpoints, 1) - if (cont_dir == "right") { - l_comp <- `<` - r_comp <- `>=` - # approaching the right endpoint from the right in an increasing - # function, we should predict the max of the corresponding y's; - # in a decreasing function, predict the min of the y's - extr_endpoint <- u - extr_fn <- if (increasing) max else min - extr_y <- extr_fn(y[x == extr_endpoint]) - } else { - l_comp <- `<=` - r_comp <- `>` - # approaching the left endpoint from the left in an increasing - # function, we should predict the min of the corresponding y's; - # in a decreasing function, predict the max of the y's - extr_endpoint <- l - extr_fn <- if (increasing) min else max - extr_y <- extr_fn(y[x == extr_endpoint]) + } } - # function of x that performs linear interpolation with steps - step_interp <- function(x) { - result <- rep(NA_real_, length(x)) - - # prediction at endpoint not covered by half-open intervals - extr_inds <- (x == extr_endpoint) - result[extr_inds] <- extr_y - - # predictions by linear interpolation between jump points - if (n_intervals > 0) { - # identify which interval each x observation falls into (if any) - interval_ind <- rep(-1L, length(x)) - ib_inds <- !(l_comp(x, l) | r_comp(x, u)) - interval_ind[ib_inds] <- cut(x[ib_inds], - breaks = interval_endpoints, - labels = FALSE, - right = (cont_dir != "right")) - for (i in unique(interval_ind)) { - if (i > 0) { - x_inds <- (interval_ind == i) - result[x_inds] <- interp_funs[[i]](x[x_inds]) - } - } - } + return(result) + } - return(result) - } - - return(step_interp) + return(step_interp) } @@ -241,13 +241,13 @@ step_interp_factory <- function(x, y, cont_dir = c("right", "left"), spline_cdf <- function(ps, qs, tail_dist, fn_type = c("d", "p", "q"), n_grid = 20) { - fn_type <- match.arg(fn_type) + fn_type <- match.arg(fn_type) - if (!is.null(n_grid)) { - return(spline_cdf_grid_interp(ps, qs, tail_dist, fn_type, n_grid)) - } else { - return(spline_cdf_direct(ps, qs, tail_dist, fn_type)) - } + if (!is.null(n_grid)) { + return(spline_cdf_grid_interp(ps, qs, tail_dist, fn_type, n_grid)) + } else { + return(spline_cdf_direct(ps, qs, tail_dist, fn_type)) + } } #' @rdname spline_cdf @@ -255,37 +255,35 @@ spline_cdf <- function(ps, qs, tail_dist, spline_cdf_grid_interp <- function(ps, qs, tail_dist, fn_type = c("d", "p", "q"), n_grid = 20) { - # augment originally provided ps and qs with new grid of qs - c(ps, qs) %<-% grid_augment_ps_qs(ps, qs, tail_dist, n_grid) - - # Create resulting function and return - if (fn_type == "d") { - # note that upstream, we have already validated that qs are all - # distinct and there are at least two qs - slopes <- diff(ps) / diff(qs) - slopes <- c(slopes, tail(slopes, 1)) - int_d_fn <- function(x, log = FALSE) { - result <- approx(x = qs, y = slopes, xout = x, - method = "constant", f = 0.0)$y - if (log) return(log(result)) else return(result) - } - return(int_d_fn) - } else if (fn_type == "p") { - step_interp <- step_interp_factory(x = qs, y = ps, - cont_dir = "right") - int_p_fn <- function(q, log.p = FALSE) { - result <- step_interp(q) - if (log.p) return(log(result)) else return(result) - } - return(int_p_fn) - } else if (fn_type == "q") { - step_interp <- step_interp_factory(x = ps, y = qs, - cont_dir = "left") - int_q_fn <- function(p) { - return(step_interp(p)) - } - return(int_q_fn) + # augment originally provided ps and qs with new grid of qs + c(ps, qs) %<-% grid_augment_ps_qs(ps, qs, tail_dist, n_grid) + + # Create resulting function and return + if (fn_type == "d") { + # note that upstream, we have already validated that qs are all + # distinct and there are at least two qs + slopes <- diff(ps) / diff(qs) + slopes <- c(slopes, tail(slopes, 1)) + int_d_fn <- function(x, log = FALSE) { + result <- approx(x = qs, y = slopes, xout = x, + method = "constant", f = 0.0)$y + if (log) return(log(result)) else return(result) + } + return(int_d_fn) + } else if (fn_type == "p") { + step_interp <- step_interp_factory(x = qs, y = ps, cont_dir = "right") + int_p_fn <- function(q, log.p = FALSE) { + result <- step_interp(q) + if (log.p) return(log(result)) else return(result) } + return(int_p_fn) + } else if (fn_type == "q") { + step_interp <- step_interp_factory(x = ps, y = qs, cont_dir = "left") + int_q_fn <- function(p) { + return(step_interp(p)) + } + return(int_q_fn) + } } #' Internal function that augments ps and qs by filling in a grid of @@ -294,36 +292,37 @@ spline_cdf_grid_interp <- function(ps, qs, tail_dist, #' by evaluating the spline output from `spline_cdf_direct` at the qs grid. #' @keywords internal grid_augment_ps_qs <- function(ps, qs, tail_dist, n_grid) { - n_grid <- as.integer(n_grid) - if (n_grid < 0) { - stop("If provided, `n_grid` must be a non-negative integer.") - } else if (n_grid == 0) { - # if the user just wants to linearly interpolate the given qs, ps - return(list(ps = ps, qs = qs)) + n_grid <- as.integer(n_grid) + if (n_grid < 0) { + stop("If provided, `n_grid` must be a non-negative integer.") + } else if (n_grid == 0) { + # if the user just wants to linearly interpolate the given qs, ps + return(list(ps = ps, qs = qs)) + } + + # get a function that evaluates the approximated cdf based on a spline + p_fn <- spline_cdf_direct(ps = ps, qs = qs, tail_dist = tail_dist, + fn_type = "p") + + # obtain a grid of intermediate points (between the provided qs) at + # which we will evaluate the spline approximation + q_grid <- purrr::map( + seq_len(length(qs) - 1), + function(i) { + new_q <- seq(from = qs[i], to = qs[i + 1], length = n_grid + 2L) + return(new_q[2:(n_grid + 1)]) } + ) + q_grid <- unlist(q_grid) - # get a function that evaluates the approximated cdf based on a spline - p_fn <- spline_cdf_direct(ps = ps, qs = qs, tail_dist = tail_dist, - fn_type = "p") - - # obtain a grid of intermediate points (between the provided qs) at - # which we will evaluate the spline approximation - q_grid <- purrr::map( - seq_len(length(qs) - 1), - function(i) { - new_q <- seq(from = qs[i], to = qs[i + 1], length = n_grid + 2L) - return(new_q[2:(n_grid + 1)]) - }) - q_grid <- unlist(q_grid) - - # evaluate spline-based cdf approximation at new q values, and - # append to the inputs. - p_grid <- p_fn(q_grid) + # evaluate spline-based cdf approximation at new q values, and + # append to the inputs. + p_grid <- p_fn(q_grid) - ps <- sort(c(ps, p_grid)) - qs <- sort(c(qs, q_grid)) + ps <- sort(c(ps, p_grid)) + qs <- sort(c(qs, q_grid)) - return(list(ps = ps, qs = qs)) + return(list(ps = ps, qs = qs)) } #' Internal function that constructs a monotonic Hermite spline interpolating @@ -332,74 +331,74 @@ grid_augment_ps_qs <- function(ps, qs, tail_dist, n_grid) { #' @rdname spline_cdf spline_cdf_direct <- function(ps, qs, tail_dist, fn_type = c("d", "p", "q")) { - # fit a monotonic spline to the qs and ps for the continuous part of the - # distribution to approximate the cdf on the interior - # the vector m that we assemble here has the target slopes of the spline - # at each data point (qs[i], ps[i]) - # on ends, slope of cdf approximation should match tail distribution pdf - # on interior, slope is the mean of the slopes of the adjacent segments - # note: there is probably room for improvement for this interior behavior, - # but this seems to be the standard strategy for monotonic splines - d_lower <- d_ext_factory(ps = head(ps, 2), qs = head(qs, 2), - dist = tail_dist) - m_lower <- d_lower(qs[1]) - d_upper <- d_ext_factory(ps = tail(ps, 2), qs = tail(qs, 2), - dist = tail_dist) - m_upper <- d_upper(tail(qs, 1)) - - m_segments <- diff(ps) / diff(qs) - n <- length(m_segments) - m_interior <- apply(cbind(m_segments[-1], m_segments[-n]), 1, mean) - - # in case of errors in evaluating density function at endpoints, - # repeat interior values - if (is.nan(m_lower) || !is.finite(m_lower)) { - if (n == 1) { - m_lower <- m_segments - } else { - m_lower <- m_interior[1] - } + # fit a monotonic spline to the qs and ps for the continuous part of the + # distribution to approximate the cdf on the interior + # the vector m that we assemble here has the target slopes of the spline + # at each data point (qs[i], ps[i]) + # on ends, slope of cdf approximation should match tail distribution pdf + # on interior, slope is the mean of the slopes of the adjacent segments + # note: there is probably room for improvement for this interior behavior, + # but this seems to be the standard strategy for monotonic splines + d_lower <- d_ext_factory(ps = head(ps, 2), qs = head(qs, 2), + dist = tail_dist) + m_lower <- d_lower(qs[1]) + d_upper <- d_ext_factory(ps = tail(ps, 2), qs = tail(qs, 2), + dist = tail_dist) + m_upper <- d_upper(tail(qs, 1)) + + m_segments <- diff(ps) / diff(qs) + n <- length(m_segments) + m_interior <- apply(cbind(m_segments[-1], m_segments[-n]), 1, mean) + + # in case of errors in evaluating density function at endpoints, + # repeat interior values + if (is.nan(m_lower) || !is.finite(m_lower)) { + if (n == 1) { + m_lower <- m_segments + } else { + m_lower <- m_interior[1] } - if (is.nan(m_upper) || !is.finite(m_upper)) { - if (n == 1) { - m_upper <- m_segments - } else { - m_upper <- tail(m_interior, 1) - } + } + if (is.nan(m_upper) || !is.finite(m_upper)) { + if (n == 1) { + m_upper <- m_segments + } else { + m_upper <- tail(m_interior, 1) } + } - m <- c(m_lower, m_interior, m_upper) + m <- c(m_lower, m_interior, m_upper) - interior_cdf_spline <- mono_Hermite_spline(x = qs, y = ps, m = m) + interior_cdf_spline <- mono_Hermite_spline(x = qs, y = ps, m = m) - # get a function that calculates the pdf, cdf, or quantile function - if (fn_type == "d") { - int_d_fn <- function(x, log = FALSE) { - result <- predict(interior_cdf_spline, x, deriv = 1)$y - if (log) { - return(log(result)) - } else { - return(result) - } - } - return(int_d_fn) - } else if (fn_type == "p") { - int_p_fn <- function(q, log.p = FALSE) { - result <- predict(interior_cdf_spline, q, deriv = 0)$y - - if (log.p) { - return(log(result)) - } else { - return(result) - } - } - return(int_p_fn) - } else if (fn_type == "q") { - interior_qf_spline <- backSpline(interior_cdf_spline) + # get a function that calculates the pdf, cdf, or quantile function + if (fn_type == "d") { + int_d_fn <- function(x, log = FALSE) { + result <- predict(interior_cdf_spline, x, deriv = 1)$y + if (log) { + return(log(result)) + } else { + return(result) + } + } + return(int_d_fn) + } else if (fn_type == "p") { + int_p_fn <- function(q, log.p = FALSE) { + result <- predict(interior_cdf_spline, q, deriv = 0)$y + + if (log.p) { + return(log(result)) + } else { + return(result) + } + } + return(int_p_fn) + } else if (fn_type == "q") { + interior_qf_spline <- backSpline(interior_cdf_spline) - int_q_fn <- function(p) { - return(predict(interior_qf_spline, p)$y) - } - return(int_q_fn) + int_q_fn <- function(p) { + return(predict(interior_qf_spline, p)$y) } + return(int_q_fn) + } } diff --git a/R/make_dist_fns.R b/R/make_dist_fns.R index 4a92d7a..8da6b92 100644 --- a/R/make_dist_fns.R +++ b/R/make_dist_fns.R @@ -13,25 +13,25 @@ disc_weight <- disc_ps <- disc_qs <- cont_ps <- cont_qs <- disc_ps_range <- NULL #' #' @return named list with entries `ps` and `qs` clean_ps_and_qs <- function(ps, qs) { - checkmate::assert_numeric(ps, lower=0, upper=1) - checkmate::assert_numeric(qs) - - if (length(ps) != length(qs)) { - stop("'ps' and 'qs' must have the same length.") - } - - # drop missing values for qs - na_idx <- is.na(qs) | is.na(ps) - if (any(na_idx)) { - ps <- ps[!na_idx] - qs <- qs[!na_idx] - } + checkmate::assert_numeric(ps, lower = 0, upper = 1) + checkmate::assert_numeric(qs) + + if (length(ps) != length(qs)) { + stop("'ps' and 'qs' must have the same length.") + } + + # drop missing values for qs + na_idx <- is.na(qs) | is.na(ps) + if (any(na_idx)) { + ps <- ps[!na_idx] + qs <- qs[!na_idx] + } - # sort ps and qs - ps <- sort(ps) - qs <- sort(qs) + # sort ps and qs + ps <- sort(ps) + qs <- sort(qs) - return(list(ps = ps, qs = qs)) + return(list(ps = ps, qs = qs)) } @@ -42,11 +42,11 @@ clean_ps_and_qs <- function(ps, qs) { #' @param ps vector of probability levels #' @param qs vector of quantile values correponding to ps #' @param interior_method method for interpolating the distribution on the -#' interior of the provided `qs`. This package provides one method for this, -#' `"spline_cdf"`. The user may also provide a custom function; see the -#' details for more. +#' interior of the provided `qs`. This package provides one method for this, +#' `"spline_cdf"`. The user may also provide a custom function; see the +#' details for more. #' @param interior_args an optional named list of arguments that are passed -#' on to the `interior_method` +#' on to the `interior_method` #' @param tail_dist name of parametric distribution for the tails #' @param dup_tol numeric tolerance for identifying duplicated values indicating #' a discrete component of the distribution. If there is a run of values where @@ -55,19 +55,19 @@ clean_ps_and_qs <- function(ps, qs) { #' tolerance. #' @param zero_tol numeric tolerance for identifying values in `qs` that are #' (approximately) zero. -#' +#' #' @details The default `interior_method`, `"spline_cdf"`, represents the -#' distribution as a sum of a discrete component at any points where there -#' are duplicated `qs` for multiple different `ps` and a continuous component -#' that is estimated by using a monotonic cubic spline that interpolates the -#' provided `(q, p)` pairs as an estimate of the cdf. The density function is -#' then obtained by differentiating this estimate of the cdf. -#' -#' Optionally, the user may provide another function that accepts arguments -#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, -#' or `"q"`), and optionally additional named arguments to be specified via -#' `interior_args`. This function should return a function with arguments -#' `x`, `log` that evaluates the pdf or its logarithm. +#' distribution as a sum of a discrete component at any points where there +#' are duplicated `qs` for multiple different `ps` and a continuous component +#' that is estimated by using a monotonic cubic spline that interpolates the +#' provided `(q, p)` pairs as an estimate of the cdf. The density function is +#' then obtained by differentiating this estimate of the cdf. +#' +#' Optionally, the user may provide another function that accepts arguments +#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, +#' or `"q"`), and optionally additional named arguments to be specified via +#' `interior_args`. This function should return a function with arguments +#' `x`, `log` that evaluates the pdf or its logarithm. #' #' @return a function with arguments `x` and `log` that can be used to evaluate #' the approximate density function (or its `log`) at the points `x`. @@ -77,68 +77,69 @@ make_d_fn <- function(ps, qs, interior_args = list(), tail_dist = "norm", dup_tol = 1e-6, zero_tol = 1e-12) { - interior_method <- match.arg(interior_method) - - c(ps, qs) %<-% clean_ps_and_qs(ps, qs) - - # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a - # discrete point mass at 0, using a set up like a hurdle model - is_hurdle <- tail_dist == "lnorm" - - # split ps and qs into discrete and continuous part, along with the - # weight given to the discrete part - c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% - split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, - is_hurdle = is_hurdle) - - # throw an error if there is any discontinuous part of the distribution: - # we cannot estimate a density function - if (disc_weight > 0) { - stop("make_d_fn requires the distribution to be continuous, but a discrete component was detected") + interior_method <- match.arg(interior_method) + + c(ps, qs) %<-% clean_ps_and_qs(ps, qs) + + # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a + # discrete point mass at 0, using a set up like a hurdle model + is_hurdle <- tail_dist == "lnorm" + + # split ps and qs into discrete and continuous part, along with the + # weight given to the discrete part + c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% + split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, + is_hurdle = is_hurdle) + + # throw an error if there is any discontinuous part of the distribution: + # we cannot estimate a density function + if (disc_weight > 0) { + stop("make_d_fn requires the distribution to be continuous, but a discrete component was detected") + } + + # approximate the pdf on the interior by interpolating quantiles + interior_args <- c( + list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, fn_type = "d"), + interior_args + ) + interior_pdf <- do.call(interior_method, args = interior_args) + + # approximate the pdf in the lower tail by extrapolating from the two + # lowest quantiles within a location-scale family + lower_pdf <- d_ext_factory(head(cont_ps, 2), head(cont_qs, 2), tail_dist) + + # approximate the pdf in the upper tail by extrapolating from the two + # largest quantiles within a location-scale family + upper_pdf <- d_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), tail_dist) + + d_fn <- function(x, log = FALSE) { + checkmate::assert_numeric(x) + + # instantiate result + result <- rep(NA_real_, length(x)) + + # interior points + interior_idx <- (x >= cont_qs[1]) & (x <= tail(cont_qs, 1)) + if (any(interior_idx)) { + result[interior_idx] <- interior_pdf(x[interior_idx], log = log) } - # approximate the pdf on the interior by interpolating quantiles - interior_args <- c( - list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, fn_type = "d"), - interior_args) - interior_pdf <- do.call(interior_method, args = interior_args) - - # approximate the pdf in the lower tail by extrapolating from the two - # lowest quantiles within a location-scale family - lower_pdf <- d_ext_factory(head(cont_ps, 2), head(cont_qs, 2), tail_dist) - - # approximate the pdf in the upper tail by extrapolating from the two - # largest quantiles within a location-scale family - upper_pdf <- d_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), tail_dist) - - d_fn <- function(x, log=FALSE) { - checkmate::assert_numeric(x) - - # instantiate result - result <- rep(NA_real_, length(x)) - - # interior points - interior_idx <- (x >= cont_qs[1]) & (x <= tail(cont_qs, 1)) - if (any(interior_idx)) { - result[interior_idx] <- interior_pdf(x[interior_idx], log = log) - } - - # lower points - lower_idx <- (x < cont_qs[1]) - if (any(lower_idx)) { - result[lower_idx] <- lower_pdf(x[lower_idx], log = log) - } - - # upper points - upper_idx <- (x > tail(cont_qs, 1)) - if (any(upper_idx)) { - result[upper_idx] <- upper_pdf(x[upper_idx], log = log) - } + # lower points + lower_idx <- (x < cont_qs[1]) + if (any(lower_idx)) { + result[lower_idx] <- lower_pdf(x[lower_idx], log = log) + } - return(result) + # upper points + upper_idx <- (x > tail(cont_qs, 1)) + if (any(upper_idx)) { + result[upper_idx] <- upper_pdf(x[upper_idx], log = log) } - return(d_fn) + return(result) + } + + return(d_fn) } @@ -150,11 +151,11 @@ make_d_fn <- function(ps, qs, #' @param ps vector of probability levels #' @param qs vector of quantile values correponding to ps #' @param interior_method method for interpolating the distribution on the -#' interior of the provided `qs`. This package provides one method for this, -#' `"spline_cdf"`. The user may also provide a custom function; see the -#' details for more. +#' interior of the provided `qs`. This package provides one method for this, +#' `"spline_cdf"`. The user may also provide a custom function; see the +#' details for more. #' @param interior_args an optional named list of arguments that are passed -#' on to the `interior_method` +#' on to the `interior_method` #' @param tail_dist name of parametric distribution for the tails #' @param dup_tol numeric tolerance for identifying duplicated values indicating #' a discrete component of the distribution. If there is a run of values where @@ -165,16 +166,16 @@ make_d_fn <- function(ps, qs, #' (approximately) zero. #' #' @details The default `interior_method`, `"spline_cdf"`, represents the -#' distribution as a sum of a discrete component at any points where there -#' are duplicated `qs` for multiple different `ps` and a continuous component -#' that is estimated by using a monotonic cubic spline that interpolates the -#' provided `(q, p)` pairs as an estimate of the cdf. +#' distribution as a sum of a discrete component at any points where there +#' are duplicated `qs` for multiple different `ps` and a continuous component +#' that is estimated by using a monotonic cubic spline that interpolates the +#' provided `(q, p)` pairs as an estimate of the cdf. #' -#' Optionally, the user may provide another function that accepts arguments -#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, -#' or `"q"`), and optionally additional named arguments to be specified via -#' `interior_args`. This function should return a function with arguments -#' `x`, `log` that evaluates the pdf or its logarithm. +#' Optionally, the user may provide another function that accepts arguments +#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, +#' or `"q"`), and optionally additional named arguments to be specified via +#' `interior_args`. This function should return a function with arguments +#' `x`, `log` that evaluates the pdf or its logarithm. #' #' @return a function with arguments `q` and `log.p` that can be used to #' evaluate the approximate cumulative distribution function (or its `log`) @@ -185,109 +186,105 @@ make_p_fn <- function(ps, qs, interior_args = list(), tail_dist = "norm", dup_tol = 1e-6, zero_tol = 1e-12) { - interior_method <- match.arg(interior_method) + interior_method <- match.arg(interior_method) - c(ps, qs) %<-% clean_ps_and_qs(ps, qs) + c(ps, qs) %<-% clean_ps_and_qs(ps, qs) - # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a - # discrete point mass at 0, using a set up like a hurdle model - is_hurdle <- tail_dist == "lnorm" + # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a + # discrete point mass at 0, using a set up like a hurdle model + is_hurdle <- tail_dist == "lnorm" - # split ps and qs into discrete and continuous part, along with the - # weight given to the discrete part - c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% - split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, - is_hurdle = is_hurdle) + # split ps and qs into discrete and continuous part, along with the + # weight given to the discrete part + c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% + split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, + is_hurdle = is_hurdle) - if (disc_weight < 1.0) { - # approximate the cdf on the interior by interpolating quantiles - interior_args <- c( - list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, - fn_type = "p"), - interior_args) - interior_cdf <- do.call(interior_method, args = interior_args) - - # approximate the cdf in the lower tail by extrapolating from the two - # lowest quantiles within a location-scale family - if (min(cont_ps) > 0.0) { - lower_cdf <- p_ext_factory(head(cont_ps, 2), head(cont_qs, 2), - tail_dist) - } + if (disc_weight < 1.0) { + # approximate the cdf on the interior by interpolating quantiles + interior_args <- c( + list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, fn_type = "p"), + interior_args + ) + interior_cdf <- do.call(interior_method, args = interior_args) - # approximate the pdf in the upper tail by extrapolating from the two - # largest quantiles within a location-scale family - if (max(cont_ps) < 1.0) { - upper_cdf <- p_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), - tail_dist) - } + # approximate the cdf in the lower tail by extrapolating from the two + # lowest quantiles within a location-scale family + if (min(cont_ps) > 0.0) { + lower_cdf <- p_ext_factory(head(cont_ps, 2), head(cont_qs, 2), tail_dist) } - p_fn <- function(q, log.p=FALSE) { - checkmate::assert_numeric(q) - - # instantiate result - result <- rep(0.0, length(q)) - log.p_direct <- FALSE - - if (disc_weight < 1.0) { - # directly use log.p in interior and exterior cdf methods? - # if possible, probably saves a little numerical precision in tails - # but we can't (easily) do this is if disc_weight > 0, in which case - # below we will be adding probabilities from a discrete component - log.p_direct <- (log.p && disc_weight == 0.0) - - # interior points - interior_idx <- (q >= cont_qs[1]) & (q <= tail(cont_qs, 1)) - if (any(interior_idx)) { - result[interior_idx] <- interior_cdf(q[interior_idx], - log.p = log.p_direct) - } - - # lower points - lower_idx <- (q < cont_qs[1]) - if (any(lower_idx)) { - if (min(cont_ps) > 0.0) { - result[lower_idx] <- lower_cdf(q[lower_idx], - log.p = log.p_direct) - } # else set to 0, which is how we initialized it - } - - # upper points - upper_idx <- (q > tail(cont_qs, 1)) - if (any(upper_idx)) { - if (max(cont_ps) < 1.0) { - result[upper_idx] <- upper_cdf(q[upper_idx], - log.p = log.p_direct) - } else { - result[upper_idx] <- 1.0 - } - } - - # adjust by weight for continuous component - result <- result * (1 - disc_weight) - } + # approximate the pdf in the upper tail by extrapolating from the two + # largest quantiles within a location-scale family + if (max(cont_ps) < 1.0) { + upper_cdf <- p_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), tail_dist) + } + } - # add discrete probabilities - for (i in seq_along(disc_ps)) { - inds <- (q >= disc_qs[i]) - result[inds] <- result[inds] + disc_ps[i] * disc_weight - } + p_fn <- function(q, log.p = FALSE) { + checkmate::assert_numeric(q) - # ensure result is within interval [0, 1] - result <- pmin(pmax(result, 0), 1) + # instantiate result + result <- rep(0.0, length(q)) + log.p_direct <- FALSE + + if (disc_weight < 1.0) { + # directly use log.p in interior and exterior cdf methods? + # if possible, probably saves a little numerical precision in tails + # but we can't (easily) do this is if disc_weight > 0, in which case + # below we will be adding probabilities from a discrete component + log.p_direct <- (log.p && disc_weight == 0.0) + + # interior points + interior_idx <- (q >= cont_qs[1]) & (q <= tail(cont_qs, 1)) + if (any(interior_idx)) { + result[interior_idx] <- interior_cdf(q[interior_idx], + log.p = log.p_direct) + } + + # lower points + lower_idx <- (q < cont_qs[1]) + if (any(lower_idx)) { + if (min(cont_ps) > 0.0) { + result[lower_idx] <- lower_cdf(q[lower_idx], log.p = log.p_direct) + } # else set to 0, which is how we initialized it + } - # return, handling log.p argument - if (log.p_direct) { - # result is already on log scale - return(result) - } else if (log.p) { - return(log(result)) + # upper points + upper_idx <- (q > tail(cont_qs, 1)) + if (any(upper_idx)) { + if (max(cont_ps) < 1.0) { + result[upper_idx] <- upper_cdf(q[upper_idx], log.p = log.p_direct) } else { - return(result) + result[upper_idx] <- 1.0 } + } + + # adjust by weight for continuous component + result <- result * (1 - disc_weight) + } + + # add discrete probabilities + for (i in seq_along(disc_ps)) { + inds <- (q >= disc_qs[i]) + result[inds] <- result[inds] + disc_ps[i] * disc_weight + } + + # ensure result is within interval [0, 1] + result <- pmin(pmax(result, 0), 1) + + # return, handling log.p argument + if (log.p_direct) { + # result is already on log scale + return(result) + } else if (log.p) { + return(log(result)) + } else { + return(result) } + } - return(p_fn) + return(p_fn) } @@ -299,11 +296,11 @@ make_p_fn <- function(ps, qs, #' @param ps vector of probability levels #' @param qs vector of quantile values correponding to ps #' @param interior_method method for interpolating the distribution on the -#' interior of the provided `qs`. This package provides one method for this, -#' `"spline_cdf"`. The user may also provide a custom function; see the -#' details for more. +#' interior of the provided `qs`. This package provides one method for this, +#' `"spline_cdf"`. The user may also provide a custom function; see the +#' details for more. #' @param interior_args an optional named list of arguments that are passed -#' on to the `interior_method` +#' on to the `interior_method` #' @param tail_dist name of parametric distribution for the tails #' @param dup_tol numeric tolerance for identifying duplicated values indicating #' a discrete component of the distribution. If there is a run of values where @@ -314,17 +311,17 @@ make_p_fn <- function(ps, qs, #' (approximately) zero. #' #' @details The default `interior_method`, `"spline_cdf"`, represents the -#' distribution as a sum of a discrete component at any points where there -#' are duplicated `qs` for multiple different `ps` and a continuous component -#' that is estimated by using a monotonic cubic spline that interpolates the -#' provided `(q, p)` pairs as an estimate of the cdf. The quantile function -#' is then obtained by inverting this estimate of the cdf. +#' distribution as a sum of a discrete component at any points where there +#' are duplicated `qs` for multiple different `ps` and a continuous component +#' that is estimated by using a monotonic cubic spline that interpolates the +#' provided `(q, p)` pairs as an estimate of the cdf. The quantile function +#' is then obtained by inverting this estimate of the cdf. #' -#' Optionally, the user may provide another function that accepts arguments -#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, -#' or `"q"`), and optionally additional named arguments to be specified via -#' `interior_args`. This function should return a function with argument `p` -#' that evaluates the quantile function. +#' Optionally, the user may provide another function that accepts arguments +#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, +#' or `"q"`), and optionally additional named arguments to be specified via +#' `interior_args`. This function should return a function with argument `p` +#' that evaluates the quantile function. #' #' @return a function with argument `p` that can be used to calculate quantiles #' of the approximated distribution at the probability levels `p`. @@ -335,110 +332,109 @@ make_q_fn <- function(ps, qs, interior_args = list(), tail_dist = "norm", dup_tol = 1e-6, zero_tol = 1e-12) { - interior_method <- match.arg(interior_method) + interior_method <- match.arg(interior_method) - c(ps, qs) %<-% clean_ps_and_qs(ps, qs) + c(ps, qs) %<-% clean_ps_and_qs(ps, qs) - # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a - # discrete point mass at 0, using a set up like a hurdle model - is_hurdle <- tail_dist == "lnorm" + # if tail_dist is "lnorm", we treat quantiles of 0 as indicative of a + # discrete point mass at 0, using a set up like a hurdle model + is_hurdle <- tail_dist == "lnorm" - # split ps and qs into discrete and continuous part, along with the - # weight given to the discrete part - c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% - split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, - is_hurdle = is_hurdle) + # split ps and qs into discrete and continuous part, along with the + # weight given to the discrete part + c(disc_weight, disc_ps, disc_qs, cont_ps, cont_qs, disc_ps_range) %<-% + split_disc_cont_ps_qs(ps, qs, dup_tol = dup_tol, zero_tol = zero_tol, + is_hurdle = is_hurdle) - if (disc_weight < 1.0) { - # approximate the pdf on the interior by interpolating quantiles - interior_args <- c( - list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, - fn_type = "q"), - interior_args) - interior_qf <- do.call(interior_method, args = interior_args) - - # approximate the quantile function in the lower tail by extrapolating - # from the two lowest quantiles within a location-scale family - if (min(cont_ps) > 0.0) { - lower_qf <- q_ext_factory(head(cont_ps, 2), head(cont_qs, 2), - tail_dist) - } + if (disc_weight < 1.0) { + # approximate the pdf on the interior by interpolating quantiles + interior_args <- c( + list(ps = cont_ps, qs = cont_qs, tail_dist = tail_dist, fn_type = "q"), + interior_args + ) + interior_qf <- do.call(interior_method, args = interior_args) + + # approximate the quantile function in the lower tail by extrapolating + # from the two lowest quantiles within a location-scale family + if (min(cont_ps) > 0.0) { + lower_qf <- q_ext_factory(head(cont_ps, 2), head(cont_qs, 2), + tail_dist) + } - # approximate the quantile function in the upper tail by extrapolating - # from the two largest quantiles within a location-scale family - if (max(cont_ps) < 1.0) { - upper_qf <- q_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), - tail_dist) - } + # approximate the quantile function in the upper tail by extrapolating + # from the two largest quantiles within a location-scale family + if (max(cont_ps) < 1.0) { + upper_qf <- q_ext_factory(tail(cont_ps, 2), tail(cont_qs, 2), tail_dist) + } + } + + q_fn <- function(p) { + checkmate::assert_numeric(p, lower = 0, upper = 1) + + # instantiate result + result <- rep(NA_real_, length(p)) + + # discrete part of distribution + # each discrete component has an associated ((min_p, max_p), q) tuple + # where for any p in [min_p, max_p], the quantile is q + # along the way, we update the continuous probabilities by subtracting + # the discrete point mass probability + cont_inds <- rep(TRUE, length(p)) + cont_p <- p + for (i in seq_along(disc_ps_range)) { + dpr <- disc_ps_range[[i]] + inds <- (p >= dpr[1]) & (p <= dpr[2]) + result[inds] <- disc_qs[i] + cont_inds[inds] <- FALSE + + inds <- (p > dpr[2]) + cont_p[inds] <- cont_p[inds] - disc_ps[i] * disc_weight } - q_fn <- function(p) { - checkmate::assert_numeric(p, lower=0, upper=1) - - # instantiate result - result <- rep(NA_real_, length(p)) - - # discrete part of distribution - # each discrete component has an associated ((min_p, max_p), q) tuple - # where for any p in [min_p, max_p], the quantile is q - # along the way, we update the continuous probabilities by subtracting - # the discrete point mass probability - cont_inds <- rep(TRUE, length(p)) - cont_p <- p - for (i in seq_along(disc_ps_range)) { - dpr <- disc_ps_range[[i]] - inds <- (p >= dpr[1]) & (p <= dpr[2]) - result[inds] <- disc_qs[i] - cont_inds[inds] <- FALSE - - inds <- (p > dpr[2]) - cont_p[inds] <- cont_p[inds] - disc_ps[i] * disc_weight + # continuous part of the distribution + if (disc_weight < 1.0) { + cont_p <- cont_p / (1 - disc_weight) + # address potential floating point errors: for example, + # dividing by (1 - disc_weight) could take an input p = 1 to a value + # slightly greater than 1. here, we ensure that any input p's that + # were in [0, 1] are still in [0, 1] after adjustment + cont_p[p >= 0] <- pmax(cont_p[p >= 0], 0) + cont_p[p <= 1] <- pmin(cont_p[p <= 1], 1) + + # interior points + interior_idx <- cont_inds & (cont_p >= cont_ps[1]) & + (cont_p <= tail(cont_ps, 1)) + if (any(interior_idx)) { + result[interior_idx] <- interior_qf(cont_p[interior_idx]) + } + + # lower points + # no action required if min(cont_ps) == 0.0 because in that case, + # we will never have p < cont_ps[1]. This means the outer check + # is redundant, but may be helpful to have consistency + if (min(cont_ps) > 0.0) { + lower_idx <- cont_inds & (cont_p < cont_ps[1]) + if (any(lower_idx)) { + result[lower_idx] <- lower_qf(cont_p[lower_idx]) } - - # continuous part of the distribution - if (disc_weight < 1.0) { - cont_p <- cont_p / (1 - disc_weight) - # address potential floating point errors: for example, - # dividing by (1 - disc_weight) could take an input p = 1 to a value - # slightly greater than 1. here, we ensure that any input p's that - # were in [0, 1] are still in [0, 1] after adjustment - cont_p[p >= 0] <- pmax(cont_p[p >= 0], 0) - cont_p[p <= 1] <- pmin(cont_p[p <= 1], 1) - - # interior points - interior_idx <- cont_inds & (cont_p >= cont_ps[1]) & - (cont_p <= tail(cont_ps, 1)) - if (any(interior_idx)) { - result[interior_idx] <- interior_qf(cont_p[interior_idx]) - } - - # lower points - # no action required if min(cont_ps) == 0.0 because in that case, - # we will never have p < cont_ps[1]. This means the outer check - # is redundant, but may be helpful to have consistency - if (min(cont_ps) > 0.0) { - lower_idx <- cont_inds & (cont_p < cont_ps[1]) - if (any(lower_idx)) { - result[lower_idx] <- lower_qf(cont_p[lower_idx]) - } - } - - # upper points - # no action required if max(cont_ps) == 1.0 because in that case, - # we will never have p > tail(cont_ps, 1). This means the outer - # check is redundant, but may be helpful to have consistency - if (max(cont_ps) < 1.0) { - upper_idx <- cont_inds & (cont_p > tail(cont_ps, 1)) - if (any(upper_idx)) { - result[upper_idx] <- upper_qf(cont_p[upper_idx]) - } - } + } + + # upper points + # no action required if max(cont_ps) == 1.0 because in that case, + # we will never have p > tail(cont_ps, 1). This means the outer + # check is redundant, but may be helpful to have consistency + if (max(cont_ps) < 1.0) { + upper_idx <- cont_inds & (cont_p > tail(cont_ps, 1)) + if (any(upper_idx)) { + result[upper_idx] <- upper_qf(cont_p[upper_idx]) } - - return(result) + } } - return(q_fn) + return(result) + } + + return(q_fn) } @@ -449,11 +445,11 @@ make_q_fn <- function(ps, qs, #' @param ps vector of probability levels #' @param qs vector of quantile values correponding to ps #' @param interior_method method for interpolating the distribution on the -#' interior of the provided `qs`. This package provides one method for this, -#' `"spline_cdf"`. The user may also provide a custom function; see the -#' details for more. +#' interior of the provided `qs`. This package provides one method for this, +#' `"spline_cdf"`. The user may also provide a custom function; see the +#' details for more. #' @param interior_args an optional named list of arguments that are passed -#' on to the `interior_method` +#' on to the `interior_method` #' @param tail_dist name of parametric distribution for the tails #' @param dup_tol numeric tolerance for identifying duplicated values indicating #' a discrete component of the distribution. If there is a run of values where @@ -464,17 +460,17 @@ make_q_fn <- function(ps, qs, #' (approximately) zero. #' #' @details The default `interior_method`, `"spline_cdf"`, represents the -#' distribution as a sum of a discrete component at any points where there -#' are duplicated `qs` for multiple different `ps` and a continuous component -#' that is estimated by using a monotonic cubic spline that interpolates the -#' provided `(q, p)` pairs as an estimate of the cdf. The quantile function -#' is then obtained by inverting this estimate of the cdf. +#' distribution as a sum of a discrete component at any points where there +#' are duplicated `qs` for multiple different `ps` and a continuous component +#' that is estimated by using a monotonic cubic spline that interpolates the +#' provided `(q, p)` pairs as an estimate of the cdf. The quantile function +#' is then obtained by inverting this estimate of the cdf. #' -#' Optionally, the user may provide another function that accepts arguments -#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, -#' or `"q"`), and optionally additional named arguments to be specified via -#' `interior_args`. This function should return a function with argument `p` -#' that evaluates the quantile function. +#' Optionally, the user may provide another function that accepts arguments +#' `ps`, `qs`, `tail_dist`, and `fn_type` (which will be either `"d"`, `"p"`, +#' or `"q"`), and optionally additional named arguments to be specified via +#' `interior_args`. This function should return a function with argument `p` +#' that evaluates the quantile function. #' #' @return a function with argument `n` that can be used to generate a sample of #' size `n` from the approximated distribution. @@ -484,14 +480,14 @@ make_r_fn <- function(ps, qs, interior_args = list(), tail_dist = "norm", dup_tol = 1e-6, zero_tol = 1e-12) { - interior_method <- match.arg(interior_method) - q_fn <- make_q_fn(ps, qs, interior_method, interior_args, tail_dist) + interior_method <- match.arg(interior_method) + q_fn <- make_q_fn(ps, qs, interior_method, interior_args, tail_dist) - r_fn <- function(n) { - checkmate::assert_integerish(n, lower=0, any.missing=FALSE, len=1) - u <- runif(n) - return(q_fn(u)) - } + r_fn <- function(n) { + checkmate::assert_integerish(n, lower = 0, any.missing = FALSE, len = 1) + u <- runif(n) + return(q_fn(u)) + } - return(r_fn) + return(r_fn) } diff --git a/R/utils.R b/R/utils.R index f7df477..8be6676 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,12 +14,12 @@ dup_run_starts <- dup_run_ends <- NULL #' @return a boolean vector of the same length as `x` #' @export duplicated_tol <- function(x, tol = 1e-6, incl_first = FALSE) { - if (!incl_first) { - return(c(FALSE, diff(x) < tol)) - } else { - diff_lt_tol <- diff(x) < tol - return(c(FALSE, diff_lt_tol) | c(diff_lt_tol, FALSE)) - } + if (!incl_first) { + return(c(FALSE, diff(x) < tol)) + } else { + diff_lt_tol <- diff(x) < tol + return(c(FALSE, diff_lt_tol) | c(diff_lt_tol, FALSE)) + } } @@ -28,51 +28,51 @@ duplicated_tol <- function(x, tol = 1e-6, incl_first = FALSE) { #' consecutive pair is closer together than the tolerance, all are labeled as #' corresponding to a single unique value even if not all values in the run are #' within the tolerance. -#' +#' #' @param x a numeric vector in which to identify duplicates #' @param tol numeric tolerance for identifying duplicates #' @param ties a function that is used to summarize groups of values that fall #' within the tolerance -#' +#' #' @return a numeric vector of the unique values in `x` #' @export unique_tol <- function(x, tol = 1e-6, ties = mean) { - dups_F <- duplicated_tol(x, tol = tol, incl_first = FALSE) - dups_T <- duplicated_tol(x, tol = tol, incl_first = TRUE) - if (any(dups_F)) { - c(dup_run_starts, dup_run_ends) %<-% get_dup_run_inds(dups_F) - dup_summaries <- purrr::map2_dbl( - dup_run_starts, dup_run_ends, - function(start, end) { - ties(x[start:end]) - } - ) - ux <- sort(c(x[!dups_T], dup_summaries)) - return(ux) - } else { - return(x) - } + dups_F <- duplicated_tol(x, tol = tol, incl_first = FALSE) + dups_T <- duplicated_tol(x, tol = tol, incl_first = TRUE) + if (any(dups_F)) { + c(dup_run_starts, dup_run_ends) %<-% get_dup_run_inds(dups_F) + dup_summaries <- purrr::map2_dbl( + dup_run_starts, dup_run_ends, + function(start, end) { + ties(x[start:end]) + } + ) + ux <- sort(c(x[!dups_T], dup_summaries)) + return(ux) + } else { + return(x) + } } #' Get indices of starts and ends of runs of duplicate values -#' -#' @param dups a boolean vector that would result from calling +#' +#' @param dups a boolean vector that would result from calling #' `duplicated_tol(..., incl_first = FALSE)` -#' +#' #' @return named list with entries `starts` giving indices of the first element #' in each sequence of runs of duplicate values and `ends` giving indices of #' the last element in each sequence of runs of duplicate values. get_dup_run_inds <- function(dups) { - if (any(dups)) { - dups_rle <- rle(dups) - num_runs <- length(dups_rle$lengths) - dup_run_starts <- cumsum(c(0, dups_rle$lengths[seq_len(num_runs - 1)]))[dups_rle$values] - dup_run_ends <- cumsum(dups_rle$lengths)[dups_rle$values] - return(list(starts = dup_run_starts, ends = dup_run_ends)) - } else { - return(list(starts = integer(), ends = integer())) - } + if (any(dups)) { + dups_rle <- rle(dups) + num_runs <- length(dups_rle$lengths) + dup_run_starts <- cumsum(c(0, dups_rle$lengths[seq_len(num_runs - 1)]))[dups_rle$values] + dup_run_ends <- cumsum(dups_rle$lengths)[dups_rle$values] + return(list(starts = dup_run_starts, ends = dup_run_ends)) + } else { + return(list(starts = integer(), ends = integer())) + } } @@ -113,142 +113,141 @@ get_dup_run_inds <- function(dups) { #' @export split_disc_cont_ps_qs <- function(ps, qs, dup_tol = 1e-6, zero_tol = 1e-12, is_hurdle = FALSE) { - # if zero counts as discrete and any qs are (approximately) zero, augment - # with an additional zero so that logic below based on duplicate qs works - if (is_hurdle && any(abs(qs) <= zero_tol)) { - zero_ind <- min(which(abs(qs) <= zero_tol)) - if (zero_ind == 1) { - ps <- c(0.0, ps) - } else { - ps <- sort(c(mean(ps[(zero_ind - 1):zero_ind]), ps)) - } - qs <- sort(c(0.0, qs)) + # if zero counts as discrete and any qs are (approximately) zero, augment + # with an additional zero so that logic below based on duplicate qs works + if (is_hurdle && any(abs(qs) <= zero_tol)) { + zero_ind <- min(which(abs(qs) <= zero_tol)) + if (zero_ind == 1) { + ps <- c(0.0, ps) + } else { + ps <- sort(c(mean(ps[(zero_ind - 1):zero_ind]), ps)) } + qs <- sort(c(0.0, qs)) + } + + # Isolate the discrete portion of the distribution: + # duplicated quantiles and the associated point mass probabilities + # on the interior, point mass probability is the range of ps with given q + # on the lower (upper) tail, point mass probability extends to 0 (1) + dup_q_inds_t <- duplicated_tol(qs, tol = dup_tol, incl_first = TRUE) + dup_q_inds_f <- duplicated_tol(qs, tol = dup_tol, incl_first = FALSE) - # Isolate the discrete portion of the distribution: - # duplicated quantiles and the associated point mass probabilities - # on the interior, point mass probability is the range of ps with given q - # on the lower (upper) tail, point mass probability extends to 0 (1) + # if first or last q is duplicated, ensure we get to p = 0 and p = 1 + recalc_dup_inds <- FALSE + if (dup_q_inds_t[1]) { + ps <- c(0.0, ps) + qs <- c(qs[1], qs) + recalc_dup_inds <- TRUE + } + if (tail(dup_q_inds_t, 1)) { + ps <- c(ps, 1.0) + qs <- c(qs, tail(qs, 1)) + recalc_dup_inds <- TRUE + } + if (recalc_dup_inds) { dup_q_inds_t <- duplicated_tol(qs, tol = dup_tol, incl_first = TRUE) dup_q_inds_f <- duplicated_tol(qs, tol = dup_tol, incl_first = FALSE) + } - # if first or last q is duplicated, ensure we get to p = 0 and p = 1 - recalc_dup_inds <- FALSE - if (dup_q_inds_t[1]) { - ps <- c(0.0, ps) - qs <- c(qs[1], qs) - recalc_dup_inds <- TRUE - } - if (tail(dup_q_inds_t, 1)) { - ps <- c(ps, 1.0) - qs <- c(qs, tail(qs, 1)) - recalc_dup_inds <- TRUE + disc_qs <- unique_tol(qs[dup_q_inds_t], tol = dup_tol) + c(dup_run_starts, dup_run_ends) %<-% get_dup_run_inds(dup_q_inds_f) + disc_ps_range <- purrr::map2( + dup_run_starts, dup_run_ends, + function(start, end) { + range_i <- range(ps[start:end]) + return(range_i) } - if (recalc_dup_inds) { - dup_q_inds_t <- duplicated_tol(qs, tol = dup_tol, incl_first = TRUE) - dup_q_inds_f <- duplicated_tol(qs, tol = dup_tol, incl_first = FALSE) - } - - disc_qs <- unique_tol(qs[dup_q_inds_t], tol = dup_tol) - c(dup_run_starts, dup_run_ends) %<-% get_dup_run_inds(dup_q_inds_f) - disc_ps_range <- purrr::map2( - dup_run_starts, dup_run_ends, - function(start, end) { - range_i <- range(ps[start:end]) - return(range_i) - }) - disc_ps_mass <- purrr::map_dbl( - disc_ps_range, - function(range_i) diff(range_i)) - disc_cum_ps <- cumsum(disc_ps_mass) + ) + disc_ps_mass <- purrr::map_dbl(disc_ps_range, function(range_i) diff(range_i)) + disc_cum_ps <- cumsum(disc_ps_mass) - # Short-circuit if there is insufficient information from which to estimate - # a continuous part of the distribution. - uq <- unique_tol(qs, tol = dup_tol) - if (length(uq) == 1L) { - # all qs are duplicates - # single point mass at that q value - return(list( - disc_weight = 1.0, - disc_ps = 1.0, - disc_qs = uq, - cont_ps = numeric(), - cont_qs = numeric(), - disc_ps_range = list(c(0.0, 1.0)) - )) - } else if (length(uq) == 2) { - # there are only two distinct qs and at least one is at a point mass - # we declare that both are at point masses - # assign probabilities based on the tails (to the left of the lower q - # and to the right of the upper q), and then allocate the mass in the - # "gap" between the point masses proportionally - if (length(disc_ps_mass) == 2) { - disc_ps_unnormalized <- disc_ps_mass - } else if (length(ps) == 2) { - disc_ps_unnormalized <- c(ps[1], 1 - ps[2]) - } else if (dup_q_inds_t[1]) { - disc_ps_unnormalized <- c(disc_ps_mass, 1 - tail(ps, 1)) - } else { - disc_ps_unnormalized <- c(ps[1], disc_ps_mass) - } - disc_ps <- disc_ps_unnormalized / sum(disc_ps_unnormalized) - return(list( - disc_weight = 1.0, - disc_ps = disc_ps, - disc_qs = uq, - cont_ps = numeric(), - cont_qs = numeric(), - disc_ps_range = list(c(0.0, disc_ps[1]), c(disc_ps[1], 1.0)) - )) + # Short-circuit if there is insufficient information from which to estimate + # a continuous part of the distribution. + uq <- unique_tol(qs, tol = dup_tol) + if (length(uq) == 1L) { + # all qs are duplicates + # single point mass at that q value + return(list( + disc_weight = 1.0, + disc_ps = 1.0, + disc_qs = uq, + cont_ps = numeric(), + cont_qs = numeric(), + disc_ps_range = list(c(0.0, 1.0)) + )) + } else if (length(uq) == 2) { + # there are only two distinct qs and at least one is at a point mass + # we declare that both are at point masses + # assign probabilities based on the tails (to the left of the lower q + # and to the right of the upper q), and then allocate the mass in the + # "gap" between the point masses proportionally + if (length(disc_ps_mass) == 2) { + disc_ps_unnormalized <- disc_ps_mass + } else if (length(ps) == 2) { + disc_ps_unnormalized <- c(ps[1], 1 - ps[2]) + } else if (dup_q_inds_t[1]) { + disc_ps_unnormalized <- c(disc_ps_mass, 1 - tail(ps, 1)) + } else { + disc_ps_unnormalized <- c(ps[1], disc_ps_mass) } + disc_ps <- disc_ps_unnormalized / sum(disc_ps_unnormalized) + return(list( + disc_weight = 1.0, + disc_ps = disc_ps, + disc_qs = uq, + cont_ps = numeric(), + cont_qs = numeric(), + disc_ps_range = list(c(0.0, disc_ps[1]), c(disc_ps[1], 1.0)) + )) + } - # remaining quantiles correspond to a continuous portion of the - # distribution; extract those ps and qs, and adjust the ps, - # removing any jumps due to point masses - # Note that we do keep the first instance of a duplicated q. - # That means that fits for the continuous portion of a distribution - # will see one (q, p) pair for the duplicated q - # - # An exception is qs of 0 when is_hurdle == TRUE, in which case we drop - # zero from the cont_qs; e.g. including (p=0, q=0) for a lognormal - # is not informative. - cont_ps_unadj <- ps[!dup_q_inds_f] - cont_qs <- uq - if (is_hurdle) { - nonzero_inds <- (abs(cont_qs) >= zero_tol) - cont_ps_unadj <- cont_ps_unadj[nonzero_inds] - cont_qs <- cont_qs[nonzero_inds] - } - cont_ps <- cont_ps_unadj - for (i in seq_along(disc_qs)) { - adj_inds <- (cont_qs > disc_qs[i]) - cont_ps[adj_inds] <- cont_ps[adj_inds] - disc_ps_mass[i] - } + # remaining quantiles correspond to a continuous portion of the + # distribution; extract those ps and qs, and adjust the ps, + # removing any jumps due to point masses + # Note that we do keep the first instance of a duplicated q. + # That means that fits for the continuous portion of a distribution + # will see one (q, p) pair for the duplicated q + # + # An exception is qs of 0 when is_hurdle == TRUE, in which case we drop + # zero from the cont_qs; e.g. including (p=0, q=0) for a lognormal + # is not informative. + cont_ps_unadj <- ps[!dup_q_inds_f] + cont_qs <- uq + if (is_hurdle) { + nonzero_inds <- (abs(cont_qs) >= zero_tol) + cont_ps_unadj <- cont_ps_unadj[nonzero_inds] + cont_qs <- cont_qs[nonzero_inds] + } + cont_ps <- cont_ps_unadj + for (i in seq_along(disc_qs)) { + adj_inds <- (cont_qs > disc_qs[i]) + cont_ps[adj_inds] <- cont_ps[adj_inds] - disc_ps_mass[i] + } - # adjust for the weight going to the discrete portion of the distribution - if (length(disc_cum_ps) > 0) { - disc_weight <- tail(disc_cum_ps, 1) - disc_ps_mass <- disc_ps_mass / disc_weight - cont_ps <- cont_ps / (1 - disc_weight) + # adjust for the weight going to the discrete portion of the distribution + if (length(disc_cum_ps) > 0) { + disc_weight <- tail(disc_cum_ps, 1) + disc_ps_mass <- disc_ps_mass / disc_weight + cont_ps <- cont_ps / (1 - disc_weight) - # address potential floating point errors: for example, - # dividing by (1 - disc_weight) could take an input p = 1 to a value - # slightly greater than 1. here, we ensure that any input p's that - # were in [0, 1] are still in [0, 1] after adjustment - inds_nonneg <- cont_ps_unadj >= 0 - cont_ps[inds_nonneg] <- pmax(cont_ps[inds_nonneg], 0) - inds_lte_1 <- cont_ps_unadj <= 1 - cont_ps[inds_lte_1] <- pmin(cont_ps[inds_lte_1], 1) - } else { - disc_weight <- 0.0 - } + # address potential floating point errors: for example, + # dividing by (1 - disc_weight) could take an input p = 1 to a value + # slightly greater than 1. here, we ensure that any input p's that + # were in [0, 1] are still in [0, 1] after adjustment + inds_nonneg <- cont_ps_unadj >= 0 + cont_ps[inds_nonneg] <- pmax(cont_ps[inds_nonneg], 0) + inds_lte_1 <- cont_ps_unadj <= 1 + cont_ps[inds_lte_1] <- pmin(cont_ps[inds_lte_1], 1) + } else { + disc_weight <- 0.0 + } - return(list( - disc_weight = disc_weight, - disc_ps = disc_ps_mass, - disc_qs = disc_qs, - cont_ps = cont_ps, - cont_qs = cont_qs, - disc_ps_range = disc_ps_range - )) + return(list( + disc_weight = disc_weight, + disc_ps = disc_ps_mass, + disc_qs = disc_qs, + cont_ps = cont_ps, + cont_qs = cont_qs, + disc_ps_range = disc_ps_range + )) } diff --git a/tests/testthat/test-ext.R b/tests/testthat/test-ext.R index 0acbbe6..14c7400 100644 --- a/tests/testthat/test-ext.R +++ b/tests/testthat/test-ext.R @@ -1,9 +1,9 @@ test_that("q_ext works, repeated upper quantiles", { - ps <- c(0.975, 0.99) - qs <- c(28, 28) - qext_fun <- q_ext_factory(ps = ps, qs = qs, dist = "norm") - expect_true( - isTRUE(all.equal(qext_fun(seq(from = 0.99, to = 1.0, length.out = 10)), - rep(28, 10))) - ) + ps <- c(0.975, 0.99) + qs <- c(28, 28) + qext_fun <- q_ext_factory(ps = ps, qs = qs, dist = "norm") + expect_true( + isTRUE(all.equal(qext_fun(seq(from = 0.99, to = 1.0, length.out = 10)), + rep(28, 10))) + ) }) diff --git a/tests/testthat/test-make_d_fn.R b/tests/testthat/test-make_d_fn.R index 1e46927..688b44a 100644 --- a/tests/testthat/test-make_d_fn.R +++ b/tests/testthat/test-make_d_fn.R @@ -18,178 +18,180 @@ test_that("make_d_fn works, continuous component; no point masses", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- qnorm(ps, mean = 1, sd = 2) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - - d_actual <- make_d_fn(ps, qs)(test_qs) - d_expected <- dnorm(test_qs, mean = 1, sd = 2) - # plot(test_qs, d_actual); lines(test_qs, d_expected) - expect_equal(d_actual, d_expected, tolerance = 1e-2) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- qnorm(ps, mean = 1, sd = 2) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + + d_actual <- make_d_fn(ps, qs)(test_qs) + d_expected <- dnorm(test_qs, mean = 1, sd = 2) + # plot(test_qs, d_actual); lines(test_qs, d_expected) + expect_equal(d_actual, d_expected, tolerance = 1e-2) }) test_that("make_d_fn generates error, no continuous component; one point mass, single q", { - ps <- 0.1 - qs <- rep(0.0, length(ps)) - expect_error(make_d_fn(ps, qs)) + ps <- 0.1 + qs <- rep(0.0, length(ps)) + expect_error(make_d_fn(ps, qs)) }) test_that("make_d_fn generates error, no continuous component; one point mass, duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- rep(0.0, length(ps)) - expect_error(make_d_fn(ps, qs)) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- rep(0.0, length(ps)) + expect_error(make_d_fn(ps, qs)) }) test_that("make_d_fn generates error, no continuous component; two point masses, both from duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 6)) - expect_error(make_d_fn(ps, qs)) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 6)) + expect_error(make_d_fn(ps, qs)) }) -test_that("make_d_fn generates error, no continuous component; two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - expect_error(make_d_fn(ps, qs)) -}) +test_that("make_d_fn generates error, no continuous component; + two point masses, non-zero, duplicated values first only", { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + expect_error(make_d_fn(ps, qs)) + }) -test_that("make_d_fn generates error, no continuous component; two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - expect_error(make_d_fn(ps, qs)) -}) +test_that("make_d_fn generates error, no continuous component; +two point masses, non-zero, duplicated values second only", { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + expect_error(make_d_fn(ps, qs)) + }) test_that("make_d_fn generates error, no continuous component; is_hurdle with one zero and one non-zero", { - ps <- seq(from = 0.1, to = 0.2, by = 0.1) - qs <- c(1e-13, 2.0) - expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) + ps <- seq(from = 0.1, to = 0.2, by = 0.1) + qs <- c(1e-13, 2.0) + expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) }) test_that("make_d_fn generates error, continuous component; one point mass, duplicated qs", { - # mixture of a Normal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs <- rep(0.0, length(point_ps)) - adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_error(make_d_fn(ps, qs)) + # mixture of a Normal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs <- rep(0.0, length(point_ps)) + adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_error(make_d_fn(ps, qs)) }) test_that("make_d_fn generates error, continuous component; one point mass, is_hurdle with zero", { - # mixture of a LogNormal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for lognormal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- 1.0 - point_qs <- 0.0 - adj_point_ps <- 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) + # mixture of a LogNormal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for lognormal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- 1.0 + point_qs <- 0.0 + adj_point_ps <- 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) }) test_that("make_d_fn generates error, two point masses, both from duplicated qs", { - # mixture of a Normal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_0 <- rep(0.0, length(point_ps_0)) - adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_error(make_d_fn(ps, qs)) + # mixture of a Normal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_0 <- rep(0.0, length(point_ps_0)) + adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_error(make_d_fn(ps, qs)) }) test_that("make_d_fn generates error, two point masses, is_hurdle with one zero and one non-zero with duplicated qs", { - # mixture of a LogNormal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- 1.0 - point_qs_0 <- 0.0 - adj_point_ps_0 <- 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) + # mixture of a LogNormal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- 1.0 + point_qs_0 <- 0.0 + adj_point_ps_0 <- 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_error(make_d_fn(ps, qs, tail_dist = "lnorm")) }) test_that("make_d_fn errors with out-of-bounds or incorrectly typed ps, qs", { - testthat::expect_no_error(make_d_fn(ps=c(0.0, 0.5, 1.0), qs = 1:3)) - testthat::expect_error(make_d_fn(ps=c(-1, 0.5, 1.0), qs = 1:3), - "Assertion on 'ps' failed: Element 1 is not >= 0.") - testthat::expect_error(make_d_fn(ps=c(0.0, 0.5, 2.0), qs = 1:3), - "Assertion on 'ps' failed: Element 3 is not <= 1.") - testthat::expect_error(make_d_fn(ps=c(0.0, "a", 1.0), qs = 1:3), - "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_d_fn(ps=c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), - "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_d_fn(ps=c(0.0, 0.5, 1.0), qs = 1:4), - "'ps' and 'qs' must have the same length.") + testthat::expect_no_error(make_d_fn(ps = c(0.0, 0.5, 1.0), qs = 1:3)) + testthat::expect_error(make_d_fn(ps = c(-1, 0.5, 1.0), qs = 1:3), + "Assertion on 'ps' failed: Element 1 is not >= 0.") + testthat::expect_error(make_d_fn(ps = c(0.0, 0.5, 2.0), qs = 1:3), + "Assertion on 'ps' failed: Element 3 is not <= 1.") + testthat::expect_error(make_d_fn(ps = c(0.0, "a", 1.0), qs = 1:3), + "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_d_fn(ps = c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), + "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_d_fn(ps = c(0.0, 0.5, 1.0), qs = 1:4), + "'ps' and 'qs' must have the same length.") }) test_that("make_d_fn result errors with out-of-bounds or incorrectly typed argument x", { - d_fn <- make_d_fn(ps=c(0.01, 0.5, 0.99), qs = 1:3) - testthat::expect_no_error(d_fn(c(0, 1, 5))) - testthat::expect_error(d_fn("a"), - "Must be of type 'numeric', not 'character'.") + d_fn <- make_d_fn(ps = c(0.01, 0.5, 0.99), qs = 1:3) + testthat::expect_no_error(d_fn(c(0, 1, 5))) + testthat::expect_error(d_fn("a"), + "Must be of type 'numeric', not 'character'.") }) diff --git a/tests/testthat/test-make_p_fn_make_q_fn.R b/tests/testthat/test-make_p_fn_make_q_fn.R index 76d9eb9..d32fe2d 100644 --- a/tests/testthat/test-make_p_fn_make_q_fn.R +++ b/tests/testthat/test-make_p_fn_make_q_fn.R @@ -23,649 +23,621 @@ # - make_p_fn and make_q_fn are inverses test_that("make_p_fn, make_q_fn work, continuous component; no point masses", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- qnorm(ps, mean = 1, sd = 2) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- qnorm(ps, mean = 1, sd = 2) - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- pnorm(test_qs, mean = 1, sd = 2) - # plot(test_qs, p_actual); lines(test_qs, p_expected) + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- pnorm(test_qs, mean = 1, sd = 2) + # plot(test_qs, p_actual); lines(test_qs, p_expected) - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- qnorm(test_ps, mean = 1, sd = 2) - # plot(test_ps, q_actual); lines(test_ps, q_expected) + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- qnorm(test_ps, mean = 1, sd = 2) + # plot(test_ps, q_actual); lines(test_ps, q_expected) - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - testthat::expect_equal(test_ps, make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - testthat::expect_equal(test_qs, make_q_fn(ps, qs)(p_actual), - tolerance = 1e-3) + testthat::expect_equal(test_ps, make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + testthat::expect_equal(test_qs, make_q_fn(ps, qs)(p_actual), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, no continuous component; one point mass, single q", { - ps <- 0.1 - qs <- rep(0.0, length(ps)) + ps <- 0.1 + qs <- rep(0.0, length(ps)) - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- as.numeric(test_qs >= 0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- as.numeric(test_qs >= 0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- rep(0.0, length(test_ps)) - # plot(test_ps, q_actual); lines(test_ps, q_expected) + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- rep(0.0, length(test_ps)) + # plot(test_ps, q_actual); lines(test_ps, q_expected) - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - testthat::expect_equal(rep(1, length(test_ps)), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - testthat::expect_equal(rep(0, length(test_qs)), - make_q_fn(ps, qs)(p_actual), - tolerance = 1e-3) + testthat::expect_equal(rep(1, length(test_ps)), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + testthat::expect_equal(rep(0, length(test_qs)), + make_q_fn(ps, qs)(p_actual), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, no continuous component; one point mass, duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- rep(0.0, length(ps)) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- rep(0.0, length(ps)) - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- as.numeric(test_qs >= 0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- as.numeric(test_qs >= 0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- rep(0.0, length(test_ps)) - # plot(test_ps, q_actual); lines(test_ps, q_expected) + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- rep(0.0, length(test_ps)) + # plot(test_ps, q_actual); lines(test_ps, q_expected) - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - testthat::expect_equal(rep(1, length(test_ps)), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - testthat::expect_equal(rep(0, length(test_qs)), - make_q_fn(ps, qs)(p_actual), - tolerance = 1e-3) + testthat::expect_equal(rep(1, length(test_ps)), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + testthat::expect_equal(rep(0, length(test_qs)), + make_q_fn(ps, qs)(p_actual), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, no continuous component; two point masses, both from duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 6)) - - test_ps <- sort(c( - 1 / 3, 1.0, - seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c( - 1, 2, - qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + - (2 / 3) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), - rep(1, sum(test_ps > 1 / 3))), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - # Commenting this test out -- fails due to floating point precision - # testthat::expect_equal(..., - # make_q_fn(ps, qs)(p_actual), - # tolerance = 1e-3) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 6)) + + test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), + rep(1, sum(test_ps > 1 / 3))), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + # Commenting this test out -- fails due to floating point precision + # testthat::expect_equal(..., + # make_q_fn(ps, qs)(p_actual), + # tolerance = 1e-3) }) -test_that("make_p_fn, make_q_fn work, no continuous component; two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - - test_ps <- sort(c( - 1 / 3, 1.0, - seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c( - 1, 2, - qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + - (2 / 3) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), - rep(1, sum(test_ps > 1 / 3))), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - # Commenting this test out -- fails due to floating point precision - # testthat::expect_equal(..., - # make_q_fn(ps, qs)(p_actual), - # tolerance = 1e-3) -}) +test_that("make_p_fn, make_q_fn work, no continuous component; + two point masses, non-zero, duplicated values first only", { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) -test_that("make_p_fn, make_q_fn work, no continuous component; two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - - test_ps <- sort(c( - 1 / 3, 1.0, - seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c( - 1, 2, - qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + - (2 / 3) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), - rep(1, sum(test_ps > 1 / 3))), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - # Commenting this test out -- fails due to floating point precision - # testthat::expect_equal(..., - # make_q_fn(ps, qs)(p_actual), - # tolerance = 1e-3) -}) + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), + rep(1, sum(test_ps > 1 / 3))), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + # Commenting this test out -- fails due to floating point precision + # testthat::expect_equal(..., + # make_q_fn(ps, qs)(p_actual), + # tolerance = 1e-3) + }) + + +test_that("make_p_fn, make_q_fn work, no continuous component; + two point masses, non-zero, duplicated values second only", { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + + test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), + rep(1, sum(test_ps > 1 / 3))), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + # Commenting this test out -- fails due to floating point precision + # testthat::expect_equal(..., + # make_q_fn(ps, qs)(p_actual), + # tolerance = 1e-3) + }) test_that("make_p_fn, make_q_fn work, no continuous component; is_hurdle with one zero and one non-zero", { - ps <- seq(from = 0.1, to = 0.2, by = 0.1) - qs <- c(1e-13, 2.0) - - test_ps <- sort(c( - 1 / 9, 1.0, - seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c( - 1, 2, - qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) - p_expected <- (1 / 9) * as.numeric(test_qs >= 0.0) + - (8 / 9) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) - q_expected <- ifelse(test_ps <= 1 / 9, 0.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + ps <- seq(from = 0.1, to = 0.2, by = 0.1) + qs <- c(1e-13, 2.0) + + test_ps <- sort(c(1 / 9, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) + + p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) + p_expected <- (1 / 9) * as.numeric(test_qs >= 0.0) + (8 / 9) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) + q_expected <- ifelse(test_ps <= 1 / 9, 0.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) }) test_that("make_p_fn, make_q_fn work, continuous component; one point mass, duplicated qs", { - # mixture of a Normal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs <- rep(0.0, length(point_ps)) - adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- 0.2 * as.numeric(test_qs >= 0) + 0.8 * pnorm(test_qs) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- c( - qnorm(test_ps[test_ps < 0.4] / 0.8), - rep(0.0, sum((test_ps >= 0.4) & (test_ps <= 0.6))), - qnorm((test_ps[test_ps > 0.6] - 0.2) / 0.8) - ) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(make_p_fn(ps, qs)(q_actual), - ifelse(test_ps >= 0.4 & test_ps <= 0.6, - 0.6, - test_ps), - tolerance = 1e-3) - testthat::expect_equal(make_q_fn(ps, qs)(p_actual), - test_qs, - tolerance = 1e-3) + # mixture of a Normal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs <- rep(0.0, length(point_ps)) + adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- 0.2 * as.numeric(test_qs >= 0) + 0.8 * pnorm(test_qs) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- c( + qnorm(test_ps[test_ps < 0.4] / 0.8), + rep(0.0, sum((test_ps >= 0.4) & (test_ps <= 0.6))), + qnorm((test_ps[test_ps > 0.6] - 0.2) / 0.8) + ) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(make_p_fn(ps, qs)(q_actual), + ifelse(test_ps >= 0.4 & test_ps <= 0.6, + 0.6, + test_ps), + tolerance = 1e-3) + testthat::expect_equal(make_q_fn(ps, qs)(p_actual), + test_qs, + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, continuous component; one point mass, is_hurdle with zero", { - # mixture of a LogNormal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for lognormal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- 1.0 - point_qs <- 0.0 - adj_point_ps <- 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - - p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) - p_expected <- 0.2 * as.numeric(test_qs >= 0) + - 0.8 * plnorm(test_qs) - # note that regions where actual matches expected depend on - # tail family assumptions: - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) - q_expected <- c( - rep(0.0, sum(test_ps <= 0.2)), - qlnorm((test_ps[test_ps > 0.2] - 0.2) / 0.8) - ) - # note that regions where actual matches expected depend on - # tail family assumptions: - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(make_p_fn(ps, qs, tail_dist = "lnorm")(q_actual), - ifelse(test_ps <= 0.2, - 0.2, - test_ps), - tolerance = 1e-3) - testthat::expect_equal(make_q_fn(ps, qs, tail_dist = "lnorm")(p_actual), - ifelse(test_qs < 0, 0, test_qs), - tolerance = 1e-3) + # mixture of a LogNormal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for lognormal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- 1.0 + point_qs <- 0.0 + adj_point_ps <- 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + + p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) + p_expected <- 0.2 * as.numeric(test_qs >= 0) + 0.8 * plnorm(test_qs) + # note that regions where actual matches expected depend on + # tail family assumptions: + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) + q_expected <- c( + rep(0.0, sum(test_ps <= 0.2)), + qlnorm((test_ps[test_ps > 0.2] - 0.2) / 0.8) + ) + # note that regions where actual matches expected depend on + # tail family assumptions: + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(make_p_fn(ps, qs, tail_dist = "lnorm")(q_actual), + ifelse(test_ps <= 0.2, 0.2, test_ps), + tolerance = 1e-3) + testthat::expect_equal(make_q_fn(ps, qs, tail_dist = "lnorm")(p_actual), + ifelse(test_qs < 0, 0, test_qs), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, two point masses, both from duplicated qs", { - # mixture of a Normal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_0 <- rep(0.0, length(point_ps_0)) - adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- 0.3 * as.numeric(test_qs >= 0) + - 0.1 * as.numeric(test_qs >= 1) + - 0.6 * pnorm(test_qs) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - pcut1 <- 0.3 - pcut2 <- pnorm(1.0)*0.6 + 0.3 - q_expected <- c( - qnorm(test_ps[test_ps < pcut1] / 0.6), - rep(0.0, sum((test_ps >= 0.3) & (test_ps <= 0.6))), - qnorm((test_ps[(test_ps > 0.6) & (test_ps < pcut2)] - 0.3) / 0.6), - rep(1.0, sum((test_ps >= pcut2) & (test_ps <= pcut2 + 0.1))), - qnorm((test_ps[test_ps > pcut2 + 0.1] - 0.4) / 0.6) - ) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(make_p_fn(ps, qs)(q_actual), - dplyr::case_when( - test_ps >= 0.3 & test_ps <= 0.6 ~ 0.6, - test_ps >= pcut2 & test_ps <= pcut2 + 0.1 ~ pcut2 + 0.1, - TRUE ~ test_ps), - tolerance = 1e-3) - testthat::expect_equal(make_q_fn(ps, qs)(p_actual), - test_qs, - tolerance = 1e-3) + # mixture of a Normal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_0 <- rep(0.0, length(point_ps_0)) + adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- 0.3 * as.numeric(test_qs >= 0) + 0.1 * as.numeric(test_qs >= 1) + 0.6 * pnorm(test_qs) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + pcut1 <- 0.3 + pcut2 <- pnorm(1.0) * 0.6 + 0.3 + q_expected <- c( + qnorm(test_ps[test_ps < pcut1] / 0.6), + rep(0.0, sum((test_ps >= 0.3) & (test_ps <= 0.6))), + qnorm((test_ps[(test_ps > 0.6) & (test_ps < pcut2)] - 0.3) / 0.6), + rep(1.0, sum((test_ps >= pcut2) & (test_ps <= pcut2 + 0.1))), + qnorm((test_ps[test_ps > pcut2 + 0.1] - 0.4) / 0.6) + ) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(make_p_fn(ps, qs)(q_actual), + dplyr::case_when( + test_ps >= 0.3 & test_ps <= 0.6 ~ 0.6, + test_ps >= pcut2 & test_ps <= pcut2 + 0.1 ~ pcut2 + 0.1, + TRUE ~ test_ps + ), + tolerance = 1e-3) + testthat::expect_equal(make_q_fn(ps, qs)(p_actual), + test_qs, + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn work, two point masses, is_hurdle with one zero and one non-zero with duplicated qs", { - # mixture of a LogNormal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- 1.0 - point_qs_0 <- 0.0 - adj_point_ps_0 <- 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) - test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) - - p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) - p_expected <- 0.3 * as.numeric(test_qs >= 0) + - 0.1 * as.numeric(test_qs >= 1) + - 0.6 * plnorm(test_qs) - # note that regions where actual matches expected depend on - # tail family assumptions: - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) - pcut1 <- 0.3 - pcut2 <- plnorm(1.0) * 0.6 + 0.3 - q_expected <- c( - rep(0.0, sum(test_ps <= 0.3)), - qlnorm((test_ps[(test_ps > 0.3) & (test_ps < pcut2)] - 0.3) / 0.6), - rep(1.0, sum((test_ps >= pcut2) & (test_ps <= pcut2 + 0.1))), - qlnorm((test_ps[test_ps > pcut2 + 0.1] - 0.4) / 0.6) - ) - # note that regions where actual matches expected depend on - # tail family assumptions: - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(make_p_fn(ps, qs, tail_dist = "lnorm")(q_actual), - dplyr::case_when( - test_ps <= 0.3 ~ 0.3, - test_ps >= pcut2 & test_ps <= pcut2 + 0.1 ~ pcut2 + 0.1, - TRUE ~ test_ps), - tolerance = 1e-3) - testthat::expect_equal(make_q_fn(ps, qs, tail_dist = "lnorm")(p_actual), - ifelse(test_qs < 0, 0, test_qs), - tolerance = 1e-3) + # mixture of a LogNormal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- 1.0 + point_qs_0 <- 0.0 + adj_point_ps_0 <- 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + test_ps <- seq(from = 0.01, to = 0.99, by = 0.01) + test_qs <- qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2) + + p_actual <- make_p_fn(ps, qs, tail_dist = "lnorm")(test_qs) + p_expected <- 0.3 * as.numeric(test_qs >= 0) + 0.1 * as.numeric(test_qs >= 1) + 0.6 * plnorm(test_qs) + # note that regions where actual matches expected depend on + # tail family assumptions: + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs, tail_dist = "lnorm")(test_ps) + pcut1 <- 0.3 + pcut2 <- plnorm(1.0) * 0.6 + 0.3 + q_expected <- c( + rep(0.0, sum(test_ps <= 0.3)), + qlnorm((test_ps[(test_ps > 0.3) & (test_ps < pcut2)] - 0.3) / 0.6), + rep(1.0, sum((test_ps >= pcut2) & (test_ps <= pcut2 + 0.1))), + qlnorm((test_ps[test_ps > pcut2 + 0.1] - 0.4) / 0.6) + ) + # note that regions where actual matches expected depend on + # tail family assumptions: + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(make_p_fn(ps, qs, tail_dist = "lnorm")(q_actual), + dplyr::case_when( + test_ps <= 0.3 ~ 0.3, + test_ps >= pcut2 & test_ps <= pcut2 + 0.1 ~ pcut2 + 0.1, + TRUE ~ test_ps + ), + tolerance = 1e-3) + testthat::expect_equal(make_q_fn(ps, qs, tail_dist = "lnorm")(p_actual), + ifelse(test_qs < 0, 0, test_qs), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn well-behaved with near-equal quantiles", { - # probabilities and quantiles with a near-duplicate q - ps <- c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, - 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99) - qs <- c(0, 0, 2.99327779507948e-16, 4.94582028429876, 8.22187599370956, - 9.89992446804951, 11.2927083522741, 12.5925601513011, 13.8718728208768, - 15.0382200560432, 16.0565760032992, 16.9592610229454, 17.7662853190937, - 18.6751481876927, 19.7650748463216, 21.0314540241319, 22.5188110517322, - 24.3155280557975, 26.5953673396936, 30.2878452640675, 37.6971667663299, - 46.5930391567271, 52.2594821457131) - - test_ps <- seq(from = 0.001, to = 0.999, by = 0.001) - test_qs <- seq(from = 0.0, to = 100.0, length.out = 1001) - - p_actual <- make_p_fn(ps, qs)(test_qs) - q_actual <- make_q_fn(ps, qs)(test_ps) - - testthat::expect_true(all(p_actual >= 0.0)) - testthat::expect_true(all(p_actual <= 1.0)) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) > 0)) - # plot(test_qs, p_actual, type = "l"); points(qs, ps, pch = 20) - # lines(q_actual, test_ps, lty=2, col='red') - - testthat::expect_true(all(q_actual >= 0.0)) - testthat::expect_true(all(q_actual <= 10 * max(qs))) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - # plot(test_ps, q_actual, type = "l"); points(ps, qs, pch = 20) - - testthat::expect_equal(make_p_fn(ps, qs)(q_actual), - dplyr::case_when( - test_ps <= 0.05 ~ 0.05, - TRUE ~ test_ps), - tolerance = 1e-3) - testthat::expect_equal(make_q_fn(ps, qs)(p_actual), - ifelse(test_qs < 0, 0, test_qs), - tolerance = 1e-3) + # probabilities and quantiles with a near-duplicate q + ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) + qs <- c(0, 0, 2.99327779507948e-16, 4.94582028429876, 8.22187599370956, + 9.89992446804951, 11.2927083522741, 12.5925601513011, 13.8718728208768, + 15.0382200560432, 16.0565760032992, 16.9592610229454, 17.7662853190937, + 18.6751481876927, 19.7650748463216, 21.0314540241319, 22.5188110517322, + 24.3155280557975, 26.5953673396936, 30.2878452640675, 37.6971667663299, + 46.5930391567271, 52.2594821457131) + + test_ps <- seq(from = 0.001, to = 0.999, by = 0.001) + test_qs <- seq(from = 0.0, to = 100.0, length.out = 1001) + + p_actual <- make_p_fn(ps, qs)(test_qs) + q_actual <- make_q_fn(ps, qs)(test_ps) + + testthat::expect_true(all(p_actual >= 0.0)) + testthat::expect_true(all(p_actual <= 1.0)) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) > 0)) + # plot(test_qs, p_actual, type = "l"); points(qs, ps, pch = 20) + # lines(q_actual, test_ps, lty=2, col='red') + + testthat::expect_true(all(q_actual >= 0.0)) + testthat::expect_true(all(q_actual <= 10 * max(qs))) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + # plot(test_ps, q_actual, type = "l"); points(ps, qs, pch = 20) + + testthat::expect_equal(make_p_fn(ps, qs)(q_actual), + dplyr::case_when(test_ps <= 0.05 ~ 0.05, TRUE ~ test_ps), + tolerance = 1e-3) + testthat::expect_equal(make_q_fn(ps, qs)(p_actual), + ifelse(test_qs < 0, 0, test_qs), + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn well-behaved with near-equal quantiles", { - # probabilities and quantiles with a near-duplicate q - ps <- c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, - 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99) - qs <- c(0, 0, 1.5e-6, 4.94582028429876, 8.22187599370956, - 9.89992446804951, 11.2927083522741, 12.5925601513011, 13.8718728208768, - 15.0382200560432, 16.0565760032992, 16.9592610229454, 17.7662853190937, - 18.6751481876927, 19.7650748463216, 21.0314540241319, 22.5188110517322, - 24.3155280557975, 26.5953673396936, 30.2878452640675, 37.6971667663299, - 46.5930391567271, 52.2594821457131) - - test_ps <- seq(from = 0.001, to = 0.999, by = 0.001) - test_qs <- seq(from = 0.0, to = 100.0, length.out = 1001) - - p_actual <- make_p_fn(ps, qs)(test_qs) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_fn <- make_q_fn(ps, qs) - - testthat::expect_true(all(p_actual >= 0.0)) - testthat::expect_true(all(p_actual <= 1.0)) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) > 0)) - - testthat::expect_true(all(q_actual >= 0.0)) - testthat::expect_true(all(q_actual <= 10 * max(qs))) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(make_p_fn(ps, qs)(q_actual), - dplyr::case_when( - test_ps <= 0.025 ~ 0.025, - TRUE ~ test_ps), - tolerance = 1e-6) - testthat::expect_equal(make_q_fn(ps, qs)(p_actual), - test_qs, - tolerance = 1e-3) + # probabilities and quantiles with a near-duplicate q + ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) + qs <- c(0, 0, 1.5e-6, 4.94582028429876, 8.22187599370956, + 9.89992446804951, 11.2927083522741, 12.5925601513011, 13.8718728208768, + 15.0382200560432, 16.0565760032992, 16.9592610229454, 17.7662853190937, + 18.6751481876927, 19.7650748463216, 21.0314540241319, 22.5188110517322, + 24.3155280557975, 26.5953673396936, 30.2878452640675, 37.6971667663299, + 46.5930391567271, 52.2594821457131) + + test_ps <- seq(from = 0.001, to = 0.999, by = 0.001) + test_qs <- seq(from = 0.0, to = 100.0, length.out = 1001) + + p_actual <- make_p_fn(ps, qs)(test_qs) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_fn <- make_q_fn(ps, qs) + + testthat::expect_true(all(p_actual >= 0.0)) + testthat::expect_true(all(p_actual <= 1.0)) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) > 0)) + + testthat::expect_true(all(q_actual >= 0.0)) + testthat::expect_true(all(q_actual <= 10 * max(qs))) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(make_p_fn(ps, qs)(q_actual), + dplyr::case_when(test_ps <= 0.025 ~ 0.025, TRUE ~ test_ps), + tolerance = 1e-6) + testthat::expect_equal(make_q_fn(ps, qs)(p_actual), + test_qs, + tolerance = 1e-3) }) test_that("make_p_fn, make_q_fn well-behaved with near-zero quantiles", { - # probabilities and quantiles with a near-duplicate q - ps <- seq(0.1, 0.9, 0.2) - qs <- c(0, 0, 0, 1e-07, 1e-05) - - testthat::expect_no_error( - cdf_hat <- distfromq:::make_p_fn(ps, qs) - ) - testthat::expect_no_error( - qf_hat <- distfromq:::make_q_fn(ps, qs) - ) + # probabilities and quantiles with a near-duplicate q + ps <- seq(0.1, 0.9, 0.2) + qs <- c(0, 0, 0, 1e-07, 1e-05) + + testthat::expect_no_error( + cdf_hat <- distfromq:::make_p_fn(ps, qs) + ) + testthat::expect_no_error( + qf_hat <- distfromq:::make_q_fn(ps, qs) + ) }) test_that("make_p_fn, make_q_fn well-behaved with 3 duplicated values, one at zero, lnorm tail dist", { - ps <- seq(0.05, 0.95, 0.1) - qs <- c(rep(0, 4), rep(1, 3), rep(5, 3)) - - testthat::expect_no_error( - q_hat <- distfromq::make_q_fn( - ps = ps, - qs = qs, - dup_tol = 1e-06, - zero_tol = 1e-12, - tail_dist = "lnorm")(seq(from = 0, to = 1, length.out = 10000 + 2)[2:10000]) - ) - - testthat::expect_no_error( - p_hat <- distfromq::make_p_fn( - ps = ps, - qs = qs, - dup_tol = 1e-06, - zero_tol = 1e-12, - tail_dist = "lnorm")(seq(from = 0, to = 10, length.out = 10000)) - ) + ps <- seq(0.05, 0.95, 0.1) + qs <- c(rep(0, 4), rep(1, 3), rep(5, 3)) + + testthat::expect_no_error( + q_hat <- distfromq::make_q_fn( + ps = ps, + qs = qs, + dup_tol = 1e-06, + zero_tol = 1e-12, + tail_dist = "lnorm" + )(seq(from = 0, to = 1, length.out = 10000 + 2)[2:10000]) + ) + + testthat::expect_no_error( + p_hat <- distfromq::make_p_fn( + ps = ps, + qs = qs, + dup_tol = 1e-06, + zero_tol = 1e-12, + tail_dist = "lnorm" + )(seq(from = 0, to = 10, length.out = 10000)) + ) }) -test_that("make_p_fn, make_q_fn well-behaved: multiple duplicates and floating point issues with discrete adjustments", { - ps <- c(.01,.025, seq(.05,.95, by=.05), .975, .99) - qs <- c(0,0,0,0,3,6,8,8,9,11,12,13,17,21,25,27,29,30,31,33,37,52,61) +test_that("make_p_fn, make_q_fn well-behaved: multiple duplicates and + floating point issues with discrete adjustments", { + ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) + qs <- c(0, 0, 0, 0, 3, 6, 8, 8, 9, 11, 12, 13, 17, 21, 25, 27, 29, 30, 31, 33, 37, 52, 61) - q_hat <- distfromq:::make_q_fn(ps, qs) - testthat::expect_identical(q_hat(c(0.99, 1)), c(61, Inf)) - - p_hat <- distfromq::make_p_fn(ps, qs) - testthat::expect_identical(p_hat(c(61, Inf)), c(0.99, 1.0)) -}) + q_hat <- distfromq:::make_q_fn(ps, qs) + testthat::expect_identical(q_hat(c(0.99, 1)), c(61, Inf)) + + p_hat <- distfromq::make_p_fn(ps, qs) + testthat::expect_identical(p_hat(c(61, Inf)), c(0.99, 1.0)) + }) test_that("make_p_fn result outputs values <= 1", { - ps <- c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, - 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, - 0.99) - qs <- c(0, 0, 0, 0.103331351093582, 0.206662702187163, 0.258328377733954, - 0.309994053280745, 0.309994053280745, 0.361659728827535, - 0.361659728827535, 0.413325404374326, 0.413325404374326, - 0.413325404374326, 0.464991079921117, 0.464991079921117, - 0.516656755467908, 0.516656755467908, 0.568322431014698, - 0.619988106561489, 0.723319457655071, 0.878316484295443, - 1.08497918648261, 1.29164188866977) - - result <- distfromq::make_p_fn(ps=ps, qs=qs)(25) - testthat::expect_true(result <= 1) + ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) + qs <- c(0, 0, 0, 0.103331351093582, 0.206662702187163, 0.258328377733954, + 0.309994053280745, 0.309994053280745, 0.361659728827535, + 0.361659728827535, 0.413325404374326, 0.413325404374326, + 0.413325404374326, 0.464991079921117, 0.464991079921117, + 0.516656755467908, 0.516656755467908, 0.568322431014698, + 0.619988106561489, 0.723319457655071, 0.878316484295443, + 1.08497918648261, 1.29164188866977) + + result <- distfromq::make_p_fn(ps = ps, qs = qs)(25) + testthat::expect_true(result <= 1) }) test_that("make_p_fn and make_q_fn error with out-of-bounds or incorrectly typed ps, qs", { - testthat::expect_no_error(make_p_fn(ps=c(0.0, 0.5, 1.0), qs = 1:3)) - testthat::expect_error(make_p_fn(ps=c(-1, 0.5, 1.0), qs = 1:3), - "Assertion on 'ps' failed: Element 1 is not >= 0.") - testthat::expect_error(make_p_fn(ps=c(0.0, 0.5, 2.0), qs = 1:3), - "Assertion on 'ps' failed: Element 3 is not <= 1.") - testthat::expect_error(make_p_fn(ps=c(0.0, "a", 1.0), qs = 1:3), - "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_p_fn(ps=c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), - "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_p_fn(ps=c(0.0, 0.5, 1.0), qs = 1:4), - "'ps' and 'qs' must have the same length.") - - testthat::expect_no_error(make_q_fn(ps=c(0.0, 0.5, 1.0), qs = 1:3)) - testthat::expect_error(make_q_fn(ps=c(-1, 0.5, 1.0), qs = 1:3), - "Assertion on 'ps' failed: Element 1 is not >= 0.") - testthat::expect_error(make_q_fn(ps=c(0.0, 0.5, 2.0), qs = 1:3), - "Assertion on 'ps' failed: Element 3 is not <= 1.") - testthat::expect_error(make_q_fn(ps=c(0.0, "a", 1.0), qs = 1:3), - "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_q_fn(ps=c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), - "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_q_fn(ps=c(0.0, 0.5, 1.0), qs = 1:4), - "'ps' and 'qs' must have the same length.") + testthat::expect_no_error(make_p_fn(ps = c(0.0, 0.5, 1.0), qs = 1:3)) + testthat::expect_error(make_p_fn(ps = c(-1, 0.5, 1.0), qs = 1:3), + "Assertion on 'ps' failed: Element 1 is not >= 0.") + testthat::expect_error(make_p_fn(ps = c(0.0, 0.5, 2.0), qs = 1:3), + "Assertion on 'ps' failed: Element 3 is not <= 1.") + testthat::expect_error(make_p_fn(ps = c(0.0, "a", 1.0), qs = 1:3), + "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_p_fn(ps = c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), + "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_p_fn(ps = c(0.0, 0.5, 1.0), qs = 1:4), + "'ps' and 'qs' must have the same length.") + + testthat::expect_no_error(make_q_fn(ps = c(0.0, 0.5, 1.0), qs = 1:3)) + testthat::expect_error(make_q_fn(ps = c(-1, 0.5, 1.0), qs = 1:3), + "Assertion on 'ps' failed: Element 1 is not >= 0.") + testthat::expect_error(make_q_fn(ps = c(0.0, 0.5, 2.0), qs = 1:3), + "Assertion on 'ps' failed: Element 3 is not <= 1.") + testthat::expect_error(make_q_fn(ps = c(0.0, "a", 1.0), qs = 1:3), + "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_q_fn(ps = c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), + "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_q_fn(ps = c(0.0, 0.5, 1.0), qs = 1:4), + "'ps' and 'qs' must have the same length.") }) test_that("make_p_fn and make_q_fn results error with out-of-bounds or incorrectly typed argument", { - p_fn <- make_p_fn(ps=c(0.01, 0.5, 0.99), qs = 1:3) - testthat::expect_no_error(p_fn(c(0, 1, 5))) - testthat::expect_error(p_fn("a"), - "Must be of type 'numeric', not 'character'.") - - q_fn <- make_q_fn(ps=c(0.01, 0.5, 0.99), qs = 1:3) - testthat::expect_no_error(q_fn(c(0, 0.5, 1))) - testthat::expect_error(q_fn(c(-1, 0.5, 1.0)), - "Assertion on 'p' failed: Element 1 is not >= 0.") - testthat::expect_error(q_fn(c(0.0, 0.5, 2.0)), - "Assertion on 'p' failed: Element 3 is not <= 1.") - testthat::expect_error(q_fn("a"), - "Must be of type 'numeric', not 'character'.") + p_fn <- make_p_fn(ps = c(0.01, 0.5, 0.99), qs = 1:3) + testthat::expect_no_error(p_fn(c(0, 1, 5))) + testthat::expect_error(p_fn("a"), + "Must be of type 'numeric', not 'character'.") + + q_fn <- make_q_fn(ps = c(0.01, 0.5, 0.99), qs = 1:3) + testthat::expect_no_error(q_fn(c(0, 0.5, 1))) + testthat::expect_error(q_fn(c(-1, 0.5, 1.0)), + "Assertion on 'p' failed: Element 1 is not >= 0.") + testthat::expect_error(q_fn(c(0.0, 0.5, 2.0)), + "Assertion on 'p' failed: Element 3 is not <= 1.") + testthat::expect_error(q_fn("a"), + "Must be of type 'numeric', not 'character'.") }) diff --git a/tests/testthat/test-make_r_fn.R b/tests/testthat/test-make_r_fn.R index 3a0985f..d50a0aa 100644 --- a/tests/testthat/test-make_r_fn.R +++ b/tests/testthat/test-make_r_fn.R @@ -1,26 +1,26 @@ test_that("make_r_fn errors with out-of-bounds or incorrectly typed ps, qs", { - testthat::expect_no_error(make_r_fn(ps=c(0.0, 0.5, 1.0), qs = 1:3)) - testthat::expect_error(make_r_fn(ps=c(-1, 0.5, 1.0), qs = 1:3), - "Assertion on 'ps' failed: Element 1 is not >= 0.") - testthat::expect_error(make_r_fn(ps=c(0.0, 0.5, 2.0), qs = 1:3), - "Assertion on 'ps' failed: Element 3 is not <= 1.") - testthat::expect_error(make_r_fn(ps=c(0.0, "a", 1.0), qs = 1:3), - "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_r_fn(ps=c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), - "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") - testthat::expect_error(make_r_fn(ps=c(0.0, 0.5, 1.0), qs = 1:4), - "'ps' and 'qs' must have the same length.") + testthat::expect_no_error(make_r_fn(ps = c(0.0, 0.5, 1.0), qs = 1:3)) + testthat::expect_error(make_r_fn(ps = c(-1, 0.5, 1.0), qs = 1:3), + "Assertion on 'ps' failed: Element 1 is not >= 0.") + testthat::expect_error(make_r_fn(ps = c(0.0, 0.5, 2.0), qs = 1:3), + "Assertion on 'ps' failed: Element 3 is not <= 1.") + testthat::expect_error(make_r_fn(ps = c(0.0, "a", 1.0), qs = 1:3), + "Assertion on 'ps' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_r_fn(ps = c(0.0, 0.5, 1.0), qs = c(1, "a", 3)), + "Assertion on 'qs' failed: Must be of type 'numeric', not 'character'.") + testthat::expect_error(make_r_fn(ps = c(0.0, 0.5, 1.0), qs = 1:4), + "'ps' and 'qs' must have the same length.") }) test_that("make_r_fn result errors with out-of-bounds or incorrectly typed argument n", { - r_fn <- make_r_fn(ps=c(0.0, 0.5, 1.0), qs = 1:3) - testthat::expect_no_error(r_fn(5)) - testthat::expect_no_error(r_fn(1)) - testthat::expect_no_error(r_fn(0)) - testthat::expect_error(r_fn("a"), - "Must be of type 'integerish', not 'character'.") - testthat::expect_error(r_fn(-1), - "Assertion on 'n' failed: Element 1 is not >= 0.") - testthat::expect_error(r_fn(c(1L, 5L)), - "Assertion on 'n' failed: Must have length 1, but has length 2.") + r_fn <- make_r_fn(ps = c(0.0, 0.5, 1.0), qs = 1:3) + testthat::expect_no_error(r_fn(5)) + testthat::expect_no_error(r_fn(1)) + testthat::expect_no_error(r_fn(0)) + testthat::expect_error(r_fn("a"), + "Must be of type 'integerish', not 'character'.") + testthat::expect_error(r_fn(-1), + "Assertion on 'n' failed: Element 1 is not >= 0.") + testthat::expect_error(r_fn(c(1L, 5L)), + "Assertion on 'n' failed: Must have length 1, but has length 2.") }) diff --git a/tests/testthat/test-split_disc_cont_ps_qs.R b/tests/testthat/test-split_disc_cont_ps_qs.R index 7e4fc14..99cf71c 100644 --- a/tests/testthat/test-split_disc_cont_ps_qs.R +++ b/tests/testthat/test-split_disc_cont_ps_qs.R @@ -18,293 +18,296 @@ # mass probability right test_that("split_disc_cont_ps_qs works, continuous component; no point masses", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- qnorm(ps) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 0.0, - disc_ps = numeric(), disc_qs = numeric(), - cont_ps = ps, cont_qs = qs, - disc_ps_range = list() - ) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- qnorm(ps) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 0.0, + disc_ps = numeric(), disc_qs = numeric(), + cont_ps = ps, cont_qs = qs, + disc_ps_range = list() ) + ) }) test_that("split_disc_cont_ps_qs works, no continuous component; one point mass, single q", { - ps <- 0.1 - qs <- rep(0.0, length(ps)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = 1.0, disc_qs = 0.0, - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 1.0)) - ) + ps <- 0.1 + qs <- rep(0.0, length(ps)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = 1.0, disc_qs = 0.0, + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 1.0)) ) + ) }) test_that("split_disc_cont_ps_qs works, no continuous component; one point mass, duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- rep(0.0, length(ps)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = 1.0, disc_qs = 0.0, - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 1.0)) - ) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- rep(0.0, length(ps)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = 1.0, disc_qs = 0.0, + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 1.0)) ) + ) }) test_that("split_disc_cont_ps_qs works, no continuous component; two point masses, both from duplicated qs", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 6)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = c(3/9, 6/9), disc_qs = c(1.0, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 3/9), c(3/9, 1.0)) - ) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 6)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) ) + ) }) -test_that("split_disc_cont_ps_qs works, no continuous component; two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = c(3/9, 6/9), disc_qs = c(1.0, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 3/9), c(3/9, 1.0)) - ) - ) -}) - - -test_that("split_disc_cont_ps_qs works, no continuous component; two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = c(3/9, 6/9), disc_qs = c(1.0, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 3/9), c(3/9, 1.0)) - ) - ) -}) +test_that("split_disc_cont_ps_qs works, no continuous component; + two point masses, non-zero, duplicated values first only", { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) + ) + ) + }) + + +test_that("split_disc_cont_ps_qs works, no continuous component; + two point masses, non-zero, duplicated values second only", { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) + ) + ) + }) test_that("split_disc_cont_ps_qs works, no continuous component; is_hurdle with one zero and one non-zero", { - ps <- seq(from = 0.1, to = 0.2, by = 0.1) - qs <- c(1e-13, 2.0) - expect_equal( - split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), - list( - disc_weight = 1.0, - disc_ps = c(1/9, 8/9), disc_qs = c(1e-13 / 2, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 1/9), c(1/9, 1.0)) - ) + ps <- seq(from = 0.1, to = 0.2, by = 0.1) + qs <- c(1e-13, 2.0) + expect_equal( + split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), + list( + disc_weight = 1.0, + disc_ps = c(1 / 9, 8 / 9), disc_qs = c(1e-13 / 2, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 1 / 9), c(1 / 9, 1.0)) ) + ) }) test_that("split_disc_cont_ps_qs works, continuous component; one point mass, duplicated qs", { - # mixture of a Normal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs <- rep(0.0, length(point_ps)) - adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 0.2, - disc_ps = 1.0, - disc_qs = 0.0, - cont_ps = norm_ps, - cont_qs = norm_qs, - disc_ps_range = list(c(0.4, 0.6)) - ) + # mixture of a Normal(0, 1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs <- rep(0.0, length(point_ps)) + adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 0.2, + disc_ps = 1.0, + disc_qs = 0.0, + cont_ps = norm_ps, + cont_qs = norm_qs, + disc_ps_range = list(c(0.4, 0.6)) ) + ) }) test_that("split_disc_cont_ps_qs works, continuous component; one point mass, is_hurdle with zero", { - # mixture of a LogNormal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for lognormal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- 1.0 - point_qs <- 0.0 - adj_point_ps <- 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_equal( - split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), - list( - disc_weight = 0.2, - disc_ps = 1.0, - disc_qs = 0.0, - cont_ps = norm_ps, - cont_qs = norm_qs, - disc_ps_range = list(c(0.0, 0.2)) - ) + # mixture of a LogNormal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for lognormal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- 1.0 + point_qs <- 0.0 + adj_point_ps <- 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_equal( + split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), + list( + disc_weight = 0.2, + disc_ps = 1.0, + disc_qs = 0.0, + cont_ps = norm_ps, + cont_qs = norm_qs, + disc_ps_range = list(c(0.0, 0.2)) ) + ) }) test_that("split_disc_cont_ps_qs works, two point masses, both from duplicated qs", { - # mixture of a Normal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_0 <- rep(0.0, length(point_ps_0)) - adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 0.4, - disc_ps = c(0.75, 0.25), - disc_qs = c(0.0, 1.0), - cont_ps = sort(c(norm_ps, pnorm(1.0))), - cont_qs = sort(c(norm_qs, 1.0)), - disc_ps_range = list( - range(adj_point_ps_0), - range(adj_point_ps_1) - ) + # mixture of a Normal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_0 <- rep(0.0, length(point_ps_0)) + adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 0.4, + disc_ps = c(0.75, 0.25), + disc_qs = c(0.0, 1.0), + cont_ps = sort(c(norm_ps, pnorm(1.0))), + cont_qs = sort(c(norm_qs, 1.0)), + disc_ps_range = list( + range(adj_point_ps_0), + range(adj_point_ps_1) ) ) + ) }) -test_that("split_disc_cont_ps_qs works, two point masses, is_hurdle with one zero and one non-zero with duplicated qs", { - # mixture of a LogNormal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- 1.0 - point_qs_0 <- 0.0 - adj_point_ps_0 <- 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_equal( - split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), - list( - disc_weight = 0.4, - disc_ps = c(0.75, 0.25), - disc_qs = c(0.0, 1.0), - cont_ps = sort(c(norm_ps)), - cont_qs = sort(c(norm_qs)), - disc_ps_range = list( - c(0.0, 0.3), - range(adj_point_ps_1) - ) - ) - ) -}) +test_that("split_disc_cont_ps_qs works, two point masses, + is_hurdle with one zero and one non-zero with duplicated qs", { + # mixture of a LogNormal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- 1.0 + point_qs_0 <- 0.0 + adj_point_ps_0 <- 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_equal( + split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), + list( + disc_weight = 0.4, + disc_ps = c(0.75, 0.25), + disc_qs = c(0.0, 1.0), + cont_ps = sort(c(norm_ps)), + cont_qs = sort(c(norm_qs)), + disc_ps_range = list( + c(0.0, 0.3), + range(adj_point_ps_1) + ) + ) + ) + }) test_that("split_disc_cont_ps_qs fails, one discrete component mismatched ps", { - # mixture of a Normal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_ps <- norm_ps[norm_ps != 0.5] - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- seq(from = 0.2, to = 1.0, by = 0.1) - point_qs <- rep(0.0, length(point_ps)) - adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - # NOTE: ps does not include 0.4, the value at which we switch - # between the continuous and discrete distributions. This - # results in an underestimate of the weight of the discrete - # component. - - # expect_equal( - # split_disc_cont_ps_qs(ps, qs), - # list( - # disc_weight = 0.2, - # disc_ps = 1.0, - # disc_qs = 0.0, - # cont_ps = norm_ps, - # cont_qs = norm_qs - # ) - # ) + # mixture of a Normal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_ps <- norm_ps[norm_ps != 0.5] + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- seq(from = 0.2, to = 1.0, by = 0.1) + point_qs <- rep(0.0, length(point_ps)) + adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + # NOTE: ps does not include 0.4, the value at which we switch + # between the continuous and discrete distributions. This + # results in an underestimate of the weight of the discrete + # component. + + # expect_equal( + # split_disc_cont_ps_qs(ps, qs), + # list( + # disc_weight = 0.2, + # disc_ps = 1.0, + # disc_qs = 0.0, + # cont_ps = norm_ps, + # cont_qs = norm_qs + # ) + # ) }) diff --git a/tests/testthat/test-step_interpolate.R b/tests/testthat/test-step_interpolate.R index 41be9f5..d6ca013 100644 --- a/tests/testthat/test-step_interpolate.R +++ b/tests/testthat/test-step_interpolate.R @@ -1,146 +1,146 @@ test_that("step_interp works, no discrete component", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- qnorm(ps) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- qnorm(ps) - step_interp_r <- step_interp_factory(x = ps, y = qs, cont_dir = "right") - step_interp_l <- step_interp_factory(x = ps, y = qs, cont_dir = "left") + step_interp_r <- step_interp_factory(x = ps, y = qs, cont_dir = "right") + step_interp_l <- step_interp_factory(x = ps, y = qs, cont_dir = "left") - expect_equal(step_interp_r(ps[1:8]), qs[1:8]) - expect_equal(step_interp_l(ps[2:9]), qs[2:9]) + expect_equal(step_interp_r(ps[1:8]), qs[1:8]) + expect_equal(step_interp_l(ps[2:9]), qs[2:9]) - expect_equal(step_interp_r(c(0.17, 0.32)), - c((1 - 0.7) * qs[1] + 0.7 * qs[2], - (1 - 0.2) * qs[3] + 0.2 * qs[4])) - expect_equal(step_interp_l(c(0.17, 0.32)), - c((1 - 0.7) * qs[1] + 0.7 * qs[2], - (1 - 0.2) * qs[3] + 0.2 * qs[4])) + expect_equal(step_interp_r(c(0.17, 0.32)), + c((1 - 0.7) * qs[1] + 0.7 * qs[2], + (1 - 0.2) * qs[3] + 0.2 * qs[4])) + expect_equal(step_interp_l(c(0.17, 0.32)), + c((1 - 0.7) * qs[1] + 0.7 * qs[2], + (1 - 0.2) * qs[3] + 0.2 * qs[4])) }) test_that("step_interp works, no continuous component", { - ps <- seq(from = 0.1, to = 0.9, by = 0.1) - qs <- rep(1.0, length(ps)) - - step_interp_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") - step_interp_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") - - test_qs <- c(0.99999, 1.0, 1.00001) - expect_equal( - step_interp_r(test_qs), - c(NA_real_, 0.9, NA_real_) - ) - expect_equal( - step_interp_l(test_qs), - c(NA_real_, 0.1, NA_real_) - ) + ps <- seq(from = 0.1, to = 0.9, by = 0.1) + qs <- rep(1.0, length(ps)) + + step_interp_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") + step_interp_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") + + test_qs <- c(0.99999, 1.0, 1.00001) + expect_equal( + step_interp_r(test_qs), + c(NA_real_, 0.9, NA_real_) + ) + expect_equal( + step_interp_l(test_qs), + c(NA_real_, 0.1, NA_real_) + ) }) test_that("step_interp works, one discrete component", { - # mixture of a Normal(0,1) with weight 0.8 and - # a point mass at 0 with weight 0.2 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) - - # probabilities and quantiles for point mass at 0 - point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs <- rep(0.0, length(point_ps)) - adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 - - ps <- sort(c(adj_norm_ps, adj_point_ps)) - qs <- sort(c(norm_qs, point_qs)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - step_interp_cdf_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") - step_interp_cdf_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") - - test_input_qs <- sort(c(range(qs), seq(from = min(qs) - 0.00001, - to = max(qs) + 0.00001, - length.out = 101))) - - # manual calculation for step_interp_cdf_r - expected <- rep(NA_real_, length(test_input_qs)) - for (i in seq_len(length(norm_qs) - 1)) { - qi <- norm_qs[i] - qip1 <- norm_qs[i + 1] - pi <- adj_norm_ps[i] - pip1 <- adj_norm_ps[i + 1] - if (qi == 0.0) { - pi <- pi + 0.2 - } - delta <- qip1 - qi - j <- which(test_input_qs >= qi & test_input_qs < qip1) - expected[j] <- pi + (test_input_qs[j] - qi) * (pip1 - pi) / (delta) + # mixture of a Normal(0,1) with weight 0.8 and + # a point mass at 0 with weight 0.2 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.8 + 0.2 * (norm_qs > 0.0) + + # probabilities and quantiles for point mass at 0 + point_ps <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs <- rep(0.0, length(point_ps)) + adj_point_ps <- 0.5 * 0.8 + point_ps * 0.2 + + ps <- sort(c(adj_norm_ps, adj_point_ps)) + qs <- sort(c(norm_qs, point_qs)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + step_interp_cdf_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") + step_interp_cdf_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") + + test_input_qs <- sort(c(range(qs), seq(from = min(qs) - 0.00001, + to = max(qs) + 0.00001, + length.out = 101))) + + # manual calculation for step_interp_cdf_r + expected <- rep(NA_real_, length(test_input_qs)) + for (i in seq_len(length(norm_qs) - 1)) { + qi <- norm_qs[i] + qip1 <- norm_qs[i + 1] + pi <- adj_norm_ps[i] + pip1 <- adj_norm_ps[i + 1] + if (qi == 0.0) { + pi <- pi + 0.2 } - expected[test_input_qs == 0.0] <- max(ps[qs == 0.0]) - expected[test_input_qs == max(qs)] <- max(ps) - expect_equal(step_interp_cdf_r(test_input_qs), expected, tolerance = 1e-8) - - # only change in expected output from step_interp_cdf_l is at the step - expected[test_input_qs == 0.0] <- min(ps[qs == 0.0]) - expect_equal(step_interp_cdf_l(test_input_qs), expected, tolerance = 1e-8) + delta <- qip1 - qi + j <- which(test_input_qs >= qi & test_input_qs < qip1) + expected[j] <- pi + (test_input_qs[j] - qi) * (pip1 - pi) / (delta) + } + expected[test_input_qs == 0.0] <- max(ps[qs == 0.0]) + expected[test_input_qs == max(qs)] <- max(ps) + expect_equal(step_interp_cdf_r(test_input_qs), expected, tolerance = 1e-8) + + # only change in expected output from step_interp_cdf_l is at the step + expected[test_input_qs == 0.0] <- min(ps[qs == 0.0]) + expect_equal(step_interp_cdf_l(test_input_qs), expected, tolerance = 1e-8) }) test_that("step_interp works, two discrete components", { - # mixture of a Normal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_0 <- rep(0.0, length(point_ps_0)) - adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - step_interp_cdf_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") - step_interp_cdf_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") - - test_input_qs <- sort(c(range(qs), seq(from = min(qs) - 0.00001, - to = max(qs) + 0.00001, - length.out = 101))) - - # manual calculation for step_interp_cdf_r - expected <- rep(NA_real_, length(test_input_qs)) - norm_qs <- sort(c(norm_qs, 1.0)) - adj_norm_ps <- sort(c(adj_norm_ps, pnorm(1.0) * 0.6 + 0.3)) - for (i in seq_len(length(norm_qs) - 1)) { - qi <- norm_qs[i] - qip1 <- norm_qs[i + 1] - pi <- adj_norm_ps[i] - pip1 <- adj_norm_ps[i + 1] - if (qi == 0.0) { - pi <- pi + 0.3 - } else if (qi == 1.0) { - pi <- pi + 0.1 - } - delta <- qip1 - qi - j <- which(test_input_qs >= qi & test_input_qs < qip1) - expected[j] <- pi + (test_input_qs[j] - qi) * (pip1 - pi) / (delta) + # mixture of a Normal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_0 <- rep(0.0, length(point_ps_0)) + adj_point_ps_0 <- 0.5 * 0.6 + point_ps_0 * 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- pnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + step_interp_cdf_r <- step_interp_factory(x = qs, y = ps, cont_dir = "right") + step_interp_cdf_l <- step_interp_factory(x = qs, y = ps, cont_dir = "left") + + test_input_qs <- sort(c(range(qs), seq(from = min(qs) - 0.00001, + to = max(qs) + 0.00001, + length.out = 101))) + + # manual calculation for step_interp_cdf_r + expected <- rep(NA_real_, length(test_input_qs)) + norm_qs <- sort(c(norm_qs, 1.0)) + adj_norm_ps <- sort(c(adj_norm_ps, pnorm(1.0) * 0.6 + 0.3)) + for (i in seq_len(length(norm_qs) - 1)) { + qi <- norm_qs[i] + qip1 <- norm_qs[i + 1] + pi <- adj_norm_ps[i] + pip1 <- adj_norm_ps[i + 1] + if (qi == 0.0) { + pi <- pi + 0.3 + } else if (qi == 1.0) { + pi <- pi + 0.1 } - expected[test_input_qs == 0.0] <- max(ps[qs == 0.0]) - expected[test_input_qs == 1.0] <- max(ps[qs == 1.0]) - expected[test_input_qs == max(qs)] <- max(ps) - expect_equal(step_interp_cdf_r(test_input_qs), expected, tolerance = 1e-8) - - # only change in expected output from step_interp_cdf_l is at the steps - expected[test_input_qs == 0.0] <- min(ps[qs == 0.0]) - expected[test_input_qs == 1.0] <- min(ps[qs == 1.0]) - expect_equal(step_interp_cdf_l(test_input_qs), expected, tolerance = 1e-8) + delta <- qip1 - qi + j <- which(test_input_qs >= qi & test_input_qs < qip1) + expected[j] <- pi + (test_input_qs[j] - qi) * (pip1 - pi) / (delta) + } + expected[test_input_qs == 0.0] <- max(ps[qs == 0.0]) + expected[test_input_qs == 1.0] <- max(ps[qs == 1.0]) + expected[test_input_qs == max(qs)] <- max(ps) + expect_equal(step_interp_cdf_r(test_input_qs), expected, tolerance = 1e-8) + + # only change in expected output from step_interp_cdf_l is at the steps + expected[test_input_qs == 0.0] <- min(ps[qs == 0.0]) + expected[test_input_qs == 1.0] <- min(ps[qs == 1.0]) + expect_equal(step_interp_cdf_l(test_input_qs), expected, tolerance = 1e-8) }) diff --git a/tests/testthat/test-unique_duplicated_tol.R b/tests/testthat/test-unique_duplicated_tol.R index 11c849a..f54fe0a 100644 --- a/tests/testthat/test-unique_duplicated_tol.R +++ b/tests/testthat/test-unique_duplicated_tol.R @@ -1,68 +1,74 @@ test_that("get_dup_run_inds, unique_tol and duplicated_tol work, first value duplicated", { - x <- c(1, 1 + 1e-7, 2, 3, 4, 4, 4, 5, 6, - 7 - 2e-7, 7 - 1e-7, 7 + 1e-7, 7 + 2e-7, 8, 9) + x <- c(1, 1 + 1e-7, 2, 3, 4, 4, 4, 5, 6, + 7 - 2e-7, 7 - 1e-7, 7 + 1e-7, 7 + 2e-7, 8, 9) - dxf <- duplicated_tol(x, incl_first = FALSE) - dxt <- duplicated_tol(x, incl_first = TRUE) - ux <- unique_tol(x) - dri <- get_dup_run_inds(dxf) - expect_true(all.equal( - dxf, - c(FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, - FALSE, TRUE, TRUE, TRUE, FALSE, FALSE))) - expect_true(all.equal( - dxt, - c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, - TRUE, TRUE, TRUE, TRUE, FALSE, FALSE))) - expect_true(all.equal(ux, c(1 + 5e-8, 2:9))) - expect_equal( - dri, - list(starts = c(1, 5, 10), ends = c(2, 7, 13)) - ) + dxf <- duplicated_tol(x, incl_first = FALSE) + dxt <- duplicated_tol(x, incl_first = TRUE) + ux <- unique_tol(x) + dri <- get_dup_run_inds(dxf) + expect_true(all.equal( + dxf, + c(FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, + FALSE, TRUE, TRUE, TRUE, FALSE, FALSE) + )) + expect_true(all.equal( + dxt, + c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, + TRUE, TRUE, TRUE, TRUE, FALSE, FALSE) + )) + expect_true(all.equal(ux, c(1 + 5e-8, 2:9))) + expect_equal( + dri, + list(starts = c(1, 5, 10), ends = c(2, 7, 13)) + ) }) test_that("get_dup_run_inds, unique_tol and duplicated_tol work, first value not duplicated", { - x <- c(0, 1, 2, 3, 4, 4, 4, 5, 6, - 7 - 2e-7, 7 - 1e-7, 7 + 1e-7, 7 + 2e-7, 8, 9) + x <- c(0, 1, 2, 3, 4, 4, 4, 5, 6, + 7 - 2e-7, 7 - 1e-7, 7 + 1e-7, 7 + 2e-7, 8, 9) - dxf <- duplicated_tol(x, incl_first = FALSE) - dxt <- duplicated_tol(x, incl_first = TRUE) - ux <- unique_tol(x) - dri <- get_dup_run_inds(dxf) - expect_true(all.equal( - dxf, - c(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, - FALSE, TRUE, TRUE, TRUE, FALSE, FALSE))) - expect_true(all.equal( - dxt, - c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, - TRUE, TRUE, TRUE, TRUE, FALSE, FALSE))) - expect_true(all.equal(ux, 0:9)) - expect_equal( - dri, - list(starts = c(5, 10), ends = c(7, 13)) - ) + dxf <- duplicated_tol(x, incl_first = FALSE) + dxt <- duplicated_tol(x, incl_first = TRUE) + ux <- unique_tol(x) + dri <- get_dup_run_inds(dxf) + expect_true(all.equal( + dxf, + c(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, + FALSE, TRUE, TRUE, TRUE, FALSE, FALSE) + )) + expect_true(all.equal( + dxt, + c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, + TRUE, TRUE, TRUE, TRUE, FALSE, FALSE) + )) + expect_true(all.equal(ux, 0:9)) + expect_equal( + dri, + list(starts = c(5, 10), ends = c(7, 13)) + ) }) test_that("get_dup_run_inds, unique_tol and duplicated_tol work, consecutive runs of different duplicated values", { - x <- c(rep(0, 5), rep(1, 4)) + x <- c(rep(0, 5), rep(1, 4)) - dxf <- duplicated_tol(x, incl_first = FALSE) - dxt <- duplicated_tol(x, incl_first = TRUE) - ux <- unique_tol(x) - dri <- get_dup_run_inds(dxf) - expect_true(all.equal( - dxf, - c(FALSE, TRUE, TRUE, TRUE, TRUE, - FALSE, TRUE, TRUE, TRUE))) - expect_true(all.equal( - dxt, - c(TRUE, TRUE, TRUE, TRUE, TRUE, - TRUE, TRUE, TRUE, TRUE))) - expect_true(all.equal(ux, 0:1)) - expect_equal( - dri, - list(starts = c(1, 6), ends = c(5, 9)) - ) + dxf <- duplicated_tol(x, incl_first = FALSE) + dxt <- duplicated_tol(x, incl_first = TRUE) + ux <- unique_tol(x) + dri <- get_dup_run_inds(dxf) + expect_true(all.equal( + dxf, + c(FALSE, TRUE, TRUE, TRUE, TRUE, + FALSE, TRUE, TRUE, TRUE) + )) + expect_true(all.equal( + dxt, + c(TRUE, TRUE, TRUE, TRUE, TRUE, + TRUE, TRUE, TRUE, TRUE) + )) + expect_true(all.equal(ux, 0:1)) + expect_equal( + dri, + list(starts = c(1, 6), ends = c(5, 9)) + ) }) diff --git a/vignettes/distfromq.Rmd b/vignettes/distfromq.Rmd index eba2069..2aee0ad 100644 --- a/vignettes/distfromq.Rmd +++ b/vignettes/distfromq.Rmd @@ -127,44 +127,45 @@ p_cauchy_approx <- make_p_fn(ps = quantile_probs, cdf_cauchy_approx <- p_cauchy_approx(x) dplyr::bind_rows( - data.frame( - x = x, - y = cdf_lognormal, - dist = "Log normal" - ), - data.frame( - x = x, - y = cdf_lognormal_approx, - dist = "Spline interpolation,\nlog-normal tails" - ), - data.frame( - x = x, - y = cdf_normal_approx, - dist = "Spline interpolation,\nnormal tails" - ), - data.frame( - x = x, - y = cdf_cauchy_approx, - dist = "Spline interpolation,\nCauchy tails" - ) + data.frame( + x = x, + y = cdf_lognormal, + dist = "Log normal" + ), + data.frame( + x = x, + y = cdf_lognormal_approx, + dist = "Spline interpolation,\nlog-normal tails" + ), + data.frame( + x = x, + y = cdf_normal_approx, + dist = "Spline interpolation,\nnormal tails" + ), + data.frame( + x = x, + y = cdf_cauchy_approx, + dist = "Spline interpolation,\nCauchy tails" + ) ) %>% - ggplot() + - geom_line( - mapping = aes(x = x, y = y, color = dist, linetype = dist), - size = 0.8) + - geom_point( - data = data.frame(q = q_lognormal, p = quantile_probs), - mapping = aes(x = q, y = p), - size = 1.2 - ) + - scale_color_viridis_d( - "Distribution", - end = 0.9 - ) + - scale_linetype_discrete("Distribution") + - ylab("Probability") + - xlab("") + - theme_bw() + ggplot() + + geom_line( + mapping = aes(x = x, y = y, color = dist, linetype = dist), + size = 0.8 + ) + + geom_point( + data = data.frame(q = q_lognormal, p = quantile_probs), + mapping = aes(x = q, y = p), + size = 1.2 + ) + + scale_color_viridis_d( + "Distribution", + end = 0.9 + ) + + scale_linetype_discrete("Distribution") + + ylab("Probability") + + xlab("") + + theme_bw() ``` The following illustrates differences in the density estimates. @@ -188,85 +189,87 @@ pdf_normal_approx <- d_normal_approx(x) pdf_cauchy_approx <- d_cauchy_approx(x) dplyr::bind_rows( - data.frame( - x = x, - y = pdf_lognormal, - dist = "Log normal" - ), - data.frame( - x = x, - y = pdf_lognormal_approx, - dist = "Spline interpolation,\nlog-normal tails" - ), - data.frame( - x = x, - y = pdf_normal_approx, - dist = "Spline interpolation,\nnormal tails" - ), - data.frame( - x = x, - y = pdf_cauchy_approx, - dist = "Spline interpolation,\nCauchy tails" - ) + data.frame( + x = x, + y = pdf_lognormal, + dist = "Log normal" + ), + data.frame( + x = x, + y = pdf_lognormal_approx, + dist = "Spline interpolation,\nlog-normal tails" + ), + data.frame( + x = x, + y = pdf_normal_approx, + dist = "Spline interpolation,\nnormal tails" + ), + data.frame( + x = x, + y = pdf_cauchy_approx, + dist = "Spline interpolation,\nCauchy tails" + ) ) %>% - ggplot() + - geom_vline( - data = data.frame(q = q_lognormal), - mapping = aes(xintercept = q), - size = 0.2 - ) + - geom_line( - mapping = aes(x = x, y = y, color = dist, linetype = dist), - size = 0.8) + - scale_color_viridis_d( - "Distribution", - end = 0.9 - ) + - scale_linetype_discrete("Distribution") + - ylab("Probability Density") + - xlab("") + - theme_bw() + ggplot() + + geom_vline( + data = data.frame(q = q_lognormal), + mapping = aes(xintercept = q), + size = 0.2 + ) + + geom_line( + mapping = aes(x = x, y = y, color = dist, linetype = dist), + size = 0.8 + ) + + scale_color_viridis_d( + "Distribution", + end = 0.9 + ) + + scale_linetype_discrete("Distribution") + + ylab("Probability Density") + + xlab("") + + theme_bw() ``` We emphasize that the density is a step function. We illustrate this by setting the `n_grid` parameter to the artificially low value of 1: ```{r} d_lognormal_approx_n_grid_1 <- make_d_fn(ps = quantile_probs, - qs = q_lognormal, - tail_dist = "lnorm", - interior_args = list(n_grid = 1)) + qs = q_lognormal, + tail_dist = "lnorm", + interior_args = list(n_grid = 1)) pdf_lognormal_approx_n_grid_1 <- d_lognormal_approx_n_grid_1(x) dplyr::bind_rows( - data.frame( - x = x, - y = pdf_lognormal_approx, - dist = "Spline interpolation,\n n_grid = 20" - ), - data.frame( - x = x, - y = pdf_lognormal_approx_n_grid_1, - dist = "Spline interpolation,\n n_grid = 1" - ) + data.frame( + x = x, + y = pdf_lognormal_approx, + dist = "Spline interpolation,\n n_grid = 20" + ), + data.frame( + x = x, + y = pdf_lognormal_approx_n_grid_1, + dist = "Spline interpolation,\n n_grid = 1" + ) ) %>% - ggplot() + - geom_vline( - data = data.frame(q = q_lognormal), - mapping = aes(xintercept = q), - size = 0.2 - ) + - geom_line( - mapping = aes(x = x, y = y, color = dist), - size = 0.8) + - scale_color_viridis_d( - "Distribution", - end = 0.7 - ) + - scale_linetype_discrete("Distribution") + - ylab("Probability Density") + - xlab("") + - theme_bw() + ggplot() + + geom_vline( + data = data.frame(q = q_lognormal), + mapping = aes(xintercept = q), + size = 0.2 + ) + + geom_line( + mapping = aes(x = x, y = y, color = dist), + size = 0.8 + ) + + scale_color_viridis_d( + "Distribution", + end = 0.7 + ) + + scale_linetype_discrete("Distribution") + + ylab("Probability Density") + + xlab("") + + theme_bw() ``` We can generate random deviates from the distribution estimates as follows: @@ -281,43 +284,43 @@ r_lognormal_approx <- make_r_fn(ps = quantile_probs, r_cauchy_approx <- make_r_fn(ps = quantile_probs, qs = q_lognormal, tail_dist = "cauchy") - -normal_approx_sample <- r_normal_approx(n=10000) -lognormal_approx_sample <- r_lognormal_approx(n=10000) -cauchy_approx_sample <- r_cauchy_approx(n=10000) + +normal_approx_sample <- r_normal_approx(n = 10000) +lognormal_approx_sample <- r_lognormal_approx(n = 10000) +cauchy_approx_sample <- r_cauchy_approx(n = 10000) bind_rows( - data.frame(x=normal_approx_sample, dist = "Spline interpolation,\nnormal tails"), - data.frame(x=lognormal_approx_sample, dist = "Spline interpolation,\nlog-normal tails"), - data.frame(x=cauchy_approx_sample, dist = "Spline interpolation,\nCauchy tails") + data.frame(x = normal_approx_sample, dist = "Spline interpolation,\nnormal tails"), + data.frame(x = lognormal_approx_sample, dist = "Spline interpolation,\nlog-normal tails"), + data.frame(x = cauchy_approx_sample, dist = "Spline interpolation,\nCauchy tails") ) %>% - ggplot() + - geom_density(mapping = aes(x = x, color = dist, linetype = dist)) + - scale_color_viridis_d( - "Distribution", - end = 0.9 - ) + - scale_linetype_discrete("Distribution") + - theme_bw() + ggplot() + + geom_density(mapping = aes(x = x, color = dist, linetype = dist)) + + scale_color_viridis_d( + "Distribution", + end = 0.9 + ) + + scale_linetype_discrete("Distribution") + + theme_bw() ``` The heavy tails of the Cauchy distribution dominate the display. Here we restrict the range of the horizontal axis: ```{r} bind_rows( - data.frame(x=normal_approx_sample, dist = "Spline interpolation,\nnormal tails"), - data.frame(x=lognormal_approx_sample, dist = "Spline interpolation,\nlog-normal tails"), - data.frame(x=cauchy_approx_sample, dist = "Spline interpolation,\nCauchy tails") + data.frame(x = normal_approx_sample, dist = "Spline interpolation,\nnormal tails"), + data.frame(x = lognormal_approx_sample, dist = "Spline interpolation,\nlog-normal tails"), + data.frame(x = cauchy_approx_sample, dist = "Spline interpolation,\nCauchy tails") ) %>% - ggplot() + - geom_density(mapping = aes(x = x, color = dist, linetype = dist)) + - scale_color_viridis_d( - "Distribution", - end = 0.9 - ) + - scale_linetype_discrete("Distribution") + - xlim(-100, 300) + - theme_bw() + ggplot() + + geom_density(mapping = aes(x = x, color = dist, linetype = dist)) + + scale_color_viridis_d( + "Distribution", + end = 0.9 + ) + + scale_linetype_discrete("Distribution") + + xlim(-100, 300) + + theme_bw() ``` Finally, we display quantile function estimates: @@ -342,39 +345,40 @@ quantiles_cauchy_approx <- q_cauchy_approx(ps) dplyr::bind_rows( - data.frame( - x = ps, - y = quantiles_lognormal, - dist = "Log normal" - ), - data.frame( - x = ps, - y = quantiles_normal_approx, - dist = "Spline interpolation,\nnormal tails" - ), - data.frame( - x = ps, - y = quantiles_lognormal_approx, - dist = "Spline interpolation,\nlognormal tails" - ), - data.frame( - x = ps, - y = quantiles_cauchy_approx, - dist = "Spline interpolation,\nCauchy tails" - ) + data.frame( + x = ps, + y = quantiles_lognormal, + dist = "Log normal" + ), + data.frame( + x = ps, + y = quantiles_normal_approx, + dist = "Spline interpolation,\nnormal tails" + ), + data.frame( + x = ps, + y = quantiles_lognormal_approx, + dist = "Spline interpolation,\nlognormal tails" + ), + data.frame( + x = ps, + y = quantiles_cauchy_approx, + dist = "Spline interpolation,\nCauchy tails" + ) ) %>% - ggplot() + - geom_line( - mapping = aes(x = x, y = y, color = dist, linetype = dist), - size = 0.8) + - scale_color_viridis_d( - "Distribution", - end = 0.9 - ) + - scale_linetype_discrete("Distribution") + - ylab("Quantile") + - xlab("Probability Level") + - theme_bw() + ggplot() + + geom_line( + mapping = aes(x = x, y = y, color = dist, linetype = dist), + size = 0.8 + ) + + scale_color_viridis_d( + "Distribution", + end = 0.9 + ) + + scale_linetype_discrete("Distribution") + + ylab("Quantile") + + xlab("Probability Level") + + theme_bw() ``` ## Example 2: lognormal tails with a point mass at 0 @@ -409,22 +413,23 @@ p_lognormal_approx <- make_p_fn(ps = ps, cdf_lognormal_approx <- p_lognormal_approx(x) data.frame( - x = x, - y = cdf_lognormal_approx + x = x, + y = cdf_lognormal_approx ) %>% - ggplot() + - geom_line( - mapping = aes(x = x, y = y), - size = 0.8) + - geom_point( - data = data.frame(q = qs, p = ps), - mapping = aes(x = q, y = p), - size = 1.2 - ) + - ylim(0, 1) + - ylab("Probability") + - xlab("") + - theme_bw() + ggplot() + + geom_line( + mapping = aes(x = x, y = y), + size = 0.8 + ) + + geom_point( + data = data.frame(q = qs, p = ps), + mapping = aes(x = q, y = p), + size = 1.2 + ) + + ylim(0, 1) + + ylab("Probability") + + xlab("") + + theme_bw() ``` As expected, the quantile function inverts the cdf. @@ -438,52 +443,54 @@ q_lognormal_approx <- make_q_fn(ps = ps, qf_lognormal_approx <- q_lognormal_approx(plot_ps) data.frame( - x = plot_ps, - y = qf_lognormal_approx + x = plot_ps, + y = qf_lognormal_approx ) %>% - ggplot() + - geom_line( - mapping = aes(x = x, y = y), - size = 0.8) + - geom_point( - data = data.frame(q = ps, p = qs), - mapping = aes(x = q, y = p), - size = 1.2 - ) + - xlim(0, 1) + - xlab("Probability") + - ylab("") + - theme_bw() + ggplot() + + geom_line( + mapping = aes(x = x, y = y), + size = 0.8 + ) + + geom_point( + data = data.frame(q = ps, p = qs), + mapping = aes(x = q, y = p), + size = 1.2 + ) + + xlim(0, 1) + + xlab("Probability") + + ylab("") + + theme_bw() ``` We can verify this by plotting the quantile function estimate with inverted axes on the same display as the cdf estimate: ```{r} dplyr::bind_rows( - data.frame( - x = x, - y = cdf_lognormal_approx, - method = "CDF Estimate" - ), - data.frame( - x = qf_lognormal_approx, - y = plot_ps, - method = "Flipped QF Estimate" - ) + data.frame( + x = x, + y = cdf_lognormal_approx, + method = "CDF Estimate" + ), + data.frame( + x = qf_lognormal_approx, + y = plot_ps, + method = "Flipped QF Estimate" + ) ) %>% - ggplot() + - geom_line( - mapping = aes(x = x, y = y, color = method, linetype = method), - size = 0.8) + - geom_point( - data = data.frame(q = qs, p = ps), - mapping = aes(x = q, y = p), - size = 1.2 - ) + - ylim(0, 1) + - ylab("Probability") + - xlab("") + - theme_bw() + ggplot() + + geom_line( + mapping = aes(x = x, y = y, color = method, linetype = method), + size = 0.8 + ) + + geom_point( + data = data.frame(q = qs, p = ps), + mapping = aes(x = q, y = p), + size = 1.2 + ) + + ylim(0, 1) + + ylab("Probability") + + xlab("") + + theme_bw() ``` @@ -495,8 +502,8 @@ r_fn <- make_r_fn(ps = ps, qs = qs, tail_dist = "lnorm") sampled_values_df <- data.frame(x = r_fn(10000)) ggplot(sampled_values_df) + - geom_histogram(mapping = aes(x = x), bins = 100) + - theme_bw() + geom_histogram(mapping = aes(x = x), bins = 100) + + theme_bw() mean(sampled_values_df$x == 0) ``` @@ -569,26 +576,26 @@ As described earlier, we use a piecewise linear representation of the CDF to avo ```{r} p_normal_approx_spline <- make_p_fn(ps = quantile_probs, - qs = quantile_values, - tail_dist = "norm", - interior_args = list(n_grid = NULL)) + qs = quantile_values, + tail_dist = "norm", + interior_args = list(n_grid = NULL)) q_normal_approx_spline <- make_q_fn(ps = quantile_probs, - qs = quantile_values, - tail_dist = "norm", - interior_args = list(n_grid = NULL)) + qs = quantile_values, + tail_dist = "norm", + interior_args = list(n_grid = NULL)) x <- seq(from = 0.0, to = 20.0, length = 5001) plot_df <- rbind( - data.frame( - x = x, - cdf = p_normal_approx(x), - type = "piecewise linear" - ), - data.frame( - x = x, - cdf = p_normal_approx_spline(x), - type = "spline" - ) + data.frame( + x = x, + cdf = p_normal_approx(x), + type = "piecewise linear" + ), + data.frame( + x = x, + cdf = p_normal_approx_spline(x), + type = "spline" + ) ) ggplot() + geom_line(data = plot_df, @@ -600,22 +607,22 @@ ggplot() + ```{r} ps <- seq(from = 0.0, to = 1.0, length = 5001) plot_df <- rbind( - data.frame( - p = ps, - qf = q_normal_approx(ps), - type = "piecewise linear" - ), - data.frame( - p = ps, - qf = q_normal_approx_spline(ps), - type = "spline" - ) + data.frame( + p = ps, + qf = q_normal_approx(ps), + type = "piecewise linear" + ), + data.frame( + p = ps, + qf = q_normal_approx_spline(ps), + type = "spline" + ) ) ggplot() + - geom_line(data = plot_df, - mapping = aes(x = p, y = qf, color = type, linetype = type)) + - geom_point(data = data.frame(x = quantile_probs, y = quantile_values), - mapping = aes(x = x, y = y)) + geom_line(data = plot_df, + mapping = aes(x = p, y = qf, color = type, linetype = type)) + + geom_point(data = data.frame(x = quantile_probs, y = quantile_values), + mapping = aes(x = x, y = y)) ``` We therefore recommend using the piecewise linear representation when using a quantile function or generating random deviates, which uses the quantile function in the background. @@ -632,29 +639,29 @@ p_normal_approx <- make_p_fn(ps = quantile_probs, qs = quantile_values, tail_dist = "norm") p_normal_approx_lin <- make_p_fn(ps = quantile_probs, - qs = quantile_values, - tail_dist = "norm", - interior_args = list(n_grid = 20)) + qs = quantile_values, + tail_dist = "norm", + interior_args = list(n_grid = 20)) q_normal_approx <- make_q_fn(ps = quantile_probs, qs = quantile_values, tail_dist = "norm") q_normal_approx_lin <- make_q_fn(ps = quantile_probs, - qs = quantile_values, - tail_dist = "norm", - interior_args = list(n_grid = 20)) + qs = quantile_values, + tail_dist = "norm", + interior_args = list(n_grid = 20)) x <- seq(from = 0.0, to = 20.0, length = 5001) plot_df <- rbind( - data.frame( - x = x, - cdf = p_normal_approx(x), - type = "spline" - ), - data.frame( - x = x, - cdf = p_normal_approx_lin(x), - type = "piecewise linear" - ) + data.frame( + x = x, + cdf = p_normal_approx(x), + type = "spline" + ), + data.frame( + x = x, + cdf = p_normal_approx_lin(x), + type = "piecewise linear" + ) ) ggplot() + geom_line(data = plot_df, @@ -667,20 +674,20 @@ ggplot() + ```{r} ps <- seq(from = 0.0, to = 1.0, length = 5001) plot_df <- rbind( - data.frame( - p = ps, - qf = q_normal_approx(ps), - type = "spline" - ), - data.frame( - p = ps, - qf = q_normal_approx_lin(ps), - type = "piecewise linear" - ) + data.frame( + p = ps, + qf = q_normal_approx(ps), + type = "spline" + ), + data.frame( + p = ps, + qf = q_normal_approx_lin(ps), + type = "piecewise linear" + ) ) ggplot() + - geom_line(data = plot_df, - mapping = aes(x = p, y = qf, color = type, linetype = type)) + - geom_point(data = data.frame(x = quantile_probs, y = quantile_values), - mapping = aes(x = x, y = y)) + geom_line(data = plot_df, + mapping = aes(x = p, y = qf, color = type, linetype = type)) + + geom_point(data = data.frame(x = quantile_probs, y = quantile_values), + mapping = aes(x = x, y = y)) ``` From 68a3267ffb941f47acd1d37b635563c13d80d212 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:47:27 -0400 Subject: [PATCH 09/14] Update mono_Hermite_spline.Rd --- man/mono_Hermite_spline.Rd | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/man/mono_Hermite_spline.Rd b/man/mono_Hermite_spline.Rd index a1885b6..cad29c1 100644 --- a/man/mono_Hermite_spline.Rd +++ b/man/mono_Hermite_spline.Rd @@ -8,17 +8,16 @@ interpolating a given set of points.} mono_Hermite_spline(x, y, m) } \arguments{ -\item{x}{vector giving the x coordinates of the points to be -interpolated. Alternatively a single plotting structure can -be specified: see ‘xy.coords’.} - -\item{y}{vector giving the y coordinates of the points to be -interpolated. Must be increasing or decreasing for ‘method = "hyman"’. +\item{x}{vector giving the x coordinates of the points to be interpolated. Alternatively a single plotting structure can be specified: see ‘xy.coords’.} -\item{m}{(for ‘splinefunH()’) vector of \emph{slopes} \eqn{m_i}{m[i]} at the points -\eqn{(x_i,y_i)}{(x[i],y[i])}; these together determine the \emph{H}ermite “spline” -which is piecewise cubic, (only) \emph{once} differentiable +\item{y}{vector giving the y coordinates of the points to be interpolated. +Must be increasing or decreasing for ‘method = "hyman"’. Alternatively a +single plotting structure can be specified: see ‘xy.coords’.} + +\item{m}{(for ‘splinefunH()’) vector of \emph{slopes} \eqn{m_i}{m[i]} at the +points \eqn{(x_i,y_i)}{(x[i],y[i])}; these together determine the +\emph{H}ermite “spline” which is piecewise cubic, (only) \emph{once} differentiable continuously.} } \value{ From 4583a76dde8da444669cdf73439c8c2c4029643d Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:53:11 -0400 Subject: [PATCH 10/14] Ignore dev folder --- .gitignore | 1 + dev/dev_history.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8480564..58fc63e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ inst/doc docs *.Rproj +/dev diff --git a/dev/dev_history.R b/dev/dev_history.R index cbd7f45..deab9f6 100644 --- a/dev/dev_history.R +++ b/dev/dev_history.R @@ -12,7 +12,7 @@ covr::report() devtools::test() testthat::test_dir("tests/testthat/") -# Run examples 22 +# Run examples devtools::run_examples() # autotest::autotest_package(test = TRUE) From d4338d28745c5cdd6004d60cd4b6eda86722e32a Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Sat, 27 Jul 2024 12:33:12 -0400 Subject: [PATCH 11/14] Fix indentation for `make_d_fn` test Co-authored-by: Evan Ray --- tests/testthat/test-make_d_fn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-make_d_fn.R b/tests/testthat/test-make_d_fn.R index 688b44a..e527a77 100644 --- a/tests/testthat/test-make_d_fn.R +++ b/tests/testthat/test-make_d_fn.R @@ -56,7 +56,7 @@ test_that("make_d_fn generates error, no continuous component; test_that("make_d_fn generates error, no continuous component; -two point masses, non-zero, duplicated values second only", { + two point masses, non-zero, duplicated values second only", { ps <- seq(from = 0.3, to = 0.9, by = 0.1) qs <- c(rep(1.0, 1), rep(2.0, 6)) expect_error(make_d_fn(ps, qs)) From 04f30b690e0b64a6643934e3cc21416c0a2f5749 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:49:59 -0400 Subject: [PATCH 12/14] Decrease code indentation --- tests/testthat/test-make_p_fn_make_q_fn.R | 152 ++++++++++---------- tests/testthat/test-split_disc_cont_ps_qs.R | 86 +++++------ 2 files changed, 123 insertions(+), 115 deletions(-) diff --git a/tests/testthat/test-make_p_fn_make_q_fn.R b/tests/testthat/test-make_p_fn_make_q_fn.R index d32fe2d..8ebafc6 100644 --- a/tests/testthat/test-make_p_fn_make_q_fn.R +++ b/tests/testthat/test-make_p_fn_make_q_fn.R @@ -139,70 +139,74 @@ test_that("make_p_fn, make_q_fn work, no continuous component; two point masses, }) -test_that("make_p_fn, make_q_fn work, no continuous component; - two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - - test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), - rep(1, sum(test_ps > 1 / 3))), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - # Commenting this test out -- fails due to floating point precision - # testthat::expect_equal(..., - # make_q_fn(ps, qs)(p_actual), - # tolerance = 1e-3) - }) - - -test_that("make_p_fn, make_q_fn work, no continuous component; - two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - - test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) - test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) - - p_actual <- make_p_fn(ps, qs)(test_qs) - p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) - # plot(test_qs, p_actual); lines(test_qs, p_expected) - - q_actual <- make_q_fn(ps, qs)(test_ps) - q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) - # plot(test_ps, q_actual); lines(test_ps, q_expected) - - testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) - - testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) - testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) - - testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), - rep(1, sum(test_ps > 1 / 3))), - make_p_fn(ps, qs)(q_actual), - tolerance = 1e-3) - # Commenting this test out -- fails due to floating point precision - # testthat::expect_equal(..., - # make_q_fn(ps, qs)(p_actual), - # tolerance = 1e-3) - }) +test_that( + "make_p_fn, make_q_fn work, no continuous component; two point masses, non-zero, duplicated values first only", + { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + + test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), + rep(1, sum(test_ps > 1 / 3))), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + # Commenting this test out -- fails due to floating point precision + # testthat::expect_equal(..., + # make_q_fn(ps, qs)(p_actual), + # tolerance = 1e-3) + } +) + + +test_that( + "make_p_fn, make_q_fn work, no continuous component; two point masses, non-zero, duplicated values second only", + { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + + test_ps <- sort(c(1 / 3, 1.0, seq(from = 0.01, to = 0.99, by = 0.01))) + test_qs <- c(1, 2, qnorm(seq(from = 0.01, to = 0.99, by = 0.01), mean = 1, sd = 2)) + + p_actual <- make_p_fn(ps, qs)(test_qs) + p_expected <- (1 / 3) * as.numeric(test_qs >= 1.0) + (2 / 3) * as.numeric(test_qs >= 2.0) + # plot(test_qs, p_actual); lines(test_qs, p_expected) + + q_actual <- make_q_fn(ps, qs)(test_ps) + q_expected <- ifelse(test_ps <= 1 / 3, 1.0, 2.0) + # plot(test_ps, q_actual); lines(test_ps, q_expected) + + testthat::expect_equal(p_actual, p_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(p_actual[order(test_qs)]) >= 0)) + + testthat::expect_equal(q_actual, q_expected, tolerance = 1e-3) + testthat::expect_true(all(diff(q_actual[order(test_ps)]) >= 0)) + + testthat::expect_equal(c(rep(1 / 3, sum(test_ps <= 1 / 3)), + rep(1, sum(test_ps > 1 / 3))), + make_p_fn(ps, qs)(q_actual), + tolerance = 1e-3) + # Commenting this test out -- fails due to floating point precision + # testthat::expect_equal(..., + # make_q_fn(ps, qs)(p_actual), + # tolerance = 1e-3) + } +) test_that("make_p_fn, make_q_fn work, no continuous component; is_hurdle with one zero and one non-zero", { @@ -572,17 +576,19 @@ test_that("make_p_fn, make_q_fn well-behaved with 3 duplicated values, one at ze }) -test_that("make_p_fn, make_q_fn well-behaved: multiple duplicates and - floating point issues with discrete adjustments", { - ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) - qs <- c(0, 0, 0, 0, 3, 6, 8, 8, 9, 11, 12, 13, 17, 21, 25, 27, 29, 30, 31, 33, 37, 52, 61) +test_that( + "make_p_fn, make_q_fn well-behaved: multiple duplicates and floating point issues with discrete adjustments", + { + ps <- c(.01, .025, seq(.05, .95, by = .05), .975, .99) + qs <- c(0, 0, 0, 0, 3, 6, 8, 8, 9, 11, 12, 13, 17, 21, 25, 27, 29, 30, 31, 33, 37, 52, 61) - q_hat <- distfromq:::make_q_fn(ps, qs) - testthat::expect_identical(q_hat(c(0.99, 1)), c(61, Inf)) + q_hat <- distfromq:::make_q_fn(ps, qs) + testthat::expect_identical(q_hat(c(0.99, 1)), c(61, Inf)) - p_hat <- distfromq::make_p_fn(ps, qs) - testthat::expect_identical(p_hat(c(61, Inf)), c(0.99, 1.0)) - }) + p_hat <- distfromq::make_p_fn(ps, qs) + testthat::expect_identical(p_hat(c(61, Inf)), c(0.99, 1.0)) + } +) test_that("make_p_fn result outputs values <= 1", { diff --git a/tests/testthat/test-split_disc_cont_ps_qs.R b/tests/testthat/test-split_disc_cont_ps_qs.R index 99cf71c..2957da0 100644 --- a/tests/testthat/test-split_disc_cont_ps_qs.R +++ b/tests/testthat/test-split_disc_cont_ps_qs.R @@ -91,7 +91,7 @@ test_that("split_disc_cont_ps_qs works, no continuous component; test_that("split_disc_cont_ps_qs works, no continuous component; - two point masses, non-zero, duplicated values second only", { + two point masses, non-zero, duplicated values second only", { ps <- seq(from = 0.3, to = 0.9, by = 0.1) qs <- c(rep(1.0, 1), rep(2.0, 6)) expect_equal( @@ -231,47 +231,49 @@ test_that("split_disc_cont_ps_qs works, two point masses, both from duplicated q }) -test_that("split_disc_cont_ps_qs works, two point masses, - is_hurdle with one zero and one non-zero with duplicated qs", { - # mixture of a LogNormal(0,1) with weight 0.6, - # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 - - # probabilities and quantiles for normal component - norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) - norm_qs <- qlnorm(norm_ps) - adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) - - # probabilities and quantiles for point mass at 0 - point_ps_0 <- 1.0 - point_qs_0 <- 0.0 - adj_point_ps_0 <- 0.3 - - # probabilities and quantiles for point mass at 1 - point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) - point_qs_1 <- rep(1.0, length(point_ps_1)) - adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 - - ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) - qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) - dup_inds <- duplicated(ps) - ps <- ps[!dup_inds] - qs <- qs[!dup_inds] - - expect_equal( - split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), - list( - disc_weight = 0.4, - disc_ps = c(0.75, 0.25), - disc_qs = c(0.0, 1.0), - cont_ps = sort(c(norm_ps)), - cont_qs = sort(c(norm_qs)), - disc_ps_range = list( - c(0.0, 0.3), - range(adj_point_ps_1) - ) - ) - ) - }) +test_that( + "split_disc_cont_ps_qs works, two point masses, is_hurdle with one zero and one non-zero with duplicated qs", + { + # mixture of a LogNormal(0,1) with weight 0.6, + # a point mass at 0 with weight 0.3, and a point mass at 1 with weight 0.1 + + # probabilities and quantiles for normal component + norm_ps <- seq(from = 0.1, to = 0.9, by = 0.1) + norm_qs <- qlnorm(norm_ps) + adj_norm_ps <- norm_ps * 0.6 + 0.3 * (norm_qs > 0.0) + 0.1 * (norm_qs > 1.0) + + # probabilities and quantiles for point mass at 0 + point_ps_0 <- 1.0 + point_qs_0 <- 0.0 + adj_point_ps_0 <- 0.3 + + # probabilities and quantiles for point mass at 1 + point_ps_1 <- seq(from = 0.0, to = 1.0, by = 0.1) + point_qs_1 <- rep(1.0, length(point_ps_1)) + adj_point_ps_1 <- plnorm(1.0) * 0.6 + 0.3 + point_ps_1 * 0.1 + + ps <- sort(c(adj_norm_ps, adj_point_ps_0, adj_point_ps_1)) + qs <- sort(c(norm_qs, point_qs_0, point_qs_1)) + dup_inds <- duplicated(ps) + ps <- ps[!dup_inds] + qs <- qs[!dup_inds] + + expect_equal( + split_disc_cont_ps_qs(ps, qs, is_hurdle = TRUE), + list( + disc_weight = 0.4, + disc_ps = c(0.75, 0.25), + disc_qs = c(0.0, 1.0), + cont_ps = sort(c(norm_ps)), + cont_qs = sort(c(norm_qs)), + disc_ps_range = list( + c(0.0, 0.3), + range(adj_point_ps_1) + ) + ) + ) + } +) test_that("split_disc_cont_ps_qs fails, one discrete component mismatched ps", { From 70a2a7ce7abd9afe96b10d3b6229d246b99670e3 Mon Sep 17 00:00:00 2001 From: Evan Ray Date: Thu, 15 Aug 2024 17:58:42 -0400 Subject: [PATCH 13/14] some more indentation cleanup in tests --- tests/testthat/test-make_d_fn.R | 32 ++++++----- tests/testthat/test-split_disc_cont_ps_qs.R | 64 +++++++++++---------- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/tests/testthat/test-make_d_fn.R b/tests/testthat/test-make_d_fn.R index e527a77..83a4ee4 100644 --- a/tests/testthat/test-make_d_fn.R +++ b/tests/testthat/test-make_d_fn.R @@ -47,20 +47,24 @@ test_that("make_d_fn generates error, no continuous component; two point masses, }) -test_that("make_d_fn generates error, no continuous component; - two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - expect_error(make_d_fn(ps, qs)) - }) - - -test_that("make_d_fn generates error, no continuous component; - two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - expect_error(make_d_fn(ps, qs)) - }) +test_that( + "make_d_fn generates error, no continuous component; two point masses, non-zero, duplicated values first only", + { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + expect_error(make_d_fn(ps, qs)) + } +) + + +test_that( + "make_d_fn generates error, no continuous component; two point masses, non-zero, duplicated values second only", + { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + expect_error(make_d_fn(ps, qs)) + } +) test_that("make_d_fn generates error, no continuous component; is_hurdle with one zero and one non-zero", { diff --git a/tests/testthat/test-split_disc_cont_ps_qs.R b/tests/testthat/test-split_disc_cont_ps_qs.R index 2957da0..57097d7 100644 --- a/tests/testthat/test-split_disc_cont_ps_qs.R +++ b/tests/testthat/test-split_disc_cont_ps_qs.R @@ -74,36 +74,40 @@ test_that("split_disc_cont_ps_qs works, no continuous component; two point masse }) -test_that("split_disc_cont_ps_qs works, no continuous component; - two point masses, non-zero, duplicated values first only", { - ps <- seq(from = 0.1, to = 0.4, by = 0.1) - qs <- c(rep(1.0, 3), rep(2.0, 1)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) - ) - ) - }) - - -test_that("split_disc_cont_ps_qs works, no continuous component; - two point masses, non-zero, duplicated values second only", { - ps <- seq(from = 0.3, to = 0.9, by = 0.1) - qs <- c(rep(1.0, 1), rep(2.0, 6)) - expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( - disc_weight = 1.0, - disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), - cont_ps = numeric(), cont_qs = numeric(), - disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) - ) - ) - }) +test_that( + "split_disc_cont_ps_qs works, no continuous component; two point masses, non-zero, duplicated values first only", + { + ps <- seq(from = 0.1, to = 0.4, by = 0.1) + qs <- c(rep(1.0, 3), rep(2.0, 1)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) + ) + ) + } +) + + +test_that( + "split_disc_cont_ps_qs works, no continuous component; two point masses, non-zero, duplicated values second only", + { + ps <- seq(from = 0.3, to = 0.9, by = 0.1) + qs <- c(rep(1.0, 1), rep(2.0, 6)) + expect_equal( + split_disc_cont_ps_qs(ps, qs), + list( + disc_weight = 1.0, + disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), + cont_ps = numeric(), cont_qs = numeric(), + disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) + ) + ) + } +) test_that("split_disc_cont_ps_qs works, no continuous component; is_hurdle with one zero and one non-zero", { From b8743c8afff1f5396481fbcec1a22cae35fcecef Mon Sep 17 00:00:00 2001 From: Evan Ray Date: Thu, 15 Aug 2024 18:03:35 -0400 Subject: [PATCH 14/14] indentation --- tests/testthat/test-split_disc_cont_ps_qs.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-split_disc_cont_ps_qs.R b/tests/testthat/test-split_disc_cont_ps_qs.R index 57097d7..be19669 100644 --- a/tests/testthat/test-split_disc_cont_ps_qs.R +++ b/tests/testthat/test-split_disc_cont_ps_qs.R @@ -80,13 +80,13 @@ test_that( ps <- seq(from = 0.1, to = 0.4, by = 0.1) qs <- c(rep(1.0, 3), rep(2.0, 1)) expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( + split_disc_cont_ps_qs(ps, qs), + list( disc_weight = 1.0, disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), cont_ps = numeric(), cont_qs = numeric(), disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) - ) + ) ) } ) @@ -98,13 +98,13 @@ test_that( ps <- seq(from = 0.3, to = 0.9, by = 0.1) qs <- c(rep(1.0, 1), rep(2.0, 6)) expect_equal( - split_disc_cont_ps_qs(ps, qs), - list( + split_disc_cont_ps_qs(ps, qs), + list( disc_weight = 1.0, disc_ps = c(3 / 9, 6 / 9), disc_qs = c(1.0, 2.0), cont_ps = numeric(), cont_qs = numeric(), disc_ps_range = list(c(0.0, 3 / 9), c(3 / 9, 1.0)) - ) + ) ) } )