From ae23a4f52036fa20c8e931dd7a1611299e5169e2 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 11:01:07 -0500 Subject: [PATCH 01/11] test fixup --- R/broom.R | 81 +++++++++++++++++------------------------ inst/tinytest/test-by.R | 81 +++++++++++++++++++++-------------------- 2 files changed, 75 insertions(+), 87 deletions(-) diff --git a/R/broom.R b/R/broom.R index 4d77c764c..67d17ea98 100644 --- a/R/broom.R +++ b/R/broom.R @@ -9,86 +9,72 @@ generics::glance #' tidy helper -#' +#' #' @noRd #' @export tidy.comparisons <- function(x, ...) { - insight::check_if_installed("tibble") - out <- tibble::as_tibble(x) - if (!"term" %in% names(out)) { - lab <- seq_len(nrow(out)) - if ("group" %in% colnames(out) || is.character(attr(x, "by"))) { - tmp <- c("group", attr(x, "by")) - tmp <- Filter(function(j) j %in% colnames(x), tmp) - if (length(tmp) > 0) { - tmp <- do.call(paste, out[, tmp]) - if (anyDuplicated(tmp)) { - tmp <- paste(seq_len(nrow(out)), tmp) - } - lab <- tmp - } - } - out[["term"]] <- lab - } - return(out) + insight::check_if_installed("tibble") + out <- tibble::as_tibble(x) + return(out) } #' tidy helper -#' +#' #' @noRd #' @export tidy.slopes <- tidy.comparisons #' tidy helper -#' +#' #' @noRd #' @export tidy.predictions <- tidy.comparisons #' tidy helper -#' +#' #' @noRd #' @export tidy.hypotheses <- tidy.comparisons #' tidy helper -#' +#' #' @noRd #' @export tidy.marginalmeans <- function(x, ...) { - insight::check_if_installed("tibble") - tibble::as_tibble(x) + insight::check_if_installed("tibble") + tibble::as_tibble(x) } #' @noRd #' @export glance.slopes <- function(x, ...) { - insight::check_if_installed("insight") - insight::check_if_installed("modelsummary") - model <- tryCatch(attr(x, "model"), error = function(e) NULL) - if (is.null(model) || isTRUE(checkmate::check_string(model))) { - model <- tryCatch(attr(x, "call")[["model"]], error = function(e) NULL) - } - gl <- suppressMessages(suppressWarnings(try( - modelsummary::get_gof(model, ...), silent = TRUE))) - if (inherits(gl, "data.frame")) { - out <- data.frame(gl) - } else { - out <- NULL - } - vcov.type <- attr(x, "vcov.type") - if (is.null(out) && !is.null(vcov.type)) { - out <- data.frame("vcov.type" = vcov.type) - } else if (!is.null(out)) { - out[["vcov.type"]] <- vcov.type - } - out <- tibble::as_tibble(out) - return(out) + insight::check_if_installed("insight") + insight::check_if_installed("modelsummary") + model <- tryCatch(attr(x, "model"), error = function(e) NULL) + if (is.null(model) || isTRUE(checkmate::check_string(model))) { + model <- tryCatch(attr(x, "call")[["model"]], error = function(e) NULL) + } + gl <- suppressMessages(suppressWarnings(try( + modelsummary::get_gof(model, ...), + silent = TRUE))) + if (inherits(gl, "data.frame")) { + out <- data.frame(gl) + } else { + out <- NULL + } + vcov.type <- attr(x, "vcov.type") + if (is.null(out) && !is.null(vcov.type)) { + out <- data.frame("vcov.type" = vcov.type) + } else if (!is.null(out)) { + out[["vcov.type"]] <- vcov.type + } + out <- tibble::as_tibble(out) + return(out) } @@ -109,4 +95,5 @@ glance.hypotheses <- glance.slopes #' @noRd #' @export -glance.marginalmeans <- glance.slopes \ No newline at end of file +glance.marginalmeans <- glance.slopes + diff --git a/inst/tinytest/test-by.R b/inst/tinytest/test-by.R index fded001c3..8eebe4898 100644 --- a/inst/tinytest/test-by.R +++ b/inst/tinytest/test-by.R @@ -66,9 +66,10 @@ mod <- lm(mpg ~ factor(cyl) * hp + wt, data = dat) mar <- margins(mod, at = list(cyl = unique(dat$cyl))) mar <- data.frame(summary(mar)) mfx <- slopes( - mod, - by = "cyl", - newdata = datagrid(cyl = c(4, 6, 8), grid_type = "counterfactual")) + mod, + by = "cyl", + newdata = datagrid(cyl = c(4, 6, 8), grid_type = "counterfactual")) +mfx <- mfx[order(mfx$term, mfx$contrast), ] expect_equivalent(mfx$estimate, mar$AME) expect_equivalent(mfx$std.error, mar$SE, tolerance = 1e6) @@ -77,9 +78,9 @@ expect_equivalent(mfx$std.error, mar$SE, tolerance = 1e6) # issue #434 by with character precitors dat <- read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv") mod <- glm( - affairs ~ children + gender + yearsmarried, - family = poisson, - data = dat) + affairs ~ children + gender + yearsmarried, + family = poisson, + data = dat) p <- predictions(mod, by = "children") expect_equivalent(nrow(p), 2) expect_false(anyNA(p$estimate)) @@ -97,8 +98,8 @@ cmp <- comparisons(mod, type = "probs", by = "group") expect_equivalent(nrow(cmp), 9) by <- data.frame( - group = c("3", "4", "5"), - by = c("(3,4)", "(3,4)", "(5)")) + group = c("3", "4", "5"), + by = c("(3,4)", "(3,4)", "(5)")) p1 <- predictions(mod, type = "probs") p2 <- predictions(mod, type = "probs", by = by) p3 <- predictions(mod, type = "probs", by = by, hypothesis = "sequential") @@ -113,26 +114,26 @@ cmp <- comparisons(mod, type = "probs", by = "am") expect_equivalent(nrow(cmp), 18) cmp <- comparisons( - mod, - variables = "am", - by = by, - type = "probs") + mod, + variables = "am", + by = by, + type = "probs") expect_equivalent(nrow(cmp), 2) cmp <- comparisons( - mod, - variables = "am", - by = by, - hypothesis = "sequential", - type = "probs") + mod, + variables = "am", + by = by, + hypothesis = "sequential", + type = "probs") expect_equivalent(nrow(cmp), 1) # Issue #481: warning on missing by categories mod <- nnet::multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE) by <- data.frame( - by = c("4", "5"), - group = 4:5) + by = c("4", "5"), + group = 4:5) expect_warning(comparisons(mod, variables = "mpg", newdata = "mean", by = by)) expect_warning(predictions(mod, newdata = "mean", by = by)) @@ -159,13 +160,13 @@ expect_equivalent(nrow(pre2), 96) dat <- mtcars mod <- glm(gear ~ cyl + am, family = poisson, data = dat) mfx <- avg_slopes( - mod, - by = c("cyl", "am"), - newdata = datagrid( - cyl = unique, - am = unique, - grid_type = "counterfactual")) |> - dplyr::arrange(term, cyl, am) + mod, + by = c("cyl", "am"), + newdata = datagrid( + cyl = unique, + am = unique, + grid_type = "counterfactual")) |> + dplyr::arrange(term, cyl, am) mar <- margins(mod, at = list(cyl = unique(dat$cyl), am = unique(dat$am))) mar <- summary(mar) # margins doesn't treat the binary am as binary automatically @@ -179,16 +180,16 @@ dat$cyl <- factor(dat$cyl) dat$am <- as.logical(dat$am) mod <- glm(gear ~ cyl + am, family = poisson, data = dat) mfx <- comparisons( - mod, - by = c("cyl", "am"), - newdata = datagrid( - cyl = unique, - am = unique, - grid_type = "counterfactual")) + mod, + by = c("cyl", "am"), + newdata = datagrid( + cyl = unique, + am = unique, + grid_type = "counterfactual")) mfx <- tidy(mfx) -mfx <- mfx[order(mfx$term, mfx$contrast, mfx$cyl, mfx$am),] +mfx <- mfx[order(mfx$term, mfx$contrast, mfx$cyl, mfx$am), ] mar <- margins(mod, at = list(cyl = unique(dat$cyl), am = unique(dat$am))) mar <- summary(mar) expect_equivalent(mfx$estimate, mar$AME, tolerance = tol) @@ -200,9 +201,9 @@ dat <- transform(mtcars, vs = vs, am = as.factor(am), cyl = as.factor(cyl)) mod <- lm(mpg ~ qsec + am + cyl, dat) fun <- \(hi, lo) mean(hi) / mean(lo) cmp1 <- comparisons(mod, variables = "cyl", comparison = fun, by = "am") |> - dplyr::arrange(am, contrast) + dplyr::arrange(am, contrast) cmp2 <- comparisons(mod, variables = "cyl", comparison = "ratioavg", by = "am") |> - dplyr::arrange(am, contrast) + dplyr::arrange(am, contrast) expect_equivalent(cmp1$estimate, cmp2$estimate) expect_equivalent(cmp1$std.error, cmp2$std.error) expect_equal(nrow(cmp1), 4) @@ -218,18 +219,18 @@ cmp2 <- comparisons(mod, variables = "am") %>% dplyr::group_by(cyl) %>% dplyr::summarize(estimate = mean(estimate), .groups = "keep") |> dplyr::ungroup() -cmp3 <- predictions(mod) |> - aggregate(estimate ~ am + cyl, FUN = mean) |> - aggregate(estimate ~ cyl, FUN = diff) +cmp3 <- predictions(mod) |> + aggregate(estimate ~ am + cyl, FUN = mean) |> + aggregate(estimate ~ cyl, FUN = diff) expect_equivalent(cmp1$estimate, cmp2$estimate) expect_equivalent(cmp1$estimate, cmp3$estimate) # Issue #1058 tmp <- mtcars -tmp <- tmp[c('mpg', 'cyl', 'hp')] +tmp <- tmp[c("mpg", "cyl", "hp")] tmp$cyl <- as.factor(tmp$cyl) # 3 levels -tmp$hp <- as.factor(tmp$hp) +tmp$hp <- as.factor(tmp$hp) bygrid <- datagrid(newdata = tmp, by = "cyl", hp = unique) expect_equivalent(nrow(bygrid), 23) From e94e3df188a138f282eb087bc49cb4a7fef5fe91 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 11:11:49 -0500 Subject: [PATCH 02/11] test_print --- .../print-comparisons_1focal_dataframe.txt | 11 +++++++++++ .../print-comparisons_1focal_datagrid.txt | 10 ++++++++++ .../_tinysnapshot/print-predictions_datagrid.txt | 13 +++++++++++++ .../_tinysnapshot/print-predictions_newdata.txt | 9 +++++++++ inst/tinytest/test-print.R | 15 ++++++--------- 5 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 inst/tinytest/_tinysnapshot/print-comparisons_1focal_dataframe.txt create mode 100644 inst/tinytest/_tinysnapshot/print-comparisons_1focal_datagrid.txt create mode 100644 inst/tinytest/_tinysnapshot/print-predictions_datagrid.txt create mode 100644 inst/tinytest/_tinysnapshot/print-predictions_newdata.txt diff --git a/inst/tinytest/_tinysnapshot/print-comparisons_1focal_dataframe.txt b/inst/tinytest/_tinysnapshot/print-comparisons_1focal_dataframe.txt new file mode 100644 index 000000000..3c490b349 --- /dev/null +++ b/inst/tinytest/_tinysnapshot/print-comparisons_1focal_dataframe.txt @@ -0,0 +1,11 @@ + + Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % hp + 5.28 1.08 4.89 <0.001 19.9 3.16 7.39 120 + 5.28 1.08 4.89 <0.001 19.9 3.16 7.39 120 + +Term: am +Type: response +Comparison: 1 - 0 +Columns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, am, hp, mpg + + diff --git a/inst/tinytest/_tinysnapshot/print-comparisons_1focal_datagrid.txt b/inst/tinytest/_tinysnapshot/print-comparisons_1focal_datagrid.txt new file mode 100644 index 000000000..738871a34 --- /dev/null +++ b/inst/tinytest/_tinysnapshot/print-comparisons_1focal_datagrid.txt @@ -0,0 +1,10 @@ + + Term am hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % + am 0 120 5.28 1.08 4.89 <0.001 19.9 3.16 7.39 + am 1 120 5.28 1.08 4.89 <0.001 19.9 3.16 7.39 + +Type: response +Comparison: 1 - 0 +Columns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, am, hp, predicted_lo, predicted_hi, predicted, mpg + + diff --git a/inst/tinytest/_tinysnapshot/print-predictions_datagrid.txt b/inst/tinytest/_tinysnapshot/print-predictions_datagrid.txt new file mode 100644 index 000000000..ccdc52baa --- /dev/null +++ b/inst/tinytest/_tinysnapshot/print-predictions_datagrid.txt @@ -0,0 +1,13 @@ + + PClass SexCode Estimate Pr(>|z|) S 2.5 % 97.5 % + 1st 0 0.4641 0.538 0.9 0.3538 0.578 + 1st 1 0.9469 <0.001 28.5 0.8735 0.979 + 2nd 0 0.0663 <0.001 31.4 0.0301 0.140 + 2nd 1 0.8784 <0.001 27.4 0.7879 0.934 + 3rd 0 0.1146 <0.001 53.1 0.0740 0.173 + 3rd 1 0.4497 0.391 1.4 0.3400 0.565 + +Type: invlink(link) +Columns: rowid, estimate, p.value, s.value, conf.low, conf.high, Age, PClass, SexCode, Survived + + diff --git a/inst/tinytest/_tinysnapshot/print-predictions_newdata.txt b/inst/tinytest/_tinysnapshot/print-predictions_newdata.txt new file mode 100644 index 000000000..ffcea2d47 --- /dev/null +++ b/inst/tinytest/_tinysnapshot/print-predictions_newdata.txt @@ -0,0 +1,9 @@ + + Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % am hp + 19.5 0.739 26.4 <0.001 508.8 18.1 21.0 0 120 + 24.8 0.809 30.7 <0.001 683.5 23.2 26.4 1 120 + +Type: response +Columns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, am, hp, mpg + + diff --git a/inst/tinytest/test-print.R b/inst/tinytest/test-print.R index 77cb04ee9..83da8bd6a 100644 --- a/inst/tinytest/test-print.R +++ b/inst/tinytest/test-print.R @@ -16,9 +16,6 @@ expect_snapshot_print(comparisons(mod, by = "gear"), "print-comparisons_by") dat <- read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Titanic.csv") m <- glm(Survived ~ Age * PClass * SexCode, data = dat, family = binomial) p <- predictions(m, newdata = datagrid(PClass = unique, SexCode = 0:1)) - -exit_file("TODO") - expect_snapshot_print(p, "print-predictions_datagrid") @@ -26,16 +23,16 @@ expect_snapshot_print(p, "print-predictions_datagrid") mod <- lm(mpg ~ hp + am, data = mtcars) expect_snapshot_print( - comparisons(mod, variables = "am", newdata = data.frame(am = 0:1, hp = 120)), - "print-comparisons_1focal_dataframe") + comparisons(mod, variables = "am", newdata = data.frame(am = 0:1, hp = 120)), + "print-comparisons_1focal_dataframe") expect_snapshot_print( - comparisons(mod, variables = "am", newdata = datagrid(am = 0:1, hp = 120)), - "print-comparisons_1focal_datagrid") + comparisons(mod, variables = "am", newdata = datagrid(am = 0:1, hp = 120)), + "print-comparisons_1focal_datagrid") expect_snapshot_print( - predictions(mod, newdata = data.frame(am = 0:1, hp = 120)), - "print-predictions_newdata") + predictions(mod, newdata = data.frame(am = 0:1, hp = 120)), + "print-predictions_newdata") rm(list = ls()) From d9a944bf7995b4781a458c645ba94c02b68c29df Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 11:18:57 -0500 Subject: [PATCH 03/11] bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf459e6ce..ed3c776ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: marginaleffects Title: Predictions, Comparisons, Slopes, Marginal Means, and Hypothesis Tests -Version: 0.23.0.5 +Version: 0.23.0.6 Authors@R: c(person(given = "Vincent", family = "Arel-Bundock", From c5116ab3bee28ae1d70ad31e5cb7e1f6844a2207 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 11:56:40 -0500 Subject: [PATCH 04/11] issue #1270 print by variable in comparisons --- R/print.R | 502 +++++++++--------- .../print-comparisons_by_and_variables.txt | 10 + inst/tinytest/test-print.R | 6 + 3 files changed, 267 insertions(+), 251 deletions(-) create mode 100644 inst/tinytest/_tinysnapshot/print-comparisons_by_and_variables.txt diff --git a/R/print.R b/R/print.R index b43a92b15..8da427019 100644 --- a/R/print.R +++ b/R/print.R @@ -17,14 +17,14 @@ knit_print.marginaleffects <- function(x, ...) { #' Print `marginaleffects` objects -#' +#' #' @description #' This function controls the text which is printed to the console when one of the core `marginalefffects` functions is called and the object is returned: `predictions()`, `comparisons()`, `slopes()`, `hypotheses()`, `avg_predictions()`, `avg_comparisons()`, `avg_slopes()`. -#' +#' #' All of those functions return standard data frames. Columns can be extracted by name, `predictions(model)$estimate`, and all the usual data manipulation functions work out-of-the-box: `colnames()`, `head()`, `subset()`, `dplyr::filter()`, `dplyr::arrange()`, etc. -#' +#' #' Some of the data columns are not printed by default. You can disable pretty printing and print the full results as a standard data frame using the `style` argument or by applying `as.data.frame()` on the object. See examples below. -#' +#' #' @param x An object produced by one of the `marginaleffects` package functions. #' @param style "summary", "data.frame", or "tinytable" #' @param digits The number of digits to display. @@ -41,11 +41,11 @@ knit_print.marginaleffects <- function(x, ...) { #' mod <- lm(mpg ~ hp + am + factor(gear), data = mtcars) #' p <- predictions(mod, by = c("am", "gear")) #' p -#' +#' #' subset(p, am == 1) -#' +#' #' print(p, style = "data.frame") -#' +#' #' data.frame(p) #' print.marginaleffects <- function(x, @@ -58,282 +58,285 @@ print.marginaleffects <- function(x, type = getOption("marginaleffects_print_type", default = TRUE), column_names = getOption("marginaleffects_print_column_names", default = TRUE), ...) { + checkmate::assert_number(digits) + checkmate::assert_number(topn) + checkmate::assert_number(nrows) + checkmate::assert_choice( + style, + choices = c("data.frame", "summary", "tinytable", "html", "latex", "markdown", "typst") + ) + + if (isTRUE(style == "data.frame")) { + print(as.data.frame(x)) + return(invisible(x)) + } - checkmate::assert_number(digits) - checkmate::assert_number(topn) - checkmate::assert_number(nrows) - checkmate::assert_choice( - style, - choices = c("data.frame", "summary", "tinytable", "html", "latex", "markdown", "typst") - ) - - if (isTRUE(style == "data.frame")) { - print(as.data.frame(x)) - return(invisible(x)) - } + out <- x - out <- x + nrows <- max(nrows, 2 * topn) - nrows <- max(nrows, 2 * topn) + if ("group" %in% colnames(out) && + all(out$group == "main_marginaleffects")) { + out$group <- NULL + } - if ("group" %in% colnames(out) && - all(out$group == "main_marginaleffects")) { - out$group <- NULL - } - - # subset before rounding so that digits match top and bottom rows - if (nrow(out) > nrows) { - out <- rbind(utils::head(out, topn), utils::tail(out, topn)) - splitprint <- TRUE + # subset before rounding so that digits match top and bottom rows + if (nrow(out) > nrows) { + out <- rbind(utils::head(out, topn), utils::tail(out, topn)) + splitprint <- TRUE + } else { + splitprint <- FALSE + } + + # round and replace NAs + ps <- c("p.value", "p.value.nonsup", "p.value.noninf", "p.value.equiv") + + for (i in seq_along(out)) { + if (colnames(out)[i] %in% ps) { + out[[i]] <- format.pval(out[[i]], digits = digits, eps = p_eps) + } else if (isTRUE("s.value" == colnames(out)[i])) { + out[[i]] <- sprintf("%.1f", out[[i]]) } else { - splitprint <- FALSE + out[[i]] <- format(out[[i]], digits = digits) } + } - # round and replace NAs - ps <- c("p.value", "p.value.nonsup", "p.value.noninf", "p.value.equiv") - - for (i in seq_along(out)) { - if (colnames(out)[i] %in% ps) { - out[[i]] <- format.pval(out[[i]], digits = digits, eps = p_eps) - } else if (isTRUE("s.value" == colnames(out)[i])) { - out[[i]] <- sprintf("%.1f", out[[i]]) - } else { - out[[i]] <- format(out[[i]], digits = digits) - } - } + if (is.null(attr(x, "conf_level"))) { + alpha <- NULL + } else { + alpha <- 100 * (1 - attr(x, "conf_level")) + } - if (is.null(attr(x, "conf_level"))) { - alpha <- NULL + # contrast is sometimes useless + if ("contrast" %in% colnames(out) && all(out$contrast == "")) { + out$contrast <- NULL + } + + statistic_label <- attr(x, "statistic_label") + if (is.null(statistic_label)) { + if (any(out[["df"]] < Inf)) { + statistic_label <- "t" } else { - alpha <- 100 * (1 - attr(x, "conf_level")) + statistic_label <- "z" } + } - # contrast is sometimes useless - if ("contrast" %in% colnames(out) && all(out$contrast == "")) { - out$contrast <- NULL - } + # rename + dict <- c( + "group" = "Group", + "term" = "Term", + "contrast" = "Contrast", + "hypothesis" = "Hypothesis", + "value" = "Value", + "by" = "By", + "estimate" = "Estimate", + "std.error" = "Std. Error", + "statistic" = statistic_label, + "p.value" = sprintf("Pr(>|%s|)", statistic_label), + "s.value" = "S", + "conf.low" = ifelse(is.null(alpha), + "CI low", + sprintf("%.1f %%", alpha / 2)), + "conf.high" = ifelse(is.null(alpha), + "CI high", + sprintf("%.1f %%", 100 - alpha / 2)), + "pred.low" = ifelse(is.null(alpha), + "Pred low", + sprintf("Pred. %.1f %%", alpha / 2)), + "pred.high" = ifelse(is.null(alpha), + "Pred high", + sprintf("Pred. %.1f %%", 100 - alpha / 2)), + "pred.set" = ifelse(is.null(alpha), + "Pred Set", + sprintf("Pred Set %.1f %%", 100 - alpha / 2)), + "p.value.nonsup" = "p (NonSup)", + "p.value.noninf" = "p (NonInf)", + "p.value.equiv" = "p (Equiv)", + "df" = "Df", + "df1" = "Df 1", + "df2" = "Df 2" + ) + + + if (inherits(x, "marginalmeans")) { + dict["estimate"] <- "Mean" + } - statistic_label <- attr(x, "statistic_label") - if (is.null(statistic_label)) { - if (any(out[["df"]] < Inf)) { - statistic_label <- "t" - } else { - statistic_label <- "z" - } - } + # Subset columns + idx <- c( + names(dict), + grep("^contrast_", colnames(x), value = TRUE)) - # rename - dict <- c( - "group" = "Group", - "term" = "Term", - "contrast" = "Contrast", - "hypothesis" = "Hypothesis", - "value" = "Value", - "by" = "By", - "estimate" = "Estimate", - "std.error" = "Std. Error", - "statistic" = statistic_label, - "p.value" = sprintf("Pr(>|%s|)", statistic_label), - "s.value" = "S", - "conf.low" = ifelse(is.null(alpha), - "CI low", - sprintf("%.1f %%", alpha / 2)), - "conf.high" = ifelse(is.null(alpha), - "CI high", - sprintf("%.1f %%", 100 - alpha / 2)), - "pred.low" = ifelse(is.null(alpha), - "Pred low", - sprintf("Pred. %.1f %%", alpha / 2)), - "pred.high" = ifelse(is.null(alpha), - "Pred high", - sprintf("Pred. %.1f %%", 100 - alpha / 2)), - "pred.set" = ifelse(is.null(alpha), - "Pred Set", - sprintf("Pred Set %.1f %%", 100 - alpha / 2)), - "p.value.nonsup" = "p (NonSup)", - "p.value.noninf" = "p (NonInf)", - "p.value.equiv" = "p (Equiv)", - "df" = "Df", - "df1" = "Df 1", - "df2" = "Df 2" - ) - - - if (inherits(x, "marginalmeans")) { - dict["estimate"] <- "Mean" - } + # explicitly given by user in `datagrid()` or `by` or `newdata` + nd <- attr(x, "newdata") + if (is.null(nd)) { + nd <- attr(x, "newdata_newdata") + } + explicit <- tmp <- c( + "by", + attr(x, "hypothesis_by"), + attr(nd, "variables_datagrid"), + attr(nd, "newdata_variables_datagrid"), + attr(x, "variables_datagrid"), + attr(x, "newdata_variables_datagrid") + ) + if (isTRUE(checkmate::check_character(attr(x, "by")))) { + bycols <- attr(x, "by") + tmp <- c(tmp, attr(x, "by")) + } else { + bycols <- NULL + } + idx <- c(idx[1:grep("by", idx)], tmp, idx[(grep("by", idx) + 1):length(idx)]) + if (isTRUE(attr(nd, "newdata_newdata_explicit")) || isTRUE(attr(nd, "newdata_explicit"))) { + idx <- c(idx, colnames(nd)) + } - # Subset columns - idx <- c( - names(dict), - grep("^contrast_", colnames(x), value = TRUE)) + # drop useless columns: rowid + useless <- c("rowid", "rowidcf") - # explicitly given by user in `datagrid()` or `by` or `newdata` - nd <- attr(x, "newdata") - if (is.null(nd)) { - nd <- attr(x, "newdata_newdata") - } - explicit <- tmp <- c("by", - attr(x, "hypothesis_by"), - attr(nd, "variables_datagrid"), - attr(nd, "newdata_variables_datagrid"), - attr(x, "variables_datagrid"), - attr(x, "newdata_variables_datagrid") - ) - if (isTRUE(checkmate::check_character(attr(x, "by")))) { - tmp <- c(tmp, attr(x, "by")) - } - idx <- c(idx[1:grep("by", idx)], tmp, idx[(grep("by", idx) + 1):length(idx)]) - if (isTRUE(attr(nd, "newdata_newdata_explicit")) || isTRUE(attr(nd, "newdata_explicit"))) { - idx <- c(idx, colnames(nd)) - } + # drop useless columns: dv + dv <- tryCatch( + unlist(insight::find_response(attr(x, "model"), combine = TRUE), use.names = FALSE), + error = function(e) NULL) + useless <- c(useless, dv) - # drop useless columns: rowid - useless <- c("rowid", "rowidcf") + # selection style + data.table::setDT(out) - # drop useless columns: dv - dv <- tryCatch( - unlist(insight::find_response(attr(x, "model"), combine = TRUE), use.names = FALSE), - error = function(e) NULL) - useless <- c(useless, dv) + if ("term" %in% colnames(out) && all(out$term == "cross")) { + out[["term"]] <- NULL + colnames(out) <- gsub("^contrast_", "C: ", colnames(out)) + idx <- c(grep("C: .*", colnames(out), value = TRUE), idx) + } - # selection style - data.table::setDT(out) + print_columns_text <- print_type_text <- print_term_text <- print_contrast_text <- NULL + print_omit <- getOption("marginaleffects_print_omit", default = NULL) - if ("term" %in% colnames(out) && all(out$term == "cross")) { - out[["term"]] <- NULL - colnames(out) <- gsub("^contrast_", "C: ", colnames(out)) - idx <- c(grep("C: .*", colnames(out), value = TRUE), idx) - } + # contrast and term can have long labels. Drop if not unique. + if (length(unique(out[["contrast"]])) == 1) { + print_contrast_text <- sprintf("Comparison: %s\n", out[["contrast"]][1]) + print_omit <- c(print_omit, "contrast") + } + te <- unique(out[["term"]]) + te <- setdiff(te, explicit) # ex: polynomials where both `variables="x"` and datagrid(x) + if (length(te) == 1 && length(bycols) == 0) { + print_omit <- c(print_omit, te) + print_term_text <- sprintf("Term: %s\n", out[["term"]][1]) + print_omit <- c(print_omit, "term") + } + + if (ncol(x) <= ncols && isTRUE(column_names)) { + print_columns_text <- paste("Columns:", paste(colnames(x), collapse = ", "), "\n") + } + if (isTRUE(type) && !is.null(attr(x, "type"))) { + print_type_text <- paste("Type: ", attr(x, "type"), "\n") + } - print_columns_text <- print_type_text <- print_term_text <- print_contrast_text <- NULL - print_omit <- getOption("marginaleffects_print_omit", default = NULL) + # drop useless columns + idx <- setdiff(unique(idx), c(useless, print_omit)) + idx <- intersect(idx, colnames(out)) + out <- out[, ..idx, drop = FALSE] - # contrast and term can have long labels. Drop if not unique. - if (length(unique(out[["contrast"]])) == 1) { - print_contrast_text <- sprintf("Comparison: %s\n", out[["contrast"]][1]) - print_omit <- c(print_omit, "contrast") - } - te <- unique(out[["term"]]) - te <- setdiff(te, explicit) # ex: polynomials where both `variables="x"` and datagrid(x) - if (length(te) == 1) { - print_omit <- c(print_omit, te) - print_term_text <- sprintf("Term: %s\n", out[["term"]][1]) - print_omit <- c(print_omit, "term") - } - if (ncol(x) <= ncols && isTRUE(column_names)) { - print_columns_text <- paste("Columns:", paste(colnames(x), collapse = ", "), "\n") - } - if (isTRUE(type) && !is.null(attr(x, "type"))) { - print_type_text <- paste("Type: ", attr(x, "type"), "\n") + for (i in seq_along(dict)) { + colnames(out)[colnames(out) == names(dict)[i]] <- dict[i] + } + + # recommend avg_*() + rec <- "" + if (isFALSE(attr(x, "by"))) { + if (inherits(x, "predictions")) { + rec <- "?avg_predictions and " + } else if (inherits(x, "comparisons")) { + rec <- "?avg_comparisons and " + } else if (inherits(x, "slopes")) { + rec <- "?avg_slopes and " } + } - # drop useless columns - idx <- setdiff(unique(idx), c(useless, print_omit)) - idx <- intersect(idx, colnames(out)) - out <- out[, ..idx, drop = FALSE] + # avoid infinite recursion by stripping marginaleffect.summary class + data.table::setDF(out) + if (style %in% c("tinytable", "html", "latex", "typst", "markdown")) { + insight::check_if_installed("tinytable") - for (i in seq_along(dict)) { - colnames(out)[colnames(out) == names(dict)[i]] <- dict[i] - } + tab <- as.data.frame(out) - # recommend avg_*() - rec <- "" - if (isFALSE(attr(x, "by"))) { - if (inherits(x, "predictions")) { - rec <- "?avg_predictions and " - } else if (inherits(x, "comparisons")) { - rec <- "?avg_comparisons and " - } else if (inherits(x, "slopes")) { - rec <- "?avg_slopes and " - } + if (isTRUE(splitprint)) { + tab <- rbind(utils::head(tab, topn), utils::tail(tab, topn)) } - # avoid infinite recursion by stripping marginaleffect.summary class - data.table::setDF(out) - - if (style %in% c("tinytable", "html", "latex", "typst", "markdown")) { - insight::check_if_installed("tinytable") - - tab <- as.data.frame(out) - - if (isTRUE(splitprint)) { - tab <- rbind(utils::head(tab, topn), utils::tail(tab, topn)) - } - - # at <- attributes(tab) - # attributes(tab) <- at[names(at) %in% c("row.names", "names", "class")] - - args <- list(x = tab) - notes <- c(print_type_text, print_columns_text) - if (!is.null(notes)) args$notes <- notes - tab <- do.call(tinytable::tt, args) - tab <- tinytable::format_tt(i = 0, tab, escape = TRUE) - if ("p.value" %in% colnames(tab)) { - tab <- tinytable::format_tt(tab, j = "p.value", escape = TRUE) - } - - if (isTRUE(splitprint)) { - msg <- "%s rows omitted" - msg <- sprintf(msg, nrow(x) - 2 * topn) - msg <- stats::setNames(list(topn + 1), msg) - tab <- tinytable::group_tt(tab, i = msg) - tab <- tinytable::style_tt(tab, i = topn + 1, align = "c") - } - - tab@output <- style - if (style == "tinytable") { - return(tab) - } - print(tab) - return(invisible(tab)) - } + # at <- attributes(tab) + # attributes(tab) <- at[names(at) %in% c("row.names", "names", "class")] - # head - cat("\n") - print_head <- attr(x, "print_head") - if (!is.null(print_head)) { - cat(print_head, "\n") + args <- list(x = tab) + notes <- c(print_type_text, print_columns_text) + if (!is.null(notes)) args$notes <- notes + tab <- do.call(tinytable::tt, args) + tab <- tinytable::format_tt(i = 0, tab, escape = TRUE) + if ("p.value" %in% colnames(tab)) { + tab <- tinytable::format_tt(tab, j = "p.value", escape = TRUE) } - # some commands do not generate average contrasts/mfx. E.g., `lnro` with `by` - if (splitprint) { - print(utils::head(out, n = topn), row.names = FALSE) - msg <- "--- %s rows omitted. See %s?print.marginaleffects ---" - msg <- sprintf(msg, nrow(x) - 2 * topn, rec) - cat(msg, "\n") - # remove colnames - tmp <- utils::capture.output(print(utils::tail(out, n = topn), row.names = FALSE)) - tmp <- paste(tmp[-1], collapse = "\n") - cat(tmp) - } else { - print(out, row.names = FALSE) + if (isTRUE(splitprint)) { + msg <- "%s rows omitted" + msg <- sprintf(msg, nrow(x) - 2 * topn) + msg <- stats::setNames(list(topn + 1), msg) + tab <- tinytable::group_tt(tab, i = msg) + tab <- tinytable::style_tt(tab, i = topn + 1, align = "c") } - cat("\n") - - - cat(print_term_text) - cat(print_type_text) - cat(print_contrast_text) - cat(print_columns_text) - cat("\n") - - ## This is tricky to extract nicely when transform_* are passed from avg_comparisons to comparisons. I could certainly figure it out, but at the same time, I don't think the print method should return information that is immediately visible from the call. This is different from `type`, where users often rely on the default value, which can change from model to model, so printing it is often - # if (!is.null(attr(x, "comparison_label"))) { - # cat("Pre-transformation: ", paste(attr(x, "comparison_label"), collapse = ""), "\n") - # } - # if (!is.null(attr(x, "transform_label"))) { - # cat("Post-transformation: ", paste(attr(x, "transform_label"), collapse = ""), "\n") - # } - - print_tail <- attr(x, "print_tail") - if (!is.null(print_tail)) { - cat(print_tail, "\n") + + tab@output <- style + if (style == "tinytable") { + return(tab) } + print(tab) + return(invisible(tab)) + } - return(invisible(x)) + # head + cat("\n") + print_head <- attr(x, "print_head") + if (!is.null(print_head)) { + cat(print_head, "\n") + } + + # some commands do not generate average contrasts/mfx. E.g., `lnro` with `by` + if (splitprint) { + print(utils::head(out, n = topn), row.names = FALSE) + msg <- "--- %s rows omitted. See %s?print.marginaleffects ---" + msg <- sprintf(msg, nrow(x) - 2 * topn, rec) + cat(msg, "\n") + # remove colnames + tmp <- utils::capture.output(print(utils::tail(out, n = topn), row.names = FALSE)) + tmp <- paste(tmp[-1], collapse = "\n") + cat(tmp) + } else { + print(out, row.names = FALSE) + } + cat("\n") + + + cat(print_term_text) + cat(print_type_text) + cat(print_contrast_text) + cat(print_columns_text) + cat("\n") + + ## This is tricky to extract nicely when transform_* are passed from avg_comparisons to comparisons. I could certainly figure it out, but at the same time, I don't think the print method should return information that is immediately visible from the call. This is different from `type`, where users often rely on the default value, which can change from model to model, so printing it is often + # if (!is.null(attr(x, "comparison_label"))) { + # cat("Pre-transformation: ", paste(attr(x, "comparison_label"), collapse = ""), "\n") + # } + # if (!is.null(attr(x, "transform_label"))) { + # cat("Post-transformation: ", paste(attr(x, "transform_label"), collapse = ""), "\n") + # } + + print_tail <- attr(x, "print_tail") + if (!is.null(print_tail)) { + cat(print_tail, "\n") + } + + return(invisible(x)) } #' @noRd @@ -367,6 +370,3 @@ knit_print.comparisons <- knit_print.marginaleffects #' @noRd #' @exportS3Method knitr::knit_print knit_print.slopes <- knit_print.marginaleffects - - - diff --git a/inst/tinytest/_tinysnapshot/print-comparisons_by_and_variables.txt b/inst/tinytest/_tinysnapshot/print-comparisons_by_and_variables.txt new file mode 100644 index 000000000..d616da34f --- /dev/null +++ b/inst/tinytest/_tinysnapshot/print-comparisons_by_and_variables.txt @@ -0,0 +1,10 @@ + + Term am Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % + am 0 45.7 20.2 2.26 0.0236 5.4 6.13 85.2 + am 1 51.3 23.4 2.20 0.0281 5.2 5.51 97.1 + +Type: response +Comparison: 1 - 0 +Columns: term, contrast, am, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high + + diff --git a/inst/tinytest/test-print.R b/inst/tinytest/test-print.R index 83da8bd6a..b1afd7242 100644 --- a/inst/tinytest/test-print.R +++ b/inst/tinytest/test-print.R @@ -35,4 +35,10 @@ expect_snapshot_print( "print-predictions_newdata") +# Issue #1270 +mod <- lm(hp ~ mpg * factor(am), mtcars) +cmp <- avg_comparisons(mod, variables = "am", by = "am") +expect_snapshot_print(cmp, "print-comparisons_by_and_variables") + + rm(list = ls()) From aa568604f750f74cf527f97313560c9f2038f25d Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 12:09:51 -0500 Subject: [PATCH 05/11] flag_time --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 33c416239..43ce38521 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -9,7 +9,7 @@ # once every 24 hours last_time <- config_get("startup_message_time") if (inherits(last_time, "POSIXct")) { - flag_time <- abs(as.numeric(Sys.time() - last_time)) >= 24 * 60 * 60 + flag_time <- difftime(Sys.time(), last_time, units = "sec") >= 24 * 60 * 60 } else { flag_time <- TRUE } From beb92554336a2183ca5adbc76439a551ff12caa2 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 13:31:22 -0500 Subject: [PATCH 06/11] issue #1269 mice transform should be applied after pooling (#1275) --- R/comparisons.R | 762 +++++++++++++++++----------------- R/imputation.R | 97 +++-- inst/tinytest/test-pkg-mice.R | 49 ++- 3 files changed, 462 insertions(+), 446 deletions(-) diff --git a/R/comparisons.R b/R/comparisons.R index 34cae1a9d..8988e4237 100644 --- a/R/comparisons.R +++ b/R/comparisons.R @@ -162,10 +162,9 @@ #' #' # Adjusted Risk Ratio: Manual specification of the `comparison` #' avg_comparisons( -#' mod, -#' comparison = function(hi, lo) log(mean(hi) / mean(lo)), -#' transform = exp) -# +#' mod, +#' comparison = function(hi, lo) log(mean(hi) / mean(lo)), +#' transform = exp) #' # cross contrasts #' mod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars) #' avg_comparisons(mod, variables = c("cyl", "gear"), cross = TRUE) @@ -177,31 +176,32 @@ #' mod <- lm(mpg ~ wt + drat, data = mtcars) #' #' comparisons( -#' mod, -#' newdata = "mean", -#' hypothesis = "wt = drat") +#' mod, +#' newdata = "mean", +#' hypothesis = "wt = drat") #' #' # same hypothesis test using row indices #' comparisons( -#' mod, -#' newdata = "mean", -#' hypothesis = "b1 - b2 = 0") +#' mod, +#' newdata = "mean", +#' hypothesis = "b1 - b2 = 0") #' #' # same hypothesis test using numeric vector of weights #' comparisons( -#' mod, -#' newdata = "mean", -#' hypothesis = c(1, -1)) +#' mod, +#' newdata = "mean", +#' hypothesis = c(1, -1)) #' #' # two custom contrasts using a matrix of weights -#' lc <- matrix(c( +#' lc <- matrix( +#' c( #' 1, -1, #' 2, 3), -#' ncol = 2) +#' ncol = 2) #' comparisons( -#' mod, -#' newdata = "mean", -#' hypothesis = lc) +#' mod, +#' newdata = "mean", +#' hypothesis = lc) #' #' # Effect of a 1 group-wise standard deviation change #' # First we calculate the SD in each group of `cyl` @@ -209,11 +209,11 @@ #' library(dplyr) #' mod <- lm(mpg ~ hp + factor(cyl), mtcars) #' tmp <- mtcars %>% -#' group_by(cyl) %>% -#' mutate(hp_sd = sd(hp)) -#' avg_comparisons(mod, -#' variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)), -#' by = "cyl") +#' group_by(cyl) %>% +#' mutate(hp_sd = sd(hp)) +#' avg_comparisons(mod, +#' variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)), +#' by = "cyl") #' #' # `by` argument #' mod <- lm(mpg ~ hp * am * vs, data = mtcars) @@ -225,8 +225,8 @@ #' library(nnet) #' mod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE) #' by <- data.frame( -#' group = c("3", "4", "5"), -#' by = c("3,4", "3,4", "5")) +#' group = c("3", "4", "5"), +#' by = c("3,4", "3,4", "5")) #' comparisons(mod, type = "probs", by = by) #' #' @export @@ -248,348 +248,339 @@ comparisons <- function(model, eps = NULL, numderiv = "fdforward", ...) { - - dots <- list(...) - - # backward compatibility - if ("transform_post" %in% names(dots)) { - transform <- dots[["transform_post"]] - insight::format_warning("The `transform_post` argument is deprecated. Use `transform` instead.") - } - if ("transform_pre" %in% names(dots)) { - comparison <- dots[["transform_pre"]] - insight::format_warning("The `transform_pre` argument is deprecated. Use `comparison` instead.") - } - - # very early, before any use of newdata - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - - # extracting modeldata repeatedly is slow. - # checking dots allows marginalmeans to pass modeldata to predictions. - if (isTRUE(by)) { - modeldata <- get_modeldata(model, - additional_variables = FALSE, - modeldata = dots[["modeldata"]], - wts = wts) - } else { - modeldata <- get_modeldata(model, - additional_variables = by, - modeldata = dots[["modeldata"]], - wts = wts) - } - - # build call: match.call() doesn't work well in *apply() - # after sanitize_newdata_call - call_attr <- c(list( - name = "comparisons", - model = model, - newdata = newdata, - variables = variables, - type = type, - vcov = vcov, - by = by, - conf_level = conf_level, - comparison = comparison, - transform = transform, - cross = cross, - wts = wts, - hypothesis = hypothesis, - equivalence = equivalence, - p_adjust = p_adjust, - df = df), - dots) - if ("modeldata" %in% names(dots)) { - call_attr[["modeldata"]] <- modeldata - } - call_attr <- do.call("call", call_attr) - - - # required by stubcols later, but might be overwritten - bycols <- NULL - - # sanity checks - sanity_dots(model, ...) - sanity_df(df, newdata) - conf_level <- sanitize_conf_level(conf_level, ...) - checkmate::assert_number(eps, lower = 1e-10, null.ok = TRUE) - numderiv <- sanitize_numderiv(numderiv) - sanity_equivalence_p_adjust(equivalence, p_adjust) - model <- sanitize_model( - model = model, - newdata = newdata, - wts = wts, - vcov = vcov, - by = by, - calling_function = "comparisons", - ...) - cross <- sanitize_cross(cross, variables, model) - type <- sanitize_type(model = model, type = type, calling_function = "comparisons") - sanity_comparison(comparison) - - # multiple imputation - if (inherits(model, c("mira", "amest"))) { - out <- process_imputation(model, call_attr) - return(out) - } - - # transforms - comparison_label <- transform_label <- NULL - if (is.function(comparison)) { - comparison_label <- deparse(substitute(comparison)) - } - if (is.function(transform)) { - transform_label <- deparse(substitute(transform)) - transform <- sanitize_transform(transform) - names(transform) <- transform_label - } else { - transform <- sanitize_transform(transform) - transform_label <- names(transform) - } - - marginalmeans <- isTRUE(checkmate::check_choice(newdata, choices = "marginalmeans")) - - - newdata <- sanitize_newdata( - model = model, - newdata = newdata, - modeldata = modeldata, - by = by, - wts = wts) - - # after sanitize_newdata - sanity_by(by, newdata) - - - # after sanity_by - newdata <- dedup_newdata( - model = model, - newdata = newdata, - wts = wts, - by = by, - cross = cross, - comparison = comparison) - if (isFALSE(wts) && "marginaleffects_wts_internal" %in% colnames(newdata)) { - wts <- "marginaleffects_wts_internal" + dots <- list(...) + + # very early, before any use of newdata + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + + # extracting modeldata repeatedly is slow. + # checking dots allows marginalmeans to pass modeldata to predictions. + if (isTRUE(by)) { + modeldata <- get_modeldata(model, + additional_variables = FALSE, + modeldata = dots[["modeldata"]], + wts = wts) + } else { + modeldata <- get_modeldata(model, + additional_variables = by, + modeldata = dots[["modeldata"]], + wts = wts) + } + + # build call: match.call() doesn't work well in *apply() + # after sanitize_newdata_call + call_attr <- c( + list( + name = "comparisons", + model = model, + newdata = newdata, + variables = variables, + type = type, + vcov = vcov, + by = by, + conf_level = conf_level, + comparison = comparison, + transform = transform, + cross = cross, + wts = wts, + hypothesis = hypothesis, + equivalence = equivalence, + p_adjust = p_adjust, + df = df), + dots) + if ("modeldata" %in% names(dots)) { + call_attr[["modeldata"]] <- modeldata + } + call_attr <- do.call("call", call_attr) + + + # required by stubcols later, but might be overwritten + bycols <- NULL + + # sanity checks + sanity_dots(model, ...) + sanity_df(df, newdata) + conf_level <- sanitize_conf_level(conf_level, ...) + checkmate::assert_number(eps, lower = 1e-10, null.ok = TRUE) + numderiv <- sanitize_numderiv(numderiv) + sanity_equivalence_p_adjust(equivalence, p_adjust) + model <- sanitize_model( + model = model, + newdata = newdata, + wts = wts, + vcov = vcov, + by = by, + calling_function = "comparisons", + ...) + cross <- sanitize_cross(cross, variables, model) + type <- sanitize_type(model = model, type = type, calling_function = "comparisons") + sanity_comparison(comparison) + + # multiple imputation + if (inherits(model, c("mira", "amest"))) { + out <- process_imputation(model, call_attr) + return(out) + } + + # transforms + comparison_label <- transform_label <- NULL + if (is.function(comparison)) { + comparison_label <- deparse(substitute(comparison)) + } + if (is.function(transform)) { + transform_label <- deparse(substitute(transform)) + transform <- sanitize_transform(transform) + names(transform) <- transform_label + } else { + transform <- sanitize_transform(transform) + transform_label <- names(transform) + } + + marginalmeans <- isTRUE(checkmate::check_choice(newdata, choices = "marginalmeans")) + + + newdata <- sanitize_newdata( + model = model, + newdata = newdata, + modeldata = modeldata, + by = by, + wts = wts) + + # after sanitize_newdata + sanity_by(by, newdata) + + + # after sanity_by + newdata <- dedup_newdata( + model = model, + newdata = newdata, + wts = wts, + by = by, + cross = cross, + comparison = comparison) + if (isFALSE(wts) && "marginaleffects_wts_internal" %in% colnames(newdata)) { + wts <- "marginaleffects_wts_internal" + } + + # after sanitize_newdata + # after dedup_newdata + variables_list <- sanitize_variables( + model = model, + newdata = newdata, + modeldata = modeldata, + variables = variables, + cross = cross, + by = by, + comparison = comparison, + eps = eps) + + # get dof before transforming the vcov arg + # get_df() produces a weird warning on non lmerMod. We can skip them + # because get_vcov() will produce an informative error later. + if (inherits(model, "lmerMod") && (isTRUE(hush(vcov %in% c("satterthwaite", "kenward-roger"))))) { + # predict.lmerTest requires the DV + dv <- insight::find_response(model) + if (!dv %in% colnames(newdata)) { + newdata[[dv]] <- mean(insight::get_response(model)) } - # after sanitize_newdata - # after dedup_newdata - variables_list <- sanitize_variables( - model = model, - newdata = newdata, - modeldata = modeldata, - variables = variables, - cross = cross, - by = by, - comparison = comparison, - eps = eps) - - # get dof before transforming the vcov arg - # get_df() produces a weird warning on non lmerMod. We can skip them - # because get_vcov() will produce an informative error later. - if (inherits(model, "lmerMod") && (isTRUE(hush(vcov %in% c("satterthwaite", "kenward-roger"))))) { - # predict.lmerTest requires the DV - dv <- insight::find_response(model) - if (!dv %in% colnames(newdata)) { - newdata[[dv]] <- mean(insight::get_response(model)) - } - - if (!isTRUE(hush(is.infinite(df)))) { - insight::format_error('The `df` argument is not supported when `vcov` is "satterthwaite" or "kenward-roger".') - } - - # df_per_observation is an undocumented argument introduced in 0.18.4.7 to preserve backward incompatibility - df <- insight::get_df(model, type = vcov, data = newdata, df_per_observation = TRUE) + if (!isTRUE(hush(is.infinite(df)))) { + insight::format_error('The `df` argument is not supported when `vcov` is "satterthwaite" or "kenward-roger".') } - vcov_false <- isFALSE(vcov) - vcov.type <- get_vcov_label(vcov) - vcov <- get_vcov(model, vcov = vcov, type = type, ...) + # df_per_observation is an undocumented argument introduced in 0.18.4.7 to preserve backward incompatibility + df <- insight::get_df(model, type = vcov, data = newdata, df_per_observation = TRUE) + } - predictors <- variables_list$conditional + vcov_false <- isFALSE(vcov) + vcov.type <- get_vcov_label(vcov) + vcov <- get_vcov(model, vcov = vcov, type = type, ...) - ############### sanity checks are over + predictors <- variables_list$conditional - # Bootstrap - out <- inferences_dispatch( - INF_FUN = comparisons, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, - conf_level = conf_level, - cross = cross, - comparison = comparison, transform = transform, wts = wts, hypothesis = hypothesis, eps = eps, ...) - if (!is.null(out)) { - return(out) - } + ############### sanity checks are over - # after inferences dispatch - tmp <- sanitize_hypothesis(hypothesis, ...) - hypothesis <- tmp$hypothesis - hypothesis_null <- tmp$hypothesis_null - - # compute contrasts and standard errors - args <- list(model = model, - newdata = newdata, - variables = predictors, - cross = cross, - marginalmeans = marginalmeans, - modeldata = modeldata) - dots[["modeldata"]] <- NULL # dont' pass twice - args <- c(args, dots) - contrast_data <- do.call("get_contrast_data", args) + # Bootstrap + out <- inferences_dispatch( + INF_FUN = comparisons, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, + conf_level = conf_level, + cross = cross, + comparison = comparison, transform = transform, wts = wts, hypothesis = hypothesis, eps = eps, ...) + if (!is.null(out)) { + return(out) + } + + # after inferences dispatch + tmp <- sanitize_hypothesis(hypothesis, ...) + hypothesis <- tmp$hypothesis + hypothesis_null <- tmp$hypothesis_null + + # compute contrasts and standard errors + args <- list( + model = model, + newdata = newdata, + variables = predictors, + cross = cross, + marginalmeans = marginalmeans, + modeldata = modeldata) + dots[["modeldata"]] <- NULL # dont' pass twice + args <- c(args, dots) + contrast_data <- do.call("get_contrast_data", args) + + args <- list(model, + newdata = newdata, + variables = predictors, + type = type, + original = contrast_data[["original"]], + hi = contrast_data[["hi"]], + lo = contrast_data[["lo"]], + wts = contrast_data[["original"]][["marginaleffects_wts_internal"]], + by = by, + marginalmeans = marginalmeans, + cross = cross, + hypothesis = hypothesis, + modeldata = modeldata) + args <- c(args, dots) + mfx <- do.call("get_contrasts", args) + + hyp_by <- attr(mfx, "hypothesis_function_by") + + # bayesian posterior + if (!is.null(attr(mfx, "posterior_draws"))) { + draws <- attr(mfx, "posterior_draws") + J <- NULL + # standard errors via delta method + } else if (!vcov_false && isTRUE(checkmate::check_matrix(vcov))) { + idx <- intersect(colnames(mfx), c("group", "term", "contrast")) + idx <- mfx[, (idx), drop = FALSE] args <- list(model, - newdata = newdata, - variables = predictors, - type = type, - original = contrast_data[["original"]], - hi = contrast_data[["hi"]], - lo = contrast_data[["lo"]], - wts = contrast_data[["original"]][["marginaleffects_wts_internal"]], - by = by, - marginalmeans = marginalmeans, - cross = cross, - hypothesis = hypothesis, - modeldata = modeldata) + vcov = vcov, + type = type, + FUN = get_se_delta_contrasts, + newdata = newdata, + index = idx, + variables = predictors, + marginalmeans = marginalmeans, + hypothesis = hypothesis, + hi = contrast_data$hi, + lo = contrast_data$lo, + original = contrast_data$original, + by = by, + eps = eps, + cross = cross, + numderiv = numderiv) args <- c(args, dots) - mfx <- do.call("get_contrasts", args) - - hyp_by <- attr(mfx, "hypothesis_function_by") - - # bayesian posterior - if (!is.null(attr(mfx, "posterior_draws"))) { - draws <- attr(mfx, "posterior_draws") - J <- NULL - - # standard errors via delta method - } else if (!vcov_false && isTRUE(checkmate::check_matrix(vcov))) { - idx <- intersect(colnames(mfx), c("group", "term", "contrast")) - idx <- mfx[, (idx), drop = FALSE] - args <- list(model, - vcov = vcov, - type = type, - FUN = get_se_delta_contrasts, - newdata = newdata, - index = idx, - variables = predictors, - marginalmeans = marginalmeans, - hypothesis = hypothesis, - hi = contrast_data$hi, - lo = contrast_data$lo, - original = contrast_data$original, - by = by, - eps = eps, - cross = cross, - numderiv = numderiv) - args <- c(args, dots) - se <- do.call("get_se_delta", args) - J <- attr(se, "jacobian") - attr(se, "jacobian") <- NULL - mfx$std.error <- as.numeric(se) - draws <- NULL + se <- do.call("get_se_delta", args) + J <- attr(se, "jacobian") + attr(se, "jacobian") <- NULL + mfx$std.error <- as.numeric(se) + draws <- NULL # no standard error + } else { + J <- draws <- NULL + } + + # merge original data back in + if ((is.null(by) || isFALSE(by)) && "rowid" %in% colnames(mfx)) { + if ("rowid" %in% colnames(newdata)) { + idx <- c("rowid", "rowidcf", "term", "contrast", "by", setdiff(colnames(contrast_data$original), colnames(mfx))) + idx <- intersect(idx, colnames(contrast_data$original)) + tmp <- contrast_data$original[, ..idx, drop = FALSE] + # contrast_data is duplicated to compute contrasts for different terms or pairs + bycols <- intersect(colnames(tmp), colnames(mfx)) + idx <- duplicated(tmp, by = bycols) + tmp <- tmp[!idx] + mfx <- merge(mfx, tmp, all.x = TRUE, by = bycols, sort = FALSE) + # HACK: relies on NO sorting at ANY point } else { - J <- draws <- NULL - } - - # merge original data back in - if ((is.null(by) || isFALSE(by)) && "rowid" %in% colnames(mfx)) { - if ("rowid" %in% colnames(newdata)) { - idx <- c("rowid", "rowidcf", "term", "contrast", "by", setdiff(colnames(contrast_data$original), colnames(mfx))) - idx <- intersect(idx, colnames(contrast_data$original)) - tmp <- contrast_data$original[, ..idx, drop = FALSE] - # contrast_data is duplicated to compute contrasts for different terms or pairs - bycols <- intersect(colnames(tmp), colnames(mfx)) - idx <- duplicated(tmp, by = bycols) - tmp <- tmp[!idx] - mfx <- merge(mfx, tmp, all.x = TRUE, by = bycols, sort = FALSE) - # HACK: relies on NO sorting at ANY point - } else { - idx <- setdiff(colnames(contrast_data$original), colnames(mfx)) - mfx <- data.table(mfx, contrast_data$original[, ..idx]) - } - } - - # meta info - mfx <- get_ci( - mfx, - conf_level = conf_level, - df = df, - draws = draws, - estimate = "estimate", - null_hypothesis = hypothesis_null, - p_adjust = p_adjust, - model = model) - - # clean rows and columns - # WARNING: we cannot sort rows at the end because `get_hypothesis()` is - # applied in the middle, and it must already be sorted in the final order, - # otherwise, users cannot know for sure what is going to be the first and - # second rows, etc. - mfx <- sort_columns(mfx, newdata, by) - - # bayesian draws - attr(mfx, "posterior_draws") <- draws - - # equivalence tests - mfx <- equivalence(mfx, equivalence = equivalence, df = df, ...) - - # after draws attribute - mfx <- backtransform(mfx, transform) - - # save as attribute and not column - if (!all(is.na(mfx[["marginaleffects_wts_internal"]]))) { - marginaleffects_wts_internal <- mfx[["marginaleffects_wts_internal"]] - } else { - marginaleffects_wts_internal <- NULL + idx <- setdiff(colnames(contrast_data$original), colnames(mfx)) + mfx <- data.table(mfx, contrast_data$original[, ..idx]) } - mfx[["marginaleffects_wts_internal"]] <- NULL - - out <- mfx - - data.table::setDF(out) - - out <- set_marginaleffects_attributes( - out, - get_marginaleffects_attributes(newdata, include_regex = "^newdata.*class|explicit|matrix|levels")) - - # Global option for lean return object - lean = getOption("marginaleffects_lean", default = FALSE) - - # Only add (potentially large) attributes if lean is FALSE - if (isTRUE(lean)) { - for (a in setdiff(names(attributes(out)), c("names", "row.names", "class"))) attr(out, a) = NULL - attr(out, "lean") <- TRUE - } else { - # other attributes - attr(out, "newdata") <- newdata - attr(out, "call") <- call_attr - attr(out, "type") <- type - attr(out, "model_type") <- class(model)[1] - attr(out, "model") <- model - attr(out, "variables") <- predictors - attr(out, "jacobian") <- J - attr(out, "vcov") <- vcov - attr(out, "vcov.type") <- vcov.type - attr(out, "weights") <- marginaleffects_wts_internal - attr(out, "comparison") <- comparison - attr(out, "transform") <- transform[[1]] - attr(out, "comparison_label") <- comparison_label - attr(out, "hypothesis_by") <- hyp_by - attr(out, "transform_label") <- transform_label - attr(out, "conf_level") <- conf_level - attr(out, "by") <- by - - if (inherits(model, "brmsfit")) { - insight::check_if_installed("brms") - attr(out, "nchains") <- brms::nchains(model) - } + } + + # meta info + mfx <- get_ci( + mfx, + conf_level = conf_level, + df = df, + draws = draws, + estimate = "estimate", + null_hypothesis = hypothesis_null, + p_adjust = p_adjust, + model = model) + + # clean rows and columns + # WARNING: we cannot sort rows at the end because `get_hypothesis()` is + # applied in the middle, and it must already be sorted in the final order, + # otherwise, users cannot know for sure what is going to be the first and + # second rows, etc. + mfx <- sort_columns(mfx, newdata, by) + + # bayesian draws + attr(mfx, "posterior_draws") <- draws + + # equivalence tests + mfx <- equivalence(mfx, equivalence = equivalence, df = df, ...) + + # after draws attribute + mfx <- backtransform(mfx, transform) + + # save as attribute and not column + if (!all(is.na(mfx[["marginaleffects_wts_internal"]]))) { + marginaleffects_wts_internal <- mfx[["marginaleffects_wts_internal"]] + } else { + marginaleffects_wts_internal <- NULL + } + mfx[["marginaleffects_wts_internal"]] <- NULL + + out <- mfx + + data.table::setDF(out) + + out <- set_marginaleffects_attributes( + out, + get_marginaleffects_attributes(newdata, include_regex = "^newdata.*class|explicit|matrix|levels")) + + # Global option for lean return object + lean = getOption("marginaleffects_lean", default = FALSE) + + # Only add (potentially large) attributes if lean is FALSE + if (isTRUE(lean)) { + for (a in setdiff(names(attributes(out)), c("names", "row.names", "class"))) attr(out, a) = NULL + attr(out, "lean") <- TRUE + } else { + # other attributes + attr(out, "newdata") <- newdata + attr(out, "call") <- call_attr + attr(out, "type") <- type + attr(out, "model_type") <- class(model)[1] + attr(out, "model") <- model + attr(out, "variables") <- predictors + attr(out, "jacobian") <- J + attr(out, "vcov") <- vcov + attr(out, "vcov.type") <- vcov.type + attr(out, "weights") <- marginaleffects_wts_internal + attr(out, "comparison") <- comparison + attr(out, "transform") <- transform[[1]] + attr(out, "comparison_label") <- comparison_label + attr(out, "hypothesis_by") <- hyp_by + attr(out, "transform_label") <- transform_label + attr(out, "conf_level") <- conf_level + attr(out, "by") <- by + + if (inherits(model, "brmsfit")) { + insight::check_if_installed("brms") + attr(out, "nchains") <- brms::nchains(model) } + } - class(out) <- c("comparisons", class(out)) - return(out) + class(out) <- c("comparisons", class(out)) + return(out) } @@ -615,41 +606,40 @@ avg_comparisons <- function(model, eps = NULL, numderiv = "fdforward", ...) { - - # order of the first few paragraphs is important - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - - # Bootstrap - out <- inferences_dispatch( - INF_FUN = avg_comparisons, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, - cross = cross, - conf_level = conf_level, - comparison = comparison, transform = transform, wts = wts, hypothesis = hypothesis, eps = eps, ...) - if (!is.null(out)) { - return(out) - } - - out <- comparisons( - model = model, - newdata = newdata, - variables = variables, - type = type, - vcov = vcov, - by = by, - conf_level = conf_level, - comparison = comparison, - transform = transform, - cross = cross, - wts = wts, - hypothesis = hypothesis, - equivalence = equivalence, - p_adjust = p_adjust, - df = df, - eps = eps, - ...) - + # order of the first few paragraphs is important + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + + # Bootstrap + out <- inferences_dispatch( + INF_FUN = avg_comparisons, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, + cross = cross, + conf_level = conf_level, + comparison = comparison, transform = transform, wts = wts, hypothesis = hypothesis, eps = eps, ...) + if (!is.null(out)) { return(out) + } + + out <- comparisons( + model = model, + newdata = newdata, + variables = variables, + type = type, + vcov = vcov, + by = by, + conf_level = conf_level, + comparison = comparison, + transform = transform, + cross = cross, + wts = wts, + hypothesis = hypothesis, + equivalence = equivalence, + p_adjust = p_adjust, + df = df, + eps = eps, + ...) + + return(out) } diff --git a/R/imputation.R b/R/imputation.R index 6ed3c00f1..130f39940 100644 --- a/R/imputation.R +++ b/R/imputation.R @@ -1,49 +1,56 @@ process_imputation <- function(x, call_attr, marginal_means = FALSE) { - insight::check_if_installed("mice") + insight::check_if_installed("mice") - if (inherits(x, "mira")) { - x <- x$analyses - } else if (inherits(x, "amest")) { - x <- x - } + # issue #1269: transforms must be apply after pooling + tr <- call_attr[["transform"]] + call_attr[["transform"]] <- NULL - mfx_list <- vector("list", length(x)) - for (i in seq_along(x)) { - calltmp <- call_attr - calltmp[["model"]] <- x[[i]] + if (inherits(x, "mira")) { + x <- x$analyses + } else if (inherits(x, "amest")) { + x <- x + } - # not sure why but this breaks marginal_means on "modeldata specified twice" - if (isFALSE(marginal_means)) { - calltmp[["modeldata"]] <- get_modeldata( x[[i]], additional_variables = FALSE) - } + mfx_list <- vector("list", length(x)) + for (i in seq_along(x)) { + calltmp <- call_attr + calltmp[["model"]] <- x[[i]] - mfx_list[[i]] <- evalup(calltmp) - if (i == 1) { - out <- mfx_list[[1]] - } - mfx_list[[i]]$term <- seq_len(nrow(mfx_list[[i]])) - class(mfx_list[[i]]) <- c("marginaleffects_mids", class(mfx_list[[i]])) + # not sure why but this breaks marginal_means on "modeldata specified twice" + if (isFALSE(marginal_means)) { + calltmp[["modeldata"]] <- get_modeldata(x[[i]], additional_variables = FALSE) } - mipool <- mice::pool(mfx_list) - for (col in c("estimate", "statistic", "p.value", "conf.low", "conf.high")) { - if (col %in% colnames(out) && col %in% colnames(mipool$pooled)) { - out[[col]] <- mipool$pooled[[col]] - } else { - out[[col]] <- NULL - } + + mfx_list[[i]] <- evalup(calltmp) + if (i == 1) { + out <- mfx_list[[1]] } - if ("df" %in% colnames(mipool$pooled)) { - out$df <- mipool$pooled$df + mfx_list[[i]]$term <- seq_len(nrow(mfx_list[[i]])) + class(mfx_list[[i]]) <- c("marginaleffects_mids", class(mfx_list[[i]])) + } + mipool <- mice::pool(mfx_list) + for (col in c("estimate", "statistic", "p.value", "conf.low", "conf.high")) { + if (col %in% colnames(out) && col %in% colnames(mipool$pooled)) { + out[[col]] <- mipool$pooled[[col]] + } else { + out[[col]] <- NULL } - out$std.error <- sqrt(mipool$pooled$t) - out <- get_ci( - out, - vcov = call_attr[["vcov"]], - conf_level = call_attr[["conf_level"]], - df = mipool$pooled$df) - attr(out, "inferences") <- mipool - attr(out, "model") <- mice::pool(lapply(mfx_list, attr, "model")) - return(out) + } + if ("df" %in% colnames(mipool$pooled)) { + out$df <- mipool$pooled$df + } + out$std.error <- sqrt(mipool$pooled$t) + out <- get_ci( + out, + vcov = call_attr[["vcov"]], + conf_level = call_attr[["conf_level"]], + df = mipool$pooled$df) + + out <- backtransform(out, sanitize_transform(tr)) + + attr(out, "inferences") <- mipool + attr(out, "model") <- mice::pool(lapply(mfx_list, attr, "model")) + return(out) } @@ -52,12 +59,12 @@ process_imputation <- function(x, call_attr, marginal_means = FALSE) { #' @noRd #' @export tidy.marginaleffects_mids <- function(x, ...) { - if (!"std.error" %in% colnames(x)) { - insight::format_error('The output does not include a `std.error` column. Some models do not generate standard errors when estimates are backtransformed (e.g., GLM models). One solution is to use `type="response"` for those models.') - } - out <- as.data.frame(x[, c("estimate", "std.error")]) - out$term <- seq_len(nrow(out)) - return(out) + if (!"std.error" %in% colnames(x)) { + insight::format_error('The output does not include a `std.error` column. Some models do not generate standard errors when estimates are backtransformed (e.g., GLM models). One solution is to use `type="response"` for those models.') + } + out <- as.data.frame(x[, c("estimate", "std.error")]) + out$term <- seq_len(nrow(out)) + return(out) } @@ -66,5 +73,5 @@ tidy.marginaleffects_mids <- function(x, ...) { #' @noRd #' @export glance.marginaleffects_mids <- function(x, ...) { - data.frame() + data.frame() } diff --git a/inst/tinytest/test-pkg-mice.R b/inst/tinytest/test-pkg-mice.R index 44c3ab76d..b1625b5a8 100644 --- a/inst/tinytest/test-pkg-mice.R +++ b/inst/tinytest/test-pkg-mice.R @@ -16,33 +16,52 @@ expect_equivalent(nrow(mfx1), nrow(mfx2)) # Issue #711 -data <- structure(list(id = 1:37, trt = c("soc", "soc", "soc", "soc", -"soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", -"soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", "arm", -"arm", "arm", "arm", "arm", "arm", "arm", "arm", "arm", "arm", -"arm", "arm", "arm", "arm", "arm", "arm"), endp = structure(c(1L, -1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, -2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, -1L, 1L, 1L, 1L), levels = c("TRUE", "FALSE"), class = "factor")), row.names = c(NA, --37L), class = "data.frame") +data <- structure(list(id = 1:37, trt = c( + "soc", "soc", "soc", "soc", + "soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", + "soc", "soc", "soc", "soc", "soc", "soc", "soc", "soc", "arm", + "arm", "arm", "arm", "arm", "arm", "arm", "arm", "arm", "arm", + "arm", "arm", "arm", "arm", "arm", "arm"), endp = structure(c( + 1L, + 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, + 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, + 1L, 1L, 1L, 1L), levels = c("TRUE", "FALSE"), class = "factor")), row.names = c( + NA, + -37L), class = "data.frame") data$endp <- factor(data$endp, levels = c("TRUE", "FALSE")) data_miss <- data data_miss[c(1, 5, 7, 30), c("endp")] <- NA -imp <- suppressWarnings(mice::mice(data_miss, m = 20, method = "pmm", maxit = 50, seed = 1000, print = FALSE)) +imp <- suppressWarnings(mice::mice(data_miss, m = 20, method = "pmm", maxit = 50, seed = 1000, print = FALSE)) dat_mice <- complete(imp, "all") fit_logistic <- function(dat) { - mod <- glm(endp ~ trt, family = binomial(link = "logit"), data = dat) - out <- avg_slopes(mod, newdata = dat) - return(out) + mod <- glm(endp ~ trt, family = binomial(link = "logit"), data = dat) + out <- avg_slopes(mod, newdata = dat) + return(out) } mod_imputation <- suppressWarnings(lapply(dat_mice, fit_logistic)) manu <- suppressWarnings(summary(pool(mod_imputation), conf.int = TRUE)) -fit <- with(imp, glm(endp ~ trt, family = binomial(link = "logit"))) +fit <- with(imp, glm(endp ~ trt, family = binomial(link = "logit"))) auto <- suppressWarnings(avg_slopes(fit)) expect_equivalent(auto$estimate, manu$estimate) expect_equivalent(auto$std.error, manu$std.error, tolerance = 1e-6) +# Issue 1269: transforms must be apply after pooling +data("lalonde_mis", package = "cobalt") +imp <- mice(lalonde_mis, print = FALSE, seed = 48103) +fits <- with(imp, glm(treat ~ age + married, family = binomial)) +cmp1 <- suppressWarnings(avg_comparisons(fits, + variables = "married", + comparison = "lnratioavg", + transform = "exp")) +expect_equivalent(cmp1$estimate, 0.3380001, tol = 1e-6) +expect_equivalent(cmp1$conf.low, 0.2386019, tol = 1e-6) +cmp2 <- suppressWarnings(avg_comparisons(fits, + variables = "married", + comparison = "lnratioavg")) +expect_equivalent(cmp2$estimate, -1.084709, tol = 1e-6) +expect_equivalent(cmp2$conf.low, -1.432959, tol = 1e-6) -source("helpers.R") \ No newline at end of file +source("helpers.R") + From ae97dac088a6c851d2862370dd5ee92a5a531f4a Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 20 Nov 2024 20:38:31 -0500 Subject: [PATCH 07/11] Issue #1230 intercept-only models (#1278) * Issue #1230 * fixup --- DESCRIPTION | 2 +- NEWS.md | 1 + R/sanitize_newdata.R | 3 +- R/sanitize_variables.R | 667 ++++++++++++++++++------------------ inst/tinytest/test-bugfix.R | 9 + 5 files changed, 343 insertions(+), 339 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ed3c776ba..40d24b9fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: marginaleffects Title: Predictions, Comparisons, Slopes, Marginal Means, and Hypothesis Tests -Version: 0.23.0.6 +Version: 0.23.0.7 Authors@R: c(person(given = "Vincent", family = "Arel-Bundock", diff --git a/NEWS.md b/NEWS.md index 8f68593b4..c1d785a3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ Breaking change: Bugs: +* Intercept only model now works with `avg_predictions()`. Thanks to @vbrazao for report #1230. * `systemfit` models returned no standard errors when the same variables entered in different parts of the model. Thanks to @mronkko for report #1233. New features: diff --git a/R/sanitize_newdata.R b/R/sanitize_newdata.R index 5dfd7f6c3..0fff53443 100644 --- a/R/sanitize_newdata.R +++ b/R/sanitize_newdata.R @@ -279,7 +279,8 @@ dedup_newdata <- function(model, newdata, by, wts, comparison = "difference", cr vclass <- vclass[names(vclass) != dv] } - if ("rowid" %in% colnames(out)) { + # rowid is useless, except for intercept-only models, where we want to retain all rows + if ("rowid" %in% colnames(out) && ncol(out) > 1) { out[, "rowid" := NULL] } diff --git a/R/sanitize_variables.R b/R/sanitize_variables.R index 2f6b50af9..442a31986 100644 --- a/R/sanitize_variables.R +++ b/R/sanitize_variables.R @@ -2,377 +2,370 @@ # output: named list of lists where each element represents a variable with: name, value, function, label sanitize_variables <- function(variables, model, - newdata, # need for NumPyro where `find_variables()`` does not work + newdata, # need for NumPyro where `find_variables()`` does not work modeldata, comparison = NULL, by = NULL, cross = FALSE, calling_function = "comparisons", eps = NULL) { - - checkmate::assert( - checkmate::check_null(variables), - checkmate::check_character(variables, min.len = 1, names = "unnamed"), - checkmate::check_list(variables, min.len = 1, names = "unique"), - combine = "or") - - # extensions with no `get_data()` - if (is.null(modeldata) || nrow(modeldata) == 0) { - modeldata <- set_variable_class(newdata, model) - no_modeldata <- TRUE + checkmate::assert( + checkmate::check_null(variables), + checkmate::check_character(variables, min.len = 1, names = "unnamed"), + checkmate::check_list(variables, min.len = 1, names = "unique"), + combine = "or") + + # extensions with no `get_data()` + if (is.null(modeldata) || nrow(modeldata) == 0) { + modeldata <- set_variable_class(newdata, model) + no_modeldata <- TRUE + } else { + no_modeldata <- FALSE + } + + # variables is NULL: get all variable names from the model + if (is.null(variables)) { + # mhurdle names the variables weirdly + if (inherits(model, "mhurdle")) { + predictors <- insight::find_predictors(model, flatten = TRUE) + predictors <- list(conditional = predictors) } else { - no_modeldata <- FALSE + predictors <- insight::find_variables(model) } - # variables is NULL: get all variable names from the model - if (is.null(variables)) { - # mhurdle names the variables weirdly - if (inherits(model, "mhurdle")) { - predictors <- insight::find_predictors(model, flatten = TRUE) - predictors <- list(conditional = predictors) - } else { - predictors <- insight::find_variables(model) - } - - # unsupported models like pytorch - if (length(predictors) == 0 || (length(predictors) == 1 && names(predictors) == "response")) { - dv <- hush(unlist(insight::find_response(model, combine = FALSE), use.names = FALSE)) - predictors <- setdiff(hush(colnames(newdata)), c(dv, "rowid")) - } else { - known <- c("fixed", "conditional", "zero_inflated", "scale", "nonlinear") - if (any(known %in% names(predictors))) { - predictors <- predictors[known] - # sometimes triggered by multivariate brms models where we get nested - # list: predictors$gear$hp - } else { - predictors <- unlist(predictors, recursive = TRUE, use.names = FALSE) - predictors <- unique(predictors) - } - # flatten - predictors <- unique(unlist(predictors, recursive = TRUE, use.names = FALSE)) - } - + # unsupported models like pytorch + if (length(predictors) == 0 || (length(predictors) == 1 && names(predictors) == "response")) { + dv <- hush(unlist(insight::find_response(model, combine = FALSE), use.names = FALSE)) + predictors <- setdiff(hush(colnames(newdata)), c(dv, "rowid")) } else { - predictors <- variables + known <- c("fixed", "conditional", "zero_inflated", "scale", "nonlinear") + if (any(known %in% names(predictors))) { + predictors <- predictors[known] + # sometimes triggered by multivariate brms models where we get nested + # list: predictors$gear$hp + } else { + predictors <- unlist(predictors, recursive = TRUE, use.names = FALSE) + predictors <- unique(predictors) + } + # flatten + predictors <- unique(unlist(predictors, recursive = TRUE, use.names = FALSE)) } - - # character -> list - if (isTRUE(checkmate::check_character(predictors))) { - predictors <- stats::setNames(rep(list(NULL), length(predictors)), predictors) + } else { + predictors <- variables + } + + # character -> list + if (isTRUE(checkmate::check_character(predictors))) { + predictors <- stats::setNames(rep(list(NULL), length(predictors)), predictors) + } + + # reserved keywords + # Issue #697: we used to allow "group", as long as it wasn't in + # `variables`, but this created problems with automatic `by=TRUE`. Perhaps + # I could loosen this, but there are many interactions, and the lazy way is + # just to forbid entirely. + reserved <- c( + "rowid", "group", "term", "contrast", "estimate", + "std.error", "statistic", "conf.low", "conf.high", "p.value", + "p.value.nonsup", "p.value.noninf", "by") + # if no modeldata is available, we use `newdata`, but that often has a + # `rowid` column. This used to break the extensions.Rmd vignette. + if (no_modeldata) { + reserved <- setdiff(reserved, "rowid") + } + bad <- unique(intersect(c(names(predictors), colnames(modeldata)), reserved)) + if (length(bad) > 0) { + msg <- c( + "These variable names are forbidden to avoid conflicts with the outputs of `marginaleffects`:", + sprintf("%s", paste(sprintf('"%s"', bad), collapse = ", ")), + "Please rename your variables before fitting the model.") + insight::format_error(msg) + } + + # when comparisons() only inludes one focal predictor, we don't need to specify it in `newdata` + # when `variables` is numeric, we still need to include it, because in + # non-linear model the contrast depend on the starting value of the focal + # variable. + found <- colnames(newdata) + if (calling_function == "comparisons") { + v <- NULL + if (isTRUE(checkmate::check_string(variables))) { + v <- variables + } else if (isTRUE(checkmate::check_list(variables, len = 1, names = "named"))) { + v <- names(variables)[1] } - - # reserved keywords - # Issue #697: we used to allow "group", as long as it wasn't in - # `variables`, but this created problems with automatic `by=TRUE`. Perhaps - # I could loosen this, but there are many interactions, and the lazy way is - # just to forbid entirely. - reserved <- c( - "rowid", "group", "term", "contrast", "estimate", - "std.error", "statistic", "conf.low", "conf.high", "p.value", - "p.value.nonsup", "p.value.noninf", "by") - # if no modeldata is available, we use `newdata`, but that often has a - # `rowid` column. This used to break the extensions.Rmd vignette. - if (no_modeldata) { - reserved <- setdiff(reserved, "rowid") + flag <- get_variable_class(modeldata, variable = v, compare = "categorical") + if (!is.null(v) && isTRUE(flag)) { + found <- c(found, v) } - bad <- unique(intersect(c(names(predictors), colnames(modeldata)), reserved)) - if (length(bad) > 0) { - msg <- c( - "These variable names are forbidden to avoid conflicts with the outputs of `marginaleffects`:", - sprintf("%s", paste(sprintf('"%s"', bad), collapse = ", ")), - "Please rename your variables before fitting the model.") - insight::format_error(msg) + } + + + # matrix predictors + mc <- attr(newdata, "newdata_matrix_columns") + if (length(mc) > 0 && any(names(predictors) %in% mc)) { + predictors <- predictors[!names(predictors) %in% mc] + insight::format_warning("Matrix columns are not supported. Use the `variables` argument to specify valid predictors, or use a function like `drop()` to convert your matrix columns into vectors.") + } + + # missing variables + miss <- setdiff(names(predictors), found) + predictors <- predictors[!names(predictors) %in% miss] + if (length(miss) > 0) { + msg <- sprintf( + "These variables were not found: %s. Try specifying the `newdata` argument explicitly and make sure the missing variable is included.", + paste(miss, collapse = ", ")) + insight::format_warning(msg) + } + + # sometimes `insight` returns interaction component as if it were a constituent variable + idx <- !grepl(":", names(predictors)) + predictors <- predictors[idx] + + # anything left? + if (length(predictors) == 0) { + msg <- "There is no valid predictor variable. Please change the `variables` argument or supply a new data frame to the `newdata` argument." + insight::format_error(msg) + } + + # functions to values + # only for predictions; get_contrast_data_numeric handles this for comparisons() + # do this before NULL-to-defaults so we can fill it in with default in case of failure + if (calling_function == "predictions") { + for (v in names(predictors)) { + if (is.function(predictors[[v]])) { + tmp <- hush(predictors[[v]](modeldata[[v]])) + if (is.null(tmp)) { + msg <- sprintf("The `%s` function produced invalid output when applied to the dataset used to fit the model.", v) + insight::format_warning(msg) + } + predictors[[v]] <- hush(predictors[[v]](modeldata[[v]])) + } } - - # when comparisons() only inludes one focal predictor, we don't need to specify it in `newdata` - # when `variables` is numeric, we still need to include it, because in - # non-linear model the contrast depend on the starting value of the focal - # variable. - found <- colnames(newdata) - if (calling_function == "comparisons") { - v <- NULL - if (isTRUE(checkmate::check_string(variables))) { - v <- variables - } else if (isTRUE(checkmate::check_list(variables, len = 1, names = "named"))) { - v <- names(variables)[1] + } + + # NULL to defaults + for (v in names(predictors)) { + if (is.null(predictors[[v]])) { + if (get_variable_class(modeldata, v, "binary")) { + predictors[[v]] <- 0:1 + } else if (get_variable_class(modeldata, v, "numeric")) { + if (calling_function == "comparisons") { + predictors[[v]] <- 1 + } else if (calling_function == "predictions") { + v_unique <- unique(modeldata[[v]]) + if (length(v_unique) < 6) { + predictors[[v]] <- v_unique + } else { + predictors[[v]] <- stats::fivenum(modeldata[[v]]) + } } - flag <- get_variable_class(modeldata, variable = v, compare = "categorical") - if (!is.null(v) && isTRUE(flag)) { - found <- c(found, v) + } else { + if (calling_function == "comparisons") { + predictors[[v]] <- "reference" + } else if (calling_function == "predictions") { + # TODO: warning when this is too large. Here or elsewhere? + predictors[[v]] <- unique(modeldata[[v]]) } + } } - - - # matrix predictors - mc <- attr(newdata, "newdata_matrix_columns") - if (length(mc) > 0 && any(names(predictors) %in% mc)) { - predictors <- predictors[!names(predictors) %in% mc] - insight::format_warning("Matrix columns are not supported. Use the `variables` argument to specify valid predictors, or use a function like `drop()` to convert your matrix columns into vectors.") - } - - # missing variables - miss <- setdiff(names(predictors), found) - predictors <- predictors[!names(predictors) %in% miss] - if (length(miss) > 0) { - msg <- sprintf( - "These variables were not found: %s. Try specifying the `newdata` argument explicitly and make sure the missing variable is included.", - paste(miss, collapse = ", ")) - insight::format_warning(msg) - } - - # sometimes `insight` returns interaction component as if it were a constituent variable - idx <- !grepl(":", names(predictors)) - predictors <- predictors[idx] - - # anything left? - if (length(predictors) == 0) { - msg <- "There is no valid predictor variable. Please change the `variables` argument or supply a new data frame to the `newdata` argument." + } + + # shortcuts and validity + for (v in names(predictors)) { + if (isTRUE(checkmate::check_data_frame(predictors[[v]], nrows = nrow(newdata)))) { + # do nothing, but don't take the other validity check branches + } else if (get_variable_class(modeldata, v, "binary")) { + if (!isTRUE(checkmate::check_numeric(predictors[[v]])) || !is_binary(predictors[[v]])) { + msg <- sprintf("The `%s` variable is binary. The corresponding entry in the `variables` argument must be 0 or 1.", v) insight::format_error(msg) - } - - # functions to values - # only for predictions; get_contrast_data_numeric handles this for comparisons() - # do this before NULL-to-defaults so we can fill it in with default in case of failure - if (calling_function == "predictions") { - for (v in names(predictors)) { - if (is.function(predictors[[v]])) { - tmp <- hush(predictors[[v]](modeldata[[v]])) - if (is.null(tmp)) { - msg <- sprintf("The `%s` function produced invalid output when applied to the dataset used to fit the model.", v) - insight::format_warning(msg) - } - predictors[[v]] <- hush(predictors[[v]](modeldata[[v]])) - } + } + # get_contrast_data requires both levels + if (calling_function == "comparisons") { + if (length(predictors[[v]]) != 2) { + msg <- sprintf("The `%s` variable is binary. The corresponding entry in the `variables` argument must be a vector of length 2.", v) + insight::format_error(msg) } - } - - # NULL to defaults - for (v in names(predictors)) { - if (is.null(predictors[[v]])) { - if (get_variable_class(modeldata, v, "binary")) { - predictors[[v]] <- 0:1 - - } else if (get_variable_class(modeldata, v, "numeric")) { - if (calling_function == "comparisons") { - predictors[[v]] <- 1 - } else if (calling_function == "predictions") { - v_unique <- unique(modeldata[[v]]) - if (length(v_unique) < 6) { - predictors[[v]] <- v_unique - } else { - predictors[[v]] <- stats::fivenum(modeldata[[v]]) - } - } - - } else { - if (calling_function == "comparisons") { - predictors[[v]] <- "reference" - } else if (calling_function == "predictions") { - # TODO: warning when this is too large. Here or elsewhere? - predictors[[v]] <- unique(modeldata[[v]]) - } - } + } + } else if (get_variable_class(modeldata, v, "numeric")) { + if (calling_function == "comparisons") { + # For comparisons(), the string shortcuts are processed in contrast_data_* functions because we need fancy labels. + # Eventually it would be nice to consolidate, but that's a lot of work. + valid_str <- c("iqr", "minmax", "sd", "2sd") + flag <- isTRUE(checkmate::check_numeric(predictors[[v]], min.len = 1, max.len = 2)) || + isTRUE(checkmate::check_choice(predictors[[v]], choices = valid_str)) || + isTRUE(checkmate::check_function(predictors[[v]])) + if (!isTRUE(flag)) { + msg <- "The %s element of the `variables` argument is invalid." + msg <- sprintf(msg, v) + insight::format_error(msg) } - } - - # shortcuts and validity - for (v in names(predictors)) { - - if (isTRUE(checkmate::check_data_frame(predictors[[v]], nrows = nrow(newdata)))) { - # do nothing, but don't take the other validity check branches - - } else if (get_variable_class(modeldata, v, "binary")) { - if (!isTRUE(checkmate::check_numeric(predictors[[v]])) || !is_binary(predictors[[v]])) { - msg <- sprintf("The `%s` variable is binary. The corresponding entry in the `variables` argument must be 0 or 1.", v) - insight::format_error(msg) - } - # get_contrast_data requires both levels - if (calling_function == "comparisons") { - if (length(predictors[[v]]) != 2) { - msg <- sprintf("The `%s` variable is binary. The corresponding entry in the `variables` argument must be a vector of length 2.", v) - insight::format_error(msg) - } - } - - } else if (get_variable_class(modeldata, v, "numeric")) { - if (calling_function == "comparisons") { - # For comparisons(), the string shortcuts are processed in contrast_data_* functions because we need fancy labels. - # Eventually it would be nice to consolidate, but that's a lot of work. - valid_str <- c("iqr", "minmax", "sd", "2sd") - flag <- isTRUE(checkmate::check_numeric(predictors[[v]], min.len = 1, max.len = 2)) || - isTRUE(checkmate::check_choice(predictors[[v]], choices = valid_str)) || - isTRUE(checkmate::check_function(predictors[[v]])) - if (!isTRUE(flag)) { - msg <- "The %s element of the `variables` argument is invalid." - msg <- sprintf(msg, v) - insight::format_error(msg) - } - - } else if (calling_function == "predictions") { - # string shortcuts - if (identical(predictors[[v]], "iqr")) { - predictors[[v]] <- stats::quantile(modeldata[[v]], probs = c(0.25, 0.75), na.rm = TRUE) - } else if (identical(predictors[[v]], "minmax")) { - predictors[[v]] <- c(min(modeldata[[v]], na.rm = TRUE), max(modeldata[[v]], na.rm = TRUE)) - } else if (identical(predictors[[v]], "sd")) { - s <- stats::sd(modeldata[[v]], na.rm = TRUE) - m <- mean(modeldata[[v]], na.rm = TRUE) - predictors[[v]] <- c(m - s / 2, m + s / 2) - } else if (identical(predictors[[v]], "2sd")) { - s <- stats::sd(modeldata[[v]], na.rm = TRUE) - m <- mean(modeldata[[v]], na.rm = TRUE) - predictors[[v]] <- c(m - s, m + s) - } else if (identical(predictors[[v]], "threenum")) { - s <- stats::sd(modeldata[[v]], na.rm = TRUE) - m <- mean(modeldata[[v]], na.rm = TRUE) - predictors[[v]] <- c(m - s, m, m + s) - } else if (identical(predictors[[v]], "fivenum")) { - predictors[[v]] <- stats::fivenum - } else if (is.character(predictors[[v]])) { - msg <- sprintf('%s is a numeric variable. The summary shortcuts supported by the variables argument are: "iqr", "minmax", "sd", "2sd", "threenum", "fivenum".', v) - insight::format_error(msg) - } - } - - } else { - if (calling_function == "comparisons") { - valid <- c("reference", "sequential", "pairwise", "all", "revpairwise", "revsequential", "revreference") - # minmax needs an actual factor in the original data to guarantee correct order of levels. - if (is.factor(modeldata[[v]])) { - valid <- c(valid, "minmax") - } - flag1 <- checkmate::check_choice(predictors[[v]], choices = valid) - flag2 <- checkmate::check_vector(predictors[[v]], len = 2) - flag3 <- checkmate::check_data_frame(predictors[[v]], nrows = nrow(newdata), ncols = 2) - flag4 <- checkmate::check_function(predictors[[v]]) - flag5 <- checkmate::check_data_frame(predictors[[v]]) - if (!isTRUE(flag1) && !isTRUE(flag2) && !isTRUE(flag3) && !isTRUE(flag4) && !isTRUE(flag5)) { - msg <- "The %s element of the `variables` argument must be a vector of length 2 or one of: %s" - msg <- sprintf(msg, v, paste(valid, collapse = ", ")) - insight::format_error(msg) - } - - } else if (calling_function == "predictions") { - if (is.character(predictors[[v]]) || is.factor(predictors[[v]])) { - if (!all(as.character(predictors[[v]]) %in% as.character(modeldata[[v]]))) { - invalid <- intersect( - as.character(predictors[[v]]), - c("pairwise", "reference", "sequential", "revpairwise", "revreference", "revsequential")) - if (length(invalid) > 0) { - msg <- "These values are only supported by the `variables` argument in the `comparisons()` function: %s" - msg <- sprintf(msg, paste(invalid, collapse = ", ")) - } else { - msg <- "Some elements of the `variables` argument are not in their original data. Check this variable: %s" - msg <- sprintf(msg, v) - } - insight::format_error(msg) - } - } - } + } else if (calling_function == "predictions") { + # string shortcuts + if (identical(predictors[[v]], "iqr")) { + predictors[[v]] <- stats::quantile(modeldata[[v]], probs = c(0.25, 0.75), na.rm = TRUE) + } else if (identical(predictors[[v]], "minmax")) { + predictors[[v]] <- c(min(modeldata[[v]], na.rm = TRUE), max(modeldata[[v]], na.rm = TRUE)) + } else if (identical(predictors[[v]], "sd")) { + s <- stats::sd(modeldata[[v]], na.rm = TRUE) + m <- mean(modeldata[[v]], na.rm = TRUE) + predictors[[v]] <- c(m - s / 2, m + s / 2) + } else if (identical(predictors[[v]], "2sd")) { + s <- stats::sd(modeldata[[v]], na.rm = TRUE) + m <- mean(modeldata[[v]], na.rm = TRUE) + predictors[[v]] <- c(m - s, m + s) + } else if (identical(predictors[[v]], "threenum")) { + s <- stats::sd(modeldata[[v]], na.rm = TRUE) + m <- mean(modeldata[[v]], na.rm = TRUE) + predictors[[v]] <- c(m - s, m, m + s) + } else if (identical(predictors[[v]], "fivenum")) { + predictors[[v]] <- stats::fivenum + } else if (is.character(predictors[[v]])) { + msg <- sprintf('%s is a numeric variable. The summary shortcuts supported by the variables argument are: "iqr", "minmax", "sd", "2sd", "threenum", "fivenum".', v) + insight::format_error(msg) } - } - - # sometimes weights don't get extracted by `find_variables()` - w <- tryCatch(insight::find_weights(model), error = function(e) NULL) - w <- intersect(w, colnames(newdata)) - others <- w - - - # goals: - # allow multiple function types: slopes() uses both difference and dydx - # when comparison is defined, use that if it works or turn back to defaults - # predictors list elements: name, value, function, label - - if (is.null(comparison)) { - fun_numeric <- fun_categorical <- comparison_function_dict[["difference"]] - lab_numeric <- lab_categorical <- comparison_label_dict[["difference"]] - - } else if (is.function(comparison)) { - fun_numeric <- fun_categorical <- comparison - lab_numeric <- lab_categorical <- "custom" - - } else if (is.character(comparison)) { - # switch to the avg version when there is a `by` function - if ((isTRUE(checkmate::check_character(by)) || isTRUE(by)) && !isTRUE(grepl("avg$", comparison))) { - comparison <- paste0(comparison, "avg") + } + } else { + if (calling_function == "comparisons") { + valid <- c("reference", "sequential", "pairwise", "all", "revpairwise", "revsequential", "revreference") + # minmax needs an actual factor in the original data to guarantee correct order of levels. + if (is.factor(modeldata[[v]])) { + valid <- c(valid, "minmax") } - - # weights if user requests `avg` or automatically switched - if (isTRUE(grepl("avg$", comparison)) && "marginaleffects_wts_internal" %in% colnames(newdata)) { - comparison <- paste0(comparison, "wts") + flag1 <- checkmate::check_choice(predictors[[v]], choices = valid) + flag2 <- checkmate::check_vector(predictors[[v]], len = 2) + flag3 <- checkmate::check_data_frame(predictors[[v]], nrows = nrow(newdata), ncols = 2) + flag4 <- checkmate::check_function(predictors[[v]]) + flag5 <- checkmate::check_data_frame(predictors[[v]]) + if (!isTRUE(flag1) && !isTRUE(flag2) && !isTRUE(flag3) && !isTRUE(flag4) && !isTRUE(flag5)) { + msg <- "The %s element of the `variables` argument must be a vector of length 2 or one of: %s" + msg <- sprintf(msg, v, paste(valid, collapse = ", ")) + insight::format_error(msg) } - - fun_numeric <- fun_categorical <- comparison_function_dict[[comparison]] - lab_numeric <- lab_categorical <- comparison_label_dict[[comparison]] - if (isTRUE(grepl("dydxavgwts|eyexavgwts|dyexavgwts|eydxavgwts", comparison))) { - fun_categorical <- comparison_function_dict[["differenceavgwts"]] - lab_categorical <- comparison_label_dict[["differenceavgwts"]] - } else if (isTRUE(grepl("dydxavg|eyexavg|dyexavg|eydxavg", comparison))) { - fun_categorical <- comparison_function_dict[["differenceavg"]] - lab_categorical <- comparison_label_dict[["differenceavg"]] - } else if (isTRUE(grepl("dydx$|eyex$|dyex$|eydx$", comparison))) { - fun_categorical <- comparison_function_dict[["difference"]] - lab_categorical <- comparison_label_dict[["difference"]] + } else if (calling_function == "predictions") { + if (is.character(predictors[[v]]) || is.factor(predictors[[v]])) { + if (!all(as.character(predictors[[v]]) %in% as.character(modeldata[[v]]))) { + invalid <- intersect( + as.character(predictors[[v]]), + c("pairwise", "reference", "sequential", "revpairwise", "revreference", "revsequential")) + if (length(invalid) > 0) { + msg <- "These values are only supported by the `variables` argument in the `comparisons()` function: %s" + msg <- sprintf(msg, paste(invalid, collapse = ", ")) + } else { + msg <- "Some elements of the `variables` argument are not in their original data. Check this variable: %s" + msg <- sprintf(msg, v) + } + insight::format_error(msg) + } } - + } + } + } + + # sometimes weights don't get extracted by `find_variables()` + w <- tryCatch(insight::find_weights(model), error = function(e) NULL) + w <- intersect(w, colnames(newdata)) + others <- w + + + # goals: + # allow multiple function types: slopes() uses both difference and dydx + # when comparison is defined, use that if it works or turn back to defaults + # predictors list elements: name, value, function, label + + if (is.null(comparison)) { + fun_numeric <- fun_categorical <- comparison_function_dict[["difference"]] + lab_numeric <- lab_categorical <- comparison_label_dict[["difference"]] + } else if (is.function(comparison)) { + fun_numeric <- fun_categorical <- comparison + lab_numeric <- lab_categorical <- "custom" + } else if (is.character(comparison)) { + # switch to the avg version when there is a `by` function + if ((isTRUE(checkmate::check_character(by)) || isTRUE(by)) && !isTRUE(grepl("avg$", comparison))) { + comparison <- paste0(comparison, "avg") } - for (v in names(predictors)) { - if (get_variable_class(modeldata, v, "numeric") && !get_variable_class(modeldata, v, "binary")) { - fun <- fun_numeric - lab <- lab_numeric - } else { - fun <- fun_categorical - lab <- lab_categorical - } - predictors[[v]] <- list( - "name" = v, - "function" = fun, - "label" = lab, - "value" = predictors[[v]], - "comparison" = comparison) + # weights if user requests `avg` or automatically switched + if (isTRUE(grepl("avg$", comparison)) && "marginaleffects_wts_internal" %in% colnames(newdata)) { + comparison <- paste0(comparison, "wts") } - # epsilon for finite difference - for (v in names(predictors)) { - if (!is.null(eps)) { - predictors[[v]][["eps"]] <- eps - } else if (is.numeric(modeldata[[v]])) { - predictors[[v]][["eps"]] <- 1e-4 * diff(range(modeldata[[v]], na.rm = TRUE, finite = TRUE)) - } else { - predictors[[v]]["eps"] <- list(NULL) - } + fun_numeric <- fun_categorical <- comparison_function_dict[[comparison]] + lab_numeric <- lab_categorical <- comparison_label_dict[[comparison]] + if (isTRUE(grepl("dydxavgwts|eyexavgwts|dyexavgwts|eydxavgwts", comparison))) { + fun_categorical <- comparison_function_dict[["differenceavgwts"]] + lab_categorical <- comparison_label_dict[["differenceavgwts"]] + } else if (isTRUE(grepl("dydxavg|eyexavg|dyexavg|eydxavg", comparison))) { + fun_categorical <- comparison_function_dict[["differenceavg"]] + lab_categorical <- comparison_label_dict[["differenceavg"]] + } else if (isTRUE(grepl("dydx$|eyex$|dyex$|eydx$", comparison))) { + fun_categorical <- comparison_function_dict[["difference"]] + lab_categorical <- comparison_label_dict[["difference"]] } + } - # can't take the slope of an outcome, except in weird brms models (issue #1006) - if (!inherits(model, "brmsfit") || !isTRUE(length(model$formula$forms) > 1)) { - dv <- hush(unlist(insight::find_response(model, combine = FALSE), use.names = FALSE)) - # sometimes insight doesn't work - if (length(dv) > 0) { - predictors <- predictors[setdiff(names(predictors), dv)] - } + for (v in names(predictors)) { + if (get_variable_class(modeldata, v, "numeric") && !get_variable_class(modeldata, v, "binary")) { + fun <- fun_numeric + lab <- lab_numeric + } else { + fun <- fun_categorical + lab <- lab_categorical } - if (length(predictors) == 0) { - insight::format_error("There is no valid predictor variable. Please make sure your model includes predictors and use the `variables` argument.") + predictors[[v]] <- list( + "name" = v, + "function" = fun, + "label" = lab, + "value" = predictors[[v]], + "comparison" = comparison) + } + + # epsilon for finite difference + for (v in names(predictors)) { + if (!is.null(eps)) { + predictors[[v]][["eps"]] <- eps + } else if (is.numeric(modeldata[[v]])) { + predictors[[v]][["eps"]] <- 1e-4 * diff(range(modeldata[[v]], na.rm = TRUE, finite = TRUE)) + } else { + predictors[[v]]["eps"] <- list(NULL) } - - # interaction: get_contrasts() assumes there is only one function when interaction=TRUE - if (isTRUE(interaction)) { - for (p in predictors) { - flag <- !identical(p[["function"]], predictors[[1]][["function"]]) - if (flag) { - stop("When `interaction=TRUE` all variables must use the same contrast function.", - call. = FALSE) - } - } + } + + # can't take the slope of an outcome, except in weird brms models (issue #1006) + if (!inherits(model, "brmsfit") || !isTRUE(length(model$formula$forms) > 1)) { + dv <- hush(unlist(insight::find_response(model, combine = FALSE), use.names = FALSE)) + # sometimes insight doesn't work + if (length(dv) > 0) { + predictors <- predictors[setdiff(names(predictors), dv)] } + } + if (length(predictors) == 0) { + insight::format_error("There is no valid predictor variable. Please make sure your model includes predictors and use the `variables` argument.") + } + + # interaction: get_contrasts() assumes there is only one function when interaction=TRUE + if (isTRUE(interaction)) { + for (p in predictors) { + flag <- !identical(p[["function"]], predictors[[1]][["function"]]) + if (flag) { + stop("When `interaction=TRUE` all variables must use the same contrast function.", + call. = FALSE) + } + } + } + + # sort variables alphabetically + predictors <- predictors[sort(names(predictors))] + others <- others[sort(names(others))] - # sort variables alphabetically - predictors <- predictors[sort(names(predictors))] - others <- others[sort(names(others))] + # internal variables are not predictors + predictors <- predictors[!names(predictors) %in% c("marginaleffects_wts_internal", "rowid_dedup")] + if (length(predictors) == 0 && calling_function == "comparisons") { + insight::format_error("No valid predictor variable.") + } - # output - out <- list(conditional = predictors, others = others) + # output + out <- list(conditional = predictors, others = others) - return(out) + return(out) } diff --git a/inst/tinytest/test-bugfix.R b/inst/tinytest/test-bugfix.R index de2066acb..4f85131b3 100644 --- a/inst/tinytest/test-bugfix.R +++ b/inst/tinytest/test-bugfix.R @@ -150,6 +150,15 @@ expect_equivalent(p1$estimate, p2$estimate) expect_equivalent(p1$std.error, p2$std.error) +# Issue #1230 +mod <- lm(mpg ~ 1, mtcars) +p <- avg_predictions(mod) +expect_false(is.na(p$estimate)) +expect_error(avg_slopes(mod), "No valid predictor") +expect_error(avg_comparisons(mod), "No valid predictor") + + + source("helpers.R") rm(list = ls()) From 6e98044bbe0356252fd877f20725d8232f7f62bc Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sun, 24 Nov 2024 07:59:40 -0500 Subject: [PATCH 08/11] update man links --- R/comparisons.R | 2 +- R/hypotheses.R | 583 +++++++++++------------ R/plot_comparisons.R | 217 +++++---- R/plot_predictions.R | 271 ++++++----- R/plot_slopes.R | 90 ++-- R/predictions.R | 1004 +++++++++++++++++++-------------------- R/slopes.R | 258 +++++----- man/comparisons.Rd | 53 ++- man/hypotheses.Rd | 14 +- man/plot_comparisons.Rd | 2 +- man/plot_predictions.Rd | 2 +- man/plot_slopes.Rd | 2 +- man/predictions.Rd | 41 +- man/slopes.Rd | 42 +- 14 files changed, 1281 insertions(+), 1300 deletions(-) diff --git a/R/comparisons.R b/R/comparisons.R index 8988e4237..166da99a0 100644 --- a/R/comparisons.R +++ b/R/comparisons.R @@ -14,7 +14,7 @@ #' #' See the comparisons vignette and package website for worked examples and case studies: #' -#' * +#' * #' * #' #' @inheritParams slopes diff --git a/R/hypotheses.R b/R/hypotheses.R index 899b665e9..9abca9237 100644 --- a/R/hypotheses.R +++ b/R/hypotheses.R @@ -2,19 +2,19 @@ #' #' @description #' Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, [`hypotheses`] emulates the behavior of the excellent and well-established [car::deltaMethod] and [car::linearHypothesis] functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors. -#' +#' #' To learn more, read the hypothesis tests vignette, visit the #' package website, or scroll down this page for a full list of vignettes: -#' -#' * +#' +#' * #' * -#' +#' #' Warning #1: Tests are conducted directly on the scale defined by the `type` argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the `"link"` scale instead of the `"response"` scale which is often the default. -#' +#' #' Warning #2: For hypothesis tests on objects produced by the `marginaleffects` package, it is safer to use the `hypothesis` argument of the original function. Using `hypotheses()` may not work in certain environments, in lists, or when working programmatically with *apply style functions. -#' +#' #' Warning #3: The tests assume that the `hypothesis` expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the \code{inferences()} function with `method = "boot"`. -#' +#' #' @inheritParams comparisons #' @param model Model object or object generated by the `comparisons()`, `slopes()`, or `predictions()` functions. #' @param joint Joint test of statistical significance. The null hypothesis value can be set using the `hypothesis` argument. @@ -30,107 +30,107 @@ #' The test statistic for the joint Wald test is calculated as (R * theta_hat - r)' * inv(R * V_hat * R') * (R * theta_hat - r) / Q, #' where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters, #' r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized. -#' +#' #' The p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test). -#' For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. +#' For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. #' For the Chi-squared test, the degrees of freedom are Q. #' #' @template equivalence #' @examples #' library(marginaleffects) #' mod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars) -#' +#' #' hypotheses(mod) -#' +#' #' # Test of equality between coefficients #' hypotheses(mod, hypothesis = "hp = wt") -#' +#' #' # Non-linear function #' hypotheses(mod, hypothesis = "exp(hp + wt) = 0.1") -#' +#' #' # Robust standard errors #' hypotheses(mod, hypothesis = "hp = wt", vcov = "HC3") -#' +#' #' # b1, b2, ... shortcuts can be used to identify the position of the #' # parameters of interest in the output of #' hypotheses(mod, hypothesis = "b2 = b3") -#' +#' #' # wildcard #' hypotheses(mod, hypothesis = "b* / b2 = 1") -#' +#' #' # term names with special characters have to be enclosed in backticks #' hypotheses(mod, hypothesis = "`factor(cyl)6` = `factor(cyl)8`") -#' +#' #' mod2 <- lm(mpg ~ hp * drat, data = mtcars) #' hypotheses(mod2, hypothesis = "`hp:drat` = drat") -#' +#' #' # predictions(), comparisons(), and slopes() #' mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial) #' cmp <- comparisons(mod, newdata = "mean") #' hypotheses(cmp, hypothesis = "b1 = b2") -#' +#' #' mfx <- slopes(mod, newdata = "mean") #' hypotheses(cmp, hypothesis = "b2 = 0.2") -#' +#' #' pre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35))) #' hypotheses(pre, hypothesis = "b1 = b2") -#' +#' #' # The `hypothesis` argument can be used to compute standard errors for fitted values #' mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial) -#' +#' #' f <- function(x) predict(x, type = "link", newdata = mtcars) #' p <- hypotheses(mod, hypothesis = f) #' head(p) -#' +#' #' f <- function(x) predict(x, type = "response", newdata = mtcars) #' p <- hypotheses(mod, hypothesis = f) #' head(p) -#' +#' #' # Complex aggregation #' # Step 1: Collapse predicted probabilities by outcome level, for each individual #' # Step 2: Take the mean of the collapsed probabilities by group and `cyl` #' library(dplyr) #' library(MASS) #' library(dplyr) -#' +#' #' dat <- transform(mtcars, gear = factor(gear)) #' mod <- polr(gear ~ factor(cyl) + hp, dat) -#' +#' #' aggregation_fun <- function(x) { -#' predictions(x, vcov = FALSE) |> -#' mutate(group = ifelse(group %in% c("3", "4"), "3 & 4", "5")) |> -#' summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) |> -#' summarize(estimate = mean(estimate), .by = c("cyl", "group")) |> -#' rename(term = cyl) +#' predictions(x, vcov = FALSE) |> +#' mutate(group = ifelse(group %in% c("3", "4"), "3 & 4", "5")) |> +#' summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) |> +#' summarize(estimate = mean(estimate), .by = c("cyl", "group")) |> +#' rename(term = cyl) #' } -#' +#' #' hypotheses(mod, hypothesis = aggregation_fun) -#' +#' #' # Equivalence, non-inferiority, and non-superiority tests #' mod <- lm(mpg ~ hp + factor(gear), data = mtcars) #' p <- predictions(mod, newdata = "median") #' hypotheses(p, equivalence = c(17, 18)) -#' +#' #' mfx <- avg_slopes(mod, variables = "hp") #' hypotheses(mfx, equivalence = c(-.1, .1)) -#' +#' #' cmp <- avg_comparisons(mod, variables = "gear", hypothesis = "pairwise") #' hypotheses(cmp, equivalence = c(0, 10)) -#' +#' #' # joint hypotheses: character vector #' model <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars) #' hypotheses(model, joint = c("as.factor(cyl)6:hp", "as.factor(cyl)8:hp")) -#' +#' #' # joint hypotheses: regular expression #' hypotheses(model, joint = "cyl") -#' +#' #' # joint hypotheses: integer indices #' hypotheses(model, joint = 2:3) -#' +#' #' # joint hypotheses: different null hypotheses #' hypotheses(model, joint = 2:3, hypothesis = 1) #' hypotheses(model, joint = 2:3, hypothesis = 1:2) -#' +#' #' # joint hypotheses: marginaleffects object #' cmp <- avg_comparisons(model) #' hypotheses(cmp, joint = "cyl") @@ -147,33 +147,33 @@ hypotheses <- function( joint_test = "f", numderiv = "fdforward", ...) { - - hypothesis_is_formula <- isTRUE(checkmate::check_formula(hypothesis)) - - if (inherits(model, c("slopes", "comparisons", "predictions")) && isTRUE(attr(model, "lean"))) { - msg <- "The `hypotheses()` function cannot be called on a lean object. Please set `options(marginaleffects_lean = FALSE)`, re-run your model, and then try again." + hypothesis_is_formula <- isTRUE(checkmate::check_formula(hypothesis)) + + if (inherits(model, c("slopes", "comparisons", "predictions")) && isTRUE(attr(model, "lean"))) { + msg <- "The `hypotheses()` function cannot be called on a lean object. Please set `options(marginaleffects_lean = FALSE)`, re-run your model, and then try again." + insight::format_error(msg) + } + + if (isTRUE(attr(model, "hypotheses_call"))) { + msg <- "The `hypotheses()` function cannot be called twice on the same object." + insight::format_error(msg) + } + + dots <- list(...) + + # deprecation + if ("FUN" %in% names(dots)) { + msg <- "`FUN` is deprecated. Please supply your function to the `hypothesis` argument instead." + if (is.null(hypothesis)) { + hypothesis <- dots[["FUN"]] + insight::format_warning(msg) + } else { insight::format_error(msg) } - - if (isTRUE(attr(model, "hypotheses_call"))) { - msg <- "The `hypotheses()` function cannot be called twice on the same object." - insight::format_error(msg) - } - - dots <- list(...) - - # deprecation - if ("FUN" %in% names(dots)) { - msg <- "`FUN` is deprecated. Please supply your function to the `hypothesis` argument instead." - if (is.null(hypothesis)) { - hypothesis <- dots[["FUN"]] - insight::format_warning(msg) - } else { - insight::format_error(msg) - } - } - - call_attr <- c(list( + } + + call_attr <- c( + list( name = "hypotheses", model = model, hypothesis = hypothesis, @@ -184,260 +184,253 @@ hypotheses <- function( joint = joint, joint_test = joint_test, numderiv = numderiv), - dots) - if ("modeldata" %in% names(dots)) { - call_attr[["modeldata"]] <- dots[["modeldata"]] + dots) + if ("modeldata" %in% names(dots)) { + call_attr[["modeldata"]] <- dots[["modeldata"]] + } + call_attr <- do.call("call", call_attr) + + ## Bootstrap + # restore an already sanitized hypothesis if necessary + hypothesis <- + if (is.null(attr(hypothesis, "label"))) { + hypothesis + } else { + attr(hypothesis, "label") } - call_attr <- do.call("call", call_attr) - - ## Bootstrap - # restore an already sanitized hypothesis if necessary - hypothesis <- - if(is.null(attr(hypothesis, "label"))){ - hypothesis - } else{ - attr(hypothesis, "label") + + # Apply inferences method + out <- inferences_dispatch( + INF_FUN = hypotheses, + model = model, + hypothesis = hypothesis, + vcov = vcov, + conf_level = conf_level, + df = df, + equivalence = equivalence, + joint = joint, + joint_test = joint_test, + numderiv = numderiv, + ...) + if (!is.null(out)) { + return(out) + } + ## Done with Bootstrap + + if (!isFALSE(joint)) { + out <- joint_test(object = model, joint_index = joint, joint_test = joint_test, hypothesis = hypothesis, df = df, vcov = vcov) + return(out) + } + + # after joint_test + if (is.null(df)) df <- Inf + + args <- list( + conf_level = conf_level, + vcov = vcov, + df = df, + equivalence = equivalence) + + # keep this NULL in case `hypothesis` was used in the previous call + args[["hypothesis"]] <- hypothesis + + if (length(dots) > 0) { + args <- c(args, dots) + } + + xcall <- substitute(model) + + if (is.symbol(xcall)) { + model <- eval(xcall, envir = parent.frame()) + } else if (is.call(xcall)) { + internal <- c( + "predictions", "avg_predictions", "comparisons", + "avg_comparisons", "slopes", "avg_slopes", "marginal_means") + # mfx object + if (as.character(xcall)[[1]] %in% internal) { + args[["x"]] <- model + out <- do.call(recall, args) + if (!is.null(out)) { + class(out) <- c("hypotheses", class(out)) + return(out) } + # non-mfx object + } else { + model <- eval(xcall, envir = parent.frame()) + } + } - # Apply inferences method - out <- inferences_dispatch( - INF_FUN = hypotheses, - model=model, - hypothesis = hypothesis, - vcov = vcov, - conf_level = conf_level, - df = df, - equivalence = equivalence, - joint = joint, - joint_test = joint_test, - numderiv = numderiv, - ...) + # marginaleffects objects: recall() + if (inherits(model, c("predictions", "comparisons", "slopes", "marginalmeans"))) { + args[["x"]] <- attr(model, "call") + out <- do.call(recall, args) if (!is.null(out)) { + class(out) <- c("hypotheses", class(out)) return(out) } - ## Done with Bootstrap - - if (!isFALSE(joint)) { - out <- joint_test(object = model, joint_index = joint, joint_test = joint_test, hypothesis = hypothesis, df = df, vcov = vcov) - return(out) - } - - # after joint_test - if (is.null(df)) df <- Inf + } + + numderiv <- sanitize_numderiv(numderiv) + + # after re-evaluation + tmp <- sanitize_hypothesis(hypothesis, ...) + hypothesis <- tmp$hypothesis + hypothesis_null <- tmp$hypothesis_null + + vcov_false <- isFALSE(vcov) + vcov <- get_vcov(model = model, vcov = vcov) + vcov.type <- get_vcov_label(vcov = vcov) + + FUNouter <- function(model, hypothesis, newparams = NULL, ...) { + if (inherits(model, c("predictions", "slopes", "comparisons"))) { + out <- model + } else if (isTRUE(checkmate::check_numeric(model))) { + out <- data.frame(term = seq_along(out), estimate = out) + } else if (inherits(model, "data.frame")) { + out <- model + if (!all(c("term", "estimate") %in% colnames(out))) { + msg <- "`hypothesis` function must return a data.frame with two columns named `term` and `estimate`." + insight::format_error(msg) + } - args <- list( - conf_level = conf_level, - vcov = vcov, - df = df, - equivalence = equivalence) + # unknown model + } else if (!is.function(hypothesis)) { + out <- insight::get_parameters(model, ...) + if ("Component" %in% colnames(out) && !anyNA(out$Component)) { + out$Parameter <- sprintf("%s_%s", out$Component, out$Parameter) + } + idx <- intersect(colnames(model), c("term", "group", "estimate")) + colnames(out)[1:2] <- c("term", "estimate") - # keep this NULL in case `hypothesis` was used in the previous call - args[["hypothesis"]] <- hypothesis + # glmmTMB + if (!is.null(newparams)) { + out$estimate <- newparams + } + } else if (hypothesis_is_formula) { + beta <- get_coef(model) + out <- data.table::data.table(estimate = beta, term = names(beta)) - if (length(dots) > 0) { - args <- c(args, dots) + # unknown model but user-supplied hypothesis function + } else { + out <- model } - xcall <- substitute(model) - - if (is.symbol(xcall)) { - model <- eval(xcall, envir = parent.frame()) - - } else if (is.call(xcall)) { - internal <- c( - "predictions", "avg_predictions", "comparisons", - "avg_comparisons", "slopes", "avg_slopes", "marginal_means") - # mfx object - if (as.character(xcall)[[1]] %in% internal) { - args[["x"]] <- model - out <- do.call(recall, args) - if (!is.null(out)) { - class(out) <- c("hypotheses", class(out)) - return(out) - } - # non-mfx object - } else { - model <- eval(xcall, envir = parent.frame()) - } + tmp <- get_hypothesis(out, hypothesis = hypothesis) + out <- tmp$estimate + + if (!is.null(attr(tmp, "label"))) { + attr(out, "label") <- attr(tmp, "label") + } else if ("hypothesis" %in% colnames(tmp)) { + attr(out, "label") <- tmp$hypothesis + } else { + attr(out, "label") <- tmp$term } - # marginaleffects objects: recall() - if (inherits(model, c("predictions", "comparisons", "slopes", "marginalmeans"))) { - args[["x"]] <- attr(model, "call") - out <- do.call(recall, args) - if (!is.null(out)) { - class(out) <- c("hypotheses", class(out)) - return(out) - } + if ("group" %in% colnames(tmp)) { + attr(out, "grouplab") <- tmp[["group"]] } - numderiv <- sanitize_numderiv(numderiv) - - # after re-evaluation - tmp <- sanitize_hypothesis(hypothesis, ...) - hypothesis <- tmp$hypothesis - hypothesis_null <- tmp$hypothesis_null - - vcov_false <- isFALSE(vcov) - vcov <- get_vcov(model = model, vcov = vcov) - vcov.type <- get_vcov_label(vcov = vcov) - - FUNouter <- function(model, hypothesis, newparams = NULL, ...) { - - if (inherits(model, c("predictions", "slopes", "comparisons"))) { - out <- model - - } else if (isTRUE(checkmate::check_numeric(model))) { - out <- data.frame(term = seq_along(out), estimate = out) - - } else if (inherits(model, "data.frame")) { - out <- model - if (!all(c("term", "estimate") %in% colnames(out))) { - msg <- "`hypothesis` function must return a data.frame with two columns named `term` and `estimate`." - insight::format_error(msg) - } - - # unknown model - } else if (!is.function(hypothesis)) { - out <- insight::get_parameters(model, ...) - if ("Component" %in% colnames(out) && !anyNA(out$Component)) { - out$Parameter <- sprintf("%s_%s", out$Component, out$Parameter) - } - idx <- intersect(colnames(model), c("term", "group", "estimate")) - colnames(out)[1:2] <- c("term", "estimate") - - # glmmTMB - if (!is.null(newparams)) { - out$estimate <- newparams - } - - } else if (hypothesis_is_formula) { - beta <- get_coef(model) - out <- data.table::data.table(estimate = beta, term = names(beta)) - - # unknown model but user-supplied hypothesis function - } else { - out <- model - } - - tmp <- get_hypothesis(out, hypothesis = hypothesis) - out <- tmp$estimate - - if (!is.null(attr(tmp, "label"))) { - attr(out, "label") <- attr(tmp, "label") - } else if ("hypothesis" %in% colnames(tmp)) { - attr(out, "label") <- tmp$hypothesis - } else { - attr(out, "label") <- tmp$term - } - - if ("group" %in% colnames(tmp)) { - attr(out, "grouplab") <- tmp[["group"]] - } + return(out) + } - return(out) - } + b <- FUNouter(model = model, hypothesis = hypothesis, ...) - b <- FUNouter(model = model, hypothesis = hypothesis, ...) - - # bayesian posterior - if (!is.null(attr(b, "posterior_draws"))) { - draws <- attr(b, "posterior_draws") - J <- NULL - se <- rep(NA, length(b)) - - # standard errors via delta method - } else if (!vcov_false && isTRUE(checkmate::check_matrix(vcov))) { - args <- list(model = model, - vcov = vcov, - hypothesis = hypothesis, - FUN = FUNouter, - numderiv = numderiv) - args <- c(args, dots) - se <- do.call("get_se_delta", args) - J <- attr(se, "jacobian") - attr(se, "jacobian") <- NULL - draws <- NULL - - # no standard error - } else { - J <- draws <- NULL - se <- rep(NA, length(b)) - } + # bayesian posterior + if (!is.null(attr(b, "posterior_draws"))) { + draws <- attr(b, "posterior_draws") + J <- NULL + se <- rep(NA, length(b)) - hyplab <- attr(b, "label") - if (!is.null(hypothesis)) { - if (is.null(hyplab)) { - hyplab <- attr(hypothesis, "label") - } - if (!is.null(hyplab)) { - out <- data.frame( - term = hyplab, - estimate = b, - std.error = se) - } else { - out <- data.frame( - term = "custom", - estimate = b, - std.error = se) - } - } else { - if (!is.null(hyplab) && length(hyplab) == length(b)) { - out <- data.frame( - term = hyplab, - estimate = b, - std.error = se) - } else { - out <- data.frame( - term = paste0("b", seq_along(b)), - estimate = b, - std.error = se) - } + # standard errors via delta method + } else if (!vcov_false && isTRUE(checkmate::check_matrix(vcov))) { + args <- list( + model = model, + vcov = vcov, + hypothesis = hypothesis, + FUN = FUNouter, + numderiv = numderiv) + args <- c(args, dots) + se <- do.call("get_se_delta", args) + J <- attr(se, "jacobian") + attr(se, "jacobian") <- NULL + draws <- NULL + + # no standard error + } else { + J <- draws <- NULL + se <- rep(NA, length(b)) + } + + hyplab <- attr(b, "label") + if (!is.null(hypothesis)) { + if (is.null(hyplab)) { + hyplab <- attr(hypothesis, "label") } - - # Remove std.error column when not computing st.errors and in bootstrap - if(vcov_false) { - out$std.error <- NULL + if (!is.null(hyplab)) { + out <- data.frame( + term = hyplab, + estimate = b, + std.error = se) + } else { + out <- data.frame( + term = "custom", + estimate = b, + std.error = se) } - - out[["group"]] <- attr(b, "grouplab") - - out <- get_ci( - out, - conf_level = conf_level, - vcov = vcov, - draws = draws, - estimate = "estimate", - null_hypothesis = hypothesis_null, - df = df, - model = model, - ...) - - if (!is.null(equivalence)) { - out <- equivalence( - out, - df = df, - equivalence = equivalence) + } else { + if (!is.null(hyplab) && length(hyplab) == length(b)) { + out <- data.frame( + term = hyplab, + estimate = b, + std.error = se) + } else { + out <- data.frame( + term = paste0("b", seq_along(b)), + estimate = b, + std.error = se) } + } + + # Remove std.error column when not computing st.errors and in bootstrap + if (vcov_false) { + out$std.error <- NULL + } + + out[["group"]] <- attr(b, "grouplab") + + out <- get_ci( + out, + conf_level = conf_level, + vcov = vcov, + draws = draws, + estimate = "estimate", + null_hypothesis = hypothesis_null, + df = df, + model = model, + ...) + + if (!is.null(equivalence)) { + out <- equivalence( + out, + df = df, + equivalence = equivalence) + } - out <- sort_columns(out) + out <- sort_columns(out) - class(out) <- c("hypotheses", "deltamethod", class(out)) - attr(out, "posterior_draws") <- draws - attr(out, "model") <- model - attr(out, "model_type") <- class(model)[1] - attr(out, "jacobian") <- J - attr(out, "call") <- call_attr - attr(out, "vcov") <- vcov - attr(out, "vcov.type") <- vcov.type - attr(out, "conf_level") <- conf_level + class(out) <- c("hypotheses", "deltamethod", class(out)) + attr(out, "posterior_draws") <- draws + attr(out, "model") <- model + attr(out, "model_type") <- class(model)[1] + attr(out, "jacobian") <- J + attr(out, "call") <- call_attr + attr(out, "vcov") <- vcov + attr(out, "vcov.type") <- vcov.type + attr(out, "conf_level") <- conf_level - # Issue #1102: hypotheses() should not be called twice on the same object - attr(out, "hypotheses_call") <- TRUE + # Issue #1102: hypotheses() should not be called twice on the same object + attr(out, "hypotheses_call") <- TRUE - return(out) + return(out) } - - - diff --git a/R/plot_comparisons.R b/R/plot_comparisons.R index f34e0b1b8..8fdb8c94d 100644 --- a/R/plot_comparisons.R +++ b/R/plot_comparisons.R @@ -6,12 +6,12 @@ #' The `by` argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the `by` argument in the `comparisons()` function. #' #' The `condition` argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the `newdata` argument and `datagrid()` function in a `comparisons()` call. All variables whose values are not specified explicitly are treated as usual by `datagrid()`, that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the `condition` argument, or supply model-specific arguments to compute population-level estimates. See details below. -#' +#' #' See the "Plots" vignette and website for tutorials and information on how to customize plots: #' -#' * https://marginaleffects.com/vignettes/plot.html +#' * https://marginaleffects.com/bonus/plot.html #' * https://marginaleffects.com -#' +#' #' @param variables Name of the variable whose contrast we want to plot on the y-axis. #' @param draw `TRUE` returns a `ggplot2` plot. `FALSE` returns a `data.frame` of the underlying data. #' @inheritParams comparisons @@ -23,15 +23,15 @@ #' @export #' @examples #' mod <- lm(mpg ~ hp * drat * factor(am), data = mtcars) -#' +#' #' plot_comparisons(mod, variables = "hp", condition = "drat") #' #' plot_comparisons(mod, variables = "hp", condition = c("drat", "am")) -#' +#' #' plot_comparisons(mod, variables = "hp", condition = list("am", "drat" = 3:5)) -#' +#' #' plot_comparisons(mod, variables = "am", condition = list("hp", "drat" = range)) -#' +#' #' plot_comparisons(mod, variables = "am", condition = list("hp", "drat" = "threenum")) plot_comparisons <- function(model, variables = NULL, @@ -48,119 +48,116 @@ plot_comparisons <- function(model, gray = getOption("marginaleffects_plot_gray", default = FALSE), draw = TRUE, ...) { - - - dots <- list(...) - if ("effect" %in% names(dots)) { - if (is.null(variables)) { - variables <- dots[["effect"]] - } else { - insight::format_error("The `effect` argument has been renamed to `variables`.") - } - } - if ("transform_post" %in% names(dots)) { # backward compatibility - transform <- dots[["transform_post"]] + dots <- list(...) + if ("effect" %in% names(dots)) { + if (is.null(variables)) { + variables <- dots[["effect"]] + } else { + insight::format_error("The `effect` argument has been renamed to `variables`.") } + } + if ("transform_post" %in% names(dots)) { # backward compatibility + transform <- dots[["transform_post"]] + } - if (inherits(model, "mira") && is.null(newdata)) { - msg <- "Please supply a data frame to the `newdata` argument explicitly." - insight::format_error(msg) - } + if (inherits(model, "mira") && is.null(newdata)) { + msg <- "Please supply a data frame to the `newdata` argument explicitly." + insight::format_error(msg) + } - # order of the first few paragraphs is important - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - if (!isFALSE(wts) && is.null(by)) { - insight::format_error("The `wts` argument requires a `by` argument.") - } + # order of the first few paragraphs is important + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + if (!isFALSE(wts) && is.null(by)) { + insight::format_error("The `wts` argument requires a `by` argument.") + } - checkmate::assert_character(by, null.ok = TRUE, max.len = 3, min.len = 1, names = "unnamed") - if ((!is.null(condition) && !is.null(by)) || (is.null(condition) && is.null(by))) { - msg <- "One of the `condition` and `by` arguments must be supplied, but not both." - insight::format_error(msg) - } + checkmate::assert_character(by, null.ok = TRUE, max.len = 3, min.len = 1, names = "unnamed") + if ((!is.null(condition) && !is.null(by)) || (is.null(condition) && is.null(by))) { + msg <- "One of the `condition` and `by` arguments must be supplied, but not both." + insight::format_error(msg) + } - # sanity check - checkmate::assert( - checkmate::check_character(variables, names = "unnamed"), - checkmate::check_list(variables, names = "unique"), - .var.name = "variables") + # sanity check + checkmate::assert( + checkmate::check_character(variables, names = "unnamed"), + checkmate::check_list(variables, names = "unique"), + .var.name = "variables") - modeldata <- get_modeldata( - model, - additional_variables = c(names(condition), by), - wts = wts) + modeldata <- get_modeldata( + model, + additional_variables = c(names(condition), by), + wts = wts) - # mlr3 and tidymodels - if (is.null(modeldata) || nrow(modeldata) == 0) { - modeldata <- newdata - } + # mlr3 and tidymodels + if (is.null(modeldata) || nrow(modeldata) == 0) { + modeldata <- newdata + } - # conditional - if (!is.null(condition)) { - condition <- sanitize_condition(model, condition, variables, modeldata = modeldata) - v_x <- condition$condition1 - v_color <- condition$condition2 - v_facet_1 <- condition$condition3 - v_facet_2 <- condition$condition4 - datplot <- comparisons( - model, - newdata = condition$newdata, - type = type, - vcov = vcov, - conf_level = conf_level, - by = FALSE, - wts = wts, - variables = variables, - comparison = comparison, - transform = transform, - cross = FALSE, - modeldata = modeldata, - ...) - } + # conditional + if (!is.null(condition)) { + condition <- sanitize_condition(model, condition, variables, modeldata = modeldata) + v_x <- condition$condition1 + v_color <- condition$condition2 + v_facet_1 <- condition$condition3 + v_facet_2 <- condition$condition4 + datplot <- comparisons( + model, + newdata = condition$newdata, + type = type, + vcov = vcov, + conf_level = conf_level, + by = FALSE, + wts = wts, + variables = variables, + comparison = comparison, + transform = transform, + cross = FALSE, + modeldata = modeldata, + ...) + } - # marginal - if (!is.null(by)) { - newdata <- sanitize_newdata( - model = model, - newdata = newdata, - modeldata = modeldata, - by = by, - wts = wts) - datplot <- comparisons( - model, - by = by, - newdata = newdata, - type = type, - vcov = vcov, - conf_level = conf_level, - variables = variables, - wts = wts, - comparison = comparison, - transform = transform, - cross = FALSE, - modeldata = modeldata, - ...) - v_x <- by[[1]] - v_color <- hush(by[[2]]) - v_facet_1 <- hush(by[[3]]) - v_facet_2 <- hush(by[[4]]) - } + # marginal + if (!is.null(by)) { + newdata <- sanitize_newdata( + model = model, + newdata = newdata, + modeldata = modeldata, + by = by, + wts = wts) + datplot <- comparisons( + model, + by = by, + newdata = newdata, + type = type, + vcov = vcov, + conf_level = conf_level, + variables = variables, + wts = wts, + comparison = comparison, + transform = transform, + cross = FALSE, + modeldata = modeldata, + ...) + v_x <- by[[1]] + v_color <- hush(by[[2]]) + v_facet_1 <- hush(by[[3]]) + v_facet_2 <- hush(by[[4]]) + } - datplot <- plot_preprocess(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, condition = condition, modeldata = modeldata) + datplot <- plot_preprocess(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, condition = condition, modeldata = modeldata) - # return immediately if the user doesn't want a plot - if (isFALSE(draw)) { - out <- as.data.frame(datplot) - attr(out, "posterior_draws") <- attr(datplot, "posterior_draws") - return(out) - } - - # ggplot2 - insight::check_if_installed("ggplot2") - p <- plot_build(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, gray = gray, rug = rug, modeldata = modeldata) - p <- p + ggplot2::labs(x = v_x, y = sprintf("Comparison")) + # return immediately if the user doesn't want a plot + if (isFALSE(draw)) { + out <- as.data.frame(datplot) + attr(out, "posterior_draws") <- attr(datplot, "posterior_draws") + return(out) + } - return(p) -} + # ggplot2 + insight::check_if_installed("ggplot2") + p <- plot_build(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, gray = gray, rug = rug, modeldata = modeldata) + p <- p + ggplot2::labs(x = v_x, y = sprintf("Comparison")) + return(p) +} diff --git a/R/plot_predictions.R b/R/plot_predictions.R index efa7475c0..91566420c 100644 --- a/R/plot_predictions.R +++ b/R/plot_predictions.R @@ -6,17 +6,17 @@ #' The `by` argument is used to plot marginal predictions, that is, predictions made on the original data, but averaged by subgroups. This is analogous to using the `by` argument in the `predictions()` function. #' #' The `condition` argument is used to plot conditional predictions, that is, predictions made on a user-specified grid. This is analogous to using the `newdata` argument and `datagrid()` function in a `predictions()` call. All variables whose values are not specified explicitly are treated as usual by `datagrid()`, that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the `condition` argument, or supply model-specific arguments to compute population-level estimates. See details below. -#' +#' #' See the "Plots" vignette and website for tutorials and information on how to customize plots: #' -#' * https://marginaleffects.com/vignettes/plot.html +#' * https://marginaleffects.com/bonus/plot.html #' * https://marginaleffects.com -#' +#' #' @param condition Conditional predictions #' + Character vector (max length 4): Names of the predictors to display. #' + Named list (max length 4): List names correspond to predictors. List elements can be: #' - Numeric vector -#' - Function which returns a numeric vector or a set of unique categorical values +#' - Function which returns a numeric vector or a set of unique categorical values #' - Shortcut strings for common reference values: "minmax", "quartile", "threenum" #' + 1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid). #' + Numeric variables in positions 2 and 3 are summarized by Tukey's five numbers `?stats::fivenum` @@ -40,7 +40,7 @@ #' plot_predictions(mod, condition = c("hp", "wt")) #' #' plot_predictions(mod, condition = list("hp", wt = "threenum")) -#' +#' #' plot_predictions(mod, condition = list("hp", wt = range)) #' plot_predictions <- function(model, @@ -57,139 +57,134 @@ plot_predictions <- function(model, gray = getOption("marginaleffects_plot_gray", default = FALSE), draw = TRUE, ...) { - - dots <- list(...) - - checkmate::assert_number(points, lower = 0, upper = 1) - - if ("variables" %in% names(dots)) { - insight::format_error("The `variables` argument is not supported by this function.") - } - if ("effect" %in% names(dots)) { - insight::format_error("The `effect` argument is not supported by this function.") - } - if ("transform_post" %in% names(dots)) { # backward compatibility - transform <- dots[["transform_post"]] + dots <- list(...) + + checkmate::assert_number(points, lower = 0, upper = 1) + + if ("variables" %in% names(dots)) { + insight::format_error("The `variables` argument is not supported by this function.") + } + if ("effect" %in% names(dots)) { + insight::format_error("The `effect` argument is not supported by this function.") + } + if ("transform_post" %in% names(dots)) { # backward compatibility + transform <- dots[["transform_post"]] + } + + if (inherits(model, "mira") && is.null(newdata)) { + msg <- "Please supply a data frame to the `newdata` argument explicitly." + insight::format_error(msg) + } + + # order of the first few paragraphs is important + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + if (!isFALSE(wts) && is.null(by)) { + insight::format_error("The `wts` argument requires a `by` argument.") + } + checkmate::assert_character(by, null.ok = TRUE) + + # sanity check + checkmate::assert_character(by, null.ok = TRUE, max.len = 4, min.len = 1, names = "unnamed") + if ((!is.null(condition) && !is.null(by)) || (is.null(condition) && is.null(by))) { + msg <- "One of the `condition` and `by` arguments must be supplied, but not both." + insight::format_error(msg) + } + + modeldata <- get_modeldata( + model, + additional_variables = c(names(condition), by), + wts = wts) + + # mlr3 and tidymodels + if (is.null(modeldata) || nrow(modeldata) == 0) { + modeldata <- newdata + } + + # conditional + if (!is.null(condition)) { + condition <- sanitize_condition(model, condition, variables = NULL, modeldata = modeldata) + v_x <- condition$condition1 + v_color <- condition$condition2 + v_facet_1 <- condition$condition3 + v_facet_2 <- condition$condition4 + datplot <- predictions( + model, + newdata = condition$newdata, + type = type, + vcov = vcov, + conf_level = conf_level, + transform = transform, + modeldata = modeldata, + wts = wts, + ...) + } + + # marginal + if (!isFALSE(by) && !is.null(by)) { # switched from NULL above + condition <- NULL + + newdata <- sanitize_newdata( + model = model, + newdata = newdata, + modeldata = modeldata, + by = by, + wts = wts) + + # tidymodels & mlr3 + if (is.null(modeldata)) { + modeldata <- newdata } - if (inherits(model, "mira") && is.null(newdata)) { - msg <- "Please supply a data frame to the `newdata` argument explicitly." - insight::format_error(msg) - } - - # order of the first few paragraphs is important - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - if (!isFALSE(wts) && is.null(by)) { - insight::format_error("The `wts` argument requires a `by` argument.") - } - checkmate::assert_character(by, null.ok = TRUE) - - # sanity check - checkmate::assert_character(by, null.ok = TRUE, max.len = 4, min.len = 1, names = "unnamed") - if ((!is.null(condition) && !is.null(by)) || (is.null(condition) && is.null(by))) { - msg <- "One of the `condition` and `by` arguments must be supplied, but not both." - insight::format_error(msg) - } - - modeldata <- get_modeldata( - model, - additional_variables = c(names(condition), by), - wts = wts) - - # mlr3 and tidymodels - if (is.null(modeldata) || nrow(modeldata) == 0) { - modeldata <- newdata - } - - # conditional - if (!is.null(condition)) { - condition <- sanitize_condition(model, condition, variables = NULL, modeldata = modeldata) - v_x <- condition$condition1 - v_color <- condition$condition2 - v_facet_1 <- condition$condition3 - v_facet_2 <- condition$condition4 - datplot <- predictions( - model, - newdata = condition$newdata, - type = type, - vcov = vcov, - conf_level = conf_level, - transform = transform, - modeldata = modeldata, - wts = wts, - ...) - } - - # marginal - if (!isFALSE(by) && !is.null(by)) { # switched from NULL above - condition <- NULL - - newdata <- sanitize_newdata( - model = model, - newdata = newdata, - modeldata = modeldata, - by = by, - wts = wts) - - # tidymodels & mlr3 - if (is.null(modeldata)) { - modeldata <- newdata - } - - datplot <- predictions( - model, - by = by, - type = type, - vcov = vcov, - conf_level = conf_level, - wts = wts, - transform = transform, - newdata = newdata, - modeldata = modeldata, - ...) - v_x <- by[[1]] - v_color <- hush(by[[2]]) - v_facet_1 <- hush(by[[3]]) - v_facet_2 <- hush(by[[4]]) - } - - dv <- unlist(insight::find_response(model, combine = TRUE), use.names = FALSE)[1] - datplot <- plot_preprocess(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, condition = condition, modeldata = modeldata) - - # return immediately if the user doesn't want a plot - if (isFALSE(draw)) { - out <- as.data.frame(datplot) - attr(out, "posterior_draws") <- attr(datplot, "posterior_draws") - return(out) - } - - # ggplot2 - insight::check_if_installed("ggplot2") - p <- plot_build(datplot, - v_x = v_x, - v_color = v_color, - v_facet_1 = v_facet_1, - v_facet_2 = v_facet_2, - points = points, - modeldata = modeldata, - dv = dv, - rug = rug, - gray = gray) - - p <- p + ggplot2::labs( - x = v_x, - y = dv, - color = v_color, - fill = v_color, - linetype = v_color) - - # attach model data for each of use - attr(p, "modeldata") <- modeldata - - return(p) + datplot <- predictions( + model, + by = by, + type = type, + vcov = vcov, + conf_level = conf_level, + wts = wts, + transform = transform, + newdata = newdata, + modeldata = modeldata, + ...) + v_x <- by[[1]] + v_color <- hush(by[[2]]) + v_facet_1 <- hush(by[[3]]) + v_facet_2 <- hush(by[[4]]) + } + + dv <- unlist(insight::find_response(model, combine = TRUE), use.names = FALSE)[1] + datplot <- plot_preprocess(datplot, v_x = v_x, v_color = v_color, v_facet_1 = v_facet_1, v_facet_2 = v_facet_2, condition = condition, modeldata = modeldata) + + # return immediately if the user doesn't want a plot + if (isFALSE(draw)) { + out <- as.data.frame(datplot) + attr(out, "posterior_draws") <- attr(datplot, "posterior_draws") + return(out) + } + + # ggplot2 + insight::check_if_installed("ggplot2") + p <- plot_build(datplot, + v_x = v_x, + v_color = v_color, + v_facet_1 = v_facet_1, + v_facet_2 = v_facet_2, + points = points, + modeldata = modeldata, + dv = dv, + rug = rug, + gray = gray) + + p <- p + ggplot2::labs( + x = v_x, + y = dv, + color = v_color, + fill = v_color, + linetype = v_color) + + # attach model data for each of use + attr(p, "modeldata") <- modeldata + + return(p) } - - - - diff --git a/R/plot_slopes.R b/R/plot_slopes.R index 07d8ddf85..50579acd4 100644 --- a/R/plot_slopes.R +++ b/R/plot_slopes.R @@ -9,15 +9,15 @@ #' See the "Plots" vignette and website for tutorials and information on how to customize plots: #' -#' * https://marginaleffects.com/vignettes/plot.html +#' * https://marginaleffects.com/bonus/plot.html #' * https://marginaleffects.com -#' +#' #' @param variables Name of the variable whose marginal effect (slope) we want to plot on the y-axis. #' @param condition Conditional slopes #' + Character vector (max length 4): Names of the predictors to display. #' + Named list (max length 4): List names correspond to predictors. List elements can be: #' - Numeric vector -#' - Function which returns a numeric vector or a set of unique categorical values +#' - Function which returns a numeric vector or a set of unique categorical values #' - Shortcut strings for common reference values: "minmax", "quartile", "threenum" #' + 1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid). #' + Numeric variables in positions 2 and 3 are summarized by Tukey's five numbers `?stats::fivenum`. @@ -32,17 +32,17 @@ #' @examples #' library(marginaleffects) #' mod <- lm(mpg ~ hp * drat * factor(am), data = mtcars) -#' +#' #' plot_slopes(mod, variables = "hp", condition = "drat") #' #' plot_slopes(mod, variables = "hp", condition = c("drat", "am")) -#' +#' #' plot_slopes(mod, variables = "hp", condition = list("am", "drat" = 3:5)) -#' +#' #' plot_slopes(mod, variables = "am", condition = list("hp", "drat" = range)) #' #' plot_slopes(mod, variables = "am", condition = list("hp", "drat" = "threenum")) -#' +#' plot_slopes <- function(model, variables = NULL, condition = NULL, @@ -57,50 +57,48 @@ plot_slopes <- function(model, gray = getOption("marginaleffects_plot_gray", default = FALSE), draw = TRUE, ...) { - - dots <- list(...) - if ("effect" %in% names(dots)) { - if (is.null(variables)) { - variables <- dots[["effect"]] - } else { - insight::format_error("The `effect` argument has been renamed to `variables`.") - } + dots <- list(...) + if ("effect" %in% names(dots)) { + if (is.null(variables)) { + variables <- dots[["effect"]] + } else { + insight::format_error("The `effect` argument has been renamed to `variables`.") } + } - if (inherits(model, "mira") && is.null(newdata)) { - msg <- "Please supply a data frame to the `newdata` argument explicitly." - insight::format_error(msg) - } + if (inherits(model, "mira") && is.null(newdata)) { + msg <- "Please supply a data frame to the `newdata` argument explicitly." + insight::format_error(msg) + } - # order of the first few paragraphs is important - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - # should probably not be nested too deeply in the call stack since we eval.parent() (not sure about this) - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model) + # order of the first few paragraphs is important + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + # should probably not be nested too deeply in the call stack since we eval.parent() (not sure about this) + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model) - valid <- c("dydx", "eyex", "eydx", "dyex") - checkmate::assert_choice(slope, choices = valid) + valid <- c("dydx", "eyex", "eydx", "dyex") + checkmate::assert_choice(slope, choices = valid) - out <- plot_comparisons( - model, - variables = variables, - condition = condition, - by = by, - newdata = newdata, - type = type, - vcov = vcov, - conf_level = conf_level, - wts = wts, - draw = draw, - rug = rug, - gray = gray, - comparison = slope, - ...) + out <- plot_comparisons( + model, + variables = variables, + condition = condition, + by = by, + newdata = newdata, + type = type, + vcov = vcov, + conf_level = conf_level, + wts = wts, + draw = draw, + rug = rug, + gray = gray, + comparison = slope, + ...) - if (inherits(out, "ggplot")) { - out <- out + ggplot2::labs(x = condition[1], y = "Slope") - } + if (inherits(out, "ggplot")) { + out <- out + ggplot2::labs(x = condition[1], y = "Slope") + } - return(out) + return(out) } - diff --git a/R/predictions.R b/R/predictions.R index d11446097..5ae94a7fd 100644 --- a/R/predictions.R +++ b/R/predictions.R @@ -10,7 +10,7 @@ #' #' See the predictions vignette and package website for worked examples and case studies: -#' * +#' * #' * #' #' @rdname predictions @@ -129,31 +129,32 @@ #' mod <- lm(mpg ~ wt + drat, data = mtcars) #' #' predictions( -#' mod, -#' newdata = datagrid(wt = 2:3), -#' hypothesis = "b1 = b2") +#' mod, +#' newdata = datagrid(wt = 2:3), +#' hypothesis = "b1 = b2") #' #' # same hypothesis test using row indices #' predictions( -#' mod, -#' newdata = datagrid(wt = 2:3), -#' hypothesis = "b1 - b2 = 0") +#' mod, +#' newdata = datagrid(wt = 2:3), +#' hypothesis = "b1 - b2 = 0") #' #' # same hypothesis test using numeric vector of weights #' predictions( -#' mod, -#' newdata = datagrid(wt = 2:3), -#' hypothesis = c(1, -1)) +#' mod, +#' newdata = datagrid(wt = 2:3), +#' hypothesis = c(1, -1)) #' #' # two custom contrasts using a matrix of weights -#' lc <- matrix(c( +#' lc <- matrix( +#' c( #' 1, -1, #' 2, 3), -#' ncol = 2) +#' ncol = 2) #' predictions( -#' mod, -#' newdata = datagrid(wt = 2:3), -#' hypothesis = lc) +#' mod, +#' newdata = datagrid(wt = 2:3), +#' hypothesis = lc) #' #' #' # `by` argument @@ -170,16 +171,16 @@ #' avg_predictions(nom, type = "probs", by = "group") #' #' by <- data.frame( -#' group = c("3", "4", "5"), -#' by = c("3,4", "3,4", "5")) +#' group = c("3", "4", "5"), +#' by = c("3,4", "3,4", "5")) #' #' predictions(nom, type = "probs", by = by) #' #' # sum of predicted probabilities for combined response levels #' mod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE) #' by <- data.frame( -#' by = c("4,6", "4,6", "8"), -#' group = as.character(c(4, 6, 8))) +#' by = c("4,6", "4,6", "8"), +#' group = as.character(c(4, 6, 8))) #' predictions(mod, newdata = "mean", byfun = sum, by = by) #' #' @inheritParams slopes @@ -201,383 +202,383 @@ predictions <- function(model, df = Inf, numderiv = "fdforward", ...) { - - dots <- list(...) - - if ("transform_post" %in% names(dots)) { - transform <- dots[["transform_post"]] - insight::format_warning("The `transform_post` argument is deprecated. Use `transform` instead.") - } - - # very early, before any use of newdata - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - - if ("cross" %in% names(dots)) { - insight::format_error("The `cross` argument is not available in this function.") - } - - # extracting modeldata repeatedly is slow. - # checking dots allows marginalmeans to pass modeldata to predictions. - if (isTRUE(by)) { - modeldata <- get_modeldata(model, - additional_variables = FALSE, - modeldata = dots[["modeldata"]], - wts = wts) - } else { - modeldata <- get_modeldata(model, - additional_variables = by, - modeldata = dots[["modeldata"]], - wts = wts) - } - - # build call: match.call() doesn't work well in *apply() - # after sanitize_newdata_call - call_attr <- c(list( - name = "predictions", - model = model, - newdata = newdata, - variables = variables, - vcov = vcov, - conf_level = conf_level, - type = type, - by = by, - byfun = byfun, - wts = wts, - transform = transform, - hypothesis = hypothesis, - df = df), - dots) - if ("modeldata" %in% names(dots)) { - call_attr[["modeldata"]] <- modeldata - } - call_attr <- do.call("call", call_attr) - - # sanity checks - sanity_dots(model = model, ...) - numderiv <- sanitize_numderiv(numderiv) - sanity_df(df, newdata) - sanity_equivalence_p_adjust(equivalence, p_adjust) - model <- sanitize_model( - model = model, - newdata = newdata, - wts = wts, - vcov = vcov, - by = by, - calling_function = "predictions", - ...) - tmp <- sanitize_hypothesis(hypothesis, ...) - hypothesis <- tmp$hypothesis - hypothesis_null <- tmp$hypothesis_null - - # multiple imputation - if (inherits(model, c("mira", "amest"))) { - out <- process_imputation(model, call_attr) - return(out) - } - - # if type is NULL, we backtransform if relevant - type_string <- sanitize_type( - model = model, - type = type, - by = by, - calling_function = "predictions") - if (identical(type_string, "invlink(link)")) { - if (is.null(hypothesis)) { - type_call <- "link" - } else { - type_call <- "response" - type_string <- "response" - insight::format_warning('The `type="invlink"` argument is not available unless `hypothesis` is `NULL` or a single number. The value of the `type` argument was changed to "response" automatically. To suppress this warning, use `type="response"` explicitly in your function call.') - } + dots <- list(...) + + if ("transform_post" %in% names(dots)) { + transform <- dots[["transform_post"]] + insight::format_warning("The `transform_post` argument is deprecated. Use `transform` instead.") + } + + # very early, before any use of newdata + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + + if ("cross" %in% names(dots)) { + insight::format_error("The `cross` argument is not available in this function.") + } + + # extracting modeldata repeatedly is slow. + # checking dots allows marginalmeans to pass modeldata to predictions. + if (isTRUE(by)) { + modeldata <- get_modeldata(model, + additional_variables = FALSE, + modeldata = dots[["modeldata"]], + wts = wts) + } else { + modeldata <- get_modeldata(model, + additional_variables = by, + modeldata = dots[["modeldata"]], + wts = wts) + } + + # build call: match.call() doesn't work well in *apply() + # after sanitize_newdata_call + call_attr <- c( + list( + name = "predictions", + model = model, + newdata = newdata, + variables = variables, + vcov = vcov, + conf_level = conf_level, + type = type, + by = by, + byfun = byfun, + wts = wts, + transform = transform, + hypothesis = hypothesis, + df = df), + dots) + if ("modeldata" %in% names(dots)) { + call_attr[["modeldata"]] <- modeldata + } + call_attr <- do.call("call", call_attr) + + # sanity checks + sanity_dots(model = model, ...) + numderiv <- sanitize_numderiv(numderiv) + sanity_df(df, newdata) + sanity_equivalence_p_adjust(equivalence, p_adjust) + model <- sanitize_model( + model = model, + newdata = newdata, + wts = wts, + vcov = vcov, + by = by, + calling_function = "predictions", + ...) + tmp <- sanitize_hypothesis(hypothesis, ...) + hypothesis <- tmp$hypothesis + hypothesis_null <- tmp$hypothesis_null + + # multiple imputation + if (inherits(model, c("mira", "amest"))) { + out <- process_imputation(model, call_attr) + return(out) + } + + # if type is NULL, we backtransform if relevant + type_string <- sanitize_type( + model = model, + type = type, + by = by, + calling_function = "predictions") + if (identical(type_string, "invlink(link)")) { + if (is.null(hypothesis)) { + type_call <- "link" } else { - type_call <- type_string + type_call <- "response" + type_string <- "response" + insight::format_warning('The `type="invlink"` argument is not available unless `hypothesis` is `NULL` or a single number. The value of the `type` argument was changed to "response" automatically. To suppress this warning, use `type="response"` explicitly in your function call.') } - - # save the original because it gets converted to a named list, which breaks - # user-input sanity checks - transform_original <- transform - transform <- sanitize_transform(transform) - - conf_level <- sanitize_conf_level(conf_level, ...) - newdata <- sanitize_newdata( - model = model, - newdata = newdata, - modeldata = modeldata, - by = by, - wts = wts) - - # after sanitize_newdata - sanity_by(by, newdata) - - # after sanity_by - newdata <- dedup_newdata( - model = model, - newdata = newdata, - wts = wts, - by = by, - byfun = byfun) - - if (isFALSE(wts) && "marginaleffects_wts_internal" %in% colnames(newdata)) { - wts <- "marginaleffects_wts_internal" + } else { + type_call <- type_string + } + + # save the original because it gets converted to a named list, which breaks + # user-input sanity checks + transform_original <- transform + transform <- sanitize_transform(transform) + + conf_level <- sanitize_conf_level(conf_level, ...) + newdata <- sanitize_newdata( + model = model, + newdata = newdata, + modeldata = modeldata, + by = by, + wts = wts) + + # after sanitize_newdata + sanity_by(by, newdata) + + # after sanity_by + newdata <- dedup_newdata( + model = model, + newdata = newdata, + wts = wts, + by = by, + byfun = byfun) + + if (isFALSE(wts) && "marginaleffects_wts_internal" %in% colnames(newdata)) { + wts <- "marginaleffects_wts_internal" + } + + # analogous to comparisons(variables=list(...)) + if (!is.null(variables)) { + args <- list( + "model" = model, + "newdata" = newdata, + "grid_type" = "counterfactual") + tmp <- sanitize_variables( + variables = variables, + model = model, + newdata = newdata, + modeldata = modeldata, + calling_function = "predictions" + )$conditional + for (v in tmp) { + args[[v$name]] <- v$value } - - # analogous to comparisons(variables=list(...)) - if (!is.null(variables)) { - args <- list( - "model" = model, - "newdata" = newdata, - "grid_type" = "counterfactual") - tmp <- sanitize_variables( - variables = variables, - model = model, - newdata = newdata, - modeldata = modeldata, - calling_function = "predictions" - )$conditional - for (v in tmp) { - args[[v$name]] <- v$value - } - newdata <- do.call("datagrid", args) - # the original rowids are no longer valid after averaging et al. - newdata[["rowid"]] <- NULL + newdata <- do.call("datagrid", args) + # the original rowids are no longer valid after averaging et al. + newdata[["rowid"]] <- NULL + } + + character_levels <- attr(newdata, "newdata_character_levels") + + + # trust newdata$rowid + if (!"rowid" %in% colnames(newdata)) { + newdata[["rowid"]] <- seq_len(nrow(newdata)) + } + + # mlogit models sometimes returns an `idx` column that is impossible to `rbind` + if (inherits(model, "mlogit") && inherits(newdata[["idx"]], "idx")) { + newdata[["idx"]] <- NULL + } + + # padding destroys `newdata` attributes, so we save them + newdata_attr_cache <- get_marginaleffects_attributes(newdata, include_regex = "^newdata") + + # mlogit uses an internal index that is very hard to track, so we don't + # support `newdata` and assume no padding the `idx` column is necessary for + # `get_predict` but it breaks binding, so we can't remove it in + # sanity_newdata and we can't rbind it with padding + # pad factors: `model.matrix` breaks when factor levels are missing + if (inherits(model, "mlogit")) { + padding <- data.frame() + } else { + padding <- complete_levels(newdata, character_levels) + if (nrow(padding) > 0) { + newdata <- rbindlist(list(padding, newdata)) } + } - character_levels <- attr(newdata, "newdata_character_levels") - + if (is.null(by) || isFALSE(by)) { + vcov_tmp <- vcov + } else { + vcov_tmp <- FALSE + } - # trust newdata$rowid - if (!"rowid" %in% colnames(newdata)) { - newdata[["rowid"]] <- seq_len(nrow(newdata)) - } - - # mlogit models sometimes returns an `idx` column that is impossible to `rbind` - if (inherits(model, "mlogit") && inherits(newdata[["idx"]], "idx")) { - newdata[["idx"]] <- NULL - } - # padding destroys `newdata` attributes, so we save them - newdata_attr_cache <- get_marginaleffects_attributes(newdata, include_regex = "^newdata") + ############### sanity checks are over - # mlogit uses an internal index that is very hard to track, so we don't - # support `newdata` and assume no padding the `idx` column is necessary for - # `get_predict` but it breaks binding, so we can't remove it in - # sanity_newdata and we can't rbind it with padding - # pad factors: `model.matrix` breaks when factor levels are missing - if (inherits(model, "mlogit")) { - padding <- data.frame() - } else { - padding <- complete_levels(newdata, character_levels) - if (nrow(padding) > 0) { - newdata <- rbindlist(list(padding, newdata)) - } - } - - if (is.null(by) || isFALSE(by)) { - vcov_tmp <- vcov - } else { - vcov_tmp <- FALSE + # Bootstrap + out <- inferences_dispatch( + INF_FUN = predictions, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type_string, by = by, + conf_level = conf_level, + byfun = byfun, wts = wts, transform = transform_original, hypothesis = hypothesis, ...) + if (!is.null(out)) { + return(out) + } + + + # pre-building the model matrix can speed up repeated predictions + newdata <- get_model_matrix_attribute(model, newdata) + + + # main estimation + args <- list( + model = model, + newdata = newdata, + type = type_call, + hypothesis = hypothesis, + wts = wts, + by = by, + byfun = byfun) + + args <- utils::modifyList(args, dots) + tmp <- do.call(get_predictions, args) + + hyp_by <- attr(tmp, "hypothesis_function_by") + + # two cases when tmp is a data.frame + # get_predict gets us rowid with the original rows + if (inherits(tmp, "data.frame")) { + setnames(tmp, + old = c("Predicted", "SE", "CI_low", "CI_high"), + new = c("estimate", "std.error", "conf.low", "conf.high"), + skip_absent = TRUE) + } else { + tmp <- data.frame(newdata$rowid, type, tmp) + colnames(tmp) <- c("rowid", "estimate") + if ("rowidcf" %in% colnames(newdata)) { + tmp[["rowidcf"]] <- newdata[["rowidcf"]] } - - - ############### sanity checks are over - - # Bootstrap - out <- inferences_dispatch( - INF_FUN = predictions, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type_string, by = by, - conf_level = conf_level, - byfun = byfun, wts = wts, transform = transform_original, hypothesis = hypothesis, ...) - if (!is.null(out)) { - return(out) + } + + # issue #1105: hypothesis may change the meaning of rows, so we don't want to force-merge `newdata` + if (!"rowid" %in% colnames(tmp) && nrow(tmp) == nrow(newdata) && is.null(hypothesis)) { + tmp$rowid <- newdata$rowid + } + + # degrees of freedom + if (isTRUE(vcov == "satterthwaite") || isTRUE(vcov == "kenward-roger")) { + df <- tryCatch( + # df_per_observation is an undocumented argument introduced in 0.18.4.7 to preserve backward incompatibility + insight::get_df(model, data = newdata, type = vcov, df_per_observation = TRUE), + error = function(e) NULL) + if (isTRUE(length(df) == nrow(tmp))) { + tmp$df <- df } - - - # pre-building the model matrix can speed up repeated predictions - newdata <- get_model_matrix_attribute(model, newdata) - - - # main estimation - args <- list( - model = model, - newdata = newdata, - type = type_call, - hypothesis = hypothesis, - wts = wts, - by = by, - byfun = byfun) - - args <- utils::modifyList(args, dots) - tmp <- do.call(get_predictions, args) - - hyp_by <- attr(tmp, "hypothesis_function_by") - - # two cases when tmp is a data.frame - # get_predict gets us rowid with the original rows - if (inherits(tmp, "data.frame")) { - setnames(tmp, - old = c("Predicted", "SE", "CI_low", "CI_high"), - new = c("estimate", "std.error", "conf.low", "conf.high"), - skip_absent = TRUE) - } else { - tmp <- data.frame(newdata$rowid, type, tmp) - colnames(tmp) <- c("rowid", "estimate") - if ("rowidcf" %in% colnames(newdata)) { - tmp[["rowidcf"]] <- newdata[["rowidcf"]] + } + + # bayesian posterior draws + draws <- attr(tmp, "posterior_draws") + + V <- NULL + J <- NULL + if (!isFALSE(vcov)) { + V <- get_vcov(model, vcov = vcov, type = type, ...) + + # Delta method + if (!"std.error" %in% colnames(tmp) && is.null(draws)) { + if (isTRUE(checkmate::check_matrix(V))) { + # vcov = FALSE to speed things up + fun <- function(...) { + get_predictions(..., wts = wts, verbose = FALSE)$estimate } - } - - # issue #1105: hypothesis may change the meaning of rows, so we don't want to force-merge `newdata` - if (!"rowid" %in% colnames(tmp) && nrow(tmp) == nrow(newdata) && is.null(hypothesis)) { - tmp$rowid <- newdata$rowid - } - - # degrees of freedom - if (isTRUE(vcov == "satterthwaite") || isTRUE(vcov == "kenward-roger")) { - df <- tryCatch( - # df_per_observation is an undocumented argument introduced in 0.18.4.7 to preserve backward incompatibility - insight::get_df(model, data = newdata, type = vcov, df_per_observation = TRUE), - error = function(e) NULL) - if (isTRUE(length(df) == nrow(tmp))) { - tmp$df <- df + args <- list( + model, + newdata = newdata, + vcov = V, + type = type_call, + FUN = fun, + J = J, + hypothesis = hypothesis, + by = by, + byfun = byfun, + numderiv = numderiv) + args <- utils::modifyList(args, dots) + se <- do.call(get_se_delta, args) + if (is.numeric(se) && length(se) == nrow(tmp)) { + J <- attr(se, "jacobian") + attr(se, "jacobian") <- NULL + tmp[["std.error"]] <- se } + } } - # bayesian posterior draws - draws <- attr(tmp, "posterior_draws") - - V <- NULL - J <- NULL - if (!isFALSE(vcov)) { - V <- get_vcov(model, vcov = vcov, type = type, ...) - - # Delta method - if (!"std.error" %in% colnames(tmp) && is.null(draws)) { - if (isTRUE(checkmate::check_matrix(V))) { - # vcov = FALSE to speed things up - fun <- function(...) { - get_predictions(..., wts = wts, verbose = FALSE)$estimate - } - args <- list( - model, - newdata = newdata, - vcov = V, - type = type_call, - FUN = fun, - J = J, - hypothesis = hypothesis, - by = by, - byfun = byfun, - numderiv = numderiv) - args <- utils::modifyList(args, dots) - se <- do.call(get_se_delta, args) - if (is.numeric(se) && length(se) == nrow(tmp)) { - J <- attr(se, "jacobian") - attr(se, "jacobian") <- NULL - tmp[["std.error"]] <- se - } - } - } - - tmp <- get_ci( - tmp, - conf_level = conf_level, - vcov = vcov, - draws = draws, - estimate = "estimate", - null_hypothesis = hypothesis_null, - df = df, - model = model, - p_adjust = p_adjust, - ...) + tmp <- get_ci( + tmp, + conf_level = conf_level, + vcov = vcov, + draws = draws, + estimate = "estimate", + null_hypothesis = hypothesis_null, + df = df, + model = model, + p_adjust = p_adjust, + ...) + } + + out <- data.table::data.table(tmp) + data.table::setDT(newdata) + + # expensive: only do this inside jacobian if necessary + if (!inherits(model, "mclogit")) { # weird case. probably a cleaner way but lazy now... + out <- merge_by_rowid(out, newdata) + } + + # save weights as attribute and not column + marginaleffects_wts_internal <- out[["marginaleffects_wts_internal"]] + out[["marginaleffects_wts_internal"]] <- NULL + + # bycols + if (isTRUE(checkmate::check_data_frame(by))) { + bycols <- setdiff(colnames(by), "by") + } else { + bycols <- by + } + + # sort rows: do NOT sort rows because it breaks hypothesis b1, b2, b3 indexing. + + # clean columns + stubcols <- c( + "rowid", "rowidcf", "term", "group", "hypothesis", + bycols, + "estimate", "std.error", "statistic", "p.value", "s.value", "conf.low", + "conf.high", "marginaleffects_wts", + sort(grep("^predicted", colnames(newdata), value = TRUE))) + cols <- intersect(stubcols, colnames(out)) + cols <- unique(c(cols, colnames(out))) + out <- out[, ..cols] + + attr(out, "posterior_draws") <- draws + + # equivalence tests + out <- equivalence(out, equivalence = equivalence, df = df, ...) + + # after rename to estimate / after assign draws + if (identical(type_string, "invlink(link)")) { + linv <- tryCatch(insight::link_inverse(model), error = function(e) identity) + out <- backtransform(out, transform = linv) + } + out <- backtransform(out, transform = transform) + + data.table::setDF(out) + class(out) <- c("predictions", class(out)) + out <- set_marginaleffects_attributes(out, attr_cache = newdata_attr_cache) + + # Global option for lean return object + lean = getOption("marginaleffects_lean", default = FALSE) + + # Only add (potentially large) attributes if lean isn't TRUE + if (isTRUE(lean)) { + for (a in setdiff(aa, c("names", "row.names", "class"))) attr(mfx, a) = NULL + attr(out, "lean") <- TRUE + } else { + # other attributes + attr(out, "model") <- model + attr(out, "type") <- type_string + attr(out, "model_type") <- class(model)[1] + attr(out, "vcov.type") <- get_vcov_label(vcov) + attr(out, "jacobian") <- J + attr(out, "vcov") <- V + attr(out, "newdata") <- newdata + attr(out, "weights") <- marginaleffects_wts_internal + attr(out, "conf_level") <- conf_level + attr(out, "by") <- by + attr(out, "call") <- call_attr + attr(out, "hypothesis_by") <- hyp_by + attr(out, "transform_label") <- names(transform)[1] + attr(out, "transform") <- transform[[1]] + # save newdata for use in recall() + attr(out, "newdata") <- newdata + + if (inherits(model, "brmsfit")) { + insight::check_if_installed("brms") + attr(out, "nchains") <- brms::nchains(model) } + } - out <- data.table::data.table(tmp) - data.table::setDT(newdata) + if ("group" %in% names(out) && all(out$group == "main_marginaleffect")) { + out$group <- NULL + } - # expensive: only do this inside jacobian if necessary - if (!inherits(model, "mclogit")) { # weird case. probably a cleaner way but lazy now... - out <- merge_by_rowid(out, newdata) - } - - # save weights as attribute and not column - marginaleffects_wts_internal <- out[["marginaleffects_wts_internal"]] - out[["marginaleffects_wts_internal"]] <- NULL - - # bycols - if (isTRUE(checkmate::check_data_frame(by))) { - bycols <- setdiff(colnames(by), "by") - } else { - bycols <- by - } - - # sort rows: do NOT sort rows because it breaks hypothesis b1, b2, b3 indexing. - - # clean columns - stubcols <- c( - "rowid", "rowidcf", "term", "group", "hypothesis", - bycols, - "estimate", "std.error", "statistic", "p.value", "s.value", "conf.low", - "conf.high", "marginaleffects_wts", - sort(grep("^predicted", colnames(newdata), value = TRUE))) - cols <- intersect(stubcols, colnames(out)) - cols <- unique(c(cols, colnames(out))) - out <- out[, ..cols] - - attr(out, "posterior_draws") <- draws - - # equivalence tests - out <- equivalence(out, equivalence = equivalence, df = df, ...) - - # after rename to estimate / after assign draws - if (identical(type_string, "invlink(link)")) { - linv <- tryCatch(insight::link_inverse(model), error = function(e) identity) - out <- backtransform(out, transform = linv) - } - out <- backtransform(out, transform = transform) - - data.table::setDF(out) - class(out) <- c("predictions", class(out)) - out <- set_marginaleffects_attributes(out, attr_cache = newdata_attr_cache) - - # Global option for lean return object - lean = getOption("marginaleffects_lean", default = FALSE) - - # Only add (potentially large) attributes if lean isn't TRUE - if (isTRUE(lean)) { - for (a in setdiff(aa, c("names", "row.names", "class"))) attr(mfx, a) = NULL - attr(out, "lean") <- TRUE - } else { - # other attributes - attr(out, "model") <- model - attr(out, "type") <- type_string - attr(out, "model_type") <- class(model)[1] - attr(out, "vcov.type") <- get_vcov_label(vcov) - attr(out, "jacobian") <- J - attr(out, "vcov") <- V - attr(out, "newdata") <- newdata - attr(out, "weights") <- marginaleffects_wts_internal - attr(out, "conf_level") <- conf_level - attr(out, "by") <- by - attr(out, "call") <- call_attr - attr(out, "hypothesis_by") <- hyp_by - attr(out, "transform_label") <- names(transform)[1] - attr(out, "transform") <- transform[[1]] - # save newdata for use in recall() - attr(out, "newdata") <- newdata - - if (inherits(model, "brmsfit")) { - insight::check_if_installed("brms") - attr(out, "nchains") <- brms::nchains(model) - } - } - - if ("group" %in% names(out) && all(out$group == "main_marginaleffect")) { - out$group <- NULL - } - - return(out) + return(out) } @@ -591,94 +592,90 @@ get_predictions <- function(model, verbose = TRUE, wts = FALSE, ...) { - - - out <- myTryCatch(get_predict( - model, - newdata = newdata, - type = type, - ...)) - - - if (inherits(out$value, "data.frame")) { - out <- out$value - - } else { - - # tidymodels - if (inherits(out$error, "rlang_error") && - isTRUE(grepl("the object should be", out$error$message))) { - insight::format_error(out$error$message) - } - - msg <- "Unable to compute predicted values with this model. You can try to supply a different dataset to the `newdata` argument." - if (!is.null(out$error)) { - msg <- c(paste(msg, "This error was also raised:"), "", out$error$message) - } - if (inherits(out$value, "try-error")) { - msg <- c(paste(msg, "", "This error was also raised:"), "", as.character(out$value)) - } - msg <- c(msg, "", "Bug Tracker: https://github.com/vincentarelbundock/marginaleffects/issues") - insight::format_error(msg) - } - - if (!"rowid" %in% colnames(out) && "rowid" %in% colnames(newdata) && nrow(out) == nrow(newdata)) { - out$rowid <- newdata$rowid + out <- myTryCatch(get_predict( + model, + newdata = newdata, + type = type, + ...)) + + + if (inherits(out$value, "data.frame")) { + out <- out$value + } else { + # tidymodels + if (inherits(out$error, "rlang_error") && + isTRUE(grepl("the object should be", out$error$message))) { + insight::format_error(out$error$message) } - # extract attributes before setDT - draws <- attr(out, "posterior_draws") - - data.table::setDT(out) - - # unpad factors before averaging - # trust `newdata` rowid more than `out` because sometimes `get_predict()` will add a positive index even on padded data - # HACK: the padding indexing rowid code is still a mess - # Do not merge `newdata` with `hypothesis`, because it may have the same - # number of rows but represent different quantities - if ("rowid" %in% colnames(newdata) && nrow(newdata) == nrow(out) && is.null(hypothesis)) { - out$rowid <- newdata$rowid - } - # unpad - if ("rowid" %in% colnames(out)) draws <- subset(draws, out$rowid > 0) - if ("rowid" %in% colnames(out)) out <- subset(out, rowid > 0) - if ("rowid" %in% colnames(newdata)) newdata <- subset(newdata, rowid > 0) - - # expensive: only do this inside the jacobian if necessary - if (!isFALSE(wts) || - !isTRUE(checkmate::check_flag(by, null.ok = TRUE)) || - inherits(model, "mclogit")) { # not sure why sorting is so finicky here - out <- merge_by_rowid(out, newdata) + msg <- "Unable to compute predicted values with this model. You can try to supply a different dataset to the `newdata` argument." + if (!is.null(out$error)) { + msg <- c(paste(msg, "This error was also raised:"), "", out$error$message) } - - # by: auto group - if (isTRUE(checkmate::check_character(by))) { - by <- intersect(c("group", by), colnames(out)) + if (inherits(out$value, "try-error")) { + msg <- c(paste(msg, "", "This error was also raised:"), "", as.character(out$value)) } - - # averaging by groups - out <- get_by( - out, - draws = draws, - newdata = newdata, - by = by, - byfun = byfun, - verbose = verbose, - ...) - - draws <- attr(out, "posterior_draws") - - # hypothesis tests using the delta method - out <- get_hypothesis(out, hypothesis = hypothesis, by = by, newdata = newdata, draws = draws) - - # WARNING: we cannot sort rows at the end because `get_hypothesis()` is - # applied in the middle, and it must already be sorted in the final order, - # otherwise, users cannot know for sure what is going to be the first and - # second rows, etc. - out <- sort_columns(out, newdata, by) - - - return(out) + msg <- c(msg, "", "Bug Tracker: https://github.com/vincentarelbundock/marginaleffects/issues") + insight::format_error(msg) + } + + if (!"rowid" %in% colnames(out) && "rowid" %in% colnames(newdata) && nrow(out) == nrow(newdata)) { + out$rowid <- newdata$rowid + } + + # extract attributes before setDT + draws <- attr(out, "posterior_draws") + + data.table::setDT(out) + + # unpad factors before averaging + # trust `newdata` rowid more than `out` because sometimes `get_predict()` will add a positive index even on padded data + # HACK: the padding indexing rowid code is still a mess + # Do not merge `newdata` with `hypothesis`, because it may have the same + # number of rows but represent different quantities + if ("rowid" %in% colnames(newdata) && nrow(newdata) == nrow(out) && is.null(hypothesis)) { + out$rowid <- newdata$rowid + } + # unpad + if ("rowid" %in% colnames(out)) draws <- subset(draws, out$rowid > 0) + if ("rowid" %in% colnames(out)) out <- subset(out, rowid > 0) + if ("rowid" %in% colnames(newdata)) newdata <- subset(newdata, rowid > 0) + + # expensive: only do this inside the jacobian if necessary + if (!isFALSE(wts) || + !isTRUE(checkmate::check_flag(by, null.ok = TRUE)) || + inherits(model, "mclogit")) { # not sure why sorting is so finicky here + out <- merge_by_rowid(out, newdata) + } + + # by: auto group + if (isTRUE(checkmate::check_character(by))) { + by <- intersect(c("group", by), colnames(out)) + } + + # averaging by groups + out <- get_by( + out, + draws = draws, + newdata = newdata, + by = by, + byfun = byfun, + verbose = verbose, + ...) + + draws <- attr(out, "posterior_draws") + + # hypothesis tests using the delta method + out <- get_hypothesis(out, hypothesis = hypothesis, by = by, newdata = newdata, draws = draws) + + # WARNING: we cannot sort rows at the end because `get_hypothesis()` is + # applied in the middle, and it must already be sorted in the final order, + # otherwise, users cannot know for sure what is going to be the first and + # second rows, etc. + out <- sort_columns(out, newdata, by) + + + return(out) } @@ -702,47 +699,46 @@ avg_predictions <- function(model, df = Inf, numderiv = "fdforward", ...) { - - # order of the first few paragraphs is important - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - - # group by focal variable automatically unless otherwise stated - if (isTRUE(by)) { - if (isTRUE(checkmate::check_character(variables))) { - by <- variables - } else if (isTRUE(checkmate::check_list(variables, names = "named"))) { - by <- names(variables) - } - } - - # Bootstrap - out <- inferences_dispatch( - INF_FUN = avg_predictions, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, - conf_level = conf_level, - byfun = byfun, wts = wts, transform = transform, hypothesis = hypothesis, ...) - if (!is.null(out)) { - return(out) + # order of the first few paragraphs is important + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + + # group by focal variable automatically unless otherwise stated + if (isTRUE(by)) { + if (isTRUE(checkmate::check_character(variables))) { + by <- variables + } else if (isTRUE(checkmate::check_list(variables, names = "named"))) { + by <- names(variables) } - - out <- predictions( - model = model, - newdata = newdata, - variables = variables, - vcov = vcov, - conf_level = conf_level, - type = type, - by = by, - byfun = byfun, - wts = wts, - transform = transform, - hypothesis = hypothesis, - equivalence = equivalence, - p_adjust = p_adjust, - df = df, - ...) - + } + + # Bootstrap + out <- inferences_dispatch( + INF_FUN = avg_predictions, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, by = by, + conf_level = conf_level, + byfun = byfun, wts = wts, transform = transform, hypothesis = hypothesis, ...) + if (!is.null(out)) { return(out) + } + + out <- predictions( + model = model, + newdata = newdata, + variables = variables, + vcov = vcov, + conf_level = conf_level, + type = type, + by = by, + byfun = byfun, + wts = wts, + transform = transform, + hypothesis = hypothesis, + equivalence = equivalence, + p_adjust = p_adjust, + df = df, + ...) + + return(out) } diff --git a/R/slopes.R b/R/slopes.R index ae9fb0bfd..3687ea8d4 100644 --- a/R/slopes.R +++ b/R/slopes.R @@ -10,7 +10,7 @@ #' #' See the slopes vignette and package website for worked examples and case studies: #' -#' * +#' * #' * #' #' @details @@ -98,7 +98,7 @@ #' - Returns a data frame with columns `term` and `estimate` (mandatory) and `rowid` (optional). #' - The function can also accept optional input arguments: `newdata`, `by`, `draws`. #' - This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use `posterior_draws()` to extract and manipulate the draws directly. -#' + See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html +#' + See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html #' @param p_adjust Adjust p-values for multiple comparisons: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", or "fdr". See [stats::p.adjust] #' @param df Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and `Inf`. When `df` is `Inf`, the normal distribution is used. When `df` is finite, the `t` distribution is used. See [insight::get_df] for a convenient function to extract degrees of freedom. Ex: `slopes(model, df = insight::get_df(model))` #' @param eps NULL or numeric value which determines the step size to use when @@ -181,9 +181,9 @@ #' # original values, and the whole dataset is duplicated once for each #' # combination of the values in `datagrid()` #' mfx <- slopes(mod, -#' newdata = datagrid( -#' hp = c(100, 110), -#' grid_type = "counterfactual")) +#' newdata = datagrid( +#' hp = c(100, 110), +#' grid_type = "counterfactual")) #' head(mfx) #' #' # Heteroskedasticity robust standard errors @@ -194,33 +194,33 @@ #' mod <- lm(mpg ~ wt + drat, data = mtcars) #' #' slopes( -#' mod, -#' newdata = "mean", -#' hypothesis = "wt = drat") +#' mod, +#' newdata = "mean", +#' hypothesis = "wt = drat") #' #' # same hypothesis test using row indices #' slopes( -#' mod, -#' newdata = "mean", -#' hypothesis = "b1 - b2 = 0") +#' mod, +#' newdata = "mean", +#' hypothesis = "b1 - b2 = 0") #' #' # same hypothesis test using numeric vector of weights #' slopes( -#' mod, -#' newdata = "mean", -#' hypothesis = c(1, -1)) +#' mod, +#' newdata = "mean", +#' hypothesis = c(1, -1)) #' #' # two custom contrasts using a matrix of weights #' lc <- matrix( -#' c( -#' 1, -1, -#' 2, 3), -#' ncol = 2) +#' c( +#' 1, -1, +#' 2, 3), +#' ncol = 2) #' colnames(lc) <- c("Contrast A", "Contrast B") #' slopes( -#' mod, -#' newdata = "mean", -#' hypothesis = lc) +#' mod, +#' newdata = "mean", +#' hypothesis = lc) #' #' @export slopes <- function(model, @@ -239,92 +239,92 @@ slopes <- function(model, eps = NULL, numderiv = "fdforward", ...) { - dots <- list(...) + dots <- list(...) - # very early, before any use of newdata - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + # very early, before any use of newdata + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - # build call: match.call() doesn't work well in *apply() - call_attr <- c( - list( - name = "slopes", - model = model, - newdata = newdata, - variables = variables, - type = type, - vcov = vcov, - by = by, - conf_level = conf_level, - slope = slope, - wts = wts, - hypothesis = hypothesis, - df = df, - eps = eps), - list(...)) - call_attr <- do.call("call", call_attr) + # build call: match.call() doesn't work well in *apply() + call_attr <- c( + list( + name = "slopes", + model = model, + newdata = newdata, + variables = variables, + type = type, + vcov = vcov, + by = by, + conf_level = conf_level, + slope = slope, + wts = wts, + hypothesis = hypothesis, + df = df, + eps = eps), + list(...)) + call_attr <- do.call("call", call_attr) - # slopes() does not support a named list of variables like comparisons() - checkmate::assert_character(variables, null.ok = TRUE) + # slopes() does not support a named list of variables like comparisons() + checkmate::assert_character(variables, null.ok = TRUE) - # slope - valid <- c("dydx", "eyex", "eydx", "dyex", "dydxavg", "eyexavg", "eydxavg", "dyexavg") - checkmate::assert_choice(slope, choices = valid) + # slope + valid <- c("dydx", "eyex", "eydx", "dyex", "dydxavg", "eyexavg", "eydxavg", "dyexavg") + checkmate::assert_choice(slope, choices = valid) - # sanity checks and pre-processing - model <- sanitize_model(model = model, newdata = newdata, wts = wts, vcov = vcov, by = by, calling_function = "marginaleffects", ...) - sanity_dots(model = model, calling_function = "marginaleffects", ...) - type <- sanitize_type(model = model, type = type, calling_function = "slopes") + # sanity checks and pre-processing + model <- sanitize_model(model = model, newdata = newdata, wts = wts, vcov = vcov, by = by, calling_function = "marginaleffects", ...) + sanity_dots(model = model, calling_function = "marginaleffects", ...) + type <- sanitize_type(model = model, type = type, calling_function = "slopes") - ############### sanity checks are over + ############### sanity checks are over - # Bootstrap - out <- inferences_dispatch( - INF_FUN = slopes, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, - conf_level = conf_level, - by = by, - wts = wts, slope = slope, hypothesis = hypothesis, ...) - if (!is.null(out)) { - return(out) - } + # Bootstrap + out <- inferences_dispatch( + INF_FUN = slopes, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, + conf_level = conf_level, + by = by, + wts = wts, slope = slope, hypothesis = hypothesis, ...) + if (!is.null(out)) { + return(out) + } - out <- comparisons( - model, - newdata = newdata, - variables = variables, - vcov = vcov, - conf_level = conf_level, - type = type, - wts = wts, - hypothesis = hypothesis, - equivalence = equivalence, - df = df, - p_adjust = p_adjust, - by = by, - eps = eps, - numderiv = numderiv, - comparison = slope, - cross = FALSE, - # secret arguments - internal_call = TRUE, - ...) + out <- comparisons( + model, + newdata = newdata, + variables = variables, + vcov = vcov, + conf_level = conf_level, + type = type, + wts = wts, + hypothesis = hypothesis, + equivalence = equivalence, + df = df, + p_adjust = p_adjust, + by = by, + eps = eps, + numderiv = numderiv, + comparison = slope, + cross = FALSE, + # secret arguments + internal_call = TRUE, + ...) - data.table::setDT(out) + data.table::setDT(out) - lean = getOption("marginaleffects_lean", default = FALSE) - if (!isTRUE(lean)) { - attr(out, "vcov.type") <- get_vcov_label(vcov) - attr(out, "newdata") <- newdata # recall - attr(out, "call") <- call_attr - } - - # class - data.table::setDF(out) - class(out) <- setdiff(class(out), "comparisons") - class(out) <- c("slopes", "marginaleffects", class(out)) - return(out) + lean = getOption("marginaleffects_lean", default = FALSE) + if (!isTRUE(lean)) { + attr(out, "vcov.type") <- get_vcov_label(vcov) + attr(out, "newdata") <- newdata # recall + attr(out, "call") <- call_attr + } + + # class + data.table::setDF(out) + class(out) <- setdiff(class(out), "comparisons") + class(out) <- c("slopes", "marginaleffects", class(out)) + return(out) } @@ -350,41 +350,41 @@ avg_slopes <- function(model, eps = NULL, numderiv = "fdforward", ...) { - # order of the first few paragraphs is important - # if `newdata` is a call to `typical` or `counterfactual`, insert `model` - # should probably not be nested too deeply in the call stack since we eval.parent() (not sure about this) - scall <- rlang::enquo(newdata) - newdata <- sanitize_newdata_call(scall, newdata, model, by = by) + # order of the first few paragraphs is important + # if `newdata` is a call to `typical` or `counterfactual`, insert `model` + # should probably not be nested too deeply in the call stack since we eval.parent() (not sure about this) + scall <- rlang::enquo(newdata) + newdata <- sanitize_newdata_call(scall, newdata, model, by = by) - # Bootstrap - out <- inferences_dispatch( - INF_FUN = avg_slopes, - model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, - conf_level = conf_level, by = by, - wts = wts, slope = slope, hypothesis = hypothesis, ...) - if (!is.null(out)) { - return(out) - } + # Bootstrap + out <- inferences_dispatch( + INF_FUN = avg_slopes, + model = model, newdata = newdata, vcov = vcov, variables = variables, type = type, + conf_level = conf_level, by = by, + wts = wts, slope = slope, hypothesis = hypothesis, ...) + if (!is.null(out)) { + return(out) + } - out <- slopes( - model = model, - newdata = newdata, - variables = variables, - type = type, - vcov = vcov, - conf_level = conf_level, - by = by, - slope = slope, - wts = wts, - hypothesis = hypothesis, - equivalence = equivalence, - p_adjust = p_adjust, - df = df, - eps = eps, - numderiv = numderiv, - ...) + out <- slopes( + model = model, + newdata = newdata, + variables = variables, + type = type, + vcov = vcov, + conf_level = conf_level, + by = by, + slope = slope, + wts = wts, + hypothesis = hypothesis, + equivalence = equivalence, + p_adjust = p_adjust, + df = df, + eps = eps, + numderiv = numderiv, + ...) - return(out) + return(out) } diff --git a/man/comparisons.Rd b/man/comparisons.Rd index 27dc71ab4..80ce291ae 100644 --- a/man/comparisons.Rd +++ b/man/comparisons.Rd @@ -225,7 +225,7 @@ first entry in the error message is used by default.} \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. \item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. } -\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html +\item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} \item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.} @@ -290,7 +290,7 @@ slopes, elasticities, etc. See the comparisons vignette and package website for worked examples and case studies: \itemize{ -\item \url{https://marginaleffects.com/vignettes/comparisons.html} +\item \url{https://marginaleffects.com/chapters/comparisons.html} \item \url{https://marginaleffects.com/} } } @@ -580,9 +580,9 @@ avg_comparisons(mod, comparison = "lnratioavg", transform = exp) # Adjusted Risk Ratio: Manual specification of the `comparison` avg_comparisons( - mod, - comparison = function(hi, lo) log(mean(hi) / mean(lo)), - transform = exp) + mod, + comparison = function(hi, lo) log(mean(hi) / mean(lo)), + transform = exp) # cross contrasts mod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars) avg_comparisons(mod, variables = c("cyl", "gear"), cross = TRUE) @@ -594,31 +594,32 @@ avg_comparisons(mod, variables = list(gear = "sequential", hp = 10)) mod <- lm(mpg ~ wt + drat, data = mtcars) comparisons( - mod, - newdata = "mean", - hypothesis = "wt = drat") + mod, + newdata = "mean", + hypothesis = "wt = drat") # same hypothesis test using row indices comparisons( - mod, - newdata = "mean", - hypothesis = "b1 - b2 = 0") + mod, + newdata = "mean", + hypothesis = "b1 - b2 = 0") # same hypothesis test using numeric vector of weights comparisons( - mod, - newdata = "mean", - hypothesis = c(1, -1)) + mod, + newdata = "mean", + hypothesis = c(1, -1)) # two custom contrasts using a matrix of weights -lc <- matrix(c( +lc <- matrix( + c( 1, -1, 2, 3), - ncol = 2) + ncol = 2) comparisons( - mod, - newdata = "mean", - hypothesis = lc) + mod, + newdata = "mean", + hypothesis = lc) # Effect of a 1 group-wise standard deviation change # First we calculate the SD in each group of `cyl` @@ -626,11 +627,11 @@ comparisons( library(dplyr) mod <- lm(mpg ~ hp + factor(cyl), mtcars) tmp <- mtcars \%>\% - group_by(cyl) \%>\% - mutate(hp_sd = sd(hp)) -avg_comparisons(mod, - variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)), - by = "cyl") + group_by(cyl) \%>\% + mutate(hp_sd = sd(hp)) +avg_comparisons(mod, + variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)), + by = "cyl") # `by` argument mod <- lm(mpg ~ hp * am * vs, data = mtcars) @@ -642,8 +643,8 @@ avg_comparisons(mod, variables = "hp", by = c("vs", "am")) library(nnet) mod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE) by <- data.frame( - group = c("3", "4", "5"), - by = c("3,4", "3,4", "5")) + group = c("3", "4", "5"), + by = c("3,4", "3,4", "5")) comparisons(mod, type = "probs", by = by) } diff --git a/man/hypotheses.Rd b/man/hypotheses.Rd index dcac5e18b..b042e6c6d 100644 --- a/man/hypotheses.Rd +++ b/man/hypotheses.Rd @@ -64,7 +64,7 @@ hypotheses( \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. \item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. } -\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html +\item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} \item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values: @@ -123,7 +123,7 @@ Uncertainty estimates are calculated as first-order approximate standard errors To learn more, read the hypothesis tests vignette, visit the package website, or scroll down this page for a full list of vignettes: \itemize{ -\item \url{https://marginaleffects.com/vignettes/hypothesis.html} +\item \url{https://marginaleffects.com/chapters/hypothesis.html} \item \url{https://marginaleffects.com/} } @@ -234,11 +234,11 @@ dat <- transform(mtcars, gear = factor(gear)) mod <- polr(gear ~ factor(cyl) + hp, dat) aggregation_fun <- function(x) { - predictions(x, vcov = FALSE) |> - mutate(group = ifelse(group \%in\% c("3", "4"), "3 & 4", "5")) |> - summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) |> - summarize(estimate = mean(estimate), .by = c("cyl", "group")) |> - rename(term = cyl) + predictions(x, vcov = FALSE) |> + mutate(group = ifelse(group \%in\% c("3", "4"), "3 & 4", "5")) |> + summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) |> + summarize(estimate = mean(estimate), .by = c("cyl", "group")) |> + rename(term = cyl) } hypotheses(mod, hypothesis = aggregation_fun) diff --git a/man/plot_comparisons.Rd b/man/plot_comparisons.Rd index 18e926da0..100f30aea 100644 --- a/man/plot_comparisons.Rd +++ b/man/plot_comparisons.Rd @@ -127,7 +127,7 @@ The \code{condition} argument is used to plot conditional comparisons, that is, See the "Plots" vignette and website for tutorials and information on how to customize plots: \itemize{ -\item https://marginaleffects.com/vignettes/plot.html +\item https://marginaleffects.com/bonus/plot.html \item https://marginaleffects.com } } diff --git a/man/plot_predictions.Rd b/man/plot_predictions.Rd index 3ebb9f102..3caaf9add 100644 --- a/man/plot_predictions.Rd +++ b/man/plot_predictions.Rd @@ -109,7 +109,7 @@ The \code{condition} argument is used to plot conditional predictions, that is, See the "Plots" vignette and website for tutorials and information on how to customize plots: \itemize{ -\item https://marginaleffects.com/vignettes/plot.html +\item https://marginaleffects.com/bonus/plot.html \item https://marginaleffects.com } } diff --git a/man/plot_slopes.Rd b/man/plot_slopes.Rd index 4c8459706..294c3c42d 100644 --- a/man/plot_slopes.Rd +++ b/man/plot_slopes.Rd @@ -119,7 +119,7 @@ The \code{by} argument is used to plot marginal slopes, that is, slopes made on The \code{condition} argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the \code{newdata} argument and \code{datagrid()} function in a \code{slopes()} call. All variables whose values are not specified explicitly are treated as usual by \code{datagrid()}, that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the \code{condition} argument, or supply model-specific arguments to compute population-level estimates. See details below. See the "Plots" vignette and website for tutorials and information on how to customize plots: \itemize{ -\item https://marginaleffects.com/vignettes/plot.html +\item https://marginaleffects.com/bonus/plot.html \item https://marginaleffects.com } } diff --git a/man/predictions.Rd b/man/predictions.Rd index 7446b6181..f974c1197 100644 --- a/man/predictions.Rd +++ b/man/predictions.Rd @@ -190,7 +190,7 @@ levels. See examples section.} \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. \item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. } -\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html +\item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} \item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.} @@ -243,7 +243,7 @@ The \code{newdata} argument and the \code{datagrid()} function can be used to co See the predictions vignette and package website for worked examples and case studies: \itemize{ -\item \url{https://marginaleffects.com/vignettes/predictions.html} +\item \url{https://marginaleffects.com/chapters/predictions.html} \item \url{https://marginaleffects.com/} } } @@ -492,31 +492,32 @@ nrow(p) mod <- lm(mpg ~ wt + drat, data = mtcars) predictions( - mod, - newdata = datagrid(wt = 2:3), - hypothesis = "b1 = b2") + mod, + newdata = datagrid(wt = 2:3), + hypothesis = "b1 = b2") # same hypothesis test using row indices predictions( - mod, - newdata = datagrid(wt = 2:3), - hypothesis = "b1 - b2 = 0") + mod, + newdata = datagrid(wt = 2:3), + hypothesis = "b1 - b2 = 0") # same hypothesis test using numeric vector of weights predictions( - mod, - newdata = datagrid(wt = 2:3), - hypothesis = c(1, -1)) + mod, + newdata = datagrid(wt = 2:3), + hypothesis = c(1, -1)) # two custom contrasts using a matrix of weights -lc <- matrix(c( +lc <- matrix( + c( 1, -1, 2, 3), - ncol = 2) + ncol = 2) predictions( - mod, - newdata = datagrid(wt = 2:3), - hypothesis = lc) + mod, + newdata = datagrid(wt = 2:3), + hypothesis = lc) # `by` argument @@ -533,16 +534,16 @@ predictions(nom, type = "probs") |> head() avg_predictions(nom, type = "probs", by = "group") by <- data.frame( - group = c("3", "4", "5"), - by = c("3,4", "3,4", "5")) + group = c("3", "4", "5"), + by = c("3,4", "3,4", "5")) predictions(nom, type = "probs", by = by) # sum of predicted probabilities for combined response levels mod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE) by <- data.frame( - by = c("4,6", "4,6", "8"), - group = as.character(c(4, 6, 8))) + by = c("4,6", "4,6", "8"), + group = as.character(c(4, 6, 8))) predictions(mod, newdata = "mean", byfun = sum, by = by) } diff --git a/man/slopes.Rd b/man/slopes.Rd index 471b6f6af..31c13c98e 100644 --- a/man/slopes.Rd +++ b/man/slopes.Rd @@ -169,7 +169,7 @@ first entry in the error message is used by default.} \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. \item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. } -\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html +\item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} \item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.} @@ -230,7 +230,7 @@ The \code{newdata} argument and the \code{datagrid()} function can be used to co See the slopes vignette and package website for worked examples and case studies: \itemize{ -\item \url{https://marginaleffects.com/vignettes/slopes.html} +\item \url{https://marginaleffects.com/chapters/slopes.html} \item \url{https://marginaleffects.com/} } } @@ -476,9 +476,9 @@ avg_slopes(mod2, variables = "hp", by = "cyl") # original values, and the whole dataset is duplicated once for each # combination of the values in `datagrid()` mfx <- slopes(mod, - newdata = datagrid( - hp = c(100, 110), - grid_type = "counterfactual")) + newdata = datagrid( + hp = c(100, 110), + grid_type = "counterfactual")) head(mfx) # Heteroskedasticity robust standard errors @@ -489,33 +489,33 @@ head(mfx) mod <- lm(mpg ~ wt + drat, data = mtcars) slopes( - mod, - newdata = "mean", - hypothesis = "wt = drat") + mod, + newdata = "mean", + hypothesis = "wt = drat") # same hypothesis test using row indices slopes( - mod, - newdata = "mean", - hypothesis = "b1 - b2 = 0") + mod, + newdata = "mean", + hypothesis = "b1 - b2 = 0") # same hypothesis test using numeric vector of weights slopes( - mod, - newdata = "mean", - hypothesis = c(1, -1)) + mod, + newdata = "mean", + hypothesis = c(1, -1)) # two custom contrasts using a matrix of weights lc <- matrix( - c( - 1, -1, - 2, 3), - ncol = 2) + c( + 1, -1, + 2, 3), + ncol = 2) colnames(lc) <- c("Contrast A", "Contrast B") slopes( - mod, - newdata = "mean", - hypothesis = lc) + mod, + newdata = "mean", + hypothesis = lc) } \references{ From dc60044c65076857ecd4499034393954c76c20f6 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sun, 24 Nov 2024 14:36:44 -0500 Subject: [PATCH 09/11] issue #1211 escape latex and html tinytables --- R/print.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/print.R b/R/print.R index 8da427019..42822f35c 100644 --- a/R/print.R +++ b/R/print.R @@ -273,10 +273,7 @@ print.marginaleffects <- function(x, notes <- c(print_type_text, print_columns_text) if (!is.null(notes)) args$notes <- notes tab <- do.call(tinytable::tt, args) - tab <- tinytable::format_tt(i = 0, tab, escape = TRUE) - if ("p.value" %in% colnames(tab)) { - tab <- tinytable::format_tt(tab, j = "p.value", escape = TRUE) - } + tab <- tinytable::format_tt(tab, escape = TRUE) if (isTRUE(splitprint)) { msg <- "%s rows omitted" From 0fc9b90c42fc11293c65a451491884aaacd4f8db Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sun, 24 Nov 2024 14:37:01 -0500 Subject: [PATCH 10/11] tinytable --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 40d24b9fb..91d977b32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -174,7 +174,7 @@ Suggests: tidyr, tidyverse, tinysnapshot, - tinytable (>= 0.4.0), + tinytable (>= 0.6.1), tinytest, titanic, truncreg, From 0b1d3c374ae211580b883f6e2422d92d2f5a4d4c Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sun, 24 Nov 2024 15:26:40 -0500 Subject: [PATCH 11/11] Issue #1277 get_draws (#1283) * issue #1277 posterior_draws() -> get_draws() * minor --- DESCRIPTION | 2 +- NAMESPACE | 2 +- NEWS.md | 2 + R/get_coef.R | 20 +- R/get_draws.R | 134 ++++++++++++++ R/get_hypothesis.R | 2 +- R/get_vcov.R | 234 ++++++++++++------------ R/inferences.R | 2 +- R/posterior_draws.R | 137 -------------- R/slopes.R | 2 +- altdoc/quarto_website.yml | 12 +- inst/tinytest/helpers.R | 48 ++--- inst/tinytest/test-inferences.R | 4 +- inst/tinytest/test-pkg-brms.R | 211 ++++++++++----------- man/comparisons.Rd | 2 +- man/get_coef.Rd | 4 +- man/{posteriordraws.Rd => get_draws.Rd} | 13 +- man/get_vcov.Rd | 4 +- man/hypotheses.Rd | 2 +- man/inferences.Rd | 2 +- man/posterior_draws.Rd | 21 +-- man/predictions.Rd | 2 +- man/slopes.Rd | 2 +- 23 files changed, 428 insertions(+), 436 deletions(-) create mode 100644 R/get_draws.R delete mode 100644 R/posterior_draws.R rename man/{posteriordraws.Rd => get_draws.Rd} (66%) diff --git a/DESCRIPTION b/DESCRIPTION index 91d977b32..14c33fbce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -209,6 +209,7 @@ Collate: 'get_contrast_data_logical.R' 'get_contrast_data_numeric.R' 'get_contrasts.R' + 'get_draws.R' 'get_group_names.R' 'get_hypothesis.R' 'get_jacobian.R' @@ -284,7 +285,6 @@ Collate: 'plot_comparisons.R' 'plot_predictions.R' 'plot_slopes.R' - 'posterior_draws.R' 'predictions.R' 'print.R' 'recall.R' diff --git a/NAMESPACE b/NAMESPACE index d66905755..407673516 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -198,6 +198,7 @@ export(expect_margins) export(expect_predictions) export(expect_slopes) export(get_coef) +export(get_draws) export(get_group_names) export(get_model_matrix) export(get_predict) @@ -213,7 +214,6 @@ export(plot_comparisons) export(plot_predictions) export(plot_slopes) export(posterior_draws) -export(posteriordraws) export(predictions) export(set_coef) export(slopes) diff --git a/NEWS.md b/NEWS.md index c1d785a3b..cbdc6d786 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,8 @@ Misc: * Be less strict about combining columns of different types. This allows us to handle types like `haven_labelled`. Thanks to @mwindzio for report #1238. * In `lme4` and `glmmTMB` models, warnings are now silenced when the user specifically passes `re.form=NULL`. Thanks to @mattansb for the feature request. * New startup message appears once per 24hr period and can be suppressed using `options(marginaleffects_startup_message = FALSE)`. +* `posterior_draws()` is renamed `get_draws()` because it also applies to bootstrap and simulation-based inference draws. +* `get_coef()` and `get_vcov()` are now documented on the main website, as they are useful helper functions. ## 0.23.0 diff --git a/R/get_coef.R b/R/get_coef.R index 518316486..964115d88 100644 --- a/R/get_coef.R +++ b/R/get_coef.R @@ -1,23 +1,23 @@ -#' Get a named vector of coefficients from a model object (internal function) -#' +#' Get a named vector of coefficients from a model object +#' #' @inheritParams slopes #' @return A named vector of coefficients. The names must match those of the variance matrix. #' @rdname get_coef #' @keywords internal #' @export -get_coef <- function (model, ...) { - UseMethod("get_coef", model) +get_coef <- function(model, ...) { + UseMethod("get_coef", model) } #' @rdname get_coef #' @export get_coef.default <- function(model, ...) { - ## faster - # out <- stats::coef(model) + ## faster + # out <- stats::coef(model) - # more general - out <- insight::get_parameters(model, component = "all") + # more general + out <- insight::get_parameters(model, component = "all") - out <- stats::setNames(out$Estimate, out$Parameter) - return(out) + out <- stats::setNames(out$Estimate, out$Parameter) + return(out) } diff --git a/R/get_draws.R b/R/get_draws.R new file mode 100644 index 000000000..09d9f9c2a --- /dev/null +++ b/R/get_draws.R @@ -0,0 +1,134 @@ +#' Extract Posterior Draws or Bootstrap Resamples from `marginaleffects` Objects +#' +#' @param x An object produced by a `marginaleffects` package function, such as `predictions()`, `avg_slopes()`, `hypotheses()`, etc. +#' @param shape string indicating the shape of the output format: +#' * "long": long format data frame +#' * "DxP": Matrix with draws as rows and parameters as columns +#' * "PxD": Matrix with draws as rows and parameters as columns +#' * "rvar": Random variable datatype (see `posterior` package documentation). +#' @return A data.frame with `drawid` and `draw` columns. +#' @export +get_draws <- function(x, shape = "long") { + checkmate::assert_choice(shape, choices = c("long", "DxP", "PxD", "rvar")) + + # tidy.comparisons() sometimes already saves draws in a nice long format + draws <- attr(x, "posterior_draws") + if (inherits(draws, "posterior_draws")) { + return(draws) + } + + if (is.null(attr(x, "posterior_draws"))) { + warning('This object does not include a "posterior_draws" attribute. The `posterior_draws` function only supports bayesian models produced by the `marginaleffects` or `predictions` functions of the `marginaleffects` package.', + call. = FALSE) + return(x) + } + + if (nrow(draws) != nrow(x)) { + stop("The number of parameters in the object does not match the number of parameters for which posterior draws are available.", call. = FALSE) + } + + if (shape %in% c("PxD", "DxP")) { + row.names(draws) <- paste0("b", seq_len(nrow(draws))) + colnames(draws) <- paste0("draw", seq_len(ncol(draws))) + } + + if (shape == "PxD") { + return(draws) + } + + if (shape == "DxP") { + return(t(draws)) + } + + if (shape == "rvar") { + insight::check_if_installed("posterior") + draws <- t(draws) + if (!is.null(attr(x, "nchains"))) { + x[["rvar"]] <- posterior::rvar(draws, nchains = attr(x, "nchains")) + } else { + x[["rvar"]] <- posterior::rvar(draws) + } + return(x) + } + + if (shape == "long") { + draws <- data.table(draws) + setnames(draws, as.character(seq_len(ncol(draws)))) + for (v in colnames(x)) { + draws[[v]] <- x[[v]] + } + out <- melt( + draws, + id.vars = colnames(x), + variable.name = "drawid", + value.name = "draw") + cols <- unique(c("drawid", "draw", "rowid", colnames(out))) + cols <- intersect(cols, colnames(out)) + setcolorder(out, cols) + data.table::setDF(out) + return(out) + } +} + + +average_draws <- function(data, index, draws, byfun = NULL) { + insight::check_if_installed("collapse", minimum_version = "1.9.0") + + w <- data[["marginaleffects_wts_internal"]] + if (all(is.na(w))) { + w <- NULL + } + + if (is.null(index)) { + index <- intersect(colnames(data), "type") + } + + if (length(index) > 0) { + g <- collapse::GRP(data, by = index) + + if (is.null(byfun)) { + draws <- collapse::fmean( + draws, + g = g, + w = w, + drop = FALSE) + } else { + draws <- collapse::BY( + draws, + g = g, + FUN = byfun, + drop = FALSE) + } + out <- data.table( + g[["groups"]], + average = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) + } else { + if (is.null(byfun)) { + draws <- collapse::fmean( + draws, + w = w, + drop = FALSE) + } else { + draws <- collapse::BY( + draws, + g = g, + FUN = byfun, + drop = FALSE) + } + out <- data.table(average = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) + } + + setnames(out, old = "average", new = "estimate") + attr(out, "posterior_draws") <- draws + return(out) +} + + + + +#' alias to `get_draws()` for backward compatibility with JJSS +#' +#' @inherit posterior_draws +#' @keywords internal +#' @export +posterior_draws <- get_draws diff --git a/R/get_hypothesis.R b/R/get_hypothesis.R index 2961584c4..11f5f3abc 100644 --- a/R/get_hypothesis.R +++ b/R/get_hypothesis.R @@ -27,7 +27,7 @@ get_hypothesis <- function( if (is.function(hypothesis)) { if (!is.null(draws)) { - msg <- "The `hypothesis` argument does not support function for models with draws. You can use `posterior_draws()` to extract draws and manipulate them directly instead." + msg <- "The `hypothesis` argument does not support function for models with draws. You can use `get_draws()` to extract draws and manipulate them directly instead." insight::format_error(msg) } if ("rowid" %in% colnames(x) && "rowid" %in% colnames(newdata)) { diff --git a/R/get_vcov.R b/R/get_vcov.R index 8e618997a..277043e03 100644 --- a/R/get_vcov.R +++ b/R/get_vcov.R @@ -1,4 +1,4 @@ -#' Get a named variance-covariance matrix from a model object (internal function) +#' Get a named variance-covariance matrix from a model object #' #' @inheritParams slopes #' @return A named square matrix of variance and covariances. The names must match the coefficient names. @@ -6,7 +6,7 @@ #' @keywords internal #' @export get_vcov <- function(model, ...) { - UseMethod("get_vcov", model) + UseMethod("get_vcov", model) } @@ -15,78 +15,77 @@ get_vcov <- function(model, ...) { get_vcov.default <- function(model, vcov = NULL, ...) { - - if (isFALSE(vcov)) { - return(NULL) - } - - vcov <- sanitize_vcov(model = model, vcov = vcov) - if (isTRUE(checkmate::check_matrix(vcov))) { - return(vcov) - } - - # {insight} - args <- get_varcov_args(model, vcov) - args[["x"]] <- model - args[["component"]] <- "all" - - # 1st try: with arguments - fun <- get("get_varcov", asNamespace("insight")) - out <- myTryCatch(do.call("fun", args)) - - # 2nd try: without arguments - if (!isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { - out <- myTryCatch(insight::get_varcov(model)) - if (isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { - msg <- "Unable to extract a variance-covariance matrix using this `vcov` argument. Standard errors are computed using the default variance instead. Perhaps the model or argument is not supported by the `sandwich` ('HC0', 'HC3', ~clusterid, etc.) or `clubSandwich` ('CR0', etc.) packages. If you believe that the model is supported by one of these two packages, you can open a feature request on Github." - insight::format_warning(msg) - } + if (isFALSE(vcov)) { + return(NULL) + } + + vcov <- sanitize_vcov(model = model, vcov = vcov) + if (isTRUE(checkmate::check_matrix(vcov))) { + return(vcov) + } + + # {insight} + args <- get_varcov_args(model, vcov) + args[["x"]] <- model + args[["component"]] <- "all" + + # 1st try: with arguments + fun <- get("get_varcov", asNamespace("insight")) + out <- myTryCatch(do.call("fun", args)) + + # 2nd try: without arguments + if (!isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { + out <- myTryCatch(insight::get_varcov(model)) + if (isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { + msg <- "Unable to extract a variance-covariance matrix using this `vcov` argument. Standard errors are computed using the default variance instead. Perhaps the model or argument is not supported by the `sandwich` ('HC0', 'HC3', ~clusterid, etc.) or `clubSandwich` ('CR0', etc.) packages. If you believe that the model is supported by one of these two packages, you can open a feature request on Github." + insight::format_warning(msg) } + } - if (!isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { - msg <- "Unable to extract a variance-covariance matrix from this model." - warning(msg, call. = FALSE) - return(NULL) + if (!isTRUE(checkmate::check_matrix(out$value, min.rows = 1))) { + msg <- "Unable to extract a variance-covariance matrix from this model." + warning(msg, call. = FALSE) + return(NULL) # valid matrix with warning - } else if (!is.null(out$warning)) { - warning(out$warning$message, call. = FALSE) + } else if (!is.null(out$warning)) { + warning(out$warning$message, call. = FALSE) + } + + out <- out[["value"]] + + # problem: no row.names + if (is.null(row.names(out))) { + coefs <- get_coef(model) + if (ncol(out) == length(coefs)) { + termnames <- names(stats::coef(model)) + if (length(termnames) == ncol(out)) { + colnames(out) <- termnames + row.names(out) <- termnames + } + } else { + return(NULL) } - - out <- out[["value"]] - - # problem: no row.names - if (is.null(row.names(out))) { - coefs <- get_coef(model) - if (ncol(out) == length(coefs)) { - termnames <- names(stats::coef(model)) - if (length(termnames) == ncol(out)) { - colnames(out) <- termnames - row.names(out) <- termnames - } - } else { - return(NULL) - } + } + + # problem: duplicate colnames + if (anyDuplicated(colnames(out)) == 0) { + coefs <- get_coef(model, ...) + # 1) Check above is needed for `AER::tobit` and others where `out` + # includes Log(scale) but `coef` does not Dangerous for `oridinal::clm` + # and others where there are important duplicate column names in + # `out`, and selecting with [,] repeats the first instance. + + # 2) Sometimes out has more columns than coefs + if (all(names(coefs) %in% colnames(out))) { + out <- out[names(coefs), names(coefs), drop = FALSE] } + } - # problem: duplicate colnames - if (anyDuplicated(colnames(out)) == 0) { - coefs <- get_coef(model, ...) - # 1) Check above is needed for `AER::tobit` and others where `out` - # includes Log(scale) but `coef` does not Dangerous for `oridinal::clm` - # and others where there are important duplicate column names in - # `out`, and selecting with [,] repeats the first instance. - - # 2) Sometimes out has more columns than coefs - if (all(names(coefs) %in% colnames(out))) { - out <- out[names(coefs), names(coefs), drop = FALSE] - } - } + return(out) - return(out) - - # NOTES: - # survival::coxph with 1 regressor produces a vector + # NOTES: + # survival::coxph with 1 regressor produces a vector } @@ -96,63 +95,64 @@ get_vcov.default <- function(model, #' #' @keywords internal get_varcov_args <- function(model, vcov) { - if (is.null(vcov) || isTRUE(checkmate::check_matrix(vcov))) { - out <- list() - return(out) - } + if (is.null(vcov) || isTRUE(checkmate::check_matrix(vcov))) { + out <- list() + return(out) + } - if (isTRUE(checkmate::check_formula(vcov))) { - out <- list("vcov" = "vcovCL", "vcov_args" = list("cluster" = vcov)) - return(out) - } + if (isTRUE(checkmate::check_formula(vcov))) { + out <- list("vcov" = "vcovCL", "vcov_args" = list("cluster" = vcov)) + return(out) + } - if (isTRUE(vcov == "satterthwaite") || isTRUE(vcov == "kenward-roger")) { - if (!isTRUE(inherits(model, "lmerMod")) && !isTRUE(inherits(model, "lmerModTest"))) { - msg <- 'Satterthwaite and Kenward-Roger corrections are only available for linear mixed effects models from the `lme4` package, and objects of class `lmerMod` or `lmerModTest`.' - stop(msg, call. = FALSE) - } - if (isTRUE(vcov == "satterthwaite")) { - return(list()) - } else { - return(list(vcov = "kenward-roger")) - } + if (isTRUE(vcov == "satterthwaite") || isTRUE(vcov == "kenward-roger")) { + if (!isTRUE(inherits(model, "lmerMod")) && !isTRUE(inherits(model, "lmerModTest"))) { + msg <- "Satterthwaite and Kenward-Roger corrections are only available for linear mixed effects models from the `lme4` package, and objects of class `lmerMod` or `lmerModTest`." + stop(msg, call. = FALSE) } - - out <- switch(vcov, - "stata" = list(vcov = "HC2"), - "robust" = list(vcov = "HC3"), - "bootstrap" = list(vcov = "BS"), - "outer-product" = list(vcov = "OPG"), - list(vcov = vcov)) - return(out) + if (isTRUE(vcov == "satterthwaite")) { + return(list()) + } else { + return(list(vcov = "kenward-roger")) + } + } + + out <- switch(vcov, + "stata" = list(vcov = "HC2"), + "robust" = list(vcov = "HC3"), + "bootstrap" = list(vcov = "BS"), + "outer-product" = list(vcov = "OPG"), + list(vcov = vcov) + ) + return(out) } get_vcov_label <- function(vcov) { - if (is.null(vcov)) vcov <- "" - if (!is.character(vcov)) return(NULL) - - out <- switch(vcov, - "stata" = "Stata", - "robust" = "Robust", - "kenward-roger" = "Kenward-Roger", - "satterthwaite" = "Satterthwaite", - "HC" = , - "HC0" = , - "HC1" = , - "HC2" = , - "HC3" = , - "HC4" = , - "HC4m" = , - "HC5" = , - "HAC" = , - "OPG" = vcov, - "NeweyWest" = "Newey-West", - "kernHAC" = "Kernel HAC", - vcov - ) - return(out) + if (is.null(vcov)) vcov <- "" + if (!is.character(vcov)) { + return(NULL) + } + + out <- switch(vcov, + "stata" = "Stata", + "robust" = "Robust", + "kenward-roger" = "Kenward-Roger", + "satterthwaite" = "Satterthwaite", + "HC" = , + "HC0" = , + "HC1" = , + "HC2" = , + "HC3" = , + "HC4" = , + "HC4m" = , + "HC5" = , + "HAC" = , + "OPG" = vcov, + "NeweyWest" = "Newey-West", + "kernHAC" = "Kernel HAC", + vcov + ) + return(out) } - - diff --git a/R/inferences.R b/R/inferences.R index 8690c932f..9055f5517 100644 --- a/R/inferences.R +++ b/R/inferences.R @@ -71,7 +71,7 @@ #' # Fractional (bayesian) bootstrap #' avg_slopes(mod, by = "Species") %>% #' inferences(method = "fwb") %>% -#' posterior_draws("rvar") %>% +#' get_draws("rvar") %>% #' data.frame() #' #' # Simulation-based inference diff --git a/R/posterior_draws.R b/R/posterior_draws.R deleted file mode 100644 index 8fbdb2648..000000000 --- a/R/posterior_draws.R +++ /dev/null @@ -1,137 +0,0 @@ -#' Extract Posterior Draws or Bootstrap Resamples from `marginaleffects` Objects -#' -#' @param x An object produced by a `marginaleffects` package function, such as `predictions()`, `avg_slopes()`, `hypotheses()`, etc. -#' @param shape string indicating the shape of the output format: -#' * "long": long format data frame -#' * "DxP": Matrix with draws as rows and parameters as columns -#' * "PxD": Matrix with draws as rows and parameters as columns -#' * "rvar": Random variable datatype (see `posterior` package documentation). -#' @return A data.frame with `drawid` and `draw` columns. -#' @export -posterior_draws <- function(x, shape = "long") { - - checkmate::assert_choice(shape, choices = c("long", "DxP", "PxD", "rvar")) - - # tidy.comparisons() sometimes already saves draws in a nice long format - draws <- attr(x, "posterior_draws") - if (inherits(draws, "posterior_draws")) return(draws) - - if (is.null(attr(x, "posterior_draws"))) { - warning('This object does not include a "posterior_draws" attribute. The `posterior_draws` function only supports bayesian models produced by the `marginaleffects` or `predictions` functions of the `marginaleffects` package.', - call. = FALSE) - return(x) - } - - if (nrow(draws) != nrow(x)) { - stop('The number of parameters in the object does not match the number of parameters for which posterior draws are available.', call. = FALSE) - } - - if (shape %in% c("PxD", "DxP")) { - row.names(draws) <- paste0("b", seq_len(nrow(draws))) - colnames(draws) <- paste0("draw", seq_len(ncol(draws))) - } - - if (shape == "PxD") { - return(draws) - } - - if (shape == "DxP") { - return(t(draws)) - } - - if (shape == "rvar") { - insight::check_if_installed("posterior") - draws <- t(draws) - if (!is.null(attr(x, "nchains"))) { - x[["rvar"]] <- posterior::rvar(draws, nchains = attr(x, "nchains")) - } else { - x[["rvar"]] <- posterior::rvar(draws) - } - return(x) - } - - if (shape == "long") { - draws <- data.table(draws) - setnames(draws, as.character(seq_len(ncol(draws)))) - for (v in colnames(x)) { - draws[[v]] <- x[[v]] - } - out <- melt( - draws, - id.vars = colnames(x), - variable.name = "drawid", - value.name = "draw") - cols <- unique(c("drawid", "draw", "rowid", colnames(out))) - cols <- intersect(cols, colnames(out)) - setcolorder(out, cols) - data.table::setDF(out) - return(out) - } - -} - - -average_draws <- function(data, index, draws, byfun = NULL) { - insight::check_if_installed("collapse", minimum_version = "1.9.0") - - w <- data[["marginaleffects_wts_internal"]] - if (all(is.na(w))) { - w <- NULL - } - - if (is.null(index)) { - index <- intersect(colnames(data), "type") - } - - if (length(index) > 0) { - g <- collapse::GRP(data, by = index) - - if (is.null(byfun)) { - draws <- collapse::fmean( - draws, - g = g, - w = w, - drop = FALSE) - } else { - draws <- collapse::BY( - draws, - g = g, - FUN = byfun, - drop = FALSE) - } - out <- data.table( - g[["groups"]], - average = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) - - } else { - if (is.null(byfun)) { - draws <- collapse::fmean( - draws, - w = w, - drop = FALSE) - } else { - draws <- collapse::BY( - draws, - g = g, - FUN = byfun, - drop = FALSE) - } - out <- data.table(average = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) - } - - setnames(out, old = "average", new = "estimate") - attr(out, "posterior_draws") <- draws - return(out) -} - - - - -#' `posteriordraws()` is an alias to `posterior_draws()` -#' -#' This alias is kept for backward compatibility and because some users may prefer that name. -#' -#' @inherit posterior_draws -#' @keywords internal -#' @export -posteriordraws <- posterior_draws \ No newline at end of file diff --git a/R/slopes.R b/R/slopes.R index 3687ea8d4..d775f8458 100644 --- a/R/slopes.R +++ b/R/slopes.R @@ -97,7 +97,7 @@ #' - Accepts an argument `x`: object produced by a `marginaleffects` function or a data frame with column `rowid` and `estimate` #' - Returns a data frame with columns `term` and `estimate` (mandatory) and `rowid` (optional). #' - The function can also accept optional input arguments: `newdata`, `by`, `draws`. -#' - This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use `posterior_draws()` to extract and manipulate the draws directly. +#' - This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use `get_draws()` to extract and manipulate the draws directly. #' + See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html #' @param p_adjust Adjust p-values for multiple comparisons: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", or "fdr". See [stats::p.adjust] #' @param df Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and `Inf`. When `df` is `Inf`, the normal distribution is used. When `df` is finite, the `t` distribution is used. See [insight::get_df] for a convenient function to extract degrees of freedom. Ex: `slopes(model, df = insight::get_df(model))` diff --git a/altdoc/quarto_website.yml b/altdoc/quarto_website.yml index 110856cf9..f694ab367 100644 --- a/altdoc/quarto_website.yml +++ b/altdoc/quarto_website.yml @@ -30,9 +30,9 @@ website: aria-label: marginaleffects GitHub menu: - text: R - href: https://github.com/vincentarelbundock/marginaleffects + href: https://github.com/vincentarelbundock/marginaleffects - text: Python - href: https://github.com/vincentarelbundock/pymarginaleffects + href: https://github.com/vincentarelbundock/pymarginaleffects - icon: twitter href: https://twitter.com/vincentab - icon: mastodon @@ -61,10 +61,14 @@ website: file: man/hypotheses.qmd - text: "`inferences`" file: man/inferences.qmd - - text: "`posterior_draws`" - file: man/posterior_draws.qmd - text: "`datagrid`" file: man/datagrid.qmd + - text: "`get_draws`" + file: man/get_draws.qmd + - text: "`get_vcov`" + file: man/get_vcov.qmd + - text: "`get_coef`" + file: man/get_coef.qmd - text: "`print.marginaleffects`" file: man/print.marginaleffects.qmd - text: News diff --git a/inst/tinytest/helpers.R b/inst/tinytest/helpers.R index dd779c7ac..dbc9c6a8f 100644 --- a/inst/tinytest/helpers.R +++ b/inst/tinytest/helpers.R @@ -8,26 +8,26 @@ options("tinysnapshot_tol" = 200) options(marginaleffects_numDeriv = NULL) if (isTRUE(insight::check_if_installed("cmdstanr", quietly = TRUE))) { - options("brms.backend" = "cmdstanr") + options("brms.backend" = "cmdstanr") } # libraries requiet <- function(package) { - void <- capture.output( - pkg_available <- tryCatch(suppressPackageStartupMessages(suppressWarnings(suppressMessages(tryCatch( - isTRUE(require(package, warn.conflicts = FALSE, character.only = TRUE)), - error = function(e) FALSE - )))))) - return(pkg_available) + void <- capture.output( + pkg_available <- tryCatch(suppressPackageStartupMessages(suppressWarnings(suppressMessages(tryCatch( + isTRUE(require(package, warn.conflicts = FALSE, character.only = TRUE)), + error = function(e) FALSE + )))))) + return(pkg_available) } requiet("tinytest") requiet("tinysnapshot") if (isTRUE(suppressMessages(require("tinytest"))) && packageVersion("tinytest") >= "1.4.0") { - tinytest::register_tinytest_extension( - "marginaleffects", - c("expect_slopes", "expect_predictions", "expect_margins")) + tinytest::register_tinytest_extension( + "marginaleffects", + c("expect_slopes", "expect_predictions", "expect_margins")) } # common names of datasets, often assigned to global environment @@ -50,21 +50,21 @@ ON_WINDOWS <- isTRUE(Sys.info()[["sysname"]] == "Windows") ON_OSX <- isTRUE(Sys.info()[["sysname"]] == "Darwin") minver <- function(pkg, ver = NULL) { - ins <- try(utils::packageVersion(pkg), silent = TRUE) - if (is.null(ver)) { - isTRUE(inherits(ins, "try-error")) - } else { - isTRUE(as.character(ins) >= ver) - } + ins <- try(utils::packageVersion(pkg), silent = TRUE) + if (is.null(ver)) { + isTRUE(inherits(ins, "try-error")) + } else { + isTRUE(as.character(ins) >= ver) + } } testing_path <- function(x) { - wd <- tinytest::get_call_wd() - if (isTRUE(wd != "")) { - out <- x - } else { - out <- paste0(wd, "/", x) - } - out <- gsub("^\\/", "", out) - return(out) + wd <- tinytest::get_call_wd() + if (isTRUE(wd != "")) { + out <- x + } else { + out <- paste0(wd, "/", x) + } + out <- gsub("^\\/", "", out) + return(out) } diff --git a/inst/tinytest/test-inferences.R b/inst/tinytest/test-inferences.R index 791c53104..274174960 100644 --- a/inst/tinytest/test-inferences.R +++ b/inst/tinytest/test-inferences.R @@ -50,7 +50,7 @@ x <- mod |> expect_equivalent(nrow(x), 2) x <- mod|> avg_comparisons() |> inferences(method = "simulation", R = R) expect_equivalent(nrow(x), 2) -x <- x |> posterior_draws() +x <- x |> get_draws() expect_equivalent(nrow(x), 2 * R) @@ -72,7 +72,7 @@ expect_equivalent(nrow(x), 2) x <- mod |> avg_comparisons() |> inferences(method = "rsample", R = R) |> - posterior_draws() + get_draws() expect_equivalent(nrow(x), 2 * R) # fwb no validity check diff --git a/inst/tinytest/test-pkg-brms.R b/inst/tinytest/test-pkg-brms.R index ef96708a9..a4f136c3e 100644 --- a/inst/tinytest/test-pkg-brms.R +++ b/inst/tinytest/test-pkg-brms.R @@ -64,7 +64,7 @@ bm <- brmsmargins( CI = 0.95, CIType = "ETI") bm <- data.frame(bm$ContrastSummary) mfx <- avg_slopes(brms_numeric) -expect_equivalent(mean(posterior_draws(mfx)$draw), bm$M, tolerance = tol) +expect_equivalent(mean(get_draws(mfx)$draw), bm$M, tolerance = tol) expect_equivalent(mfx$conf.low, bm$LL, tolerance = tol) expect_equivalent(mfx$conf.high, bm$UL, tolerance = tol) @@ -72,10 +72,10 @@ options("marginaleffects_posterior_interval" = "hdi") # marginaleffects vs. emmeans mfx <- avg_slopes( - brms_numeric2, - newdata = datagrid(mpg = 20, hp = 100), - variables = "mpg", - type = "link") + brms_numeric2, + newdata = datagrid(mpg = 20, hp = 100), + variables = "mpg", + type = "link") em <- emtrends(brms_numeric2, ~mpg, "mpg", at = list(mpg = 20, hp = 100)) em <- tidy(em) @@ -83,8 +83,9 @@ expect_equivalent(mfx$estimate, em$mpg.trend) expect_equivalent(mfx$conf.low, em$lower.HPD) expect_equivalent(mfx$conf.high, em$upper.HPD) # tolerance is less good for back-transformed response -mfx <- avg_slopes(brms_numeric2, newdata = datagrid(mpg = 20, hp = 100), - variables = "mpg", type = "response") +mfx <- avg_slopes(brms_numeric2, + newdata = datagrid(mpg = 20, hp = 100), + variables = "mpg", type = "response") em <- emtrends(brms_numeric2, ~mpg, "mpg", at = list(mpg = 20, hp = 100), regrid = "response") em <- tidy(em) expect_equivalent(mfx$estimate, em$mpg.trend, tolerance = .1) @@ -102,18 +103,20 @@ expect_inherits(mfx, "marginaleffects") expect_equivalent(nrow(mfx), nrow(attr(mfx, "posterior_draws"))) -# predictions: hypothetical group -nd <- suppressWarnings(datagrid(model = brms_mixed_3, grp = 4, subgrp = 12)) -nd$Subject <- 1000 -set.seed(1024) -p1 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE) -set.seed(1024) -p2 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE, sample_new_levels = "gaussian") -set.seed(1024) -p3 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE, sample_new_levels = "uncertainty") -expect_false(any(p1$estimate == p2$estimate)) -expect_equivalent(p1, p3) -expect_inherits(posterior_draws(p3), "data.frame") +## Not sure what the intent of those tests are, and the first one fails +# +# # predictions: hypothetical group +# nd <- suppressWarnings(datagrid(model = brms_mixed_3, grp = 4, subgrp = 12)) +# nd$Subject <- 1000000 +# set.seed(1024) +# p1 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE) +# set.seed(1024) +# p2 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE, sample_new_levels = "gaussian") +# set.seed(1024) +# p3 <- predictions(brms_mixed_3, newdata = nd, allow_new_levels = TRUE, sample_new_levels = "uncertainty") +# expect_false(any(p1$estimate == p2$estimate)) +# expect_equivalent(p1, p3) +# expect_inherits(get_draws(p3), "data.frame") # predictions w/ random effects @@ -273,9 +276,9 @@ expect_equivalent(mfx1$conf.high, mfx2$upper.HPD, tolerance = .001) # numeric + factor: factor dat <- datagrid(model = brms_factor, mpg = 25, cyl_fac = 4) mfx1 <- slopes(brms_factor, variables = "cyl_fac", newdata = dat, type = "link") -mfx2 <- emmeans::emmeans(brms_factor, ~ cyl_fac, var = "cyl_fac", at = list(mpg = 25)) +mfx2 <- emmeans::emmeans(brms_factor, ~cyl_fac, var = "cyl_fac", at = list(mpg = 25)) mfx2 <- emmeans::contrast(mfx2, method = "revpairwise") -mfx2 <- data.frame(mfx2)[1:2,] +mfx2 <- data.frame(mfx2)[1:2, ] expect_equivalent(mfx1$estimate, mfx2$estimate, tolerance = .001) expect_equivalent(mfx1$conf.low, mfx2$lower.HPD, tolerance = .001) expect_equivalent(mfx1$conf.high, mfx2$upper.HPD, tolerance = .001) @@ -295,15 +298,15 @@ expect_equivalent(mfx1$conf.high, mfx2$upper.HPD, tolerance = .001) # factor in formula expect_error(slopes(brms_factor_formula), - pattern = "factor") + pattern = "factor") expect_error(predictions(brms_factor_formula), - pattern = "factor") + pattern = "factor") # bugs stay dead: factor indexing for posterior draws tmp <- predictions(brms_factor, newdata = datagrid(cyl_fac = 4, mpg = c(10, 20))) -expect_inherits(posterior_draws(tmp), "data.frame") +expect_inherits(get_draws(tmp), "data.frame") @@ -336,7 +339,7 @@ expect_inherits(pred, "predictions") comp <- comparisons(brms_mv_1) expect_inherits(comp, "comparisons") -draws <- posterior_draws(mfx) +draws <- get_draws(mfx) expect_inherits(draws, "data.frame") expect_true(all(c("drawid", "draw", "rowid") %in% colnames(draws))) @@ -350,27 +353,27 @@ expect_inherits(pred, "predictions") comp <- comparisons(brms_categorical_1) expect_inherits(comp, "comparisons") -draws <- posterior_draws(mfx) +draws <- get_draws(mfx) expect_inherits(draws, "data.frame") expect_true(all(c("drawid", "draw", "rowid") %in% colnames(draws))) # vignette vdem example p_response <- predictions( - brms_vdem, - type = "response", - newdata = datagrid( - party_autonomy = c(TRUE, FALSE), - civil_liberties = .5, - region = "Middle East and North Africa")) + brms_vdem, + type = "response", + newdata = datagrid( + party_autonomy = c(TRUE, FALSE), + civil_liberties = .5, + region = "Middle East and North Africa")) expect_predictions(p_response, se = FALSE) p_prediction <- predictions( - brms_vdem, - type = "prediction", - newdata = datagrid( - party_autonomy = c(TRUE, FALSE), - civil_liberties = .5, - region = "Middle East and North Africa")) + brms_vdem, + type = "prediction", + newdata = datagrid( + party_autonomy = c(TRUE, FALSE), + civil_liberties = .5, + region = "Middle East and North Africa")) expect_predictions(p_prediction, se = FALSE) @@ -384,7 +387,7 @@ expect_true(length(unique(ti$estimate)) == nrow(ti)) # warning: vcov not supported expect_warning(slopes(brms_numeric, vcov = "HC3"), - pattern = "vcov.*not supported") + pattern = "vcov.*not supported") # Andrew Heiss says that lognormal_hurdle are tricky because the link is # identity even if the response is actually logged @@ -392,46 +395,46 @@ expect_warning(slopes(brms_numeric, vcov = "HC3"), # non-hurdle part: post-calculation exponentiation p1 <- predictions( - brms_lognormal_hurdle, - newdata = datagrid(lifeExp = seq(30, 80, 10)), - transform = exp, - dpar = "mu") + brms_lognormal_hurdle, + newdata = datagrid(lifeExp = seq(30, 80, 10)), + transform = exp, + dpar = "mu") p2 <- predictions( - brms_lognormal_hurdle, - newdata = datagrid(lifeExp = seq(30, 80, 10)), - dpar = "mu") + brms_lognormal_hurdle, + newdata = datagrid(lifeExp = seq(30, 80, 10)), + dpar = "mu") expect_true(all(p1$estimate != p2$estimate)) eps <- 0.01 cmp1 <- comparisons( - brms_lognormal_hurdle, - variables = list(lifeExp = eps), - newdata = datagrid(lifeExp = seq(30, 80, 10)), - comparison = function(hi, lo) (exp(hi) - exp(lo)) / exp(eps), - dpar = "mu") + brms_lognormal_hurdle, + variables = list(lifeExp = eps), + newdata = datagrid(lifeExp = seq(30, 80, 10)), + comparison = function(hi, lo) (exp(hi) - exp(lo)) / exp(eps), + dpar = "mu") cmp2 <- comparisons( - brms_lognormal_hurdle, - variables = list(lifeExp = eps), - newdata = datagrid(lifeExp = seq(30, 80, 10)), - comparison = function(hi, lo) exp((hi - lo) / eps), - dpar = "mu") + brms_lognormal_hurdle, + variables = list(lifeExp = eps), + newdata = datagrid(lifeExp = seq(30, 80, 10)), + comparison = function(hi, lo) exp((hi - lo) / eps), + dpar = "mu") expect_true(all(cmp1$estimate != cmp2$estimate)) cmp <- comparisons( - brms_lognormal_hurdle2, - dpar = "mu", - datagrid(disp = c(150, 300, 450)), - comparison = "expdydx") - -expect_equivalent(cmp$estimate, - c(-0.0464610297239711, -0.0338017059188856, -0.0245881481374242), - # seed difference? - # c(-0.0483582312992919, -0.035158983842012, -0.0255763979591749), - tolerance = .01) - -# emt <- emtrends(mod, ~disp, var = "disp", dpar = "mu", + brms_lognormal_hurdle2, + dpar = "mu", + datagrid(disp = c(150, 300, 450)), + comparison = "expdydx") + +expect_equivalent(cmp$estimate, + c(-0.0464610297239711, -0.0338017059188856, -0.0245881481374242), + # seed difference? + # c(-0.0483582312992919, -0.035158983842012, -0.0255763979591749), + tolerance = .01) + +# emt <- emtrends(mod, ~disp, var = "disp", dpar = "mu", # regrid = "response", tran = "log", type = "response", - # at = list(disp = c(150, 300, 450))) +# at = list(disp = c(150, 300, 450))) # Issue #432: bayes support for comparison with output of length 1 cmp1 <- comparisons(brms_numeric2, comparison = "difference") @@ -450,8 +453,8 @@ expect_true(all(cmp$estimate != cmp$conf.low)) expect_true(all(cmp$estimate != cmp$conf.high)) expect_true(all(cmp$conf.high != cmp$conf.low)) -# Issue #432: posterior_draws() and tidy() error with `comparison="avg"` -pd <- posterior_draws(cmp) +# Issue #432: get_draws() and tidy() error with `comparison="avg"` +pd <- get_draws(cmp) expect_inherits(pd, "data.frame") expect_equivalent(nrow(pd), 4000) ti <- tidy(cmp) @@ -461,14 +464,14 @@ expect_inherits(ti, "data.frame") # hypothesis with bayesian models p1 <- predictions( - brms_numeric2, - hypothesis = c(1, -1), - newdata = datagrid(hp = c(100, 110))) + brms_numeric2, + hypothesis = c(1, -1), + newdata = datagrid(hp = c(100, 110))) p2 <- predictions( - brms_numeric2, - hypothesis = "b1 = b2", - newdata = datagrid(hp = c(100, 110))) + brms_numeric2, + hypothesis = "b1 = b2", + newdata = datagrid(hp = c(100, 110))) expect_inherits(p1, "predictions") expect_inherits(p2, "predictions") @@ -481,9 +484,9 @@ expect_true(all(c("conf.low", "conf.high") %in% colnames(p2))) lc <- matrix(c(1, -1, -1, 1), ncol = 2) colnames(lc) <- c("Contrast A", "Contrast B") p3 <- predictions( - brms_numeric2, - hypothesis = lc, - newdata = datagrid(hp = c(100, 110))) + brms_numeric2, + hypothesis = lc, + newdata = datagrid(hp = c(100, 110))) expect_inherits(p3, "predictions") expect_equivalent(nrow(p3), 2) expect_equivalent(p3$term, c("Contrast A", "Contrast B")) @@ -495,8 +498,8 @@ expect_equivalent(p3$estimate[1], -p3$estimate[2]) # take the average, and we need to rely on more subtle transformations from # `comparison_function_dict`. p <- predictions( - brms_factor, - by = "cyl_fac") + brms_factor, + by = "cyl_fac") expect_inherits(p, "predictions") expect_equal(ncol(attr(p, "posterior_draws")), 2000) expect_equal(nrow(p), 3) @@ -505,16 +508,16 @@ expect_true(all(c("conf.low", "conf.high") %in% colnames(p))) # `by` data frame to collapse response group by <- data.frame( - group = as.character(1:4), - by = rep(c("(1,2)", "(3,4)"), each = 2)) + group = as.character(1:4), + by = rep(c("(1,2)", "(3,4)"), each = 2)) p <- predictions( - brms_cumulative_random, - by = by) + brms_cumulative_random, + by = by) expect_equivalent(nrow(p), 2) p <- predictions( - brms_cumulative_random, - by = by, - hypothesis = "reference") + brms_cumulative_random, + by = by, + hypothesis = "reference") expect_equivalent(nrow(p), 1) @@ -566,8 +569,8 @@ expect_equivalent(exp(attr(p1, "posterior_draws")), attr(p2, "posterior_draws")) # byfun by <- data.frame( - by = c("1,2", "1,2", "3,4", "3,4"), - group = 1:4) + by = c("1,2", "1,2", "3,4", "3,4"), + group = 1:4) p1 <- predictions(brms_cumulative_random, newdata = "mean") p2 <- predictions(brms_cumulative_random, newdata = "mean", by = by) p3 <- predictions(brms_cumulative_random, newdata = "mean", by = by, byfun = sum) @@ -589,10 +592,10 @@ set.seed(1024) K <<- 100 cmp <- avg_comparisons( - brms_logit_re, - newdata = datagrid(firm = sample(1e5:2e6, K)), - allow_new_levels = TRUE, - sample_new_levels = "gaussian") + brms_logit_re, + newdata = datagrid(firm = sample(1e5:2e6, K)), + allow_new_levels = TRUE, + sample_new_levels = "gaussian") bm <- brmsmargins( k = K, @@ -608,16 +611,16 @@ expect_equivalent(cmp$conf.high, bm$UL, tolerance = .05) -# posterior_draws(shape = ) +# get_draws(shape = ) tid <- avg_comparisons(brms_numeric2) -pd <- posterior_draws(tid, shape = "DxP") +pd <- get_draws(tid, shape = "DxP") hyp <- brms::hypothesis(pd, "b1 - b2 > 0") expect_inherits(hyp, "brmshypothesis") # posterior::rvar tid <- avg_comparisons(brms_numeric2) -rv <- posterior_draws(tid, "rvar") +rv <- get_draws(tid, "rvar") expect_equivalent(nrow(rv), 2) expect_inherits(rv$rvar[[1]], "rvar") @@ -657,18 +660,19 @@ expect_inherits(cmp, "comparisons") # Issue #751: informative error on bad predition expect_error(comparisons(brms_logit_re, newdata = datagrid(firm = -10:8)), - pattern = "new.levels") + pattern = "new.levels") cmp = comparisons(brms_logit_re, newdata = datagrid(firm = -10:8), allow_new_levels = TRUE) expect_inherits(cmp, "comparisons") -# Issue #888: posterior_draws() fails for quantile transformation -expect_error(predictions( +# Issue #888: get_draws() fails for quantile transformation +expect_error( + predictions( brms_factor, by = "cyl_fac", transform = \(x) ecdf(mtcars$mpg)(x)) |> - posterior_draws(), - pattern = "matrix input must return") + get_draws(), + pattern = "matrix input must return") # Issue 1006: predictor is also a response @@ -685,4 +689,3 @@ expect_inherits(cmp, "comparisons") source("helpers.R") rm(list = ls()) - diff --git a/man/comparisons.Rd b/man/comparisons.Rd index 80ce291ae..ecd258de8 100644 --- a/man/comparisons.Rd +++ b/man/comparisons.Rd @@ -223,7 +223,7 @@ first entry in the error message is used by default.} \item Accepts an argument \code{x}: object produced by a \code{marginaleffects} function or a data frame with column \code{rowid} and \code{estimate} \item Returns a data frame with columns \code{term} and \code{estimate} (mandatory) and \code{rowid} (optional). \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. -\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. +\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{get_draws()} to extract and manipulate the draws directly. } \item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} diff --git a/man/get_coef.Rd b/man/get_coef.Rd index e9b728af3..280f34beb 100644 --- a/man/get_coef.Rd +++ b/man/get_coef.Rd @@ -34,7 +34,7 @@ \alias{get_coef.svyolr} \alias{get_coef.systemfit} \alias{get_coef.workflow} -\title{Get a named vector of coefficients from a model object (internal function)} +\title{Get a named vector of coefficients from a model object} \usage{ get_coef(model, ...) @@ -104,6 +104,6 @@ arguments.} A named vector of coefficients. The names must match those of the variance matrix. } \description{ -Get a named vector of coefficients from a model object (internal function) +Get a named vector of coefficients from a model object } \keyword{internal} diff --git a/man/posteriordraws.Rd b/man/get_draws.Rd similarity index 66% rename from man/posteriordraws.Rd rename to man/get_draws.Rd index 4b562111b..544ae55a0 100644 --- a/man/posteriordraws.Rd +++ b/man/get_draws.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/posterior_draws.R -\name{posteriordraws} -\alias{posteriordraws} -\title{\code{posteriordraws()} is an alias to \code{posterior_draws()}} +% Please edit documentation in R/get_draws.R +\name{get_draws} +\alias{get_draws} +\title{Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects} \usage{ -posteriordraws(x, shape = "long") +get_draws(x, shape = "long") } \arguments{ \item{x}{An object produced by a \code{marginaleffects} package function, such as \code{predictions()}, \code{avg_slopes()}, \code{hypotheses()}, etc.} @@ -21,6 +21,5 @@ posteriordraws(x, shape = "long") A data.frame with \code{drawid} and \code{draw} columns. } \description{ -This alias is kept for backward compatibility and because some users may prefer that name. +Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects } -\keyword{internal} diff --git a/man/get_vcov.Rd b/man/get_vcov.Rd index d50762d16..56a890771 100644 --- a/man/get_vcov.Rd +++ b/man/get_vcov.Rd @@ -26,7 +26,7 @@ \alias{get_vcov.systemfit} \alias{get_vcov.model_fit} \alias{get_vcov.workflow} -\title{Get a named variance-covariance matrix from a model object (internal function)} +\title{Get a named variance-covariance matrix from a model object} \usage{ get_vcov(model, ...) @@ -109,6 +109,6 @@ first entry in the error message is used by default.} A named square matrix of variance and covariances. The names must match the coefficient names. } \description{ -Get a named variance-covariance matrix from a model object (internal function) +Get a named variance-covariance matrix from a model object } \keyword{internal} diff --git a/man/hypotheses.Rd b/man/hypotheses.Rd index b042e6c6d..85e1afe18 100644 --- a/man/hypotheses.Rd +++ b/man/hypotheses.Rd @@ -62,7 +62,7 @@ hypotheses( \item Accepts an argument \code{x}: object produced by a \code{marginaleffects} function or a data frame with column \code{rowid} and \code{estimate} \item Returns a data frame with columns \code{term} and \code{estimate} (mandatory) and \code{rowid} (optional). \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. -\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. +\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{get_draws()} to extract and manipulate the draws directly. } \item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} diff --git a/man/inferences.Rd b/man/inferences.Rd index 5ec2ce8eb..648ef601a 100644 --- a/man/inferences.Rd +++ b/man/inferences.Rd @@ -106,7 +106,7 @@ avg_predictions(mod, by = "Species") \%>\% # Fractional (bayesian) bootstrap avg_slopes(mod, by = "Species") \%>\% inferences(method = "fwb") \%>\% - posterior_draws("rvar") \%>\% + get_draws("rvar") \%>\% data.frame() # Simulation-based inference diff --git a/man/posterior_draws.Rd b/man/posterior_draws.Rd index 99958c151..71b906322 100644 --- a/man/posterior_draws.Rd +++ b/man/posterior_draws.Rd @@ -1,25 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/posterior_draws.R +% Please edit documentation in R/get_draws.R \name{posterior_draws} \alias{posterior_draws} -\title{Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects} +\title{alias to \code{get_draws()} for backward compatibility with JJSS} \usage{ posterior_draws(x, shape = "long") } -\arguments{ -\item{x}{An object produced by a \code{marginaleffects} package function, such as \code{predictions()}, \code{avg_slopes()}, \code{hypotheses()}, etc.} - -\item{shape}{string indicating the shape of the output format: -\itemize{ -\item "long": long format data frame -\item "DxP": Matrix with draws as rows and parameters as columns -\item "PxD": Matrix with draws as rows and parameters as columns -\item "rvar": Random variable datatype (see \code{posterior} package documentation). -}} -} -\value{ -A data.frame with \code{drawid} and \code{draw} columns. -} \description{ -Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects +alias to \code{get_draws()} for backward compatibility with JJSS } +\keyword{internal} diff --git a/man/predictions.Rd b/man/predictions.Rd index f974c1197..6c451e4d9 100644 --- a/man/predictions.Rd +++ b/man/predictions.Rd @@ -188,7 +188,7 @@ levels. See examples section.} \item Accepts an argument \code{x}: object produced by a \code{marginaleffects} function or a data frame with column \code{rowid} and \code{estimate} \item Returns a data frame with columns \code{term} and \code{estimate} (mandatory) and \code{rowid} (optional). \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. -\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. +\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{get_draws()} to extract and manipulate the draws directly. } \item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }} diff --git a/man/slopes.Rd b/man/slopes.Rd index 31c13c98e..c18fdb5ae 100644 --- a/man/slopes.Rd +++ b/man/slopes.Rd @@ -167,7 +167,7 @@ first entry in the error message is used by default.} \item Accepts an argument \code{x}: object produced by a \code{marginaleffects} function or a data frame with column \code{rowid} and \code{estimate} \item Returns a data frame with columns \code{term} and \code{estimate} (mandatory) and \code{rowid} (optional). \item The function can also accept optional input arguments: \code{newdata}, \code{by}, \code{draws}. -\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{posterior_draws()} to extract and manipulate the draws directly. +\item This function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use \code{get_draws()} to extract and manipulate the draws directly. } \item See the Examples section below and the vignette: https://marginaleffects.com/chapters/hypothesis.html }}