From 0108a087ea2f3eb41249ecbd06faf5cfadc7d703 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 16 Nov 2024 10:24:58 -0500 Subject: [PATCH] broken equivalence tests --- R/get_hypothesis.R | 636 ++++++++++++++++--------------- inst/tinytest/test-by.R | 81 ++-- inst/tinytest/test-equivalence.R | 68 ++-- 3 files changed, 394 insertions(+), 391 deletions(-) diff --git a/R/get_hypothesis.R b/R/get_hypothesis.R index 2961584c4..57d6c2978 100644 --- a/R/get_hypothesis.R +++ b/R/get_hypothesis.R @@ -4,320 +4,320 @@ get_hypothesis <- function( by = NULL, newdata = NULL, draws = NULL) { + if (is.null(hypothesis)) { + return(x) + } - if (is.null(hypothesis)) return(x) - - if (isTRUE(checkmate::check_choice(hypothesis, "meandev"))) { - hypothesis <- function(x) { - out <- x - out$estimate <- out$estimate - mean(out$estimate) - out$hypothesis <- "Mean deviation" - return(out) - } - } else if (isTRUE(checkmate::check_choice(hypothesis, "meanotherdev"))) { - hypothesis <- function(x) { - out <- x - s <- sum(out$estimate) - m_other <- (s - out$estimate) / (nrow(x) - 1) - out$estimate <- out$estimate - m_other - out$hypothesis <- "Mean deviation (other)" - return(out) - } + if (isTRUE(checkmate::check_choice(hypothesis, "meandev"))) { + hypothesis <- function(x) { + out <- x + out$estimate <- out$estimate - mean(out$estimate) + out$hypothesis <- "Mean deviation" + return(out) } + } else if (isTRUE(checkmate::check_choice(hypothesis, "meanotherdev"))) { + hypothesis <- function(x) { + out <- x + s <- sum(out$estimate) + m_other <- (s - out$estimate) / (nrow(x) - 1) + out$estimate <- out$estimate - m_other + out$hypothesis <- "Mean deviation (other)" + return(out) + } + } - if (is.function(hypothesis)) { - if (!is.null(draws)) { - msg <- "The `hypothesis` argument does not support function for models with draws. You can use `posterior_draws()` to extract draws and manipulate them directly instead." - insight::format_error(msg) - } - if ("rowid" %in% colnames(x) && "rowid" %in% colnames(newdata)) { - x <- merge(x, newdata, all.x = TRUE, by = intersect(colnames(x), colnames(newdata))) - } else if (isTRUE(nrow(x) == nrow(newdata))) { - x <- cbind(x, newdata) - } + if (is.function(hypothesis)) { + if (!is.null(draws)) { + msg <- "The `hypothesis` argument does not support function for models with draws. You can use `posterior_draws()` to extract draws and manipulate them directly instead." + insight::format_error(msg) + } + if ("rowid" %in% colnames(x) && "rowid" %in% colnames(newdata)) { + x <- merge(x, newdata, all.x = TRUE, by = intersect(colnames(x), colnames(newdata))) + } else if (isTRUE(nrow(x) == nrow(newdata))) { + x <- cbind(x, newdata) + } - attr(x, "variables_datagrid") <- attr(newdata, "variables_datagrid") - attr(x, "by") <- if (is.character(by)) by else names(by) + attr(x, "variables_datagrid") <- attr(newdata, "variables_datagrid") + attr(x, "by") <- if (is.character(by)) by else names(by) - argnames <- names(formals(hypothesis)) - if (!"x" %in% argnames) insight::format_error("The `hypothesis` function must accept an `x` argument.") - if (!all(argnames %in% c("x", "draws"))) { - msg <- "The allowable arguments for the `hypothesis` function are: `x` and `draws`" - insight::format_error(msg) - } - args <- list(x = x, newdata = newdata, by = by, draws = draws) - args <- args[names(args) %in% argnames] - out <- do.call(hypothesis, args) - at <- attr(out, "hypothesis_function_by") - - # sanity - msg <- "The `hypothesis` argument function must return a data frame with `term` (or `hypothesis`) and `estimate` columns." - if (inherits(out, "data.frame")) { - if (!all(c("term", "estimate") %in% colnames(out)) && !all(c("hypothesis", "estimate") %in% colnames(out))) { - insight::format_error(msg) - } - } else if (isTRUE(checkmate::check_numeric(out))) { - if (isTRUE(checkmate::check_data_frame(x, nrows = length(out))) && "term" %in% colnames(out)) { - out <- data.frame(term = out$term, estimate = out) - } else { - out <- data.frame(term = seq_along(out), estimate = out) - } - } else { - insight::format_error(msg) - } - - attr(out, "hypothesis_function_by") <- at - return(out) + argnames <- names(formals(hypothesis)) + if (!"x" %in% argnames) insight::format_error("The `hypothesis` function must accept an `x` argument.") + if (!all(argnames %in% c("x", "draws"))) { + msg <- "The allowable arguments for the `hypothesis` function are: `x` and `draws`" + insight::format_error(msg) + } + args <- list(x = x, newdata = newdata, by = by, draws = draws) + args <- args[names(args) %in% argnames] + out <- do.call(hypothesis, args) + at <- attr(out, "hypothesis_function_by") + + # sanity + msg <- "The `hypothesis` argument function must return a data frame with `term` (or `hypothesis`) and `estimate` columns." + if (inherits(out, "data.frame")) { + if (!all(c("term", "estimate") %in% colnames(out)) && !all(c("hypothesis", "estimate") %in% colnames(out))) { + insight::format_error(msg) + } + } else if (isTRUE(checkmate::check_numeric(out))) { + if (isTRUE(checkmate::check_data_frame(x, nrows = length(out))) && "term" %in% colnames(out)) { + out <- data.frame(term = out$term, estimate = out) + } else { + out <- data.frame(term = seq_along(out), estimate = out) + } + } else { + insight::format_error(msg) } - lincom <- NULL + attr(out, "hypothesis_function_by") <- at + return(out) + } + + lincom <- NULL - # lincom: numeric vector or matrix - if (isTRUE(checkmate::check_numeric(hypothesis))) { - if (isTRUE(checkmate::check_atomic_vector(hypothesis))) { - checkmate::assert_numeric(hypothesis, len = nrow(x)) - lincom <- as.matrix(hypothesis) - } else if (isTRUE(checkmate::check_matrix(hypothesis))) { - lincom <- hypothesis - } + # lincom: numeric vector or matrix + if (isTRUE(checkmate::check_numeric(hypothesis))) { + if (isTRUE(checkmate::check_atomic_vector(hypothesis))) { + checkmate::assert_numeric(hypothesis, len = nrow(x)) + lincom <- as.matrix(hypothesis) + } else if (isTRUE(checkmate::check_matrix(hypothesis))) { + lincom <- hypothesis } + } - # lincom: string shortcuts - valid <- c("pairwise", "reference", "sequential", "revpairwise", "revreference", "revsequential") - if (isTRUE(hypothesis %in% valid)) { - safe <- isFALSE(getOption("marginaleffects_safe", default = TRUE)) - if (nrow(x) > 25 && !safe) { - msg <- 'The "pairwise", "reference", and "sequential" options of the `hypotheses` argument are not supported for `marginaleffects` commands which generate more than 25 rows of results. Use the `newdata`, `by`, and/or `variables` arguments to compute a smaller set of results on which to conduct hypothesis tests.' - insight::format_error(msg) - } + # lincom: string shortcuts + valid <- c("pairwise", "reference", "sequential", "revpairwise", "revreference", "revsequential") + if (isTRUE(hypothesis %in% valid)) { + safe <- isFALSE(getOption("marginaleffects_safe", default = TRUE)) + if (nrow(x) > 25 && !safe) { + msg <- 'The "pairwise", "reference", and "sequential" options of the `hypotheses` argument are not supported for `marginaleffects` commands which generate more than 25 rows of results. Use the `newdata`, `by`, and/or `variables` arguments to compute a smaller set of results on which to conduct hypothesis tests.' + insight::format_error(msg) } - if (isTRUE(hypothesis == "reference")) { - lincom <- lincom_reference(x, by) - } else if (isTRUE(hypothesis == "revreference")) { - lincom <- lincom_revreference(x, by) - } else if (isTRUE(hypothesis == "sequential")) { - lincom <- lincom_sequential(x, by) - } else if (isTRUE(hypothesis == "revsequential")) { - lincom <- lincom_revsequential(x, by) - } else if (isTRUE(hypothesis == "pairwise")) { - lincom <- lincom_pairwise(x, by) - } else if (isTRUE(hypothesis == "revpairwise")) { - lincom <- lincom_revpairwise(x, by) - } - lincom <- sanitize_lincom(lincom, x) + } + if (isTRUE(hypothesis == "reference")) { + lincom <- lincom_reference(x, by) + } else if (isTRUE(hypothesis == "revreference")) { + lincom <- lincom_revreference(x, by) + } else if (isTRUE(hypothesis == "sequential")) { + lincom <- lincom_sequential(x, by) + } else if (isTRUE(hypothesis == "revsequential")) { + lincom <- lincom_revsequential(x, by) + } else if (isTRUE(hypothesis == "pairwise")) { + lincom <- lincom_pairwise(x, by) + } else if (isTRUE(hypothesis == "revpairwise")) { + lincom <- lincom_revpairwise(x, by) + } + lincom <- sanitize_lincom(lincom, x) - # matrix hypothesis - if (isTRUE(checkmate::check_matrix(lincom))) { - out <- lincom_multiply(x, lincom) - return(out) + # matrix hypothesis + if (isTRUE(checkmate::check_matrix(lincom))) { + out <- lincom_multiply(x, lincom) + return(out) # string hypothesis - } else if (is.character(hypothesis)) { - out_list <- draws_list <- list() - lab <- attr(hypothesis, "label") - tmp <- expand_wildcard(hypothesis, nrow(x), lab) - hypothesis <- tmp[[1]] - labs <- tmp[[2]] - for (i in seq_along(hypothesis)) { - out_list[[i]] <- eval_string_hypothesis(x, hypothesis[i], labs[i]) - draws_list[[i]] <- attr(out_list[[i]], "posterior_draws") - } - out <- do.call(rbind, out_list) - attr(out, "posterior_draws") <- do.call(rbind, draws_list) - attr(out, "label") <- if (!is.null(attr(labs, "names"))) { - attr(labs, "names") - } else { - labs - } - return(out) + } else if (is.character(hypothesis)) { + out_list <- draws_list <- list() + lab <- attr(hypothesis, "label") + tmp <- expand_wildcard(hypothesis, nrow(x), lab) + hypothesis <- tmp[[1]] + labs <- tmp[[2]] + for (i in seq_along(hypothesis)) { + out_list[[i]] <- eval_string_hypothesis(x, hypothesis[i], labs[i]) + draws_list[[i]] <- attr(out_list[[i]], "posterior_draws") } + out <- do.call(rbind, out_list) + attr(out, "posterior_draws") <- do.call(rbind, draws_list) + attr(out, "label") <- if (!is.null(attr(labs, "names"))) { + attr(labs, "names") + } else { + labs + } + return(out) + } - insight::format_error("`hypotheses` is broken. Please report this bug: https://github.com/vincentarelbundock/marginaleffects/issues.") + insight::format_error("`hypotheses` is broken. Please report this bug: https://github.com/vincentarelbundock/marginaleffects/issues.") } get_hypothesis_row_labels <- function(x, by = NULL) { - lab <- grep("^term$|^by$|^group$|^value$|^contrast$|^contrast_", colnames(x), value = TRUE) - lab <- Filter(function(z) length(unique(x[[z]])) > 1, lab) - if (isTRUE(checkmate::check_character(by))) { - lab <- unique(c(lab, by)) - } - if (length(lab) == 0) { - lab <- NULL + lab <- grep("^term$|^by$|^group$|^value$|^contrast$|^contrast_", colnames(x), value = TRUE) + lab <- Filter(function(z) length(unique(x[[z]])) > 1, lab) + if (isTRUE(checkmate::check_character(by))) { + lab <- unique(c(lab, by)) + } + if (length(lab) == 0) { + lab <- NULL + } else { + lab_df <- data.frame(x)[, lab, drop = FALSE] + idx <- vapply(lab_df, FUN = function(x) length(unique(x)) > 1, FUN.VALUE = logical(1)) + if (sum(idx) > 0) { + lab <- apply(lab_df[, idx, drop = FALSE], 1, function(k) paste(unique(k), collapse = ", ")) } else { - lab_df <- data.frame(x)[, lab, drop = FALSE] - idx <- vapply(lab_df, FUN = function(x) length(unique(x)) > 1, FUN.VALUE = logical(1)) - if (sum(idx) > 0) { - lab <- apply(lab_df[, idx, drop = FALSE], 1, paste, collapse = ", ") - } else { - lab <- apply(lab_df, 1, paste, collapse = ", ") - } + lab <- apply(lab_df, 1, function(k) paste(unique(k), collapse = ", ")) } + } - # wrap in parentheses to avoid a-b-c-d != (a-b)-(c-d) - if (any(grepl("-", lab))) { - lab <- sprintf("(%s)", lab) - } + # wrap in parentheses to avoid a-b-c-d != (a-b)-(c-d) + if (any(grepl("-", lab))) { + lab <- sprintf("(%s)", lab) + } - return(lab) + return(lab) } sanitize_lincom <- function(lincom, x) { - if (isTRUE(checkmate::check_matrix(lincom))) { - checkmate::assert_matrix(lincom, nrows = nrow(x)) - if (is.null(colnames(lincom))) { - colnames(lincom) <- rep("custom", ncol(lincom)) - } + if (isTRUE(checkmate::check_matrix(lincom))) { + checkmate::assert_matrix(lincom, nrows = nrow(x)) + if (is.null(colnames(lincom))) { + colnames(lincom) <- rep("custom", ncol(lincom)) } - return(lincom) + } + return(lincom) } lincom_revreference <- function(x, by) { - lincom <- -1 * diag(nrow(x)) - lincom[1, ] <- 1 - lab <- get_hypothesis_row_labels(x, by = by) - if (length(lab) == 0 || anyDuplicated(lab) > 0) { - lab <- sprintf("Row 1 - Row %s", seq_len(ncol(lincom))) - } else { - lab <- sprintf("%s - %s", lab[1], lab) - } - colnames(lincom) <- lab - lincom <- lincom[, 2:ncol(lincom), drop = FALSE] - return(lincom) + lincom <- -1 * diag(nrow(x)) + lincom[1, ] <- 1 + lab <- get_hypothesis_row_labels(x, by = by) + if (length(lab) == 0 || anyDuplicated(lab) > 0) { + lab <- sprintf("Row 1 - Row %s", seq_len(ncol(lincom))) + } else { + lab <- sprintf("%s - %s", lab[1], lab) + } + colnames(lincom) <- lab + lincom <- lincom[, 2:ncol(lincom), drop = FALSE] + return(lincom) } lincom_reference <- function(x, by) { - lincom <- diag(nrow(x)) - lincom[1, ] <- -1 - lab <- get_hypothesis_row_labels(x, by = by) - if (length(lab) == 0 || anyDuplicated(lab) > 0) { - lab <- sprintf("Row %s - Row 1", seq_len(ncol(lincom))) - } else { - lab <- sprintf("%s - %s", lab, lab[1]) - } - colnames(lincom) <- lab - lincom <- lincom[, 2:ncol(lincom), drop = FALSE] - return(lincom) + lincom <- diag(nrow(x)) + lincom[1, ] <- -1 + lab <- get_hypothesis_row_labels(x, by = by) + if (length(lab) == 0 || anyDuplicated(lab) > 0) { + lab <- sprintf("Row %s - Row 1", seq_len(ncol(lincom))) + } else { + lab <- sprintf("%s - %s", lab, lab[1]) + } + colnames(lincom) <- lab + lincom <- lincom[, 2:ncol(lincom), drop = FALSE] + return(lincom) } lincom_revsequential <- function(x, by) { - lincom <- matrix(0, nrow = nrow(x), ncol = nrow(x) - 1) - lab <- get_hypothesis_row_labels(x, by = by) - if (length(lab) == 0 || anyDuplicated(lab) > 0) { - lab <- sprintf("Row %s - Row %s", seq_len(ncol(lincom)), seq_len(ncol(lincom)) + 1) - } else { - lab <- sprintf("%s - %s", lab[seq_len(ncol(lincom))], lab[seq_len(ncol(lincom)) + 1]) - } - for (i in seq_len(ncol(lincom))) { - lincom[i:(i + 1), i] <- c(1, -1) - } - colnames(lincom) <- lab - return(lincom) + lincom <- matrix(0, nrow = nrow(x), ncol = nrow(x) - 1) + lab <- get_hypothesis_row_labels(x, by = by) + if (length(lab) == 0 || anyDuplicated(lab) > 0) { + lab <- sprintf("Row %s - Row %s", seq_len(ncol(lincom)), seq_len(ncol(lincom)) + 1) + } else { + lab <- sprintf("%s - %s", lab[seq_len(ncol(lincom))], lab[seq_len(ncol(lincom)) + 1]) + } + for (i in seq_len(ncol(lincom))) { + lincom[i:(i + 1), i] <- c(1, -1) + } + colnames(lincom) <- lab + return(lincom) } lincom_sequential <- function(x, by) { - lincom <- matrix(0, nrow = nrow(x), ncol = nrow(x) - 1) - lab <- get_hypothesis_row_labels(x, by = by) - if (length(lab) == 0 || anyDuplicated(lab) > 0) { - lab <- sprintf("Row %s - Row %s", seq_len(ncol(lincom)) + 1, seq_len(ncol(lincom))) - } else { - lab <- sprintf("%s - %s", lab[seq_len(ncol(lincom)) + 1], lab[seq_len(ncol(lincom))]) - } - for (i in seq_len(ncol(lincom))) { - lincom[i:(i + 1), i] <- c(-1, 1) - } - colnames(lincom) <- lab - return(lincom) + lincom <- matrix(0, nrow = nrow(x), ncol = nrow(x) - 1) + lab <- get_hypothesis_row_labels(x, by = by) + if (length(lab) == 0 || anyDuplicated(lab) > 0) { + lab <- sprintf("Row %s - Row %s", seq_len(ncol(lincom)) + 1, seq_len(ncol(lincom))) + } else { + lab <- sprintf("%s - %s", lab[seq_len(ncol(lincom)) + 1], lab[seq_len(ncol(lincom))]) + } + for (i in seq_len(ncol(lincom))) { + lincom[i:(i + 1), i] <- c(-1, 1) + } + colnames(lincom) <- lab + return(lincom) } lincom_revpairwise <- function(x, by) { - lab_row <- get_hypothesis_row_labels(x, by = by) - lab_col <- NULL - flag <- length(lab_row) == 0 || anyDuplicated(lab_row) > 0 - mat <- list() - for (i in seq_len(nrow(x))) { - for (j in 2:nrow(x)) { - if (i < j) { - tmp <- matrix(0, nrow = nrow(x), ncol = 1) - tmp[i, ] <- -1 - tmp[j, ] <- 1 - mat <- c(mat, list(tmp)) - if (isTRUE(flag)) { - lab_col <- c(lab_col, sprintf("Row %s - Row %s", j, i)) - } else { - lab_col <- c(lab_col, sprintf("%s - %s", lab_row[j], lab_row[i])) - } - } + lab_row <- get_hypothesis_row_labels(x, by = by) + lab_col <- NULL + flag <- length(lab_row) == 0 || anyDuplicated(lab_row) > 0 + mat <- list() + for (i in seq_len(nrow(x))) { + for (j in 2:nrow(x)) { + if (i < j) { + tmp <- matrix(0, nrow = nrow(x), ncol = 1) + tmp[i, ] <- -1 + tmp[j, ] <- 1 + mat <- c(mat, list(tmp)) + if (isTRUE(flag)) { + lab_col <- c(lab_col, sprintf("Row %s - Row %s", j, i)) + } else { + lab_col <- c(lab_col, sprintf("%s - %s", lab_row[j], lab_row[i])) } + } } - lincom <- do.call("cbind", mat) - colnames(lincom) <- lab_col - return(lincom) + } + lincom <- do.call("cbind", mat) + colnames(lincom) <- lab_col + return(lincom) } lincom_pairwise <- function(x, by) { - lab_row <- get_hypothesis_row_labels(x, by = by) - lab_col <- NULL - flag <- length(lab_row) == 0 || anyDuplicated(lab_row) > 0 - mat <- list() - for (i in seq_len(nrow(x))) { - for (j in 2:nrow(x)) { - if (i < j) { - tmp <- matrix(0, nrow = nrow(x), ncol = 1) - tmp[j, ] <- -1 - tmp[i, ] <- 1 - mat <- c(mat, list(tmp)) - if (isTRUE(flag)) { - lab_col <- c(lab_col, sprintf("Row %s - Row %s", i, j)) - } else { - lab_col <- c(lab_col, sprintf("%s - %s", lab_row[i], lab_row[j])) - } - } + lab_row <- get_hypothesis_row_labels(x, by = by) + lab_col <- NULL + flag <- length(lab_row) == 0 || anyDuplicated(lab_row) > 0 + mat <- list() + for (i in seq_len(nrow(x))) { + for (j in 2:nrow(x)) { + if (i < j) { + tmp <- matrix(0, nrow = nrow(x), ncol = 1) + tmp[j, ] <- -1 + tmp[i, ] <- 1 + mat <- c(mat, list(tmp)) + if (isTRUE(flag)) { + lab_col <- c(lab_col, sprintf("Row %s - Row %s", i, j)) + } else { + lab_col <- c(lab_col, sprintf("%s - %s", lab_row[i], lab_row[j])) } + } } - lincom <- do.call("cbind", mat) - colnames(lincom) <- lab_col - return(lincom) + } + lincom <- do.call("cbind", mat) + colnames(lincom) <- lab_col + return(lincom) } lincom_multiply <- function(x, lincom) { - # bayesian - draws <- attr(x, "posterior_draws") - if (!is.null(draws)) { - draws <- t(as.matrix(lincom)) %*% draws - out <- data.table( - term = colnames(lincom), - tmp = apply(draws, 1, stats::median)) - setnames(out, old = "tmp", new = "estimate") - attr(out, "posterior_draws") <- draws - - # frequentist - } else { - out <- data.table( - term = colnames(lincom), - tmp = as.vector(x[["estimate"]] %*% lincom)) - setnames(out, old = "tmp", new = "estimate") - } + # bayesian + draws <- attr(x, "posterior_draws") + if (!is.null(draws)) { + draws <- t(as.matrix(lincom)) %*% draws + out <- data.table( + term = colnames(lincom), + tmp = apply(draws, 1, stats::median)) + setnames(out, old = "tmp", new = "estimate") + attr(out, "posterior_draws") <- draws - out <- out[out$term != "1 - 1", , drop = FALSE] - return(out) + # frequentist + } else { + out <- data.table( + term = colnames(lincom), + tmp = as.vector(x[["estimate"]] %*% lincom)) + setnames(out, old = "tmp", new = "estimate") + } + + out <- out[out$term != "1 - 1", , drop = FALSE] + return(out) } eval_string_hypothesis <- function(x, hypothesis, lab) { - # row indices: `hypotheses` includes them, but `term` does not - if (isTRUE(grepl("\\bb\\d+\\b", hypothesis)) && !any(grepl("\\bb\\d+\\b", x[["term"]]))) { - - msg <- " -It is essential to check the order of estimates when specifying hypothesis tests using positional indices like b1, b2, etc. The indices of estimates can change depending on the order of rows in the original dataset, user-supplied arguments, model-fitting package, and version of `marginaleffects`. + # row indices: `hypotheses` includes them, but `term` does not + if (isTRUE(grepl("\\bb\\d+\\b", hypothesis)) && !any(grepl("\\bb\\d+\\b", x[["term"]]))) { + msg <- " +It is essential to check the order of estimates when specifying hypothesis tests using positional indices like b1, b2, etc. The indices of estimates can change depending on the order of rows in the original dataset, user-supplied arguments, model-fitting package, and version of `marginaleffects`. It is also good practice to use assertions that ensure the order of estimates is consistent across different runs of the same code. Example: @@ -334,93 +334,95 @@ avg_predictions(mod, by = 'carb', hypothesis = 'b1 - b2 = 0') Disable this warning with: `options(marginaleffects_safe = FALSE)` " - if (isFALSE(getOption("marginaleffects_safe", default = TRUE))) { - warn_once(msg, "hypothesis_positional_indices_are_dangerous") - } + if (isFALSE(getOption("marginaleffects_safe", default = TRUE))) { + warn_once(msg, "hypothesis_positional_indices_are_dangerous") + } - bmax <- regmatches(lab, gregexpr("\\bb\\d+\\b", lab))[[1]] - bmax <- tryCatch(max(as.numeric(gsub("b", "", bmax))), error = function(e) 0) - if (bmax > nrow(x)) { - msg <- "%s cannot be used in `hypothesis` because the call produced just %s estimate(s). Try executing the exact same command without the `hypothesis` argument to see which estimates are available for hypothesis testing." - msg <- sprintf(msg, paste0("b", bmax), nrow(x)) - insight::format_error(msg) - } - for (i in seq_len(nrow(x))) { - tmp <- paste0("marginaleffects__", i) - hypothesis <- gsub(paste0("b", i), tmp, hypothesis) - } - rowlabels <- paste0("marginaleffects__", seq_len(nrow(x))) + bmax <- regmatches(lab, gregexpr("\\bb\\d+\\b", lab))[[1]] + bmax <- tryCatch(max(as.numeric(gsub("b", "", bmax))), error = function(e) 0) + if (bmax > nrow(x)) { + msg <- "%s cannot be used in `hypothesis` because the call produced just %s estimate(s). Try executing the exact same command without the `hypothesis` argument to see which estimates are available for hypothesis testing." + msg <- sprintf(msg, paste0("b", bmax), nrow(x)) + insight::format_error(msg) + } + for (i in seq_len(nrow(x))) { + tmp <- paste0("marginaleffects__", i) + hypothesis <- gsub(paste0("b", i), tmp, hypothesis) + } + rowlabels <- paste0("marginaleffects__", seq_len(nrow(x))) # term names - } else { - if (!"term" %in% colnames(x) || anyDuplicated(x$term) > 0) { - msg <- c( - 'To use term names in a `hypothesis` string, the same function call without `hypothesis` argument must produce a `term` column with unique row identifiers. You can use `b1`, `b2`, etc. indices instead of term names in the `hypotheses` string Ex: "b1 + b2 = 0" Alternatively, you can use the `newdata`, `variables`, or `by` arguments:', - "", - "mod <- lm(mpg ~ am * vs + cyl, data = mtcars)", - 'comparisons(mod, newdata = "mean", hypothesis = "b1 = b2")', - 'comparisons(mod, newdata = "mean", hypothesis = "am = vs")', - 'comparisons(mod, variables = "am", by = "cyl", hypothesis = "pairwise")') - insight::format_error(msg) - } - rowlabels <- x$term + } else { + if (!"term" %in% colnames(x) || anyDuplicated(x$term) > 0) { + msg <- c( + 'To use term names in a `hypothesis` string, the same function call without `hypothesis` argument must produce a `term` column with unique row identifiers. You can use `b1`, `b2`, etc. indices instead of term names in the `hypotheses` string Ex: "b1 + b2 = 0" Alternatively, you can use the `newdata`, `variables`, or `by` arguments:', + "", + "mod <- lm(mpg ~ am * vs + cyl, data = mtcars)", + 'comparisons(mod, newdata = "mean", hypothesis = "b1 = b2")', + 'comparisons(mod, newdata = "mean", hypothesis = "am = vs")', + 'comparisons(mod, variables = "am", by = "cyl", hypothesis = "pairwise")') + insight::format_error(msg) } + rowlabels <- x$term + } - eval_string_function <- function(vec, hypothesis, rowlabels) { - envir <- parent.frame() - void <- sapply( - seq_along(vec), function(i) { - assign(rowlabels[i], vec[i], envir = envir) - }) - out <- eval(parse(text = hypothesis), envir = envir) - return(out) - } + eval_string_function <- function(vec, hypothesis, rowlabels) { + envir <- parent.frame() + void <- sapply( + seq_along(vec), function(i) { + assign(rowlabels[i], vec[i], envir = envir) + }) + out <- eval(parse(text = hypothesis), envir = envir) + return(out) + } - if (!is.null(attr(lab, "names"))) { - lab <- attr(lab, "names") - } else { - lab <- gsub("\\s+", "", lab) - } + if (!is.null(attr(lab, "names"))) { + lab <- attr(lab, "names") + } else { + lab <- gsub("\\s+", "", lab) + } - draws <- attr(x, "posterior_draws") - if (!is.null(draws)) { - insight::check_if_installed("collapse", minimum_version = "1.9.0") - tmp <- apply( - draws, - MARGIN = 2, - FUN = eval_string_function, - hypothesis = hypothesis, - rowlabels = rowlabels) - draws <- matrix(tmp, ncol = ncol(draws)) - out <- data.table( - term = lab, - tmp = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) - } else { - out <- eval_string_function( - x[["estimate"]], - hypothesis = hypothesis, - rowlabels = rowlabels) - out <- data.table( - term = lab, - tmp = out) - } + draws <- attr(x, "posterior_draws") + if (!is.null(draws)) { + insight::check_if_installed("collapse", minimum_version = "1.9.0") + tmp <- apply( + draws, + MARGIN = 2, + FUN = eval_string_function, + hypothesis = hypothesis, + rowlabels = rowlabels) + draws <- matrix(tmp, ncol = ncol(draws)) + out <- data.table( + term = lab, + tmp = collapse::dapply(draws, MARGIN = 1, FUN = collapse::fmedian)) + } else { + out <- eval_string_function( + x[["estimate"]], + hypothesis = hypothesis, + rowlabels = rowlabels) + out <- data.table( + term = lab, + tmp = out) + } - setnames(out, old = "tmp", new = "estimate") - attr(out, "posterior_draws") <- draws - return(out) + setnames(out, old = "tmp", new = "estimate") + attr(out, "posterior_draws") <- draws + return(out) } expand_wildcard <- function(hyp, bmax, lab) { # Find all occurrences of b* bstar_indices <- gregexpr("b\\*", hyp)[[1]] - if (bstar_indices[1] == -1) return(list(hyp, lab)) + if (bstar_indices[1] == -1) { + return(list(hyp, lab)) + } bstar_count <- length(bstar_indices) if (bstar_count > 1) { insight::format_error("Error: More than one 'b*' substring found.") } - + # Replace b* with b1, b2, b3, ..., bmax labs <- character(bmax) result <- character(bmax) @@ -428,6 +430,6 @@ expand_wildcard <- function(hyp, bmax, lab) { result[i] <- sub("b\\*", paste0("b", i), hyp) labs[i] <- sub("b\\*", paste0("b", i), lab) } - + return(list(result, labs)) } diff --git a/inst/tinytest/test-by.R b/inst/tinytest/test-by.R index fded001c3..8eebe4898 100644 --- a/inst/tinytest/test-by.R +++ b/inst/tinytest/test-by.R @@ -66,9 +66,10 @@ mod <- lm(mpg ~ factor(cyl) * hp + wt, data = dat) mar <- margins(mod, at = list(cyl = unique(dat$cyl))) mar <- data.frame(summary(mar)) mfx <- slopes( - mod, - by = "cyl", - newdata = datagrid(cyl = c(4, 6, 8), grid_type = "counterfactual")) + mod, + by = "cyl", + newdata = datagrid(cyl = c(4, 6, 8), grid_type = "counterfactual")) +mfx <- mfx[order(mfx$term, mfx$contrast), ] expect_equivalent(mfx$estimate, mar$AME) expect_equivalent(mfx$std.error, mar$SE, tolerance = 1e6) @@ -77,9 +78,9 @@ expect_equivalent(mfx$std.error, mar$SE, tolerance = 1e6) # issue #434 by with character precitors dat <- read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv") mod <- glm( - affairs ~ children + gender + yearsmarried, - family = poisson, - data = dat) + affairs ~ children + gender + yearsmarried, + family = poisson, + data = dat) p <- predictions(mod, by = "children") expect_equivalent(nrow(p), 2) expect_false(anyNA(p$estimate)) @@ -97,8 +98,8 @@ cmp <- comparisons(mod, type = "probs", by = "group") expect_equivalent(nrow(cmp), 9) by <- data.frame( - group = c("3", "4", "5"), - by = c("(3,4)", "(3,4)", "(5)")) + group = c("3", "4", "5"), + by = c("(3,4)", "(3,4)", "(5)")) p1 <- predictions(mod, type = "probs") p2 <- predictions(mod, type = "probs", by = by) p3 <- predictions(mod, type = "probs", by = by, hypothesis = "sequential") @@ -113,26 +114,26 @@ cmp <- comparisons(mod, type = "probs", by = "am") expect_equivalent(nrow(cmp), 18) cmp <- comparisons( - mod, - variables = "am", - by = by, - type = "probs") + mod, + variables = "am", + by = by, + type = "probs") expect_equivalent(nrow(cmp), 2) cmp <- comparisons( - mod, - variables = "am", - by = by, - hypothesis = "sequential", - type = "probs") + mod, + variables = "am", + by = by, + hypothesis = "sequential", + type = "probs") expect_equivalent(nrow(cmp), 1) # Issue #481: warning on missing by categories mod <- nnet::multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE) by <- data.frame( - by = c("4", "5"), - group = 4:5) + by = c("4", "5"), + group = 4:5) expect_warning(comparisons(mod, variables = "mpg", newdata = "mean", by = by)) expect_warning(predictions(mod, newdata = "mean", by = by)) @@ -159,13 +160,13 @@ expect_equivalent(nrow(pre2), 96) dat <- mtcars mod <- glm(gear ~ cyl + am, family = poisson, data = dat) mfx <- avg_slopes( - mod, - by = c("cyl", "am"), - newdata = datagrid( - cyl = unique, - am = unique, - grid_type = "counterfactual")) |> - dplyr::arrange(term, cyl, am) + mod, + by = c("cyl", "am"), + newdata = datagrid( + cyl = unique, + am = unique, + grid_type = "counterfactual")) |> + dplyr::arrange(term, cyl, am) mar <- margins(mod, at = list(cyl = unique(dat$cyl), am = unique(dat$am))) mar <- summary(mar) # margins doesn't treat the binary am as binary automatically @@ -179,16 +180,16 @@ dat$cyl <- factor(dat$cyl) dat$am <- as.logical(dat$am) mod <- glm(gear ~ cyl + am, family = poisson, data = dat) mfx <- comparisons( - mod, - by = c("cyl", "am"), - newdata = datagrid( - cyl = unique, - am = unique, - grid_type = "counterfactual")) + mod, + by = c("cyl", "am"), + newdata = datagrid( + cyl = unique, + am = unique, + grid_type = "counterfactual")) mfx <- tidy(mfx) -mfx <- mfx[order(mfx$term, mfx$contrast, mfx$cyl, mfx$am),] +mfx <- mfx[order(mfx$term, mfx$contrast, mfx$cyl, mfx$am), ] mar <- margins(mod, at = list(cyl = unique(dat$cyl), am = unique(dat$am))) mar <- summary(mar) expect_equivalent(mfx$estimate, mar$AME, tolerance = tol) @@ -200,9 +201,9 @@ dat <- transform(mtcars, vs = vs, am = as.factor(am), cyl = as.factor(cyl)) mod <- lm(mpg ~ qsec + am + cyl, dat) fun <- \(hi, lo) mean(hi) / mean(lo) cmp1 <- comparisons(mod, variables = "cyl", comparison = fun, by = "am") |> - dplyr::arrange(am, contrast) + dplyr::arrange(am, contrast) cmp2 <- comparisons(mod, variables = "cyl", comparison = "ratioavg", by = "am") |> - dplyr::arrange(am, contrast) + dplyr::arrange(am, contrast) expect_equivalent(cmp1$estimate, cmp2$estimate) expect_equivalent(cmp1$std.error, cmp2$std.error) expect_equal(nrow(cmp1), 4) @@ -218,18 +219,18 @@ cmp2 <- comparisons(mod, variables = "am") %>% dplyr::group_by(cyl) %>% dplyr::summarize(estimate = mean(estimate), .groups = "keep") |> dplyr::ungroup() -cmp3 <- predictions(mod) |> - aggregate(estimate ~ am + cyl, FUN = mean) |> - aggregate(estimate ~ cyl, FUN = diff) +cmp3 <- predictions(mod) |> + aggregate(estimate ~ am + cyl, FUN = mean) |> + aggregate(estimate ~ cyl, FUN = diff) expect_equivalent(cmp1$estimate, cmp2$estimate) expect_equivalent(cmp1$estimate, cmp3$estimate) # Issue #1058 tmp <- mtcars -tmp <- tmp[c('mpg', 'cyl', 'hp')] +tmp <- tmp[c("mpg", "cyl", "hp")] tmp$cyl <- as.factor(tmp$cyl) # 3 levels -tmp$hp <- as.factor(tmp$hp) +tmp$hp <- as.factor(tmp$hp) bygrid <- datagrid(newdata = tmp, by = "cyl", hp = unique) expect_equivalent(nrow(bygrid), 23) diff --git a/inst/tinytest/test-equivalence.R b/inst/tinytest/test-equivalence.R index 9c671604e..cfcd2ca69 100644 --- a/inst/tinytest/test-equivalence.R +++ b/inst/tinytest/test-equivalence.R @@ -11,10 +11,10 @@ null <- 20 em <- emmeans(mod, "gear", df = Inf) e1 <- test(em, delta = delta, null = null, side = "noninferiority", df = Inf) e2 <- predictions( - mod, - newdata = datagrid(gear = unique), - equivalence = c(19, 21)) |> - poorman::arrange(gear) + mod, + newdata = datagrid(gear = unique), + equivalence = c(19, 21)) |> + poorman::arrange(gear) expect_equivalent(e1$z.ratio, e2$statistic.noninf, tolerance = 1e-6) expect_equivalent(e1$p.value, e2$p.value.noninf) @@ -22,10 +22,10 @@ expect_equivalent(e1$p.value, e2$p.value.noninf) # predictions() vs. {emmeans}: sup e1 <- test(em, delta = 1, null = 23, side = "nonsuperiority", df = Inf) e2 <- predictions( - mod, - newdata = datagrid(gear = unique), - equivalence = c(22, 24)) |> - poorman::arrange(gear) + mod, + newdata = datagrid(gear = unique), + equivalence = c(22, 24)) |> + poorman::arrange(gear) expect_equivalent(e1$z.ratio, e2$statistic.nonsup, tol = 1e-6) expect_equivalent(e1$p.value, e2$p.value.nonsup) @@ -33,19 +33,19 @@ expect_equivalent(e1$p.value, e2$p.value.nonsup) # predictions() vs. {emmeans}: equiv e1 <- test(em, delta = 1, null = 22, side = "equivalence", df = Inf) e2 <- predictions( - mod, - newdata = datagrid(gear = unique), - equivalence = c(21, 23)) |> - poorman::arrange(gear) + mod, + newdata = datagrid(gear = unique), + equivalence = c(21, 23)) |> + poorman::arrange(gear) expect_equivalent(e1$p.value, e2$p.value.equiv) # slopes() works; no validity mfx <- slopes( - mod, - variables = "hp", - newdata = "mean", - equivalence = c(-.09, .01)) + mod, + variables = "hp", + newdata = "mean", + equivalence = c(-.09, .01)) expect_inherits(mfx, "slopes") @@ -55,18 +55,18 @@ requiet("equivalence") set.seed(1024) N <- 100 dat <- rbind( - data.frame(y = rnorm(N), x = 0), - data.frame(y = rnorm(N, mean = 0.3), x = 1)) + data.frame(y = rnorm(N), x = 0), + data.frame(y = rnorm(N, mean = 0.3), x = 1)) mod <- lm(y ~ x, data = dat) FUN <- function(x) { - data.frame(term = "t-test", estimate = coef(x)[2]) + data.frame(term = "t-test", estimate = coef(x)[2]) } e1 <- tost(dat$y[dat$x == 0], dat$y[dat$x == 1], epsilon = .05) e2 <- hypotheses( - mod, - hypothesis = FUN, - equivalence = c(-.05, .05), - df = e1$parameter) + mod, + hypothesis = FUN, + equivalence = c(-.05, .05), + df = e1$parameter) expect_true(e1$tost.p.value > .5 && e1$tost.p.value < .9) expect_equivalent(e1$tost.p.value, e2$p.value.equiv) @@ -76,12 +76,12 @@ mod <- glm(vs ~ factor(gear), data = mtcars, family = binomial) em <- emmeans(mod, "gear", df = Inf) e1 <- test(em, delta = .5, null = 1, side = "noninferiority", df = Inf) e2 <- predictions( - mod, - type = "link", - newdata = datagrid(gear = unique), - equivalence = c(.5, 1.5), - numderiv = "richardson") |> - poorman::arrange(gear) + mod, + type = "link", + newdata = datagrid(gear = unique), + equivalence = c(.5, 1.5), + numderiv = "richardson") |> + poorman::arrange(gear) expect_equivalent(e1$emmean, e2$estimate) expect_equivalent(e1$z.ratio, e2$statistic.noninf) expect_equivalent(e1$p.value, e2$p.value.noninf) @@ -97,7 +97,7 @@ expect_inherits(mfx, "hypotheses") expect_inherits(pre, "hypotheses") if (!requiet("tinysnapshot")) { - exit_file("tinysnapshot") + exit_file("tinysnapshot") } cmp <- avg_comparisons(tmp, equivalence = c(-.1, 0)) expect_snapshot_print(cmp, "equivalence-avg_comparisons") @@ -120,10 +120,10 @@ em <- emmeans(rg, "source", at = list(), df = Inf) pa <- pairs(em, df = Inf) mm <- predictions( - mod, - newdata = datagrid(grid_type = "balanced"), - by = "source", - hypothesis = "pairwise") + mod, + newdata = datagrid(grid_type = "balanced"), + by = "source", + hypothesis = "pairwise") e1 <- test(pa, delta = delta, adjust = "none", side = "nonsuperiority", df = Inf) e2 <- hypotheses(mm, equivalence = c(-delta, delta))