Skip to content

Commit

Permalink
Remove caching of across() selections (#5855)
Browse files Browse the repository at this point in the history
- Caching `.fns` is too perilous. The function might produce side
  effects or depend on state.

- Caching `.cols` is no longer as useful with top-level `across()`
  expansion.

We still cache for `c_across()` as it doesn't have any expansion
yet. The caching takes into account the quosure env to improve
correctness. Should be further improved by not caching any selection
that includes env-expressions since these might have side effects and
depend on state.

Closes #5835.
  • Loading branch information
lionel- authored Apr 20, 2021
1 parent 827615c commit 04309fe
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 56 deletions.
61 changes: 31 additions & 30 deletions R/across.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
#' `{.fn}` to stand for the name of the function being applied. The default
#' (`NULL`) is equivalent to `"{.col}"` for the single function case and
#' `"{.col}_{.fn}"` for the case where a list is used for `.fns`.
#' @param .call Call used by the caching mechanism. This is only useful when `across()`
#' is called from another function, and should mostly just be ignored.
#'
#' @returns
#' `across()` returns a tibble with one column for each column in `.cols` and each function in `.fns`.
Expand Down Expand Up @@ -114,9 +112,13 @@
#'
#' @export
#' @seealso [c_across()] for a function that returns a vector
across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL, .call = sys.call()) {
key <- key_deparse(.call)
setup <- across_setup({{ .cols }}, fns = .fns, names = .names, key = key, .caller_env = caller_env())
across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) {
setup <- across_setup(
{{ .cols }},
fns = .fns,
names = .names,
.caller_env = caller_env()
)

vars <- setup$vars
if (length(vars) == 0L) {
Expand Down Expand Up @@ -175,17 +177,17 @@ across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL, .call

#' @rdname across
#' @export
if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL, .call = sys.call()) {
df <- across({{ .cols }}, .fns = .fns, ..., .names = .names, .call = .call)
if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) {
df <- across({{ .cols }}, .fns, ..., .names = .names)
n <- nrow(df)
df <- vec_cast_common(!!!df, .to = logical())
.Call(dplyr_reduce_lgl_or, df, n)
}

#' @rdname across
#' @export
if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL, .call = sys.call()) {
df <- across({{ .cols }}, .fns = .fns, ..., .names = .names, .call = .call)
if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) {
df <- across({{ .cols }}, .fns, ..., .names = .names)
n <- nrow(df)
df <- vec_cast_common(!!!df, .to = logical())
.Call(dplyr_reduce_lgl_and, df, n)
Expand Down Expand Up @@ -214,8 +216,9 @@ if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL, .call
#' sd = sd(c_across(w:z))
#' )
c_across <- function(cols = everything()) {
key <- key_deparse(sys.call())
vars <- c_across_setup({{ cols }}, key = key)
cols <- enquo(cols)
key <- key_deparse(cols)
vars <- c_across_setup(!!cols, key = key)

mask <- peek_mask("c_across()")

Expand All @@ -232,25 +235,18 @@ across_glue_mask <- function(.col, .fn, .caller_env) {
glue_mask
}

# TODO: The usage of a cache in `across_setup()` and `c_across_setup()` is a stopgap solution, and
# this idea should not be used anywhere else. This should be replaced by the
# TODO: The usage of a cache in `c_across_setup()` is a stopgap solution, and
# this idea should not be used anywhere else. This should be replaced by either
# expansions of expressions (as we now use for `across()`) or the
# next version of hybrid evaluation, which should offer a way for any function
# to do any required "set up" work (like the `eval_select()` call) a single
# time per top-level call, rather than once per group.
across_setup <- function(cols, fns, names, key, .caller_env) {
mask <- peek_mask("across()")
value <- mask$across_cache_get(key)
if (is.null(value)) {
value <- across_setup_impl({{ cols }},
fns = fns, names = names, .caller_env = .caller_env, mask = mask,
.top_level = FALSE
)
mask$across_cache_add(key, value)
}
value
}

across_setup_impl <- function(cols, fns, names, .caller_env, mask = peek_mask("across()"), .top_level = FALSE) {
across_setup <- function(cols,
fns,
names,
.caller_env,
mask = peek_mask("across()"),
.top_level = FALSE) {
cols <- enquo(cols)

if (.top_level) {
Expand Down Expand Up @@ -346,8 +342,13 @@ c_across_setup <- function(cols, key) {
value
}

key_deparse <- function(key) {
deparse(key, width.cutoff = 500L, backtick = TRUE, nlines = 1L, control = NULL)
# FIXME: Should not cache `cols` when it includes env-expressions
# https://github.com/r-lib/tidyselect/issues/235
key_deparse <- function(cols) {
paste(
deparse(quo_get_expr(cols)),
format(quo_get_env(cols))
)
}

new_dplyr_quosure <- function(quo, ...) {
Expand Down Expand Up @@ -423,7 +424,7 @@ expand_across <- function(quo) {
}
cols <- as_quosure(cols, env)

setup <- across_setup_impl(
setup <- across_setup(
!!cols,
fns = eval_tidy(expr$.fns, mask),
names = eval_tidy(expr$.names, mask),
Expand Down
31 changes: 5 additions & 26 deletions man/across.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 04309fe

Please sign in to comment.