diff --git a/NEWS.md b/NEWS.md index a51eb4fe9..cb198fa7c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dbplyr (development version) +* Refined the `select()` inlining criteria to keep computed columns used to + `arrange()` subqueries that are eliminated by a subsequent select (@ejneer, + #1437). + * dbplyr now exports some tools to work with the internal `table_path` class which is useful for certain backends that need to work with this data structure (#1300). diff --git a/R/verb-select.R b/R/verb-select.R index c1ee020a2..ef93594f3 100644 --- a/R/verb-select.R +++ b/R/verb-select.R @@ -114,7 +114,7 @@ add_select <- function(lazy_query, vars) { } is_select <- is_lazy_select_query(lazy_query) - select_can_be_inlined <- is_bijective_projection(vars, vars_data) || !is_true(lazy_query$distinct) + select_can_be_inlined <- select_can_be_inlined(lazy_query, vars) if (is_select && select_can_be_inlined) { idx <- vctrs::vec_match(vars, vars_data) @@ -131,9 +131,20 @@ add_select <- function(lazy_query, vars) { ) } -is_bijective_projection <- function(vars, names_prev) { - vars <- unname(vars) - identical(sort(vars), names_prev) +select_can_be_inlined <- function(lazy_query, vars) { + # all computed columns used for ordering (if any) must be present + computed_flag <- purrr::map_lgl(lazy_query$select$expr, is_quosure) + computed_columns <- lazy_query$select$name[computed_flag] + + order_vars <- purrr::map_chr(lazy_query$order_by, as_label) + ordered_present <- all(intersect(computed_columns, order_vars) %in% vars) + + # if the projection is distinct, it must be bijective + is_distinct <- is_true(lazy_query$distinct) + is_bijective_projection <- identical(sort(unname(vars)), op_vars(lazy_query)) + distinct_is_bijective <- !is_distinct || is_bijective_projection + + ordered_present && distinct_is_bijective } rename_groups <- function(lazy_query, vars) { diff --git a/tests/testthat/_snaps/verb-select.md b/tests/testthat/_snaps/verb-select.md index 0140cc721..327a24e67 100644 --- a/tests/testthat/_snaps/verb-select.md +++ b/tests/testthat/_snaps/verb-select.md @@ -181,6 +181,22 @@ Error in `select()`: ! This tidyselect interface doesn't support predicates. +# arranged computed columns are not inlined away + + Code + lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x) + Condition + Warning: + ORDER BY is ignored in subqueries without LIMIT + i Do you need to move arrange() later in the pipeline or use window_order() instead? + Output + + SELECT `x` + FROM ( + SELECT `df`.*, 1.0 AS `z` + FROM `df` + ) AS `q01` + # multiple selects are collapsed Code diff --git a/tests/testthat/test-verb-select.R b/tests/testthat/test-verb-select.R index 62e567769..bebe74429 100644 --- a/tests/testthat/test-verb-select.R +++ b/tests/testthat/test-verb-select.R @@ -35,6 +35,31 @@ test_that("select after distinct produces subquery", { expect_true(lq$x$distinct) }) +test_that("select after arrange produces subquery, if needed", { + lf <- lazy_frame(x = 1) + + # shouldn't inline + out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x) + # should inline + out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z) + + inner_query <- out$lazy_query$x + expect_s3_class(inner_query, "lazy_select_query") + expect_equal(inner_query$order_by, list(quo(x), quo(z)), ignore_formula_env = TRUE) + expect_equal( + op_vars(inner_query), + c("x", "z") + ) + expect_equal(op_vars(out$lazy_query), "x") + + # order vars in a subquery are dropped + expect_equal( + inner_query[setdiff(names(inner_query), "order_vars")], + out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")] + ) +}) + + test_that("rename/relocate after distinct is inlined #1141", { lf <- lazy_frame(x = 1, y = 1:2) expect_snapshot({ @@ -284,6 +309,33 @@ test_that("where() isn't suppored", { }) }) +test_that("arranged computed columns are not inlined away", { + lf <- lazy_frame(x = 1) + + # shouldn't inline + out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x) + # should inline + out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z) + + inner_query <- out$lazy_query$x + expect_snapshot({ + lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x) + }) + expect_s3_class(inner_query, "lazy_select_query") + expect_equal( + inner_query$order_by, + list(quo(x), quo(z)), + ignore_formula_env = TRUE + ) + expect_equal(op_vars(inner_query), c("x", "z")) + expect_equal(op_vars(out$lazy_query), "x") + expect_equal( + # order vars in a subquery are dropped + inner_query[setdiff(names(inner_query), "order_vars")], + out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")] + ) +}) + # sql_render -------------------------------------------------------------- test_that("multiple selects are collapsed", {