Skip to content

Commit

Permalink
default formatting functions in tbl_svysummary() (#2079)
Browse files Browse the repository at this point in the history
* fix

* Update test-tbl_svysummary.R

* Update tbl_svysummary.R
  • Loading branch information
ddsjoberg authored Nov 22, 2024
1 parent ad4005f commit 214557f
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 72 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gtsummary
Title: Presentation-Ready Data Summary and Analytic Result Tables
Version: 2.0.3.9006
Version: 2.0.3.9007
Authors@R: c(
person("Daniel D.", "Sjoberg", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0862-2018")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# gtsummary (development version)

* Fix for setting default formatting functions in `tbl_svysummary()`. Previously, defaults were assigned similarly to those in `tbl_summary()`, which led to survey-only statistics being assigned sub-optimal defaults. (#2078)

* The `with_gtsummary_theme()` has been updated to no longer print theme names when the applied, nor when the original theme is re-applied. (#2031)

* Updated the `theme_gtsummary_journal("jama")` theme to apply changes to `tbl_svysummary()`. (#1964; @vjcatharine)
Expand Down
126 changes: 65 additions & 61 deletions R/assign_summary_digits.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,62 +48,65 @@ assign_summary_digits <- function(data, statistic, type, digits = NULL) {
# extract the statistics
statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())

lapply(
names(statistic),
function(variable) {
# if user passed digits AND they've specified every statistic, use the passed value
# otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed
if (!is.null(digits[[variable]])) {
# if a scalar or vector passed, convert it to a list
if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) {
digits[[variable]] <- as.list(digits[[variable]])
digits_final <-
lapply(
names(statistic),
function(variable) {
# if user passed digits AND they've specified every statistic, use the passed value
# otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed
if (!is.null(digits[[variable]])) {
# if a scalar or vector passed, convert it to a list
if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) {
digits[[variable]] <- as.list(digits[[variable]])
}

# if user-passed value is not named, repeat the passed value to the length of 'statistic'
if (!is_named(digits[[variable]])) {
if (!is_function(digits[[variable]])) digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]])
else digits[[variable]] <- rep_named(statistic[[variable]], digits[variable])
}

# convert integers to a proper function
digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]])

# check value is a function
if (!is_list(digits[[variable]]) || some(digits[[variable]], \(.x) !is_function(.x))) {
cli::cli_abort(
c("Error in {.arg digits} argument for variable {.val {variable}},",
i = "Passed values must be either a {.cls function} or {.cls integer}."),
call = get_cli_abort_call()
)
}

# if the passed value fully specifies the formatting for each 'statistic',
# then return it. Otherwise, the remaining stat will be filled below
if (setequal(statistic[[variable]], names(digits[[variable]]))) {
return(lst_all_fmt_fns |> utils::modifyList(digits[[variable]]))
}
}

# if user-passed value is not named, repeat the passed value to the length of 'statistic'
if (!is_named(digits[[variable]])) {
if (!is_function(digits[[variable]])) digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]])
else digits[[variable]] <- rep_named(statistic[[variable]], digits[variable])
}

# convert integers to a proper function
digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]])

# check value is a function
if (!is_list(digits[[variable]]) || some(digits[[variable]], \(.x) !is_function(.x))) {
cli::cli_abort(
c("Error in {.arg digits} argument for variable {.val {variable}},",
i = "Passed values must be either a {.cls function} or {.cls integer}."),
call = get_cli_abort_call()
if (type[[variable]] %in% c("categorical", "dichotomous")) {
return(
c(lst_cat_summary_fns, lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}

# if the passed value fully specifies the formatting for each 'statistic',
# then return it. Otherwise, the remaining stat will be filled below
if (setequal(statistic[[variable]], names(digits[[variable]]))) {
return(lst_all_fmt_fns |> utils::modifyList(digits[[variable]]))
if (type[[variable]] %in% c("continuous", "continuous2")) {
return(
rep_named(
statistic[[variable]],
list(.guess_continuous_summary_digits(data, variable))
) |>
utils::modifyList(lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}
}

if (type[[variable]] %in% c("categorical", "dichotomous")) {
return(
c(lst_cat_summary_fns, lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}

if (type[[variable]] %in% c("continuous", "continuous2")) {
return(
rep_named(
statistic[[variable]],
list(.guess_continuous_summary_digits(data, variable))
) |>
utils::modifyList(lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}
}
) |>
) |>
stats::setNames(names(statistic))

digits_final
}

.convert_integer_to_fmt_fn <- function(x) {
Expand Down Expand Up @@ -152,21 +155,22 @@ assign_summary_digits <- function(data, statistic, type, digits = NULL) {
inherits(data, "survey.design") ~
cardx::ard_continuous(data, variables = all_of(variable), statistic = ~ c("p5", "p95")) |>
dplyr::pull("stat") |>
reduce(\(.x, .y) .y - .x)
reduce(\(.x, .y) .y - .x) %>%
{ifelse(is_empty(.), Inf, .)} # styler: off
)

label_style_number(
digits =
dplyr::case_when(
var_spread < 0.01 ~ 4L,
var_spread >= 0.01 & var_spread < 0.1 ~ 3L,
var_spread >= 0.1 & var_spread < 10 ~ 2L,
var_spread >= 10 & var_spread < 20 ~ 1L,
var_spread >= 20 ~ 0L
)
)
},
error = function(e) 0L
label_style_number(
digits =
dplyr::case_when(
var_spread < 0.01 ~ 4L,
var_spread >= 0.01 & var_spread < 0.1 ~ 3L,
var_spread >= 0.1 & var_spread < 10 ~ 2L,
var_spread >= 10 & var_spread < 20 ~ 1L,
var_spread >= 20 ~ 0L
)
)
},
error = function(e) 0L
)
# styler: on
}
Expand Down
3 changes: 2 additions & 1 deletion R/tbl_svysummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ tbl_svysummary <- function(data,
# fill each element of digits argument
if (!missing(digits)) {
digits <-
scope_table_body(.list2tb(type, "var_type"), as.data.frame(data)[include]) |>
data |>
assign_summary_digits(statistic, type, digits = digits)
}

Expand Down Expand Up @@ -374,3 +374,4 @@ tbl_svysummary <- function(data,

x
}

16 changes: 8 additions & 8 deletions tests/testthat/_snaps/tbl_svysummary.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
as.data.frame(tbl_svysummary(data = svy_mtcars))
Output
**Characteristic** **N = 32**
1 mpg 19.2 (15.2, 22.8)
1 mpg 19 (15, 23)
2 cyl <NA>
3 4 11 (34%)
4 6 7 (22%)
Expand Down Expand Up @@ -78,7 +78,7 @@
as.data.frame(tbl_svysummary(data = svy_mtcars, by = am))
Output
**Characteristic** **0** \nN = 19 **1** \nN = 13
1 mpg 17.3 (14.7, 19.2) 22.8 (21.0, 30.4)
1 mpg 17 (15, 19) 23 (21, 30)
2 cyl <NA> <NA>
3 4 3 (16%) 8 (62%)
4 6 4 (21%) 3 (23%)
Expand Down Expand Up @@ -134,12 +134,12 @@
Code
as.data.frame(tbl)
Output
**Characteristic** **0** \nN = 19 **1** \nN = 13
1 New mpg 17.3 (14.7, 19.2) 22.8 (21.0, 30.4)
2 New cyl <NA> <NA>
3 4 3 (16%) 8 (62%)
4 6 4 (21%) 3 (23%)
5 8 12 (63%) 2 (15%)
**Characteristic** **0** \nN = 19 **1** \nN = 13
1 New mpg 17 (15, 19) 23 (21, 30)
2 New cyl <NA> <NA>
3 4 3 (16%) 8 (62%)
4 6 4 (21%) 3 (23%)
5 8 12 (63%) 2 (15%)

# tbl_svysummary(label) errors properly

Expand Down
14 changes: 13 additions & 1 deletion tests/testthat/test-tbl_svysummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ test_that("tbl_svysummary(statistic)", {
) |>
as.data.frame(col_labels = FALSE) |>
dplyr::pull(stat_0),
"n=61 | N=193 | p=32 | N_obs=200 | N_miss=7 | N_nonmiss=193 | p_miss=3.5 | p_nonmiss=97 | p.std.error=0.0 | deff=Inf | n_unweighted=61 | N_unweighted=193 | p_unweighted=31.6"
"n=61 | N=193 | p=32 | N_obs=200 | N_miss=7 | N_nonmiss=193 | p_miss=3.5 | p_nonmiss=97 | p.std.error=0.034 | deff=Inf | n_unweighted=61 | N_unweighted=193 | p_unweighted=32"
)

# continuous summary when there is no "continuous" stats (just missingness stats)
Expand Down Expand Up @@ -588,3 +588,15 @@ test_that("tbl_svysummary(percent)", {
tbl_svysummary(svy_trial, by = trt, include = grade, percent = letters, statistic = ~"{p}%")
)
})

# Fix for default formatting function issue reported in #2078
test_that("tbl_svysummary() default fmt fn", {
expect_equal(
svy_trial |>
tbl_svysummary(include = age, missing_stat = "{N_miss_unweighted} ({p_miss_unweighted}%)") |>
as.data.frame(col_label = FALSE) |>
dplyr::pull(stat_0) |>
getElement(2L),
"11 (5.5%)"
)
})

0 comments on commit 214557f

Please sign in to comment.