Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
1. change `remove_na` to `show_na` and set default to conditionally show the missing category if any NA are present
2. use the Wilson confidence interval rather than the normal approximation to avoid CIs that escape the [0, 1] bounds
3. namespace ggplot2 functions
4. add arguments for the column and error bar colors, defaulting to blue hues for each
  • Loading branch information
bwiernik committed Aug 31, 2023
1 parent 439c2a4 commit e04947a
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 39 deletions.
104 changes: 74 additions & 30 deletions R/plot.dw_data_tabulate.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,50 @@
#' Plot tabulated data.
#'
#' @param x Object created by `datawizard::data_tabulate()`.
#' @param value_lab Boolean. Should values and percentages be displayed at the
#' top of each bar.
#' @param remove_na Boolean. Should missing values be dropped?
#' @param na_label The label given to missing values (only when
#' `remove_na = FALSE`).
#' @param error_bar Boolean. Should error bars be displayed?
#' @param label_values Logical. Should values and percentages be displayed at the
#' top of each bar.
#' @param show_na Should missing values be dropped? Can be `"if_any"` (default) to show
#' the missing category only if any missing values are present, `"always"` to
#' always show the missing category, or `"never"` to never show the missing
#' category.
#' @param na_label The label given to missing values when they are shown.
#' @param error_bar Logical. Should error bars be displayed?
#' If `TRUE`, confidence intervals computed using the Wilson method are shown.
#' See Brown et al. (2001) for details.
#' @param ci Confidence Interval (CI) level. Default to `0.95` (⁠95%⁠).
#' @param fill_col Color to use for category columns (default: "#87CEFA").
#' @param color_error_bar Color to use for error bars (default: "#607B8B").
#' @param ... Unused
#'
#' @references
#' Brown, L. D., Cai, T. T., & DasGupta, A. (2001).
#' Interval estimation for a binomial proportion.
#' _Statistical Science, 16_(2), 101–133. \doi{10.1214/ss/1009213286}
#'
#' @rdname plot.dw_data_tabulate
#' @export

plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE,
na_label = "(Missing)", error_bar = TRUE,
plot.dw_data_tabulates <- function(x, label_values = TRUE,
show_na = c("if_any", "always", "never"),
na_label = "(Missing)",
error_bar = TRUE, ci = .95,

Check warning on line 30 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=30,col=59,[numeric_leading_zero_linter] Include the leading zero for fractional numeric constants.
fill_col = "#87CEFA",
color_error_bar = "#607B8B",
...) {
show_na <- match.arg(show_na, choices = c("if_any", "always", "never"))
if (length(x) == 1) {
plot.dw_data_tabulate(
x[[1]], value_lab = value_lab, remove_na = remove_na,
na_label = na_label, error_bar = error_bar
x[[1]], label_values = label_values,
show_na = show_na, na_label = na_label,
error_bar = error_bar, ci = ci,
fill_col = fill_col, color_error_bar = color_error_bar
)
} else {
lapply(x, plot.dw_data_tabulate,
value_lab = value_lab, remove_na = remove_na,
na_label = na_label, error_bar = error_bar
label_values = label_values,
show_na = show_na, na_label = na_label,
error_bar = error_bar, ci = ci,
fill_col = fill_col, color_error_bar = color_error_bar
)
}
}
Expand All @@ -32,12 +53,25 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE,
#'
#' @export

plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE,
na_label = "(Missing)", error_bar = TRUE,
plot.dw_data_tabulate <- function(x, label_values = TRUE,
show_na = c("if_any", "always", "never"),
na_label = "(Missing)",
error_bar = TRUE, ci = .95,

Check warning on line 59 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=59,col=58,[numeric_leading_zero_linter] Include the leading zero for fractional numeric constants.
fill_col = "#87CEFA",
color_error_bar = "#607B8B",
...) {
show_na <- match.arg(show_na, choices = c("if_any", "always", "never"))
dat <- as.data.frame(x)

if (isTRUE(remove_na)) {
if (show_na == "if_any") {
if (any(is.na(dat$Value))) {

Check warning on line 67 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=67,col=9,[any_is_na_linter] anyNA(x) is better than any(is.na(x)).
show_na <- ifelse(dat[is.na(dat$Value), "N"] > 0, "always", "never")
} else {
show_na <- "never"
}
}

if (show_na == "never") {
dat <- dat[!is.na(dat$Value), ]
dat$output <- dat[[which(startsWith(names(dat), "Valid"))]]
} else {
Expand All @@ -54,40 +88,50 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE,

if (isTRUE(error_bar)) {
total_n <- sum(dat$N)
rel_frq <- dat$output / 100
ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n))
dat$upper.ci <- total_n * (rel_frq + ci)
dat$lower.ci <- total_n * (rel_frq - ci)
props <- dat$output / 100
dat <- cbind(dat, CI = ci, .wilson_ci(prop = props, total_n = total_n, ci = ci) * total_n)
dat$label <- paste0(dat$N, " (", round(dat$output, 2), "%)")
} else {
dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)")
}

out <- ggplot(dat, aes(x = .data$Value, y = .data$N)) +
geom_col() +
labs(title = unique(dat$Variable)) +
out <- ggplot2::ggplot(dat) +
ggplot2::aes(x = .data$Value, y = .data$N) +
ggplot2::geom_col(fill = fill_col) +
ggplot2::labs(title = unique(dat$Variable)) +
theme_modern()

if (isTRUE(value_lab)) {
if (isTRUE(label_values)) {
if (isTRUE(error_bar)) {
out <- out +
geom_text(aes(label = .data$label), vjust = -1, hjust = 1.2) +
ylim(c(0, max(dat$N) * 1.5))
ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -1, hjust = 1.2) +
ggplot2::coord_cartesian(ylim = c(0, max(dat$CI_high)))
} else {
out <- out +
geom_text(aes(label = .data$label), vjust = -0.5) +
ylim(c(0, max(dat$N) * 1.2))
ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -0.5) +
ggplot2::coord_cartesian(ylim = c(0, max(dat$N) * 1.2))
}
}

# add confidence intervals for frequencies
if (isTRUE(error_bar)) {
out <- out +
geom_errorbar(
aes(ymin = .data$lower.ci, ymax = .data$upper.ci),
width = 0.5, color = "darkblue"
ggplot2::geom_linerange(
ggplot2::aes(ymin = .data$CI_low, ymax = .data$CI_high),
color = color_error_bar
)
}

out
}

.wilson_ci <- function(prop, total_n, ci = .95) {

Check warning on line 128 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=128,col=44,[numeric_leading_zero_linter] Include the leading zero for fractional numeric constants.
z <- qnorm((1 - ci) / 2, lower.tail = FALSE)
z2 <- z^2
p1 <- prop + 0.5 * z2 / total_n
p2 <- z * sqrt((prop * (1 - prop) + 0.25 * z2 / total_n) / total_n)
p3 <- 1 + z2 / total_n
CI_low <- (p1 - p2) / p3

Check warning on line 134 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=134,col=3,[object_name_linter] Variable and function name style should match snake_case or symbols.
CI_high <- (p1 + p2) / p3

Check warning on line 135 in R/plot.dw_data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/plot.dw_data_tabulate.R,line=135,col=3,[object_name_linter] Variable and function name style should match snake_case or symbols.
return(data.frame(CI_low = CI_low, CI_high = CI_high))
}
39 changes: 30 additions & 9 deletions man/plot.dw_data_tabulate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e04947a

Please sign in to comment.