Skip to content

Commit

Permalink
Treatment options for zeroes in histograms (#6139)
Browse files Browse the repository at this point in the history
* add `keep.zeroes` option

* add test

* document

* add news bullet
  • Loading branch information
teunbrand authored Oct 30, 2024
1 parent 579e2d5 commit b29b831
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 1 deletion.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
* The ellipsis argument is now checked in `fortify()`, `get_alt_text()`,
`labs()` and several guides (@teunbrand, #3196).
* `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647).
* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449)

# ggplot2 3.5.1

Expand Down
29 changes: 28 additions & 1 deletion R/stat-bin.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#' or left edges of bins are included in the bin.
#' @param pad If `TRUE`, adds empty bins at either end of x. This ensures
#' frequency polygons touch 0. Defaults to `FALSE`.
#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such
#' bins are kept as-is. If `"none"`, all zero count bins are filtered out.
#' If `"inner"` only zero count bins at the flanks are filtered out, but not
#' in the middle.
#' @eval rd_computed_vars(
#' count = "number of points in bin.",
#' density = "density of points in bin, scaled to integrate to 1.",
Expand Down Expand Up @@ -55,6 +59,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
closed = c("right", "left"),
pad = FALSE,
na.rm = FALSE,
keep.zeroes = "all",
orientation = NA,
show.legend = NA,
inherit.aes = TRUE) {
Expand All @@ -77,6 +82,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
pad = pad,
na.rm = na.rm,
orientation = orientation,
keep.zeroes = keep.zeroes,
...
)
)
Expand All @@ -89,6 +95,10 @@ stat_bin <- function(mapping = NULL, data = NULL,
StatBin <- ggproto("StatBin", Stat,
setup_params = function(self, data, params) {
params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE)
params$keep.zeroes <- arg_match0(
params$keep.zeroes %||% "all",
c("all", "none", "inner"), arg_nm = "keep.zeroes"
)

has_x <- !(is.null(data$x) && is.null(params$x))
has_y <- !(is.null(data$y) && is.null(params$y))
Expand Down Expand Up @@ -139,7 +149,7 @@ StatBin <- ggproto("StatBin", Stat,
compute_group = function(data, scales, binwidth = NULL, bins = NULL,
center = NULL, boundary = NULL,
closed = c("right", "left"), pad = FALSE,
breaks = NULL, flipped_aes = FALSE,
breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all",
# The following arguments are not used, but must
# be listed so parameters are computed correctly
origin = NULL, right = NULL, drop = NULL) {
Expand All @@ -163,6 +173,14 @@ StatBin <- ggproto("StatBin", Stat,
boundary = boundary, closed = closed)
}
bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad)

keep <- switch(
keep.zeroes,
none = bins$count != 0,
inner = inner_runs(bins$count != 0),
TRUE
)
bins <- vec_slice(bins, keep)
bins$flipped_aes <- flipped_aes
flip_data(bins, flipped_aes)
},
Expand All @@ -174,3 +192,12 @@ StatBin <- ggproto("StatBin", Stat,
dropped_aes = "weight" # after statistical transformation, weights are no longer available
)

inner_runs <- function(x) {
rle <- vec_unrep(x)
nruns <- nrow(rle)
inner <- rep(TRUE, nruns)
i <- unique(c(1, nruns))
inner[i] <- inner[i] & rle$key[i]
rep(inner, rle$times)
}

6 changes: 6 additions & 0 deletions man/geom_histogram.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-stat-bin.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,20 @@ test_that("stat_bin() provides width (#3522)", {
expect_equal(out$xmax - out$xmin, rep(binwidth, 10))
})

test_that("stat_bin(keep.zeroes) options work as intended", {
p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) +
scale_x_continuous(limits = c(-1, 9))

ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all"))
expect_equal(ld$x, -1:9)

ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner"))
expect_equal(ld$x, c(1:7))

ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none"))
expect_equal(ld$x, c(1:3, 5:7))
})

# Underlying binning algorithm --------------------------------------------

test_that("bins() computes fuzz with non-finite breaks", {
Expand Down

0 comments on commit b29b831

Please sign in to comment.