diff --git a/doc/NEWS.Rd b/doc/NEWS.Rd index b5f0383b3df..cdbdbad272a 100644 --- a/doc/NEWS.Rd +++ b/doc/NEWS.Rd @@ -284,6 +284,13 @@ \item \code{bug.report()} now tries to extract an email address from a \samp{BugReports} field, and if there is none, from a \samp{Contacts} field. + + \item The \code{format()} and \code{print()} methods for + \code{object.size()} results get new options \code{standard} and + \code{digits}; notably, \code{standard = "IEC"} and + \code{standard = "SI"} allow more standard abbreviations than the + default ones, e.g., for kilo bytes. From contributions by Henrik + Bengtsson. } } @@ -389,7 +396,7 @@ \item There is support for compiling C++14 code in packages on suitable platforms: see \sQuote{Writing R Extensions} for how to request this. - + \item The order of flags when \samp{LinkingTo} other packages has been changed so their include directories come earlier, before those specified in \code{CPPFLAGS}. This will only have an effect @@ -440,7 +447,7 @@ \item Check customization via environment variables to detect side effects of \code{.Call()} and \code{.External()} calls which alter their arguments is described in ยง8 of the \sQuote{R Internals} manual. - + \item \command{R CMD check} now checks any \samp{BugReports} field to be non-empty and a suitable single URL. } @@ -595,12 +602,12 @@ } \subsection{INSTALLATION and INCLUDED SOFTWARE}{ - \itemize{ + \itemize{ \item The check for the zlib version is now robust to versions longer than 5 characters, including 1.2.10. } } - + \subsection{UTILITIES}{ \itemize{ \item Environmental variable \env{_R_CHECK_TESTS_NLINES_} controls @@ -646,11 +653,11 @@ \item \code{methods(f)} now also works for \code{f} \code{"("} or \code{"{"}. - + \item (Windows only) \code{dir.create()} did not check the length of the path to create, and so could overflow a buffer and crash \R. (\PR{17206}) - + \item On some systems, very small hexadecimal numbers in hex notation would underflow to zero. (\PR{17199}) @@ -658,7 +665,7 @@ \item \code{pmin()} and \code{pmax()} now work again for \code{ordered} factors and 0-length S3 classed objects, thanks to Suharto Anggono's \PR{17195} and \PR{17200}. - + \item \code{bug.report()} did not do any validity checking on a package's \samp{BugReports} field. It now ignores an empty field, removes leading whitespace and only attempts to open @@ -668,7 +675,7 @@ \item \code{findMethod} failed if the active signature had expanded beyond what a particular package used. (Example with packages XR and XRJulia on CRAN). - + } } } diff --git a/src/library/utils/R/object.size.R b/src/library/utils/R/object.size.R index 7b727c8bcac..8e5f6666d19 100644 --- a/src/library/utils/R/object.size.R +++ b/src/library/utils/R/object.size.R @@ -1,7 +1,7 @@ # File src/library/utils/R/object.size.R # Part of the R package, https://www.R-project.org # -# Copyright (C) 1995-2016 The R Core Team +# Copyright (C) 1995-2017 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,40 +19,55 @@ object.size <- function(x) structure(.Call(C_objectSize, x), class = "object_size") -format.object_size <- function(x, units = "b", ...) +format.object_size <- function(x, units = "b", standard = "auto", digits = 1L, ...) { - units <- match.arg(units, c("b", "auto", "Kb", "Mb", "Gb", "Tb", "Pb", - "B", "KB", "MB", "GB", "TB", "PB", - "KiB", "MiB", "GiB", "TiB", - "PiB", "EiB", "ZiB", "YiB")) - if (units == "auto") - units <- - if (x >= 1024^4) "Tb" - else if (x >= 1024^3) "Gb" - else if (x >= 1024^2) "Mb" - else if (x >= 1024 ) "Kb" else "b" - switch(units, - "b" =, "B" = paste(x, "bytes"), - "Kb" =, "KB" = paste(round(x/1024 , 1L), "Kb"), - "Mb" =, "MB" = paste(round(x/1024^2, 1L), "Mb"), - "Gb" =, "GB" = paste(round(x/1024^3, 1L), "Gb"), - "Tb" =, "TB" = paste(round(x/1024^4, 1L), "Tb"), - "Pb" =, "PB" = paste(round(x/1024^5, 1L), "Pb"), - "KiB" = paste(round(x/1024 , 1L), "KiB"), - "MiB" = paste(round(x/1024^2, 1L), "MiB"), - "GiB" = paste(round(x/1024^3, 1L), "GiB"), - "TiB" = paste(round(x/1024^4, 1L), "TiB"), - "PiB" = paste(round(x/1024^5, 1L), "PiB"), - "EiB" = paste(round(x/1024^6, 1L), "EiB"), - "ZiB" = paste(round(x/1024^7, 1L), "ZiB"), - "YiB" = paste(round(x/1024^8, 1L), "YiB") - ) + known_bases <- c(legacy = 1024, IEC = 1024, SI = 1000) + known_units <- list( + SI = c("B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"), + IEC = c("B", "KiB", "MiB", "GiB","TiB","PiB", "EiB", "ZiB", "YiB"), + legacy = c("b", "Kb", "Mb", "Gb", "Tb", "Pb"), + LEGACY = c("B", "KB", "MB", "GB", "TB", "PB") # <- only for "KB" + ) + + units <- match.arg(units, + c("auto", unique(unlist(known_units), use.names = FALSE))) + standard <- match.arg(standard, c("auto", names(known_bases))) + + if (standard == "auto") { ## infer 'standard' from 'units': + standard <- "legacy" # default; may become "SI" + if (units != "auto") { + if (grepl("iB$", units)) + standard <- "IEC" + else if (grepl("b$", units)) + standard <- "legacy" ## keep when "SI" is the default + else if (units == "kB") + ## SPECIAL: Drop when "SI" becomes the default + stop("For SI units, specify 'standard = \"SI\"'") + } + } + base <- known_bases[[standard]] + units_map <- known_units[[standard]] + + if (units == "auto") { + power <- if (x <= 0) 0L else min(as.integer(log(x, base = base)), + length(units_map) - 1L) + } else { + power <- match(toupper(units), toupper(units_map)) - 1L + if (is.na(power)) + stop(gettextf("Unit \"%s\" is not part of standard \"%s\"", + sQuote(units), sQuote(standard)), domain = NA) + } + unit <- units_map[power + 1L] + ## SPECIAL: Use suffix 'bytes' instead of 'b' for 'legacy' (or always) ? + if (power == 0 && standard == "legacy") unit <- "bytes" + + paste(round(x / base^power, digits=digits), unit) } print.object_size <- - function(x, quote = FALSE, units = "b", ...) + function(x, quote = FALSE, units = "b", standard = "auto", digits = 1L, ...) { - y <- format.object_size(x, units = units) + y <- format.object_size(x, units=units, standard=standard, digits=digits) if(quote) print.default(y, ...) else cat(y, "\n", sep = "") invisible(x) } diff --git a/src/library/utils/man/object.size.Rd b/src/library/utils/man/object.size.Rd index 3cb56e26cc4..941473cb389 100644 --- a/src/library/utils/man/object.size.Rd +++ b/src/library/utils/man/object.size.Rd @@ -1,6 +1,6 @@ % File src/library/utils/man/object.size.Rd % Part of the R package, https://www.R-project.org -% Copyright 1995-2016 R Core Team +% Copyright 1995-2017 R Core Team % Distributed under GPL 2 or later \name{object.size} @@ -14,20 +14,37 @@ \usage{ object.size(x) -\method{format}{object_size}(x, units = "b", \dots) -\method{print}{object_size}(x, quote = FALSE, units = "b", \dots) +\method{format}{object_size}(x, units = "b", standard = "auto", digits = 1L, \dots) +\method{print}{object_size}(x, quote = FALSE, units = "b", standard = "auto", + digits = 1L, \dots) } \arguments{ \item{x}{an \R object.} \item{quote}{logical, indicating whether or not the result should be printed with surrounding quotes.} - \item{units}{the units to be used in printing the size. Allowed - values are - \code{"b"}, \code{"Kb"}, \code{"Mb"}, \code{"Gb"}, \code{"Tb"}, \code{"Pb"}, - \code{"B"}, \code{"KB"}, \code{"MB"}, \code{"GB"}, \code{"TB"}, \code{"PB"}, - \code{"KiB"}, \code{"MiB"}, \code{"GiB"}, \code{"TiB"}, - \code{"PiB"}, \code{"EiB"}, \code{"ZiB"}, \code{"YiB"}, - and \code{"auto"} (see \sQuote{Details}). Can be abbreviated.} + \item{units}{the units to be used in formatting and printing the size. + Allowed values for the different \code{standard}s are + \describe{ + \item{\code{standard = "legacy"}:}{ + \code{"b"}, \code{"Kb"}, \code{"Mb"}, \code{"Gb"}, \code{"Tb"}, \code{"Pb"}, + \code{"B"}, \code{"KB"}, \code{"MB"}, \code{"GB"}, \code{"TB"} and \code{"PB"}.} + \item{\code{standard = "IEC"}:}{ + \code{"B"}, \code{"KiB"}, \code{"MiB"}, \code{"GiB"}, + \code{"TiB"}, \code{"PiB"}, \code{"EiB"}, \code{"ZiB"} and \code{"YiB"}.} + \item{\code{standard = "SI"}:}{ + \code{"B"}, \code{"kB"}, \code{"MB"}, \code{"GB"}, \code{"TB"}, + \code{"PB"}, \code{"EB"}, \code{"ZB"} and \code{"YB"}.} + } + For all standards, \code{unit = "auto"} is also allowed. + If \code{standard = "auto"}, any of the "legacy" and \acronym{IEC} + units are allowed. + See \sQuote{Formatting and printing object sizes} for details.} + \item{standard}{the byte-size unit standard to be used. A character + string, possibly abbreviated from \code{"legacy"}, \code{"IEC"}, + \code{"SI"} and \code{"auto"}. See \sQuote{Formatting and printing + object sizes} for details.} + \item{digits}{the number of digits after the decimal point, passed to + \code{\link{round}}.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ @@ -48,32 +65,75 @@ object.size(x) Object sizes are larger on 64-bit builds than 32-bit ones, but will very likely be the same on different platforms with the same word length and pointer size. +} + +\section{Formatting and printing object sizes}{ + Object sizes can be formatted using byte-size units from \R's legacy + standard, the \acronym{IEC} standard, or the \acronym{SI} standard. + As illustrated by below tables, the legacy and \acronym{IEC} standards use + \emph{binary} units (multiples of 1024), whereas the SI standard uses + \emph{decimal} units (multiples of 1000). - \code{units = "auto"} in the \code{format} and \code{print} methods - chooses the largest units in which the result is one or more (before - rounding). Values in kilobytes, megabytes or gigabytes are rounded to - the nearest \code{0.1}. + For methods \code{format} and \code{print}, argument \code{standard} + specifies which standard to use and argument \code{units} specifies + which byte-size unit to use. \code{units = "auto"} chooses the largest + units in which the result is one or more (before rounding). + Byte sizes are rounded to \code{digits} decimal places. + \code{standard = "auto"} chooses the standard based on \code{units}, + if possible, otherwise, the legacy standard is used. - The IEC standard for \emph{binary} byte size units uses notation \code{KiB}, - etc. Note that our uses of \code{Kb}, \code{Mb}, etc, also mean - multiples of \code{1024} (and not of \code{1000}) and hence the numbers - for \code{Kb}, \code{KB}, and \code{KiB} are all the same - contrary - to SI standard but according to widespread tradition. + Summary of \R's legacy and \acronym{IEC} units: + \tabular{lll}{ + \bold{object size} \tab\bold{legacy} \tab\bold{IEC}\cr + 1 \tab 1 bytes \tab 1 B \cr + 1024 \tab 1 Kb \tab 1 KiB \cr + 1024^2 \tab 1 Mb \tab 1 MiB \cr + 1024^3 \tab 1 Gb \tab 1 GiB \cr + 1024^4 \tab 1 Tb \tab 1 TiB \cr + 1024^5 \tab 1 Pb \tab 1 PiB \cr + 1024^6 \tab \tab 1 EiB \cr + 1024^7 \tab \tab 1 ZiB \cr + 1024^8 \tab \tab 1 YiB \cr + } + + Summary of \acronym{SI} units: + \tabular{ll}{ + \bold{object size} \tab \bold{SI} \cr + 1 \tab 1 B \cr + 1000 \tab 1 kB \cr + 1000^2 \tab 1 MB \cr + 1000^3 \tab 1 GB \cr + 1000^4 \tab 1 TB \cr + 1000^5 \tab 1 PB \cr + 1000^6 \tab 1 EB \cr + 1000^7 \tab 1 ZB \cr + 1000^8 \tab 1 YB \cr + } } \value{ An object of class \code{"object_size"} with a length-one double value, an estimate of the memory allocation attributable to the object in bytes. } +\author{R Core; Henrik Bengtsson for the non-legacy \code{standard}s.} \seealso{ \code{\link{Memory-limits}} for the design limitations on object size. } - +\references{ + The wikipedia page, \url{https://en.wikipedia.org/wiki/Binary_prefix}, + is extensive on the different standards, usages and their history. +} \examples{ object.size(letters) object.size(ls) format(object.size(library), units = "auto") sl <- object.size(rep(letters, 1000)) + +print(sl) ## 209288 bytes +print(sl, units = "auto") ## 204.4 Kb +print(sl, units = "auto", standard = "IEC") ## 204.4 KiB +print(sl, units = "auto", standard = "SI") ## 209.3 kB + (fsl <- sapply(c("Kb", "KB", "KiB"), function(u) format(sl, units = u))) stopifnot(identical( ## assert that all three are the same : @@ -83,6 +143,9 @@ stopifnot(identical( ## assert that all three are the same : ## find the 10 largest objects in the base package z <- sapply(ls("package:base"), function(x) object.size(get(x, envir = baseenv()))) +if(interactive()) { as.matrix(rev(sort(z))[1:10]) +} else # (more constant over time): + names(rev(sort(z))[1:10]) } \keyword{utilities}