Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #151 - Metadata includes verbose option #199

Merged
merged 18 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## New Features and Bug Fixes

* `xportr_metadata()` can set `verbose` for a whole pipeline, i.e. setting `verbose` in `xportr_metadata()` will populate to all `xportr` functions. (#151)

* All `xportr` functions now have `verbose = NULL` as the default (#151)

## Documentation

* `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`.
* Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. (#179)
* Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189).
Expand Down
8 changes: 7 additions & 1 deletion R/label.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
xportr_label <- function(.df,
metadata = NULL,
domain = NULL,
verbose = getOption("xportr.label_verbose", "none"),
verbose = NULL,
metacore = deprecated()) {
if (!missing(metacore)) {
lifecycle::deprecate_stop(
Expand Down Expand Up @@ -99,6 +99,12 @@ xportr_label <- function(.df,
# Check any variables missed in metadata but present in input data ---
miss_vars <- setdiff(names(.df), metadata[[variable_name]])

# Verbose should use an explicit verbose option first, then the value set in
# metadata, and finally fall back to the option value
verbose <- verbose %||%
attr(.df, "_xportr.df_verbose_") %||%
getOption("xportr.label_verbose", "none")

label_log(miss_vars, verbose)

label <- metadata[[variable_label]]
Expand Down
8 changes: 7 additions & 1 deletion R/length.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
xportr_length <- function(.df,
metadata = NULL,
domain = NULL,
verbose = getOption("xportr.length_verbose", "none"),
verbose = NULL,
metacore = deprecated()) {
if (!missing(metacore)) {
lifecycle::deprecate_stop(
Expand Down Expand Up @@ -106,6 +106,12 @@ xportr_length <- function(.df,
# Check any variables missed in metadata but present in input data ---
miss_vars <- setdiff(names(.df), metadata[[variable_name]])

# Verbose should use an explicit verbose option first, then the value set in
# metadata, and finally fall back to the option value
verbose <- verbose %||%
attr(.df, "_xportr.df_verbose_") %||%
getOption("xportr.length_verbose", "none")

length_log(miss_vars, verbose)

length <- metadata[[variable_length]]
Expand Down
10 changes: 8 additions & 2 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@
#' xportr_type() %>%
#' xportr_order()
#' }
xportr_metadata <- function(.df, metadata = NULL, domain = NULL) {
xportr_metadata <- function(.df,
metadata = NULL,
domain = NULL,
verbose = NULL) {
if (is.null(metadata) && is.null(domain)) {
stop("Must provide either metadata or domain argument")
}
Expand All @@ -51,5 +54,8 @@ xportr_metadata <- function(.df, metadata = NULL, domain = NULL) {

## End of common section

structure(.df, `_xportr.df_metadata_` = metadata)
structure(.df,
Copy link
Collaborator

@averissimo averissimo Jan 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very very optional. I would think that {styler} or {lintr} didn't like this indentation

Suggested change
structure(.df,
structure(
.df,

`_xportr.df_metadata_` = metadata,
`_xportr.df_verbose_` = verbose
Comment on lines +64 to +65
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use quotes ", instead of backticks

Suggested change
`_xportr.df_metadata_` = metadata,
`_xportr.df_verbose_` = verbose
"_xportr.df_metadata_" = metadata,
"_xportr.df_verbose_" = verbose

)
}
8 changes: 7 additions & 1 deletion R/order.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
xportr_order <- function(.df,
metadata = NULL,
domain = NULL,
verbose = getOption("xportr.order_verbose", "none"),
verbose = NULL,
metacore = deprecated()) {
if (!missing(metacore)) {
lifecycle::deprecate_stop(
Expand Down Expand Up @@ -120,6 +120,12 @@ xportr_order <- function(.df,
# Used in warning message for how many vars have been moved
reorder_vars <- names(df_re_ord)[names(df_re_ord) != names(.df)]

# Verbose should use an explicit verbose option first, then the value set in
# metadata, and finally fall back to the option value
verbose <- verbose %||%
attr(.df, "_xportr.df_verbose_") %||%
getOption("xportr.order_verbose", "none")

# Function is located in messages.R
var_ord_msg(reorder_vars, names(drop_vars), verbose)

Expand Down
8 changes: 7 additions & 1 deletion R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
xportr_type <- function(.df,
metadata = NULL,
domain = NULL,
verbose = getOption("xportr.type_verbose", "none"),
verbose = NULL,
metacore = deprecated()) {
if (!missing(metacore)) {
lifecycle::deprecate_stop(
Expand Down Expand Up @@ -147,6 +147,12 @@ xportr_type <- function(.df,
type.y = if_else(type.y %in% numericTypes, "_numeric", type.y)
)

# Verbose should use an explicit verbose option first, then the value set in
# metadata, and finally fall back to the option value
verbose <- verbose %||%
attr(.df, "_xportr.df_verbose_") %||%
getOption("xportr.type_verbose", "none")

# It is possible that a variable exists in the table that isn't in the metadata
# it will be silently ignored here. This may happen depending on what a user
# passes and the options they choose. The check_core function is the place
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ The `xportr_metadata()` function can reduce duplication by setting the variable

```{r, message=FALSE, eval=FALSE}
adsl %>%
xportr_metadata(var_spec, "ADSL") %>%
xportr_metadata(var_spec, "ADSL", verbose = "warn") %>%
xportr_type() %>%
xportr_length() %>%
xportr_label() %>%
Expand Down
6 changes: 5 additions & 1 deletion man/metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/xportr_label.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/xportr_length.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/xportr_order.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/xportr_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

138 changes: 138 additions & 0 deletions tests/testthat/test-metadata.R
bms63 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,96 @@ test_that("xportr_length: Expect error if domain is not a character", {
)
})

test_that("xportr_metadata: Impute character lengths based on class", {
adsl <- minimal_table(30, cols = c("x", "b"))
metadata <- minimal_metadata(
dataset = TRUE, length = TRUE, var_names = colnames(adsl)
) %>%
mutate(length = length - 1)

adsl <- adsl %>%
mutate(
new_date = as.Date(.data$x, origin = "1970-01-01"),
new_char = as.character(.data$b),
new_num = as.numeric(.data$x)
)

adsl %>%
xportr_metadata(metadata, verbose = "none") %>%
xportr_length() %>%
expect_message("Variable lengths missing from metadata") %>%
expect_message("lengths resolved") %>%
expect_attr_width(c(7, 199, 200, 200, 8))
})

test_that("xportr_metadata: Throws message when variables not present in metadata", {
adsl <- minimal_table(30, cols = c("x", "y"))
metadata <- minimal_metadata(dataset = TRUE, length = TRUE, var_names = c("x"))

# Test that message is given which indicates that variable is not present
xportr_metadata(adsl, metadata, verbose = "message") %>%
xportr_length() %>%
expect_message("Variable lengths missing from metadata") %>%
expect_message("lengths resolved") %>%
expect_message(regexp = "Problem with `y`")
})

test_that("xportr_metadata: Variable ordering messaging is correct", {
skip_if_not_installed("haven")
skip_if_not_installed("readxl")

require(haven, quietly = TRUE)
require(readxl, quietly = TRUE)

df <- data.frame(c = 1:5, a = "a", d = 5:1, b = LETTERS[1:5])
df2 <- data.frame(a = "a", z = "z")
df_meta <- data.frame(
dataset = "df",
variable = letters[1:4],
order = 1:4
)

# Metadata versions
xportr_metadata(df, df_meta, verbose = "message") %>%
xportr_order() %>%
expect_message("All variables in specification file are in dataset") %>%
expect_condition("4 reordered in dataset") %>%
expect_message("Variable reordered in `.df`: `a`, `b`, `c`, and `d`")

xportr_metadata(df2, df_meta, domain = "df2", verbose = "message") %>%
xportr_order() %>%
expect_message("2 variables not in spec and moved to end") %>%
expect_message("Variable moved to end in `.df`: `a` and `z`") %>%
expect_message("All variables in dataset are ordered")
})

test_that("xportr_type: Variable types are coerced as expected and can raise messages", {
df <- data.frame(
Subj = as.character(c(123, 456, 789, "", NA, NA_integer_)),
Different = c("a", "b", "c", "", NA, NA_character_),
Val = c("1", "2", "3", "", NA, NA_character_),
Param = c("param1", "param2", "param3", "", NA, NA_character_)
)
meta_example <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character"),
format = NA
)

# Metadata version of the last statement
df %>%
xportr_metadata(meta_example, verbose = "warn") %>%
xportr_type() %>%
expect_warning()

# Metadata version
df %>%
xportr_metadata(meta_example, verbose = "message") %>%
xportr_type() %>%
expect_message("Variable type\\(s\\) in dataframe don't match metadata")
})

# many tests here are more like qualification/domain testing - this section adds
# tests for `xportr_metadata()` basic functionality
# start
Expand Down Expand Up @@ -651,3 +741,51 @@ test_that("xportr_*: Domain is kept in between calls", {
expect_equal(attr(df5, "_xportr.df_arg_"), "adsl")
})
# end

test_that("`xportr_metadata()` results match traditional results", {
if (require(magrittr, quietly = TRUE)) {
test_dir <- tempdir()

trad_path <- file.path(test_dir, "adsltrad.xpt")
metadata_path <- file.path(test_dir, "adslmeta.xpt")

dataset_spec_low <- setNames(dataset_spec, tolower(names(dataset_spec)))
names(dataset_spec_low)[[2]] <- "label"

var_spec_low <- setNames(var_spec, tolower(names(var_spec)))
names(var_spec_low)[[5]] <- "type"

metadata_df <- adsl %>%
xportr_metadata(var_spec_low, "ADSL", verbose = "none") %>%
xportr_type() %>%
xportr_length() %>%
xportr_label() %>%
xportr_order() %>%
xportr_format() %>%
xportr_df_label(dataset_spec_low) %>%
xportr_write(metadata_path)

trad_df <- adsl %>%
xportr_type(var_spec_low, "ADSL", verbose = "none") %>%
xportr_length(var_spec_low, "ADSL", verbose = "none") %>%
xportr_label(var_spec_low, "ADSL", verbose = "none") %>%
xportr_order(var_spec_low, "ADSL", verbose = "none") %>%
xportr_format(var_spec_low, "ADSL") %>%
xportr_df_label(dataset_spec_low, "ADSL") %>%
xportr_write(trad_path)

expect_identical(
metadata_df,
structure(
trad_df,
`_xportr.df_metadata_` = var_spec_low,
`_xportr.df_verbose_` = "none"
)
)

expect_identical(
haven::read_xpt(metadata_path),
haven::read_xpt(trad_path)
)
}
})
Loading