Skip to content

Commit

Permalink
tidy
Browse files Browse the repository at this point in the history
ref #437
  • Loading branch information
wibeasley committed Oct 2, 2022
1 parent 0c0c83e commit 96a96e9
Showing 1 changed file with 12 additions and 47 deletions.
59 changes: 12 additions & 47 deletions playgrounds/eav-playground-2.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
rm(list=ls(all=TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it's above the first chunk.
rm(list = ls(all = TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it's above the first chunk.

# ---- load-sources ------------------------------------------------------------

# ---- load-packages -----------------------------------------------------------
library("magrittr")
requireNamespace("httr")
requireNamespace("dplyr")
requireNamespace("readr")
requireNamespace("testit")
Expand All @@ -17,26 +16,21 @@ token <- "5007DC786DBE39CE77ED8DD0C68069A6" # PHI-free demo: Checkboxes 1
# token <- "5C1526186C4D04AE0A0630743E69B53C" # PHI-free demo: super-wide #3--35,000 columns
# token <- "56F43A10D01D6578A46393394D76D88F" # PHI-free demo: Repeating Instruments --Sparse

raw_or_label <- "raw"
export_data_access_groups_string <- "false"

# ---- load-data ---------------------------------------------------------------
system.time(
ds_expected <- REDCapR::redcap_read_oneshot(redcap_uri, token)$data
)

system.time({
col_types <- REDCapR::redcap_metadata_coltypes( redcap_uri, token)
ds_metadata <- REDCapR:::redcap_metadata_internal(redcap_uri, token)$d_variable
# ds_variable <- REDCapR::redcap_variables(redcap_uri, token)$data
ds_eav <- REDCapR:::redcap_read_eav_oneshot(redcap_uri, token)$data
ds_eav <- REDCapR:::redcap_read_eav_oneshot( redcap_uri, token)$data
})

# ds_eav$field_name
testit::assert(sort(ds_metadata$field_name) == sort(colnames(ds_expected)))
testthat::expect_setequal( ds_metadata$field_name, colnames(ds_expected))

# ---- tweak-data --------------------------------------------------------------

if (!"event_id" %in% colnames(ds_eav)) {
ds_eav$event_id <- "dummy_1"
}
Expand All @@ -48,62 +42,33 @@ ds_eav_possible <-
tidyr::crossing(field_name = ds_metadata$field_name)
)

# distinct_checkboxes <-
# ds_metadata %>%
# dplyr::filter(.data$field_type == "checkbox") %>%
# dplyr::pull(.data$field_name)
#
# ds_possible_checkbox_rows <-
# tidyr::crossing(
# field_name = distinct_checkboxes,
# record = unique(ds_eav$record),
# field_type = "checkbox",
# event_id = unique(ds_eav$event_id)
# )

# variables_to_keep <-
# ds_metadata %>%
# dplyr::select(.data$field_name) %>%
# dplyr::union(
# ds_variable %>%
# dplyr::select(field_name = .data$export_field_name) %>%
# dplyr::filter(grepl("^\\w+?_complete$", .data$field_name))
# ) %>%
# dplyr::pull(.data$field_name) #%>% rev()

ds_eav_2 <-
ds_eav %>%
dplyr::rename(field_name_base = field_name) %>%
dplyr::left_join(
ds_metadata %>%
dplyr::distinct(.data$field_name_base, .data$field_type), # .data$field_name,
dplyr::distinct(.data$field_name_base, .data$field_type),
by = "field_name_base"
) %>%
dplyr::mutate(
field_name = dplyr::if_else(!is.na(.data$field_type) & (.data$field_type == "checkbox"), paste0(.data$field_name_base , "___", .data$value), .data$field_name_base )
) %>%
dplyr::mutate(
value = dplyr::if_else(!is.na(.data$field_type) & (.data$field_type == "checkbox"), as.character(!is.na(.data$value)), .data$value)
field_name = dplyr::if_else(!is.na(.data$field_type) & (.data$field_type == "checkbox"), paste0(.data$field_name_base , "___", .data$value), .data$field_name_base ),
value = dplyr::if_else(!is.na(.data$field_type) & (.data$field_type == "checkbox"), as.character(!is.na(.data$value)), .data$value),
) %>%
dplyr::right_join(ds_eav_possible, by = c("record", "event_id", "field_name"))



. <- NULL # For the sake of avoiding an R CMD check note.
# . <- NULL # For the sake of avoiding an R CMD check note.
ds <-
ds_eav_2 %>%
dplyr::select(-.data$field_type, -.data$field_name_base) %>%
# dplyr::select(-.data$redcap_repeat_instance) %>% # TODO: need a good fix for repeats
# tidyr::drop_na(event_id) %>% # TODO: need a good fix for repeats
tidyr::pivot_wider(
# id_cols = c(record, event_id),
tidyr::pivot_wider( # Everything else is considered an ID column
names_from = field_name,
values_from = value
) %>%
dplyr::select(!!ds_metadata$field_name)
dplyr::select(!!ds_metadata$field_name) %>%
readr::type_convert(col_types)
# dplyr::select(.data = ., !!intersect(variables_to_keep, colnames(.)))

# ds_2 <-
# ds %>%
# dplyr::mutate_if(is.character, ~type.convert(., as.is = FALSE)) %>%
# dplyr::mutate_if(is.factor , as.character)
testit::assert(sort(colnames(ds)) == sort(colnames(ds_expected)))
testthat::expect_setequal(colnames(ds), colnames(ds_expected))

0 comments on commit 96a96e9

Please sign in to comment.