Skip to content

Commit

Permalink
Some simplifications
Browse files Browse the repository at this point in the history
  • Loading branch information
nealrichardson committed May 8, 2021
1 parent 6e5edc0 commit 795e1f9
Showing 1 changed file with 15 additions and 38 deletions.
53 changes: 15 additions & 38 deletions r/R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ arrow_dplyr_query <- function(.data) {
if (inherits(.data, "arrow_dplyr_query")) {
return(.data)
}
if (!inherits(.data, "Dataset")) {
.data <- InMemoryDataset$create(.data)
}

structure(
list(
.data = .data$clone(),
.data = if (inherits(.data, "Dataset")) {
.data$clone()
} else {
InMemoryDataset$create(.data)
},
# selected_columns is a named list:
# * contents are references/expressions pointing to the data
# * names are the names they should be in the end (i.e. this
Expand All @@ -61,20 +63,24 @@ arrow_dplyr_query <- function(.data) {
)
}

make_field_refs <- function(field_names) {
set_names(lapply(field_names, Expression$field_ref), field_names)
}

#' @export
print.arrow_dplyr_query <- function(x, ...) {
schm <- x$.data$schema
cols <- get_field_names(x)
# If cols are expressions, they won't be in the schema and will be "" in cols
fields <- map_chr(cols, function(name) {
fields <- map_chr(x$selected_columns, function(expr) {
name <- expr$field_name
if (nzchar(name)) {
schm$GetFieldByName(name)$ToString()
} else {
# It's "" because this is not a field_ref, it's a more complex expression
"expr"
}
})
# Strip off the field names as they are in the dataset and add the renamed ones
fields <- paste(names(cols), sub("^.*?: ", "", fields), sep = ": ", collapse = "\n")
fields <- paste(names(fields), sub("^.*?: ", "", fields), sep = ": ", collapse = "\n")
cat(class(x$.data)[1], " (query)\n", sep = "")
cat(fields, "\n", sep = "")
cat("\n")
Expand Down Expand Up @@ -104,29 +110,6 @@ print.arrow_dplyr_query <- function(x, ...) {
invisible(x)
}

get_field_names <- function(selected_cols) {
if (inherits(selected_cols, "arrow_dplyr_query")) {
selected_cols <- selected_cols$selected_columns
}
map_chr(selected_cols, function(x) {
if (inherits(x, "Expression")) {
out <- x$field_name
} else if (inherits(x, "array_expression")) {
out <- x$args$field_name
} else {
out <- NULL
}
# If x isn't some kind of field reference, out is NULL,
# but we always need to return a string
out %||% ""
})
}

make_field_refs <- function(field_names) {
out <- lapply(field_names, Expression$field_ref)
set_names(out, field_names)
}

# These are the names reflecting all select/rename, not what is in Arrow
#' @export
names.arrow_dplyr_query <- function(x) names(x$selected_columns)
Expand Down Expand Up @@ -751,13 +734,7 @@ restore_dplyr_features <- function(df, query) {
# An arrow_dplyr_query holds some attributes that Arrow doesn't know about
# After calling collect(), make sure these features are carried over

grouped <- length(query$group_by_vars) > 0
renamed <- ncol(df) && !identical(names(df), names(query))
if (renamed) {
# In case variables were renamed, apply those names
names(df) <- names(query)
}
if (grouped) {
if (length(query$group_by_vars) > 0) {
# Preserve groupings, if present
if (is.data.frame(df)) {
df <- dplyr::grouped_df(
Expand Down

0 comments on commit 795e1f9

Please sign in to comment.