diff --git a/R/connection.R b/R/connection.R index a6025692..659ae976 100644 --- a/R/connection.R +++ b/R/connection.R @@ -112,120 +112,3 @@ close_connection <- function(conn) { DBI::dbDisconnect(conn) } - - -#' Convenience function for DBI::Id -#' -#' @template db_table_id -#' @template conn -#' @param allow_table_only -#' logical. If `TRUE`, allows for returning an `DBI::Id` with `table` = `myschema.table` if schema `myschema` -#' is not found in `conn`. -#' If `FALSE`, the function will raise an error if the implied schema cannot be found in `conn` -#' @details The given `db_table_id` is parsed to a DBI::Id depending on the type of input: -#' * `character`: db_table_id is parsed to a DBI::Id object using an assumption of "schema.table" syntax -#' with corresponding schema (if found in `conn`) and table values. -#' If no schema is implied, the default schema of `conn` will be used. -#' -#' * `DBI::Id`: if schema is not specified in `Id`, the schema is set to the default schema for `conn` (if given). -#' -#' * `tbl_sql`: the remote name is used to resolve the table identification. -#' -#' @return A DBI::Id object parsed from db_table_id (see details) -#' @examples -#' id("schema.table") -#' @seealso [DBI::Id] which this function wraps. -#' @export -id <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { - UseMethod("id") -} - - -#' @export -id.Id <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { - return(DBI::Id(schema = purrr::pluck(db_table_id, "name", "schema", .default = SCDB::get_schema(conn)), - table = purrr::pluck(db_table_id, "name", "table"))) -} - - -#' @export -id.character <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { - - checkmate::assert(is.null(conn), DBI::dbIsValid(conn), combine = "or") - - if (stringr::str_detect(db_table_id, "\\.")) { - db_name <- stringr::str_split_1(db_table_id, "\\.") - db_schema <- db_name[1] - db_table <- db_name[2] - - # If no matching implied schema is found, return the unmodified db_table_id in the default schema - if (allow_table_only && !is.null(conn) && !schema_exists(conn, db_schema)) { - return(DBI::Id(schema = get_schema(conn), table = db_table_id)) - } - } else { - db_schema <- get_schema(conn) - db_table <- db_table_id - } - - return(DBI::Id(schema = db_schema, table = db_table)) -} - - -#' @export -id.tbl_dbi <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { - - # If table identification is fully qualified extract Id from remote_Table - if (!is.na(purrr::pluck(dbplyr::remote_table(db_table_id), unclass, "schema"))) { - - table_ident <- dbplyr::remote_table(db_table_id) |> - unclass() |> - purrr::discard(is.na) - - table_conn <- dbplyr::remote_con(db_table_id) - - return( - DBI::Id( - catalog = purrr::pluck(table_ident, "catalog"), - schema = purrr::pluck(table_ident, "schema", .default = get_schema(table_conn)), - table = purrr::pluck(table_ident, "table") - ) - ) - - } else { - - # If not attempt to resolve the table from existing tables. - # For SQLite, there should only be one table in main/temp matching the table - # In some cases, tables may have been added to the DB that makes the id ambiguous. - schema <- get_tables(dbplyr::remote_con(db_table_id), show_temporary = TRUE) |> - dplyr::filter(.data$table == dbplyr::remote_name(db_table_id)) |> - dplyr::pull("schema") - - if (length(schema) > 1) { - stop( - "Table identification has been corrupted! ", - "This table does not contain information about its schema and ", - "multiple tables with this name were found across schemas." - ) - } - - return(DBI::Id(schema = schema, table = dbplyr::remote_name(db_table_id))) - } -} - - -#' @export -as.character.Id <- function(x, ...) { - - info <- x@name |> - purrr::discard(is.na) - - id_representation <- list( - catalog = purrr::pluck(info, "catalog"), - schema = purrr::pluck(info, "schema"), - table = purrr::pluck(info, "table") - ) |> - purrr::discard(is.null) |> - do.call(purrr::partial(paste, sep = "."), args = _) - - return(id_representation) -} diff --git a/R/create_logs_if_missing.R b/R/create_logs_if_missing.R new file mode 100644 index 00000000..9d9960ba --- /dev/null +++ b/R/create_logs_if_missing.R @@ -0,0 +1,34 @@ +#' Create a table with the SCDB log structure if it does not exists +#' @template conn +#' @param log_table A specification of where the logs should exist ("schema.table") +#' @return A tbl_dbi with the generated (or existing) log table +#' @examples +#' conn <- get_connection(drv = RSQLite::SQLite()) +#' log_table_id <- id("test.logs", conn = conn, allow_table_only = TRUE) +#' +#' create_logs_if_missing(log_table_id, conn) +#' +#' close_connection(conn) +#' @export +create_logs_if_missing <- function(log_table, conn) { + + checkmate::assert_class(conn, "DBIConnection") + + if (!table_exists(conn, log_table)) { + log_signature <- data.frame(date = as.POSIXct(NA), + schema = NA_character_, + table = NA_character_, + n_insertions = NA_integer_, + n_deactivations = NA_integer_, + start_time = as.POSIXct(NA), + end_time = as.POSIXct(NA), + duration = NA_character_, + success = NA, + message = NA_character_, + log_file = NA_character_) + + DBI::dbCreateTable(conn, id(log_table, conn), log_signature) + } + + return(dplyr::tbl(conn, id(log_table, conn), check_from = FALSE)) +} diff --git a/R/create_table.R b/R/create_table.R index 7d1eea53..38003615 100644 --- a/R/create_table.R +++ b/R/create_table.R @@ -70,86 +70,3 @@ create_table <- function(.data, conn = NULL, db_table_id, ...) { return(invisible(dplyr::tbl(conn, db_table_id, check_from = FALSE))) } - - - -#' @importFrom methods setGeneric -methods::setGeneric("getTableSignature", - function(.data, conn = NULL) standardGeneric("getTableSignature"), - signature = "conn") - -methods::setMethod("getTableSignature", "DBIConnection", function(.data, conn) { - # Define the column types to be updated based on backend class - col_types <- DBI::dbDataType(conn, .data) - - backend_coltypes <- list( - "PqConnection" = c( - checksum = "TEXT", - from_ts = "TIMESTAMP", - until_ts = "TIMESTAMP" - ), - "SQLiteConnection" = c( - checksum = "TEXT", - from_ts = "TEXT", - until_ts = "TEXT" - ), - "Microsoft SQL Server" = c( - checksum = "varchar(32)", - from_ts = "DATETIME2", - until_ts = "DATETIME2" - ) - ) - - checkmate::assert_choice(class(conn), names(backend_coltypes)) - - # Update columns with indices instead of names to avoid conflicts - special_cols <- backend_coltypes[[class(conn)]] - special_indices <- (1 + length(.data) - length(special_cols)):length(.data) - - return(replace(col_types, special_indices, special_cols)) -}) - -methods::setMethod("getTableSignature", "NULL", function(.data, conn) { - # Emulate product of DBI::dbDataType - signature <- dplyr::summarise(.data, dplyr::across(tidyselect::everything(), ~ class(.)[1])) - - stats::setNames(as.character(signature), names(signature)) - - return(signature) -}) - - -#' Create a table with the SCDB log structure if it does not exists -#' @template conn -#' @param log_table A specification of where the logs should exist ("schema.table") -#' @return A tbl_dbi with the generated (or existing) log table -#' @examples -#' conn <- get_connection(drv = RSQLite::SQLite()) -#' log_table_id <- id("test.logs", conn = conn, allow_table_only = TRUE) -#' -#' create_logs_if_missing(log_table_id, conn) -#' -#' close_connection(conn) -#' @export -create_logs_if_missing <- function(log_table, conn) { - - checkmate::assert_class(conn, "DBIConnection") - - if (!table_exists(conn, log_table)) { - log_signature <- data.frame(date = as.POSIXct(NA), - schema = NA_character_, - table = NA_character_, - n_insertions = NA_integer_, - n_deactivations = NA_integer_, - start_time = as.POSIXct(NA), - end_time = as.POSIXct(NA), - duration = NA_character_, - success = NA, - message = NA_character_, - log_file = NA_character_) - - DBI::dbCreateTable(conn, id(log_table, conn), log_signature) - } - - return(dplyr::tbl(conn, id(log_table, conn), check_from = FALSE)) -} diff --git a/R/filter_keys.R b/R/filter_keys.R new file mode 100644 index 00000000..004babce --- /dev/null +++ b/R/filter_keys.R @@ -0,0 +1,68 @@ +#' Filters .data according to all records in the filter +#' +#' @description +#' If filter = NULL, not filtering is done +#' If filter is different from NULL, the .data is filtered by a inner_join using all columns of the filter: +#' \code{inner_join(.data, filter, by = colnames(filter))} +#' +#' by and na_by can overwrite the inner_join columns used in the filtering +#' +#' @template .data +#' @template filters +#' @param by passed to inner_join if different from NULL +#' @param na_by passed to inner_join if different from NULL +#' @template .data_return +#' @examples +#' # Filtering with null means no filtering is done +#' filter <- NULL +#' identical(filter_keys(mtcars, filter), mtcars) # TRUE +#' +#' # Filtering by vs = 0 +#' filter <- data.frame(vs = 0) +#' identical(filter_keys(mtcars, filter), dplyr::filter(mtcars, vs == 0)) # TRUE +#' +#' # Filtering by the specific combinations of vs = 0 and am = 1 +#' filter <- dplyr::distinct(mtcars, vs, am) +#' filter_keys(mtcars, filter) +#' +#' @importFrom rlang .data +#' @export +filter_keys <- function(.data, filters, by = NULL, na_by = NULL) { + if (is.null(filters)) { + return(.data) + } + + assert_data_like(.data) + assert_data_like(filters, null.ok = TRUE) + checkmate::assert_subset(c(by, na_by), colnames(filters)) + + UseMethod("filter_keys") +} + +#' @export +filter_keys.tbl_sql <- function(.data, filters, by = NULL, na_by = NULL) { + + if (is.null(by) && is.null(na_by)) { + # Determine key types + key_types <- filters |> + dplyr::ungroup() |> + dplyr::summarise(dplyr::across( + .cols = tidyselect::everything(), + .fns = ~ sum(ifelse(is.na(.), 0, 1), na.rm = TRUE) + )) |> + tidyr::pivot_longer(tidyselect::everything(), names_to = "column_name", values_to = "is_na") + + by <- key_types |> dplyr::filter(.data$is_na > 0) |> dplyr::pull("column_name") + na_by <- key_types |> dplyr::filter(.data$is_na == 0) |> dplyr::pull("column_name") + + if (length(by) == 0) by <- NULL + if (length(na_by) == 0) na_by <- NULL + } + return(dplyr::inner_join(.data, filters, by = by, na_by = na_by)) +} + +#' @export +filter_keys.data.frame <- function(.data, filters, by = NULL, na_by = NULL) { + if (is.null(by) && is.null(na_by)) by <- colnames(filters) + return(dplyr::inner_join(.data, filters, by = c(by, na_by))) +} diff --git a/R/getTableSignature.R b/R/getTableSignature.R new file mode 100644 index 00000000..f27f6a61 --- /dev/null +++ b/R/getTableSignature.R @@ -0,0 +1,44 @@ +#' @importFrom methods setGeneric +methods::setGeneric("getTableSignature", + function(.data, conn = NULL) standardGeneric("getTableSignature"), + signature = "conn") + +methods::setMethod("getTableSignature", "DBIConnection", function(.data, conn) { + # Define the column types to be updated based on backend class + col_types <- DBI::dbDataType(conn, .data) + + backend_coltypes <- list( + "PqConnection" = c( + checksum = "TEXT", + from_ts = "TIMESTAMP", + until_ts = "TIMESTAMP" + ), + "SQLiteConnection" = c( + checksum = "TEXT", + from_ts = "TEXT", + until_ts = "TEXT" + ), + "Microsoft SQL Server" = c( + checksum = "varchar(32)", + from_ts = "DATETIME2", + until_ts = "DATETIME2" + ) + ) + + checkmate::assert_choice(class(conn), names(backend_coltypes)) + + # Update columns with indices instead of names to avoid conflicts + special_cols <- backend_coltypes[[class(conn)]] + special_indices <- (1 + length(.data) - length(special_cols)):length(.data) + + return(replace(col_types, special_indices, special_cols)) +}) + +methods::setMethod("getTableSignature", "NULL", function(.data, conn) { + # Emulate product of DBI::dbDataType + signature <- dplyr::summarise(.data, dplyr::across(tidyselect::everything(), ~ class(.)[1])) + + stats::setNames(as.character(signature), names(signature)) + + return(signature) +}) diff --git a/R/get_schema.R b/R/get_schema.R index f238fe41..06ef5c76 100644 --- a/R/get_schema.R +++ b/R/get_schema.R @@ -71,64 +71,3 @@ get_schema.SQLiteConnection <- function(.x) { get_schema.NULL <- function(.x) { return(NULL) } - -#' Test if a schema exists in given connection -#' @param schema A character string giving the schema name -#' @template conn -#' @return TRUE if the given schema is found on conn -#' @examples -#' -#' conn <- get_connection(drv = RSQLite::SQLite()) -#' -#' schema_exists(conn, "test") -#' -#' close_connection(conn) -#' @export -schema_exists <- function(conn, schema) { - UseMethod("schema_exists") -} - -#' @export -schema_exists.SQLiteConnection <- function(conn, schema) { - query <- paste0( - "SELECT schema, name FROM pragma_table_list WHERE schema == '", - schema, - "' AND name IN ('sqlite_schema', 'sqlite_temp_schema')" - ) - result <- DBI::dbGetQuery(conn, query) - - return(nrow(result) == 1) -} - -#' @export -schema_exists.DBIConnection <- function(conn, schema) { - query <- paste0("SELECT schema_name FROM INFORMATION_SCHEMA.SCHEMATA WHERE schema_name = '", schema, "'") - result <- DBI::dbGetQuery(conn, query) - - return(nrow(result) == 1) -} - -#' @export -schema_exists.default <- function(conn, schema) { - - checkmate::assert_character(schema) - - objs <- DBI::dbListObjects(conn) - matches <- sapply(objs$table, \(.x) methods::slot(.x, "name")) |> - (\(.x) names(.x) == "schema" & .x == schema)() - - if (any(matches)) return(TRUE) - - tryCatch({ - DBI::dbCreateTable( - conn, - name = DBI::Id(schema = schema, table = "SCDB_schema_test"), - fields = data.frame(name = character()), - temporary = FALSE - ) - - DBI::dbRemoveTable(conn, DBI::Id(schema = schema, table = "SCDB_schema_test")) - TRUE - }, - error = function(e) FALSE) -} diff --git a/R/get_table.R b/R/get_table.R index 1f6cb4d7..aad95208 100644 --- a/R/get_table.R +++ b/R/get_table.R @@ -70,339 +70,3 @@ get_table <- function(conn, db_table_id = NULL, slice_ts = NA, include_slice_inf return(q) } - - -#' Gets the available tables -#' -#' @template conn -#' @param pattern A regex pattern with which to subset the returned tables -#' @param show_temporary (`logical`)\cr -#' Should temporary tables be listed? -#' -#' @return A data.frame containing table names in the DB -#' @examples -#' conn <- get_connection(drv = RSQLite::SQLite()) -#' -#' dplyr::copy_to(conn, mtcars, name = "my_test_table_1", temporary = FALSE) -#' dplyr::copy_to(conn, mtcars, name = "my_test_table_2") -#' -#' get_tables(conn, pattern = "my_[th]est") -#' get_tables(conn, pattern = "my_[th]est", show_temporary = FALSE) -#' -#' close_connection(conn) -#' @importFrom rlang .data -#' @export -get_tables <- function(conn, pattern = NULL, show_temporary = TRUE) { - - checkmate::assert_character(pattern, null.ok = TRUE) - checkmate::assert_logical(show_temporary) - - UseMethod("get_tables") -} - -#' @importFrom rlang .data -#' @export -get_tables.SQLiteConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { - query <- paste("SELECT schema, name 'table' FROM pragma_table_list", - "WHERE NOT name IN ('sqlite_schema', 'sqlite_temp_schema')", - "AND NOT name LIKE 'sqlite_stat%'") - - tables <- DBI::dbGetQuery(conn, query) - - if (!show_temporary) { - tables <- tables |> - dplyr::filter(.data$schema != "temp") - } - - if (!is.null(pattern)) { - tables <- tables |> - dplyr::mutate(db_table_str = ifelse( - is.na(.data$schema), .data$table, - paste(.data$schema, .data$table, sep = ".") - )) |> - dplyr::filter(grepl(pattern, .data$db_table_str)) |> - dplyr::select(!"db_table_str") - } - - if (!conn@dbname %in% c("", ":memory:") && nrow(tables) == 0) { - warning("No tables found. Check user privileges / database configuration") - } - - return(tables) -} - -#' @export -#' @importFrom rlang .data -get_tables.PqConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { - query <- paste("SELECT", - "schemaname AS schema,", - "tablename AS table,", - "is_temporary", - "FROM (", - "SELECT *, 0 AS is_temporary FROM pg_tables", - "WHERE NOT (schemaname LIKE 'pg_%' OR schemaname = 'information_schema')", - "UNION ALL", - "SELECT *, 1 AS is_temporary FROM pg_tables", - "WHERE schemaname LIKE 'pg_temp_%'", - ")") - - tables <- DBI::dbGetQuery(conn, query) - - if (!show_temporary) { - tables <- tables |> - dplyr::filter(!.data$is_temporary) - } - - tables <- tables |> - dplyr::select(!"is_temporary") - - - if (!is.null(pattern)) { - tables <- tables |> - dplyr::mutate(db_table_str = ifelse( - is.na(.data$schema), .data$table, - paste(.data$schema, .data$table, sep = ".") - )) |> - dplyr::filter(grepl(pattern, .data$db_table_str)) |> - dplyr::select(!"db_table_str") - } - - if (nrow(tables) == 0) warning("No tables found. Check user privileges / database configuration") - - return(tables) -} - -#' @importFrom rlang .data -#' @export -`get_tables.Microsoft SQL Server` <- function(conn, pattern = NULL, show_temporary = TRUE) { - query <- paste("SELECT", - "s.name AS [schema],", - "t.name AS [table],", - "t.is_temporary", - "FROM (", - "SELECT *, 0 AS is_temporary FROM sys.tables WHERE NOT name LIKE '#%'", - "UNION ALL", - "SELECT *, 1 AS is_temporary FROM tempdb.sys.tables WHERE name LIKE '#%'", - ") AS t", - "INNER JOIN sys.schemas AS s", - "ON t.schema_id = s.schema_id") - - tables <- DBI::dbGetQuery(conn, query) - - if (!show_temporary) { - tables <- tables |> - dplyr::filter(.data$is_temporary == 0) - } - - # Filter out trailing underscores added by engine - tables <- tables |> - dplyr::mutate(table = stringr::str_remove(.data$table, "_+[0-9a-fA-F]+$")) - - tables <- tables |> - dplyr::select(!"is_temporary") - - if (!is.null(pattern)) { - tables <- tables |> - tidyr::unite("db_table_str", "schema", "table", sep = ".", na.rm = TRUE, remove = FALSE) |> - dplyr::filter(grepl(pattern, .data$db_table_str)) |> - dplyr::select(!"db_table_str") - } - - if (nrow(tables) == 0) warning("No tables found. Check user privileges / database configuration") - - return(tables) -} - -#' @export -get_tables.OdbcConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { - query <- paste("SELECT", - "s.name AS [schema],", - "t.name AS [table]", - "FROM sys.tables t", - "INNER JOIN sys.schemas s", - "ON t.schema_id = s.schema_id") - - tables <- DBI::dbGetQuery(conn, query) |> - dplyr::mutate(schema = dplyr::na_if(.data$schema, "dbo")) - - return(tables) -} - -#' @export -get_tables.DBIConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { - if (isFALSE(show_temporary)) { # nocov start - rlang::warn("show_temporary must be 'FALSE' for unsupported backends!") # nocov end - } - - # Check arguments - checkmate::assert_class(conn, "DBIConnection") - checkmate::assert_character(pattern, null.ok = TRUE) - - # Retrieve all objects in conn - objs <- DBI::dbListObjects(conn) |> - dplyr::select(table) - - # purrr::map fails if .x is empty, avoid by returning early - if (nrow(objs) == 0) { - warning("No tables found. Check user privileges / database configuration") - - return(data.frame(schema = character(), table = character())) - } - - tables <- objs$table |> # For each top-level object (except tables)... - purrr::map(\(.x) { - if (names(.x@name) == "table") { - return(data.frame(schema = NA_character_, table = .x@name["table"])) - } - - # ...retrieve all tables - DBI::dbListObjects(conn, .x) |> - dplyr::pull(table) |> - purrr::map(\(.y) data.frame(schema = .x@name, table = .y@name["table"])) |> - purrr::reduce(rbind.data.frame) - }) |> - purrr::reduce(rbind.data.frame) - - # Skip dbplyr temporary tables - tables <- dplyr::filter(tables, !startsWith(.data$table, "dbplyr_")) - - # Subset if pattern is given - if (!is.null(pattern)) { - tables <- subset(tables, grepl(pattern, table)) - } - - # Remove empty schemas - tables <- dplyr::mutate(tables, schema = dplyr::if_else(.data$schema == "", NA, .data$schema)) - - row.names(tables) <- NULL # Reset row names - return(tables) -} - -#' Slices a data object based on time / date -#' -#' @template .data -#' @param slice_ts The time / date to slice by -#' @param from_ts The name of the column in .data specifying valid from time (note: must be unquoted) -#' @param until_ts The name of the column in .data specifying valid until time (note: must be unquoted) -#' @template .data_return -#' @examples -#' conn <- get_connection(drv = RSQLite::SQLite()) -#' -#' m <- mtcars |> -#' dplyr::mutate(from_ts = dplyr::if_else(dplyr::row_number() > 10, -#' as.Date("2020-01-01"), -#' as.Date("2021-01-01")), -#' until_ts = as.Date(NA)) -#' -#' dplyr::copy_to(conn, m, name = "mtcars", temporary = FALSE) -#' -#' q <- dplyr::tbl(conn, id("mtcars", conn)) -#' -#' nrow(slice_time(q, "2020-01-01")) # 10 -#' nrow(slice_time(q, "2021-01-01")) # nrow(mtcars) -#' -#' close_connection(conn) -#' @export -slice_time <- function(.data, slice_ts, from_ts = from_ts, until_ts = until_ts) { - - # Check arguments - assert_data_like(.data) - assert_timestamp_like(slice_ts) - - from_ts <- dplyr::enquo(from_ts) - until_ts <- dplyr::enquo(until_ts) - .data <- .data |> - dplyr::filter(is.na({{until_ts}}) | slice_ts < {{until_ts}}, - {{from_ts}} <= slice_ts) - return(.data) -} - -#' Test if a table exists in database -#' -#' @description -#' This functions attempts to determine the existence of a given table. -#' If a character input is given, matching is done heuristically assuming a "schema.table" notation. -#' If no schema is implied in this case, the default schema is assumed. -#' @template conn -#' @template db_table_id -#' @return TRUE if db_table_id can be parsed to a table found in conn -#' @importFrom rlang .data -#' @name table_exists -#' @examples -#' conn <- get_connection(drv = RSQLite::SQLite()) -#' -#' dplyr::copy_to(conn, mtcars, name = "mtcars", temporary = FALSE) -#' dplyr::copy_to(conn, iris, name = "iris") -#' -#' table_exists(conn, "mtcars") # TRUE -#' table_exists(conn, "iris") # FALSE -#' table_exists(conn, "temp.iris") # TRUE -#' -#' close_connection(conn) -#' @export -table_exists <- function(conn, db_table_id) { - checkmate::assert(DBI::dbIsValid(conn)) - assert_id_like(db_table_id) - - # Check arguments - if (inherits(db_table_id, "tbl_dbi")) { - exists <- tryCatch({ - dplyr::collect(utils::head(db_table_id, 0)) - return(TRUE) - }, - error = function(e) { - return(FALSE) - }) - - return(exists) - } - - UseMethod("table_exists", conn) -} - -#' @rdname table_exists -#' @importFrom rlang .data -#' @export -table_exists.DBIConnection <- function(conn, db_table_id) { - tables <- get_tables(conn, show_temporary = TRUE) - - if (inherits(db_table_id, "Id")) { - db_table_id <- id(db_table_id, conn) # Ensure Id is fully qualified (has schema) - - exact_match <- tables |> - dplyr::filter(.data$table == db_table_id@name["table"], .data$schema == db_table_id@name["schema"]) - - if (nrow(exact_match) == 1) { - return(TRUE) - } else { - return(FALSE) - } - - } else if (inherits(db_table_id, "character")) { - - # Check if schema is implied -- use default if not implied - if (!stringr::str_detect(db_table_id, r"{\w*\.\w*}")) { - db_table_id <- paste(get_schema(conn), db_table_id, sep = ".") - } - - # Then heuristically match with tables in conn - matches <- dplyr::union_all( - tables, - dplyr::mutate(dplyr::filter(tables, .data$schema == get_schema(conn)), schema = NA) - ) |> - tidyr::unite("table_str", "schema", "table", sep = ".", na.rm = TRUE, remove = FALSE) |> - dplyr::filter(.data$table_str == !!db_table_id) |> - dplyr::select(!"table_str") - - if (nrow(matches) <= 1) { - return(nrow(matches) == 1) - } else { - rlang::abort( - message = paste0("More than one table matching '", db_table_id, "' was found!"), - matches = matches - ) - } - } else { - rlang::abort("Only character or DBI::Id inputs to table_exists is allowed!") - } -} diff --git a/R/get_tables.R b/R/get_tables.R new file mode 100644 index 00000000..b31237b9 --- /dev/null +++ b/R/get_tables.R @@ -0,0 +1,205 @@ +#' Gets the available tables +#' +#' @template conn +#' @param pattern A regex pattern with which to subset the returned tables +#' @param show_temporary (`logical`)\cr +#' Should temporary tables be listed? +#' +#' @return A data.frame containing table names in the DB +#' @examples +#' conn <- get_connection(drv = RSQLite::SQLite()) +#' +#' dplyr::copy_to(conn, mtcars, name = "my_test_table_1", temporary = FALSE) +#' dplyr::copy_to(conn, mtcars, name = "my_test_table_2") +#' +#' get_tables(conn, pattern = "my_[th]est") +#' get_tables(conn, pattern = "my_[th]est", show_temporary = FALSE) +#' +#' close_connection(conn) +#' @importFrom rlang .data +#' @export +get_tables <- function(conn, pattern = NULL, show_temporary = TRUE) { + + checkmate::assert_character(pattern, null.ok = TRUE) + checkmate::assert_logical(show_temporary) + + UseMethod("get_tables") +} + +#' @importFrom rlang .data +#' @export +get_tables.SQLiteConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { + query <- paste("SELECT schema, name 'table' FROM pragma_table_list", + "WHERE NOT name IN ('sqlite_schema', 'sqlite_temp_schema')", + "AND NOT name LIKE 'sqlite_stat%'") + + tables <- DBI::dbGetQuery(conn, query) + + if (!show_temporary) { + tables <- tables |> + dplyr::filter(.data$schema != "temp") + } + + if (!is.null(pattern)) { + tables <- tables |> + dplyr::mutate(db_table_str = ifelse( + is.na(.data$schema), .data$table, + paste(.data$schema, .data$table, sep = ".") + )) |> + dplyr::filter(grepl(pattern, .data$db_table_str)) |> + dplyr::select(!"db_table_str") + } + + if (!conn@dbname %in% c("", ":memory:") && nrow(tables) == 0) { + warning("No tables found. Check user privileges / database configuration") + } + + return(tables) +} + +#' @export +#' @importFrom rlang .data +get_tables.PqConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { + query <- paste("SELECT", + "schemaname AS schema,", + "tablename AS table,", + "is_temporary", + "FROM (", + "SELECT *, 0 AS is_temporary FROM pg_tables", + "WHERE NOT (schemaname LIKE 'pg_%' OR schemaname = 'information_schema')", + "UNION ALL", + "SELECT *, 1 AS is_temporary FROM pg_tables", + "WHERE schemaname LIKE 'pg_temp_%'", + ")") + + tables <- DBI::dbGetQuery(conn, query) + + if (!show_temporary) { + tables <- tables |> + dplyr::filter(!.data$is_temporary) + } + + tables <- tables |> + dplyr::select(!"is_temporary") + + + if (!is.null(pattern)) { + tables <- tables |> + dplyr::mutate(db_table_str = ifelse( + is.na(.data$schema), .data$table, + paste(.data$schema, .data$table, sep = ".") + )) |> + dplyr::filter(grepl(pattern, .data$db_table_str)) |> + dplyr::select(!"db_table_str") + } + + if (nrow(tables) == 0) warning("No tables found. Check user privileges / database configuration") + + return(tables) +} + +#' @importFrom rlang .data +#' @export +`get_tables.Microsoft SQL Server` <- function(conn, pattern = NULL, show_temporary = TRUE) { + query <- paste("SELECT", + "s.name AS [schema],", + "t.name AS [table],", + "t.is_temporary", + "FROM (", + "SELECT *, 0 AS is_temporary FROM sys.tables WHERE NOT name LIKE '#%'", + "UNION ALL", + "SELECT *, 1 AS is_temporary FROM tempdb.sys.tables WHERE name LIKE '#%'", + ") AS t", + "INNER JOIN sys.schemas AS s", + "ON t.schema_id = s.schema_id") + + tables <- DBI::dbGetQuery(conn, query) + + if (!show_temporary) { + tables <- tables |> + dplyr::filter(.data$is_temporary == 0) + } + + # Filter out trailing underscores added by engine + tables <- tables |> + dplyr::mutate(table = stringr::str_remove(.data$table, "_+[0-9a-fA-F]+$")) + + tables <- tables |> + dplyr::select(!"is_temporary") + + if (!is.null(pattern)) { + tables <- tables |> + tidyr::unite("db_table_str", "schema", "table", sep = ".", na.rm = TRUE, remove = FALSE) |> + dplyr::filter(grepl(pattern, .data$db_table_str)) |> + dplyr::select(!"db_table_str") + } + + if (nrow(tables) == 0) warning("No tables found. Check user privileges / database configuration") + + return(tables) +} + +#' @export +get_tables.OdbcConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { + query <- paste("SELECT", + "s.name AS [schema],", + "t.name AS [table]", + "FROM sys.tables t", + "INNER JOIN sys.schemas s", + "ON t.schema_id = s.schema_id") + + tables <- DBI::dbGetQuery(conn, query) |> + dplyr::mutate(schema = dplyr::na_if(.data$schema, "dbo")) + + return(tables) +} + +#' @export +get_tables.DBIConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { + if (isFALSE(show_temporary)) { # nocov start + rlang::warn("show_temporary must be 'FALSE' for unsupported backends!") # nocov end + } + + # Check arguments + checkmate::assert_class(conn, "DBIConnection") + checkmate::assert_character(pattern, null.ok = TRUE) + + # Retrieve all objects in conn + objs <- DBI::dbListObjects(conn) |> + dplyr::select(table) + + # purrr::map fails if .x is empty, avoid by returning early + if (nrow(objs) == 0) { + warning("No tables found. Check user privileges / database configuration") + + return(data.frame(schema = character(), table = character())) + } + + tables <- objs$table |> # For each top-level object (except tables)... + purrr::map(\(.x) { + if (names(.x@name) == "table") { + return(data.frame(schema = NA_character_, table = .x@name["table"])) + } + + # ...retrieve all tables + DBI::dbListObjects(conn, .x) |> + dplyr::pull(table) |> + purrr::map(\(.y) data.frame(schema = .x@name, table = .y@name["table"])) |> + purrr::reduce(rbind.data.frame) + }) |> + purrr::reduce(rbind.data.frame) + + # Skip dbplyr temporary tables + tables <- dplyr::filter(tables, !startsWith(.data$table, "dbplyr_")) + + # Subset if pattern is given + if (!is.null(pattern)) { + tables <- subset(tables, grepl(pattern, table)) + } + + # Remove empty schemas + tables <- dplyr::mutate(tables, schema = dplyr::if_else(.data$schema == "", NA, .data$schema)) + + row.names(tables) <- NULL # Reset row names + return(tables) +} diff --git a/R/id.R b/R/id.R new file mode 100644 index 00000000..f6ee9aaa --- /dev/null +++ b/R/id.R @@ -0,0 +1,115 @@ +#' Convenience function for DBI::Id +#' +#' @template db_table_id +#' @template conn +#' @param allow_table_only +#' logical. If `TRUE`, allows for returning an `DBI::Id` with `table` = `myschema.table` if schema `myschema` +#' is not found in `conn`. +#' If `FALSE`, the function will raise an error if the implied schema cannot be found in `conn` +#' @details The given `db_table_id` is parsed to a DBI::Id depending on the type of input: +#' * `character`: db_table_id is parsed to a DBI::Id object using an assumption of "schema.table" syntax +#' with corresponding schema (if found in `conn`) and table values. +#' If no schema is implied, the default schema of `conn` will be used. +#' +#' * `DBI::Id`: if schema is not specified in `Id`, the schema is set to the default schema for `conn` (if given). +#' +#' * `tbl_sql`: the remote name is used to resolve the table identification. +#' +#' @return A DBI::Id object parsed from db_table_id (see details) +#' @examples +#' id("schema.table") +#' @seealso [DBI::Id] which this function wraps. +#' @export +id <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { + UseMethod("id") +} + + +#' @export +id.Id <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { + return(DBI::Id(schema = purrr::pluck(db_table_id, "name", "schema", .default = SCDB::get_schema(conn)), + table = purrr::pluck(db_table_id, "name", "table"))) +} + + +#' @export +id.character <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { + + checkmate::assert(is.null(conn), DBI::dbIsValid(conn), combine = "or") + + if (stringr::str_detect(db_table_id, "\\.")) { + db_name <- stringr::str_split_1(db_table_id, "\\.") + db_schema <- db_name[1] + db_table <- db_name[2] + + # If no matching implied schema is found, return the unmodified db_table_id in the default schema + if (allow_table_only && !is.null(conn) && !schema_exists(conn, db_schema)) { + return(DBI::Id(schema = get_schema(conn), table = db_table_id)) + } + } else { + db_schema <- get_schema(conn) + db_table <- db_table_id + } + + return(DBI::Id(schema = db_schema, table = db_table)) +} + + +#' @export +id.tbl_dbi <- function(db_table_id, conn = NULL, allow_table_only = TRUE) { + + # If table identification is fully qualified extract Id from remote_Table + if (!is.na(purrr::pluck(dbplyr::remote_table(db_table_id), unclass, "schema"))) { + + table_ident <- dbplyr::remote_table(db_table_id) |> + unclass() |> + purrr::discard(is.na) + + table_conn <- dbplyr::remote_con(db_table_id) + + return( + DBI::Id( + catalog = purrr::pluck(table_ident, "catalog"), + schema = purrr::pluck(table_ident, "schema", .default = get_schema(table_conn)), + table = purrr::pluck(table_ident, "table") + ) + ) + + } else { + + # If not attempt to resolve the table from existing tables. + # For SQLite, there should only be one table in main/temp matching the table + # In some cases, tables may have been added to the DB that makes the id ambiguous. + schema <- get_tables(dbplyr::remote_con(db_table_id), show_temporary = TRUE) |> + dplyr::filter(.data$table == dbplyr::remote_name(db_table_id)) |> + dplyr::pull("schema") + + if (length(schema) > 1) { + stop( + "Table identification has been corrupted! ", + "This table does not contain information about its schema and ", + "multiple tables with this name were found across schemas." + ) + } + + return(DBI::Id(schema = schema, table = dbplyr::remote_name(db_table_id))) + } +} + + +#' @export +as.character.Id <- function(x, ...) { + + info <- x@name |> + purrr::discard(is.na) + + id_representation <- list( + catalog = purrr::pluck(info, "catalog"), + schema = purrr::pluck(info, "schema"), + table = purrr::pluck(info, "table") + ) |> + purrr::discard(is.null) |> + do.call(purrr::partial(paste, sep = "."), args = _) + + return(id_representation) +} diff --git a/R/db_manipulating_functions.R b/R/interlace_sql.R similarity index 52% rename from R/db_manipulating_functions.R rename to R/interlace_sql.R index 54bfda0c..bd70caa2 100644 --- a/R/db_manipulating_functions.R +++ b/R/interlace_sql.R @@ -1,136 +1,3 @@ -#' Filters .data according to all records in the filter -#' -#' @description -#' If filter = NULL, not filtering is done -#' If filter is different from NULL, the .data is filtered by a inner_join using all columns of the filter: -#' \code{inner_join(.data, filter, by = colnames(filter))} -#' -#' by and na_by can overwrite the inner_join columns used in the filtering -#' -#' @template .data -#' @template filters -#' @param by passed to inner_join if different from NULL -#' @param na_by passed to inner_join if different from NULL -#' @template .data_return -#' @examples -#' # Filtering with null means no filtering is done -#' filter <- NULL -#' identical(filter_keys(mtcars, filter), mtcars) # TRUE -#' -#' # Filtering by vs = 0 -#' filter <- data.frame(vs = 0) -#' identical(filter_keys(mtcars, filter), dplyr::filter(mtcars, vs == 0)) # TRUE -#' -#' # Filtering by the specific combinations of vs = 0 and am = 1 -#' filter <- dplyr::distinct(mtcars, vs, am) -#' filter_keys(mtcars, filter) -#' -#' @importFrom rlang .data -#' @export -filter_keys <- function(.data, filters, by = NULL, na_by = NULL) { - if (is.null(filters)) { - return(.data) - } - - assert_data_like(.data) - assert_data_like(filters, null.ok = TRUE) - checkmate::assert_subset(c(by, na_by), colnames(filters)) - - UseMethod("filter_keys") -} - -#' @export -filter_keys.tbl_sql <- function(.data, filters, by = NULL, na_by = NULL) { - - if (is.null(by) && is.null(na_by)) { - # Determine key types - key_types <- filters |> - dplyr::ungroup() |> - dplyr::summarise(dplyr::across( - .cols = tidyselect::everything(), - .fns = ~ sum(ifelse(is.na(.), 0, 1), na.rm = TRUE) - )) |> - tidyr::pivot_longer(tidyselect::everything(), names_to = "column_name", values_to = "is_na") - - by <- key_types |> dplyr::filter(.data$is_na > 0) |> dplyr::pull("column_name") - na_by <- key_types |> dplyr::filter(.data$is_na == 0) |> dplyr::pull("column_name") - - if (length(by) == 0) by <- NULL - if (length(na_by) == 0) na_by <- NULL - } - return(dplyr::inner_join(.data, filters, by = by, na_by = na_by)) -} - -#' @export -filter_keys.data.frame <- function(.data, filters, by = NULL, na_by = NULL) { - if (is.null(by) && is.null(na_by)) by <- colnames(filters) - return(dplyr::inner_join(.data, filters, by = c(by, na_by))) -} - -#' tidyr::unite for tbl_dbi -#' -#' @inheritParams tidyr::unite -#' @examples -#' library(tidyr, warn.conflicts = FALSE) -#' df <- expand_grid(x = c("a", NA), y = c("b", NA)) -#' df - -#' df %>% unite("z", x:y, remove = FALSE) -#' # To remove missing values: -#* df %>% unite("z", x:y, na.rm = TRUE, remove = FALSE) -#' -#' # Separate is almost the complement of unite -#' df %>% -#' unite("xy", x:y) %>% -#' separate(xy, c("x", "y")) -# (but note `x` and `y` contain now "NA" not NA) -#' @importFrom rlang := -#' @return A tbl_dbi with the specified columns united into a new column named according to "col" -#' @exportS3Method tidyr::unite tbl_dbi -unite.tbl_dbi <- function(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) { # nolint: object_name_linter - - # Modified from - # https://stackoverflow.com/questions/48536983/how-to-concatenate-strings-of-multiple- -- continued below - # columns-from-table-in-sql-server-using-dp - - # Check arguments - checkmate::assert_class(data, "tbl_dbi") - checkmate::assert_character(sep) - checkmate::assert_logical(remove) - checkmate::assert_logical(na.rm) - - # Code below is adapted from tidyr::unite.data.frame - rlang::check_dots_unnamed() - - if (rlang::dots_n(...) == 0) { - from_vars <- colnames(data) - } else { - from_vars <- colnames(dplyr::select(data, ...)) - } - - # We need add some support for how tidyr::unite accepts input of "col" - col <- rlang::as_string(rlang::ensym(col)) - - col_symbols <- purrr::map(from_vars, as.symbol) - - # We need to determine where col should be placed - first_from <- which(colnames(data) %in% from_vars)[1] - - # CONCAT_WS does not exist in SQLite - if (inherits(data, "tbl_SQLiteConnection")) { - out <- data |> - dplyr::mutate({{col}} := NULLIF(paste(!!!col_symbols, sep = sep), ""), .before = !!first_from) - } else { - out <- data |> - dplyr::mutate({{col}} := NULLIF(CONCAT_WS(sep, !!!col_symbols), ""), .before = !!first_from) - } - - if (remove) out <- out |> dplyr::select(!tidyselect::all_of(from_vars)) - - return(out) -} - - #' Combine any number of SQL queries, where each has their own time axis of #' validity (valid_from and valid_until) #' diff --git a/R/schema_exists.R b/R/schema_exists.R new file mode 100644 index 00000000..91d65f01 --- /dev/null +++ b/R/schema_exists.R @@ -0,0 +1,60 @@ +#' Test if a schema exists in given connection +#' @param schema A character string giving the schema name +#' @template conn +#' @return TRUE if the given schema is found on conn +#' @examples +#' +#' conn <- get_connection(drv = RSQLite::SQLite()) +#' +#' schema_exists(conn, "test") +#' +#' close_connection(conn) +#' @export +schema_exists <- function(conn, schema) { + UseMethod("schema_exists") +} + +#' @export +schema_exists.SQLiteConnection <- function(conn, schema) { + query <- paste0( + "SELECT schema, name FROM pragma_table_list WHERE schema == '", + schema, + "' AND name IN ('sqlite_schema', 'sqlite_temp_schema')" + ) + result <- DBI::dbGetQuery(conn, query) + + return(nrow(result) == 1) +} + +#' @export +schema_exists.DBIConnection <- function(conn, schema) { + query <- paste0("SELECT schema_name FROM INFORMATION_SCHEMA.SCHEMATA WHERE schema_name = '", schema, "'") + result <- DBI::dbGetQuery(conn, query) + + return(nrow(result) == 1) +} + +#' @export +schema_exists.default <- function(conn, schema) { + + checkmate::assert_character(schema) + + objs <- DBI::dbListObjects(conn) + matches <- sapply(objs$table, \(.x) methods::slot(.x, "name")) |> + (\(.x) names(.x) == "schema" & .x == schema)() + + if (any(matches)) return(TRUE) + + tryCatch({ + DBI::dbCreateTable( + conn, + name = DBI::Id(schema = schema, table = "SCDB_schema_test"), + fields = data.frame(name = character()), + temporary = FALSE + ) + + DBI::dbRemoveTable(conn, DBI::Id(schema = schema, table = "SCDB_schema_test")) + TRUE + }, + error = function(e) FALSE) +} diff --git a/R/slice_time.R b/R/slice_time.R new file mode 100644 index 00000000..e5f8201d --- /dev/null +++ b/R/slice_time.R @@ -0,0 +1,38 @@ +#' Slices a data object based on time / date +#' +#' @template .data +#' @param slice_ts The time / date to slice by +#' @param from_ts The name of the column in .data specifying valid from time (note: must be unquoted) +#' @param until_ts The name of the column in .data specifying valid until time (note: must be unquoted) +#' @template .data_return +#' @examples +#' conn <- get_connection(drv = RSQLite::SQLite()) +#' +#' m <- mtcars |> +#' dplyr::mutate(from_ts = dplyr::if_else(dplyr::row_number() > 10, +#' as.Date("2020-01-01"), +#' as.Date("2021-01-01")), +#' until_ts = as.Date(NA)) +#' +#' dplyr::copy_to(conn, m, name = "mtcars", temporary = FALSE) +#' +#' q <- dplyr::tbl(conn, id("mtcars", conn)) +#' +#' nrow(slice_time(q, "2020-01-01")) # 10 +#' nrow(slice_time(q, "2021-01-01")) # nrow(mtcars) +#' +#' close_connection(conn) +#' @export +slice_time <- function(.data, slice_ts, from_ts = from_ts, until_ts = until_ts) { + + # Check arguments + assert_data_like(.data) + assert_timestamp_like(slice_ts) + + from_ts <- dplyr::enquo(from_ts) + until_ts <- dplyr::enquo(until_ts) + .data <- .data |> + dplyr::filter(is.na({{until_ts}}) | slice_ts < {{until_ts}}, + {{from_ts}} <= slice_ts) + return(.data) +} diff --git a/R/table_exists.R b/R/table_exists.R new file mode 100644 index 00000000..81435b92 --- /dev/null +++ b/R/table_exists.R @@ -0,0 +1,89 @@ +#' Test if a table exists in database +#' +#' @description +#' This functions attempts to determine the existence of a given table. +#' If a character input is given, matching is done heuristically assuming a "schema.table" notation. +#' If no schema is implied in this case, the default schema is assumed. +#' @template conn +#' @template db_table_id +#' @return TRUE if db_table_id can be parsed to a table found in conn +#' @importFrom rlang .data +#' @name table_exists +#' @examples +#' conn <- get_connection(drv = RSQLite::SQLite()) +#' +#' dplyr::copy_to(conn, mtcars, name = "mtcars", temporary = FALSE) +#' dplyr::copy_to(conn, iris, name = "iris") +#' +#' table_exists(conn, "mtcars") # TRUE +#' table_exists(conn, "iris") # FALSE +#' table_exists(conn, "temp.iris") # TRUE +#' +#' close_connection(conn) +#' @export +table_exists <- function(conn, db_table_id) { + checkmate::assert(DBI::dbIsValid(conn)) + assert_id_like(db_table_id) + + # Check arguments + if (inherits(db_table_id, "tbl_dbi")) { + exists <- tryCatch({ + dplyr::collect(utils::head(db_table_id, 0)) + return(TRUE) + }, + error = function(e) { + return(FALSE) + }) + + return(exists) + } + + UseMethod("table_exists", conn) +} + +#' @rdname table_exists +#' @importFrom rlang .data +#' @export +table_exists.DBIConnection <- function(conn, db_table_id) { + tables <- get_tables(conn, show_temporary = TRUE) + + if (inherits(db_table_id, "Id")) { + db_table_id <- id(db_table_id, conn) # Ensure Id is fully qualified (has schema) + + exact_match <- tables |> + dplyr::filter(.data$table == db_table_id@name["table"], .data$schema == db_table_id@name["schema"]) + + if (nrow(exact_match) == 1) { + return(TRUE) + } else { + return(FALSE) + } + + } else if (inherits(db_table_id, "character")) { + + # Check if schema is implied -- use default if not implied + if (!stringr::str_detect(db_table_id, r"{\w*\.\w*}")) { + db_table_id <- paste(get_schema(conn), db_table_id, sep = ".") + } + + # Then heuristically match with tables in conn + matches <- dplyr::union_all( + tables, + dplyr::mutate(dplyr::filter(tables, .data$schema == get_schema(conn)), schema = NA) + ) |> + tidyr::unite("table_str", "schema", "table", sep = ".", na.rm = TRUE, remove = FALSE) |> + dplyr::filter(.data$table_str == !!db_table_id) |> + dplyr::select(!"table_str") + + if (nrow(matches) <= 1) { + return(nrow(matches) == 1) + } else { + rlang::abort( + message = paste0("More than one table matching '", db_table_id, "' was found!"), + matches = matches + ) + } + } else { + rlang::abort("Only character or DBI::Id inputs to table_exists is allowed!") + } +} diff --git a/R/unite.tbl_dbi.R b/R/unite.tbl_dbi.R new file mode 100644 index 00000000..129f8eae --- /dev/null +++ b/R/unite.tbl_dbi.R @@ -0,0 +1,62 @@ +#' tidyr::unite for tbl_dbi +#' +#' @inheritParams tidyr::unite +#' @examples +#' library(tidyr, warn.conflicts = FALSE) +#' df <- expand_grid(x = c("a", NA), y = c("b", NA)) +#' df + +#' df %>% unite("z", x:y, remove = FALSE) +#' # To remove missing values: +#* df %>% unite("z", x:y, na.rm = TRUE, remove = FALSE) +#' +#' # Separate is almost the complement of unite +#' df %>% +#' unite("xy", x:y) %>% +#' separate(xy, c("x", "y")) +# (but note `x` and `y` contain now "NA" not NA) +#' @importFrom rlang := +#' @return A tbl_dbi with the specified columns united into a new column named according to "col" +#' @exportS3Method tidyr::unite tbl_dbi +unite.tbl_dbi <- function(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) { # nolint: object_name_linter + + # Modified from + # https://stackoverflow.com/questions/48536983/how-to-concatenate-strings-of-multiple- -- continued below + # columns-from-table-in-sql-server-using-dp + + # Check arguments + checkmate::assert_class(data, "tbl_dbi") + checkmate::assert_character(sep) + checkmate::assert_logical(remove) + checkmate::assert_logical(na.rm) + + # Code below is adapted from tidyr::unite.data.frame + rlang::check_dots_unnamed() + + if (rlang::dots_n(...) == 0) { + from_vars <- colnames(data) + } else { + from_vars <- colnames(dplyr::select(data, ...)) + } + + # We need add some support for how tidyr::unite accepts input of "col" + col <- rlang::as_string(rlang::ensym(col)) + + col_symbols <- purrr::map(from_vars, as.symbol) + + # We need to determine where col should be placed + first_from <- which(colnames(data) %in% from_vars)[1] + + # CONCAT_WS does not exist in SQLite + if (inherits(data, "tbl_SQLiteConnection")) { + out <- data |> + dplyr::mutate({{col}} := NULLIF(paste(!!!col_symbols, sep = sep), ""), .before = !!first_from) + } else { + out <- data |> + dplyr::mutate({{col}} := NULLIF(CONCAT_WS(sep, !!!col_symbols), ""), .before = !!first_from) + } + + if (remove) out <- out |> dplyr::select(!tidyselect::all_of(from_vars)) + + return(out) +} diff --git a/man/create_logs_if_missing.Rd b/man/create_logs_if_missing.Rd index 0ca8479a..e9366e6f 100644 --- a/man/create_logs_if_missing.Rd +++ b/man/create_logs_if_missing.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_table.R +% Please edit documentation in R/create_logs_if_missing.R \name{create_logs_if_missing} \alias{create_logs_if_missing} \title{Create a table with the SCDB log structure if it does not exists} diff --git a/man/filter_keys.Rd b/man/filter_keys.Rd index 02f10c9f..777af2a3 100644 --- a/man/filter_keys.Rd +++ b/man/filter_keys.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/db_manipulating_functions.R +% Please edit documentation in R/filter_keys.R \name{filter_keys} \alias{filter_keys} \title{Filters .data according to all records in the filter} diff --git a/man/get_tables.Rd b/man/get_tables.Rd index dcb18654..5295a512 100644 --- a/man/get_tables.Rd +++ b/man/get_tables.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_table.R +% Please edit documentation in R/get_tables.R \name{get_tables} \alias{get_tables} \title{Gets the available tables} diff --git a/man/id.Rd b/man/id.Rd index 36b26bc9..19ec2c48 100644 --- a/man/id.Rd +++ b/man/id.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/connection.R +% Please edit documentation in R/id.R \name{id} \alias{id} \title{Convenience function for DBI::Id} diff --git a/man/interlace_sql.Rd b/man/interlace_sql.Rd index e913e953..5cc4a9c3 100644 --- a/man/interlace_sql.Rd +++ b/man/interlace_sql.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/db_manipulating_functions.R +% Please edit documentation in R/interlace_sql.R \name{interlace_sql} \alias{interlace_sql} \title{Combine any number of SQL queries, where each has their own time axis of diff --git a/man/schema_exists.Rd b/man/schema_exists.Rd index 49d1a8c9..7457162a 100644 --- a/man/schema_exists.Rd +++ b/man/schema_exists.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_schema.R +% Please edit documentation in R/schema_exists.R \name{schema_exists} \alias{schema_exists} \title{Test if a schema exists in given connection} diff --git a/man/slice_time.Rd b/man/slice_time.Rd index 6666253e..9d766618 100644 --- a/man/slice_time.Rd +++ b/man/slice_time.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_table.R +% Please edit documentation in R/slice_time.R \name{slice_time} \alias{slice_time} \title{Slices a data object based on time / date} diff --git a/man/table_exists.Rd b/man/table_exists.Rd index 94c27c1d..2d85dc8e 100644 --- a/man/table_exists.Rd +++ b/man/table_exists.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_table.R +% Please edit documentation in R/table_exists.R \name{table_exists} \alias{table_exists} \alias{table_exists.DBIConnection} diff --git a/man/unite.tbl_dbi.Rd b/man/unite.tbl_dbi.Rd index 87508393..56062438 100644 --- a/man/unite.tbl_dbi.Rd +++ b/man/unite.tbl_dbi.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/db_manipulating_functions.R +% Please edit documentation in R/unite.tbl_dbi.R \name{unite.tbl_dbi} \alias{unite.tbl_dbi} \title{tidyr::unite for tbl_dbi} diff --git a/tests/testthat/test-connection.R b/tests/testthat/test-connection.R index d4bbe7af..661f286c 100644 --- a/tests/testthat/test-connection.R +++ b/tests/testthat/test-connection.R @@ -7,7 +7,7 @@ test_that("get_connection() works", { }) -test_that("get_connection notifies if connection fails", { +test_that("get_connection() notifies if connection fails", { for (i in 1:100) { random_string <- paste(sample(letters, size = 32, replace = TRUE), collapse = "") @@ -19,122 +19,6 @@ test_that("get_connection notifies if connection fails", { }) -test_that("id() works for character input without implied schema", { - for (conn in get_test_conns()) { - - # Without schema, we expect: - - # ... no change of no conn is given - expect_identical(id("test_mtcars"), DBI::Id(table = "test_mtcars")) - - # .. the defaults schema if conn is given - expect_identical(id("test_mtcars", conn), DBI::Id(schema = SCDB::get_schema(conn), table = "test_mtcars")) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("id() works for character input with implied schema", { - # With schema we expect the implied schema.table to be resolved: - - # ... when no conn is given, we naively assume schema.table holds true - expect_identical(id("test.mtcars"), DBI::Id(schema = "test", table = "mtcars")) - - - for (conn in get_test_conns()) { - - # ... when conn is given, we check if implied schema exists. - # NOTE: All testing connections should have the schema "test" (except SQLite without attached schemas) - # therefore, in almost all cases, we shold resolve the schema correctly (except the SQLite case above) - if (inherits(conn, "SQLiteConnection") && !schema_exists(conn, "test")) { - expect_identical(id("test.mtcars", conn), DBI::Id(schema = "main", table = "test.mtcars")) - } else { - expect_identical(id("test.mtcars", conn), DBI::Id(schema = "test", table = "mtcars")) - } - - DBI::dbDisconnect(conn) - } -}) - - -test_that("id() works for character input with implied schema when schema does not exist", { - for (conn in get_test_conns()) { - - # Generate schema that does not exist - k <- 0 - while (k < 100) { - invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (schema_exists(conn, invalid_schema_name)) next - break - } - - if (k < 100) { - - table_name <- paste(invalid_schema_name, "mtcars", sep = ".") - - # When schema does not exist and allow_table_only is TRUE, the schema should be the default schema - expect_identical(id(table_name, conn = conn, allow_table_only = TRUE), - DBI::Id(schema = get_schema(conn), table = table_name)) - - # When schema does not exist and allow_table_only is FALSE, the schema should be as implied - expect_identical(id(table_name, conn = conn, allow_table_only = FALSE), - DBI::Id(schema = invalid_schema_name, table = "mtcars")) - - } else { - warning("Non-existing schema could not be generated!") - } - - DBI::dbDisconnect(conn) - - # When connection is closed, the existence of the schema cannot be validated and an error should be given - expect_error(id(table_name, conn = conn), r"{DBI::dbIsValid\(conn\): FALSE}") - } -}) - - -test_that("id() works for DBI::Id inputs", { - for (conn in get_test_conns()) { - - # When passing an Id without a schema, id should enrich the Id with the default schema - expect_identical( - id(DBI::Id(table = "mtcars"), conn), - DBI::Id(schema = get_schema(conn), table = "mtcars") - ) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("id() is consistent for tbl_dbi inputs", { - for (conn in get_test_conns()) { - - expectation <- id(dplyr::tbl(conn, id("test.mtcars", conn), check_from = FALSE)) - - expect_identical( - expectation, - id.tbl_dbi(dplyr::tbl(conn, id("test.mtcars", conn), check_from = FALSE)) - ) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("as.character.id works", { - expect_identical(as.character(DBI::Id(table = "table")), "table") - expect_identical(as.character(DBI::Id(schema = "schema", table = "table")), "schema.table") - expect_identical(as.character(DBI::Id(catalog = "catalog", schema = "schema", table = "table")), - "catalog.schema.table") - - expect_identical(as.character(DBI::Id(table = "table", schema = "schema")), "schema.table") - expect_identical(as.character(DBI::Id(table = "table", schema = "schema", catalog = "catalog")), - "catalog.schema.table") -}) - - test_that("close_connection() works", { for (conn in get_test_conns()) { diff --git a/tests/testthat/test-create_logs_if_missing.R b/tests/testthat/test-create_logs_if_missing.R new file mode 100644 index 00000000..6170c8df --- /dev/null +++ b/tests/testthat/test-create_logs_if_missing.R @@ -0,0 +1,60 @@ +test_that("create_logs_if_missing() can create logs in default and test schema", { + for (conn in get_test_conns()) { + for (schema in list(NULL, "test")) { + + # Generate table in schema that does not exist + k <- 0 + while (k < 100) { + logs_id <- paste(c(schema, paste(sample(letters, size = 16, replace = TRUE), collapse = "")), collapse = ".") + k <- k + 1 + if (DBI::dbExistsTable(conn, id(logs_id, conn))) next + break + } + + if (k < 100) { + + # We know table does not exists + expect_false(table_exists(conn, logs_id)) + + # We create the missing log table + expect_no_error(create_logs_if_missing(log_table = logs_id, conn)) + + # And check it conforms with the requirements + expect_true(table_exists(conn, logs_id)) + expect_true(nrow(dplyr::tbl(conn, id(logs_id, conn))) == 0) + + log_signature <- data.frame( + date = as.POSIXct(NA), + schema = NA_character_, + table = NA_character_, + n_insertions = NA_integer_, + n_deactivations = NA_integer_, + start_time = as.POSIXct(NA), + end_time = as.POSIXct(NA), + duration = NA_character_, + success = NA, + message = NA_character_, + log_file = NA_character_ + ) |> + dplyr::copy_to(conn, df = _, "SCDB_tmp", overwrite = TRUE) |> + utils::head(0) |> + dplyr::collect() + + expect_identical( + dplyr::collect(dplyr::tbl(conn, id(logs_id, conn))), + log_signature + ) + + # Attempting to recreate the logs table should not change anything + expect_no_error(create_logs_if_missing(log_table = logs_id, conn)) + expect_true(table_exists(conn, logs_id)) + expect_true(nrow(dplyr::tbl(conn, id(logs_id, conn))) == 0) + + } else { + warning("Non-existing table in default schema could not be generated!") + } + + } + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-create_table.R b/tests/testthat/test-create_table.R index 40c29bc0..f33c3ccd 100644 --- a/tests/testthat/test-create_table.R +++ b/tests/testthat/test-create_table.R @@ -85,73 +85,3 @@ test_that("create_table() does not overwrite tables", { DBI::dbDisconnect(conn) } }) - - -test_that("getTableSignature() generates a signature for NULL connections", { - expect_identical( - lapply(cars, class), - as.list(getTableSignature(cars, conn = NULL)) - ) -}) - - -test_that("create_logs_if_missing() can create logs in default and test schema", { - for (conn in get_test_conns()) { - for (schema in list(NULL, "test")) { - - # Generate table in schema that does not exist - k <- 0 - while (k < 100) { - logs_id <- paste(c(schema, paste(sample(letters, size = 16, replace = TRUE), collapse = "")), collapse = ".") - k <- k + 1 - if (DBI::dbExistsTable(conn, id(logs_id, conn))) next - break - } - - if (k < 100) { - - # We know table does not exists - expect_false(table_exists(conn, logs_id)) - - # We create the missing log table - expect_no_error(create_logs_if_missing(log_table = logs_id, conn)) - - # And check it conforms with the requirements - expect_true(table_exists(conn, logs_id)) - expect_true(nrow(dplyr::tbl(conn, id(logs_id, conn))) == 0) - - log_signature <- data.frame( - date = as.POSIXct(NA), - schema = NA_character_, - table = NA_character_, - n_insertions = NA_integer_, - n_deactivations = NA_integer_, - start_time = as.POSIXct(NA), - end_time = as.POSIXct(NA), - duration = NA_character_, - success = NA, - message = NA_character_, - log_file = NA_character_ - ) |> - dplyr::copy_to(conn, df = _, "SCDB_tmp", overwrite = TRUE) |> - utils::head(0) |> - dplyr::collect() - - expect_identical( - dplyr::collect(dplyr::tbl(conn, id(logs_id, conn))), - log_signature - ) - - # Attempting to recreate the logs table should not change anything - expect_no_error(create_logs_if_missing(log_table = logs_id, conn)) - expect_true(table_exists(conn, logs_id)) - expect_true(nrow(dplyr::tbl(conn, id(logs_id, conn))) == 0) - - } else { - warning("Non-existing table in default schema could not be generated!") - } - - } - DBI::dbDisconnect(conn) - } -}) diff --git a/tests/testthat/test-db_manipulating_functions.R b/tests/testthat/test-db_manipulating_functions.R deleted file mode 100644 index c86f19c6..00000000 --- a/tests/testthat/test-db_manipulating_functions.R +++ /dev/null @@ -1,224 +0,0 @@ -test_that("unite.tbl_dbi() works", { - for (conn in get_test_conns()) { - - q <- get_table(conn, "__mtcars") |> utils::head(1) - qu_remove <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp) |> dplyr::compute() - qu <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp, remove = FALSE) |> dplyr::compute() - qu_alt <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", "mpg", "hp", remove = FALSE) |> dplyr::compute() - - expect_s3_class(qu_remove, "tbl_dbi") - expect_s3_class(qu, "tbl_dbi") - expect_s3_class(qu_alt, "tbl_dbi") - - expect_equal(colnames(qu_remove), "new_column") - expect_equal(colnames(qu), c("new_column", "mpg", "hp")) - expect_equal(colnames(qu_alt), c("new_column", "mpg", "hp")) - - expect_equal(dplyr::collect(qu), dplyr::collect(qu_alt)) - - # tidyr::unite has some quirky (and FUN!!! behavior) that we are forced to match here - # specifically, the input "col" is converted to a symbol, so we have to do escape-bullshit - # NOTE: the line "dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> " - # is to account for SQLite not having integer data-types. If we do not first convert to character, - # there will be differences between the objects that are trivial, so we remove these with this operation - # this way, the test should (hopefully) only fail if there are non-trivial differences - expect_mapequal(get_table(conn, "__mtcars") |> - tidyr::unite("new_col", mpg, hp) |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect(), - get_table(conn, "__mtcars") |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect() |> - tidyr::unite("new_col", mpg, hp)) - - col <- "new_col" - expect_mapequal(get_table(conn, "__mtcars") |> - tidyr::unite(col, mpg, hp) |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect(), - get_table(conn, "__mtcars") |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect() |> - tidyr::unite(col, mpg, hp)) - - expect_mapequal(get_table(conn, "__mtcars") |> - tidyr::unite(!!col, mpg, hp) |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect(), - get_table(conn, "__mtcars") |> - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> - dplyr::collect() |> - tidyr::unite(!!col, mpg, hp)) - - # Unite places cols in a particular way, lets be sure we match - qq <- dplyr::mutate(q, dplyr::across(tidyselect::everything(), as.character)) # we convert to character since SQLite - expect_identical(qq |> tidyr::unite("test_col", vs, am) |> dplyr::collect(), - qq |> dplyr::collect() |> tidyr::unite("test_col", vs, am)) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("interlace_sql() works", { - for (conn in get_test_conns()) { - - t1 <- data.frame(key = c("A", "A", "B"), - obs_1 = c(1, 2, 2), - valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-01-01")), - valid_until = as.Date(c("2021-02-01", "2021-03-01", NA))) - - - t2 <- data.frame(key = c("A", "B"), - obs_2 = c("a", "b"), - valid_from = as.Date(c("2021-01-01", "2021-01-01")), - valid_until = as.Date(c("2021-04-01", NA))) - - - t_ref <- data.frame(key = c("A", "A", "A", "B"), - obs_1 = c(1, 2, NA, 2), - obs_2 = c("a", "a", "a", "b"), - valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-03-01", "2021-01-01")), - valid_until = as.Date(c("2021-02-01", "2021-03-01", "2021-04-01", NA))) - - - # Copy t1, t2 and t_ref to conn (and suppress check_from message) - t1 <- suppressMessages( - dplyr::copy_to(conn, t1, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - ) - - t2 <- suppressMessages( - dplyr::copy_to(conn, t2, name = id("test.SCDB_tmp2", conn), overwrite = TRUE, temporary = FALSE) - ) - - t_ref <- suppressMessages( - dplyr::copy_to(conn, t_ref, name = id("test.SCDB_tmp3", conn), overwrite = TRUE, temporary = FALSE) - ) - - - expect_identical(interlace_sql(list(t1, t2), by = "key") |> dplyr::collect(), - t_ref |> dplyr::collect()) - - expect_mapequal(interlace_sql(list(t1, t2), by = "key") |> dplyr::collect(), - interlace_sql(list(t2, t1), by = "key") |> dplyr::collect()) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("interlace_sql returns early if length(table) == 1", { - expect_identical(mtcars$mpg, interlace_sql(mtcars["mpg"], by = "mpg")) -}) - - -test_that("digest_to_checksum() works", { - for (conn in get_test_conns()) { - - expect_s3_class(mtcars |> digest_to_checksum(), "data.frame") - expect_s3_class(mtcars |> tibble::as_tibble() |> digest_to_checksum(), "tbl_df") - expect_s3_class(get_table(conn, "__mtcars") |> digest_to_checksum(), "tbl_dbi") - - # Check that col argument works - expect_equal(mtcars |> digest_to_checksum(col = "checky") |> dplyr::pull("checky"), - mtcars |> digest_to_checksum() |> dplyr::pull("checksum")) - - - expect_equal(mtcars |> dplyr::mutate(name = rownames(mtcars)) |> digest_to_checksum() |> colnames(), - get_table(conn, "__mtcars") |> digest_to_checksum() |> colnames()) - - - # Check that NA's generate unique checksums - x <- data.frame(col1 = c("A", NA), - col2 = c(NA, "A")) - - # .. locally - checksums <- x |> digest_to_checksum() |> dplyr::pull("checksum") - expect_false(checksums[1] == checksums[2]) - - # .. and on the remote - x <- suppressMessages( - dplyr::copy_to(conn, x, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - ) - - checksums <- x |> digest_to_checksum() |> dplyr::pull("checksum") - expect_false(checksums[1] == checksums[2]) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("digest_to_checksum() warns works correctly when overwriting", { - for (conn in get_test_conns()) { - - checksum_vector <- mtcars |> - digest_to_checksum() |> - dplyr::pull(checksum) - - expect_warning(checksum_vector2 <- mtcars |> - digest_to_checksum(col = "checksum") |> - digest_to_checksum(col = "checksum", warn = TRUE) |> - dplyr::pull(checksum)) - - expect_identical(checksum_vector, checksum_vector2) - - DBI::dbDisconnect(conn) - } -}) - - -test_that("slice_time() works", { - for (conn in get_test_conns()) { - - # SQLite does not work with dates. But since we use ISO 8601 for dates, we can compare lexicographically - xx <- get_table(conn, "__mtcars") |> - dplyr::mutate(checksum = dplyr::row_number(), - from_ts = dplyr::if_else(checksum <= 20, "2022-06-01", "2022-06-15"), - until_ts = NA_character_) - - expect_equal(xx |> slice_time("2022-05-01") |> nrow(), 0) - expect_equal(xx |> slice_time("2022-06-01") |> nrow(), 20) - expect_equal(xx |> slice_time("2022-06-15") |> nrow(), nrow(mtcars)) - - DBI::dbDisconnect(conn) - } -}) - -test_that("filter_keys() works", { - for (conn in get_test_conns()) { - - x <- get_table(conn, "__mtcars") - - expect_equal(x, - x |> filter_keys(NULL)) - - filter <- x |> utils::head(10) |> dplyr::select(name) - expect_equal(x |> - dplyr::filter(name %in% !!dplyr::pull(filter, name)) |> - dplyr::collect(), - x |> - filter_keys(filter) |> - dplyr::collect()) - - filter <- x |> utils::head(10) |> dplyr::select(vs, am) |> dplyr::distinct() - expect_equal(x |> - dplyr::inner_join(filter, by = c("vs", "am")) |> - dplyr::collect(), - x |> - filter_keys(filter) |> - dplyr::collect()) - - # Filtering with null means no filtering is done - m <- mtcars - row.names(m) <- NULL - filter <- NULL - expect_identical(filter_keys(m, filter), m) - - # Filtering by vs = 0 - filter <- data.frame(vs = 0) - expect_mapequal(filter_keys(m, filter), dplyr::filter(m, vs == 0)) - - DBI::dbDisconnect(conn) - } -}) diff --git a/tests/testthat/test-digest_to_checksum.R b/tests/testthat/test-digest_to_checksum.R new file mode 100644 index 00000000..52b6d1ef --- /dev/null +++ b/tests/testthat/test-digest_to_checksum.R @@ -0,0 +1,54 @@ +test_that("digest_to_checksum() works", { + for (conn in get_test_conns()) { + + expect_s3_class(mtcars |> digest_to_checksum(), "data.frame") + expect_s3_class(mtcars |> tibble::as_tibble() |> digest_to_checksum(), "tbl_df") + expect_s3_class(get_table(conn, "__mtcars") |> digest_to_checksum(), "tbl_dbi") + + # Check that col argument works + expect_equal(mtcars |> digest_to_checksum(col = "checky") |> dplyr::pull("checky"), + mtcars |> digest_to_checksum() |> dplyr::pull("checksum")) + + + expect_equal(mtcars |> dplyr::mutate(name = rownames(mtcars)) |> digest_to_checksum() |> colnames(), + get_table(conn, "__mtcars") |> digest_to_checksum() |> colnames()) + + + # Check that NA's generate unique checksums + x <- data.frame(col1 = c("A", NA), + col2 = c(NA, "A")) + + # .. locally + checksums <- x |> digest_to_checksum() |> dplyr::pull("checksum") + expect_false(checksums[1] == checksums[2]) + + # .. and on the remote + x <- suppressMessages( + dplyr::copy_to(conn, x, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) + ) + + checksums <- x |> digest_to_checksum() |> dplyr::pull("checksum") + expect_false(checksums[1] == checksums[2]) + + DBI::dbDisconnect(conn) + } +}) + + +test_that("digest_to_checksum() warns works correctly when overwriting", { + for (conn in get_test_conns()) { + + checksum_vector <- mtcars |> + digest_to_checksum() |> + dplyr::pull(checksum) + + expect_warning(checksum_vector2 <- mtcars |> + digest_to_checksum(col = "checksum") |> + digest_to_checksum(col = "checksum", warn = TRUE) |> + dplyr::pull(checksum)) + + expect_identical(checksum_vector, checksum_vector2) + + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-filter_keys.R b/tests/testthat/test-filter_keys.R new file mode 100644 index 00000000..1dd07032 --- /dev/null +++ b/tests/testthat/test-filter_keys.R @@ -0,0 +1,37 @@ +test_that("filter_keys() works", { + for (conn in get_test_conns()) { + + x <- get_table(conn, "__mtcars") + + expect_equal(x, + x |> filter_keys(NULL)) + + filter <- x |> utils::head(10) |> dplyr::select(name) + expect_equal(x |> + dplyr::filter(name %in% !!dplyr::pull(filter, name)) |> + dplyr::collect(), + x |> + filter_keys(filter) |> + dplyr::collect()) + + filter <- x |> utils::head(10) |> dplyr::select(vs, am) |> dplyr::distinct() + expect_equal(x |> + dplyr::inner_join(filter, by = c("vs", "am")) |> + dplyr::collect(), + x |> + filter_keys(filter) |> + dplyr::collect()) + + # Filtering with null means no filtering is done + m <- mtcars + row.names(m) <- NULL + filter <- NULL + expect_identical(filter_keys(m, filter), m) + + # Filtering by vs = 0 + filter <- data.frame(vs = 0) + expect_mapequal(filter_keys(m, filter), dplyr::filter(m, vs == 0)) + + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-getTableSignature.R b/tests/testthat/test-getTableSignature.R new file mode 100644 index 00000000..2f55bdf0 --- /dev/null +++ b/tests/testthat/test-getTableSignature.R @@ -0,0 +1,6 @@ +test_that("getTableSignature() generates a signature for NULL connections", { + expect_identical( + lapply(cars, class), + as.list(getTableSignature(cars, conn = NULL)) + ) +}) diff --git a/tests/testthat/test-get_table.R b/tests/testthat/test-get_table.R index 7768b365..28c72d1c 100644 --- a/tests/testthat/test-get_table.R +++ b/tests/testthat/test-get_table.R @@ -56,81 +56,6 @@ test_that("get_tables() works", { }) -test_that("table_exists() works for default schema", { - for (conn in get_test_conns()) { - - # Generate table in default schema that does not exist - k <- 0 - while (k < 100) { - invalid_table_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (DBI::dbExistsTable(conn, id(invalid_table_name, conn))) next - break - } - - if (k < 100) { - - # Without explicit schema, table_exists assumes default schema - expect_true(table_exists(conn, "__mtcars")) - expect_false(table_exists(conn, invalid_table_name)) - - expect_true(table_exists(conn, DBI::Id(table = "__mtcars"))) - expect_false(table_exists(conn, DBI::Id(table = invalid_table_name))) - - # Using the default schema should therefore yield the same results - expect_true(table_exists(conn, paste(get_schema(conn), "__mtcars", sep = "."))) - expect_false(table_exists(conn, paste(get_schema(conn), invalid_table_name, sep = "."))) - - expect_true(table_exists(conn, DBI::Id(schema = get_schema(conn), table = "__mtcars"))) - expect_false(table_exists(conn, DBI::Id(schema = get_schema(conn), table = invalid_table_name))) - - } else { - warning("Non-existing table in default schema could not be generated!") - } - - DBI::dbDisconnect(conn) - } -}) - - -test_that("table_exists() works for non-default schema", { - for (conn in get_test_conns()) { - - # Generate schema that does not exist - k <- 0 - while (k < 100) { - invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (schema_exists(conn, invalid_schema_name)) next - break - } - - if (k < 100) { - - # With an implied schema, table_exists should still determine existence correctly - - # Character inputs - expect_true(table_exists(conn, "test.mtcars")) - expect_false(table_exists(conn, paste(invalid_schema_name, "mtcars", sep = "."))) - - - # DBI::Id inputs - if (schema_exists(conn, "test")) { - expect_true(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) - } else { - expect_false(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) - } - expect_false(table_exists(conn, DBI::Id(schema = invalid_schema_name, table = "mtcars"))) - - } else { - warning("Non-existing schema could not be generated!") - } - - DBI::dbDisconnect(conn) - } -}) - - test_that("get_table returns list of tables if no table is requested", { for (conn in get_test_conns()) { @@ -244,29 +169,3 @@ test_that("get_table() works when tables does not exist in non existing schema", DBI::dbDisconnect(conn) } }) - - -test_that("table_exists() fails when multiple matches are found", { - conns <- get_test_conns() - for (conn_id in seq_along(conns)) { - - conn <- conns[[conn_id]] - - # Not all data bases support schemas. - # Here we filter out the data bases that do not support schema - # NOTE: SQLite does support schema, but we test both with and without attaching schemas - if (schema_exists(conn, "test") && schema_exists(conn, "test.one")) { - - DBI::dbExecute(conn, 'CREATE TABLE "test"."one.two"(a TEXT)') - DBI::dbExecute(conn, 'CREATE TABLE "test.one"."two"(b TEXT)') - - expect_error( - table_exists(conn, "test.one.two"), - regex = "More than one table matching 'test.one.two' was found!" - ) - - } - - DBI::dbDisconnect(conn) - } -}) diff --git a/tests/testthat/test-get_tables.R b/tests/testthat/test-get_tables.R new file mode 100644 index 00000000..5f03b9dc --- /dev/null +++ b/tests/testthat/test-get_tables.R @@ -0,0 +1,56 @@ +test_that("get_tables() works", { + for (conn in get_test_conns()) { + + tables <- get_tables(conn) + expect_s3_class(tables, "data.frame") + + db_table_names <- tables |> + tidyr::unite("db_table_name", "schema", "table", sep = ".", na.rm = TRUE) |> + dplyr::pull(db_table_name) + + + # Check for the existence of "test.mtcars" and "__mtcars" (added during test setup) + # For SQLite connections, we don't always have the "test" schema, so we check for its existence + # and use default schema if it does not exist. + table_1 <- paste(c(switch(!schema_exists(conn, "test"), get_schema(conn)), "test.mtcars"), collapse = ".") + table_2 <- paste(c(get_schema(conn), "__mtcars"), collapse = ".") + + # We should not get tables twice + expect_setequal(db_table_names, unique(db_table_names)) + + # Our test tables should be present + checkmate::expect_subset(c(table_1, table_2), db_table_names) + + + # Now test with pattern + db_table_names <- get_tables(conn, pattern = "__mt") |> + tidyr::unite("db_table_name", "schema", "table", sep = ".", na.rm = TRUE) |> + dplyr::pull(db_table_name) + + # We should not get tables twice + expect_setequal(db_table_names, unique(db_table_names)) + + # Our test table that matches the pattern should be present + expect_false(table_1 %in% db_table_names) + expect_true(table_2 %in% db_table_names) + + + # Now test with temporary tables + tmp <- dplyr::copy_to(conn, mtcars, "__mtcars_2", temporary = TRUE) + tmp_id <- id(tmp) + tmp_name <- paste(tmp_id@name["schema"], tmp_id@name["table"], sep = ".") + + db_table_names <- get_tables(conn, show_temporary = TRUE) |> + tidyr::unite("db_table_name", "schema", "table", sep = ".", na.rm = TRUE) |> + dplyr::pull(db_table_name) + + + # We should not get tables twice + expect_setequal(db_table_names, unique(db_table_names)) + + # Our test tables should be present + checkmate::expect_subset(c(table_1, table_2, tmp_name), db_table_names) + + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-id.R b/tests/testthat/test-id.R new file mode 100644 index 00000000..ee96768c --- /dev/null +++ b/tests/testthat/test-id.R @@ -0,0 +1,114 @@ +test_that("id() works for character input without implied schema", { + for (conn in get_test_conns()) { + + # Without schema, we expect: + + # ... no change of no conn is given + expect_identical(id("test_mtcars"), DBI::Id(table = "test_mtcars")) + + # .. the defaults schema if conn is given + expect_identical(id("test_mtcars", conn), DBI::Id(schema = SCDB::get_schema(conn), table = "test_mtcars")) + + DBI::dbDisconnect(conn) + } +}) + + +test_that("id() works for character input with implied schema", { + # With schema we expect the implied schema.table to be resolved: + + # ... when no conn is given, we naively assume schema.table holds true + expect_identical(id("test.mtcars"), DBI::Id(schema = "test", table = "mtcars")) + + + for (conn in get_test_conns()) { + + # ... when conn is given, we check if implied schema exists. + # NOTE: All testing connections should have the schema "test" (except SQLite without attached schemas) + # therefore, in almost all cases, we shold resolve the schema correctly (except the SQLite case above) + if (inherits(conn, "SQLiteConnection") && !schema_exists(conn, "test")) { + expect_identical(id("test.mtcars", conn), DBI::Id(schema = "main", table = "test.mtcars")) + } else { + expect_identical(id("test.mtcars", conn), DBI::Id(schema = "test", table = "mtcars")) + } + + DBI::dbDisconnect(conn) + } +}) + + +test_that("id() works for character input with implied schema when schema does not exist", { + for (conn in get_test_conns()) { + + # Generate schema that does not exist + k <- 0 + while (k < 100) { + invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") + k <- k + 1 + if (schema_exists(conn, invalid_schema_name)) next + break + } + + if (k < 100) { + + table_name <- paste(invalid_schema_name, "mtcars", sep = ".") + + # When schema does not exist and allow_table_only is TRUE, the schema should be the default schema + expect_identical(id(table_name, conn = conn, allow_table_only = TRUE), + DBI::Id(schema = get_schema(conn), table = table_name)) + + # When schema does not exist and allow_table_only is FALSE, the schema should be as implied + expect_identical(id(table_name, conn = conn, allow_table_only = FALSE), + DBI::Id(schema = invalid_schema_name, table = "mtcars")) + + } else { + warning("Non-existing schema could not be generated!") + } + + DBI::dbDisconnect(conn) + + # When connection is closed, the existence of the schema cannot be validated and an error should be given + expect_error(id(table_name, conn = conn), r"{DBI::dbIsValid\(conn\): FALSE}") + } +}) + + +test_that("id() works for DBI::Id inputs", { + for (conn in get_test_conns()) { + + # When passing an Id without a schema, id should enrich the Id with the default schema + expect_identical( + id(DBI::Id(table = "mtcars"), conn), + DBI::Id(schema = get_schema(conn), table = "mtcars") + ) + + DBI::dbDisconnect(conn) + } +}) + + +test_that("id() is consistent for tbl_dbi inputs", { + for (conn in get_test_conns()) { + + expectation <- id(dplyr::tbl(conn, id("test.mtcars", conn), check_from = FALSE)) + + expect_identical( + expectation, + id.tbl_dbi(dplyr::tbl(conn, id("test.mtcars", conn), check_from = FALSE)) + ) + + DBI::dbDisconnect(conn) + } +}) + + +test_that("as.character.id() works", { + expect_identical(as.character(DBI::Id(table = "table")), "table") + expect_identical(as.character(DBI::Id(schema = "schema", table = "table")), "schema.table") + expect_identical(as.character(DBI::Id(catalog = "catalog", schema = "schema", table = "table")), + "catalog.schema.table") + + expect_identical(as.character(DBI::Id(table = "table", schema = "schema")), "schema.table") + expect_identical(as.character(DBI::Id(table = "table", schema = "schema", catalog = "catalog")), + "catalog.schema.table") +}) diff --git a/tests/testthat/test-interlace_sql.R b/tests/testthat/test-interlace_sql.R new file mode 100644 index 00000000..9101191e --- /dev/null +++ b/tests/testthat/test-interlace_sql.R @@ -0,0 +1,50 @@ +test_that("interlace_sql() works", { + for (conn in get_test_conns()) { + + t1 <- data.frame(key = c("A", "A", "B"), + obs_1 = c(1, 2, 2), + valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-01-01")), + valid_until = as.Date(c("2021-02-01", "2021-03-01", NA))) + + + t2 <- data.frame(key = c("A", "B"), + obs_2 = c("a", "b"), + valid_from = as.Date(c("2021-01-01", "2021-01-01")), + valid_until = as.Date(c("2021-04-01", NA))) + + + t_ref <- data.frame(key = c("A", "A", "A", "B"), + obs_1 = c(1, 2, NA, 2), + obs_2 = c("a", "a", "a", "b"), + valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-03-01", "2021-01-01")), + valid_until = as.Date(c("2021-02-01", "2021-03-01", "2021-04-01", NA))) + + + # Copy t1, t2 and t_ref to conn (and suppress check_from message) + t1 <- suppressMessages( + dplyr::copy_to(conn, t1, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) + ) + + t2 <- suppressMessages( + dplyr::copy_to(conn, t2, name = id("test.SCDB_tmp2", conn), overwrite = TRUE, temporary = FALSE) + ) + + t_ref <- suppressMessages( + dplyr::copy_to(conn, t_ref, name = id("test.SCDB_tmp3", conn), overwrite = TRUE, temporary = FALSE) + ) + + + expect_identical(interlace_sql(list(t1, t2), by = "key") |> dplyr::collect(), + t_ref |> dplyr::collect()) + + expect_mapequal(interlace_sql(list(t1, t2), by = "key") |> dplyr::collect(), + interlace_sql(list(t2, t1), by = "key") |> dplyr::collect()) + + DBI::dbDisconnect(conn) + } +}) + + +test_that("interlace_sql returns early if length(table) == 1", { + expect_identical(mtcars$mpg, interlace_sql(mtcars["mpg"], by = "mpg")) +}) diff --git a/tests/testthat/test-get_schema.R b/tests/testthat/test-schema_exists.R similarity index 100% rename from tests/testthat/test-get_schema.R rename to tests/testthat/test-schema_exists.R diff --git a/tests/testthat/test-slice_time.R b/tests/testthat/test-slice_time.R new file mode 100644 index 00000000..ba242e09 --- /dev/null +++ b/tests/testthat/test-slice_time.R @@ -0,0 +1,16 @@ +test_that("slice_time() works", { + for (conn in get_test_conns()) { + + # SQLite does not work with dates. But since we use ISO 8601 for dates, we can compare lexicographically + xx <- get_table(conn, "__mtcars") |> + dplyr::mutate(checksum = dplyr::row_number(), + from_ts = dplyr::if_else(checksum <= 20, "2022-06-01", "2022-06-15"), + until_ts = NA_character_) + + expect_equal(xx |> slice_time("2022-05-01") |> nrow(), 0) + expect_equal(xx |> slice_time("2022-06-01") |> nrow(), 20) + expect_equal(xx |> slice_time("2022-06-15") |> nrow(), nrow(mtcars)) + + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-table_exists.R b/tests/testthat/test-table_exists.R new file mode 100644 index 00000000..584c7cdb --- /dev/null +++ b/tests/testthat/test-table_exists.R @@ -0,0 +1,96 @@ +test_that("table_exists() works for default schema", { + for (conn in get_test_conns()) { + + # Generate table in default schema that does not exist + k <- 0 + while (k < 100) { + invalid_table_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") + k <- k + 1 + if (DBI::dbExistsTable(conn, id(invalid_table_name, conn))) next + break + } + + if (k < 100) { + + # Without explicit schema, table_exists assumes default schema + expect_true(table_exists(conn, "__mtcars")) + expect_false(table_exists(conn, invalid_table_name)) + + expect_true(table_exists(conn, DBI::Id(table = "__mtcars"))) + expect_false(table_exists(conn, DBI::Id(table = invalid_table_name))) + + # Using the default schema should therefore yield the same results + expect_true(table_exists(conn, paste(get_schema(conn), "__mtcars", sep = "."))) + expect_false(table_exists(conn, paste(get_schema(conn), invalid_table_name, sep = "."))) + + expect_true(table_exists(conn, DBI::Id(schema = get_schema(conn), table = "__mtcars"))) + expect_false(table_exists(conn, DBI::Id(schema = get_schema(conn), table = invalid_table_name))) + + } else { + warning("Non-existing table in default schema could not be generated!") + } + + DBI::dbDisconnect(conn) + } +}) + + +test_that("table_exists() works for non-default schema", { + for (conn in get_test_conns()) { + + # Generate schema that does not exist + k <- 0 + while (k < 100) { + invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") + k <- k + 1 + if (schema_exists(conn, invalid_schema_name)) next + break + } + + if (k < 100) { + + # With an implied schema, table_exists should still determine existence correctly + + # Character inputs + expect_true(table_exists(conn, "test.mtcars")) + expect_false(table_exists(conn, paste(invalid_schema_name, "mtcars", sep = "."))) + + + # DBI::Id inputs + if (schema_exists(conn, "test")) { + expect_true(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) + } else { + expect_false(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) + } + expect_false(table_exists(conn, DBI::Id(schema = invalid_schema_name, table = "mtcars"))) + + } else { + warning("Non-existing schema could not be generated!") + } + + DBI::dbDisconnect(conn) + } +}) + + +test_that("table_exists() fails when multiple matches are found", { + for (conn in get_test_conns()) { + + # Not all data bases support schemas. + # Here we filter out the data bases that do not support schema + # NOTE: SQLite does support schema, but we test both with and without attaching schemas + if (schema_exists(conn, "test") && schema_exists(conn, "test.one")) { + + DBI::dbExecute(conn, 'CREATE TABLE "test"."one.two"(a TEXT)') + DBI::dbExecute(conn, 'CREATE TABLE "test.one"."two"(b TEXT)') + + expect_error( + table_exists(conn, "test.one.two"), + regex = "More than one table matching 'test.one.two' was found!" + ) + + } + + DBI::dbDisconnect(conn) + } +}) diff --git a/tests/testthat/test-unite.tbl_dbi.R b/tests/testthat/test-unite.tbl_dbi.R new file mode 100644 index 00000000..20b597df --- /dev/null +++ b/tests/testthat/test-unite.tbl_dbi.R @@ -0,0 +1,60 @@ +test_that("unite.tbl_dbi() works", { + for (conn in get_test_conns()) { + + q <- get_table(conn, "__mtcars") |> utils::head(1) + qu_remove <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp) |> dplyr::compute() + qu <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp, remove = FALSE) |> dplyr::compute() + qu_alt <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", "mpg", "hp", remove = FALSE) |> dplyr::compute() + + expect_s3_class(qu_remove, "tbl_dbi") + expect_s3_class(qu, "tbl_dbi") + expect_s3_class(qu_alt, "tbl_dbi") + + expect_equal(colnames(qu_remove), "new_column") + expect_equal(colnames(qu), c("new_column", "mpg", "hp")) + expect_equal(colnames(qu_alt), c("new_column", "mpg", "hp")) + + expect_equal(dplyr::collect(qu), dplyr::collect(qu_alt)) + + # tidyr::unite has some quirky (and FUN!!! behavior) that we are forced to match here + # specifically, the input "col" is converted to a symbol, so we have to do escape-bullshit + # NOTE: the line "dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> " + # is to account for SQLite not having integer data-types. If we do not first convert to character, + # there will be differences between the objects that are trivial, so we remove these with this operation + # this way, the test should (hopefully) only fail if there are non-trivial differences + expect_mapequal(get_table(conn, "__mtcars") |> + tidyr::unite("new_col", mpg, hp) |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect(), + get_table(conn, "__mtcars") |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect() |> + tidyr::unite("new_col", mpg, hp)) + + col <- "new_col" + expect_mapequal(get_table(conn, "__mtcars") |> + tidyr::unite(col, mpg, hp) |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect(), + get_table(conn, "__mtcars") |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect() |> + tidyr::unite(col, mpg, hp)) + + expect_mapequal(get_table(conn, "__mtcars") |> + tidyr::unite(!!col, mpg, hp) |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect(), + get_table(conn, "__mtcars") |> + dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) |> + dplyr::collect() |> + tidyr::unite(!!col, mpg, hp)) + + # Unite places cols in a particular way, lets be sure we match + qq <- dplyr::mutate(q, dplyr::across(tidyselect::everything(), as.character)) # we convert to character since SQLite + expect_identical(qq |> tidyr::unite("test_col", vs, am) |> dplyr::collect(), + qq |> dplyr::collect() |> tidyr::unite("test_col", vs, am)) + + DBI::dbDisconnect(conn) + } +})