Skip to content

Commit

Permalink
feat: add basic SQL features (#457)
Browse files Browse the repository at this point in the history
Co-authored-by: Etienne Bacher <[email protected]>
  • Loading branch information
eitsupi and etiennebacher authored Nov 2, 2023
1 parent bd90c93 commit 0e730bc
Show file tree
Hide file tree
Showing 23 changed files with 501 additions and 7 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Collate:
's3_methods.R'
'series__series.R'
'series__trait.R'
'sql.R'
'translation.R'
'vctrs.R'
'zzz.R'
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ S3method("$",RField)
S3method("$",RNullValues)
S3method("$",RPolarsDataType)
S3method("$",RPolarsErr)
S3method("$",RPolarsSQLContext)
S3method("$",RPolarsStringCacheHolder)
S3method("$",RThreadHandle)
S3method("$",Series)
Expand Down Expand Up @@ -72,6 +73,7 @@ S3method("[[",RField)
S3method("[[",RNullValues)
S3method("[[",RPolarsDataType)
S3method("[[",RPolarsErr)
S3method("[[",RPolarsSQLContext)
S3method("[[",RPolarsStringCacheHolder)
S3method("[[",RThreadHandle)
S3method("[[",Series)
Expand All @@ -88,6 +90,7 @@ S3method(.DollarNames,GroupBy)
S3method(.DollarNames,LazyFrame)
S3method(.DollarNames,RField)
S3method(.DollarNames,RPolarsErr)
S3method(.DollarNames,RPolarsSQLContext)
S3method(.DollarNames,RThreadHandle)
S3method(.DollarNames,Series)
S3method(.DollarNames,Then)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
- `$scan_parquet()` gains an argument `hive_partitioning`.
- `$meta$tree_format()` has a better formatted output.

## What's changed

- New class `RPolarsSQLContext` and its methods to perform SQL queries on DataFrame like objects.
To use this feature, needs to build Rust library with full features. (#457)

# polars 0.9.0

## BREAKING CHANGES DUE TO RUST-POLARS UPDATE
Expand Down
3 changes: 2 additions & 1 deletion R/after-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ extendr_method_to_pure_functions = function(env, class_name = NULL) {
.pr$RPolarsErr = extendr_method_to_pure_functions(RPolarsErr)
.pr$RThreadHandle = extendr_method_to_pure_functions(RThreadHandle)
.pr$RPolarsStringCacheHolder = extendr_method_to_pure_functions(RPolarsStringCacheHolder)
.pr$RPolarsSQLContext = extendr_method_to_pure_functions(RPolarsSQLContext)



Expand Down Expand Up @@ -267,7 +268,7 @@ DataType = clone_env_one_level_deep(RPolarsDataType)
pl_class_names = sort(
c(
"LazyFrame", "Series", "LazyGroupBy", "DataType", "Expr", "DataFrame",
"When", "Then", "ChainedWhen", "ChainedThen"
"When", "Then", "ChainedWhen", "ChainedThen", "RPolarsSQLContext"
)
) # TODO discover all public class automatically

Expand Down
18 changes: 18 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,24 @@ Series$from_arrow <- function(name, array) .Call(wrap__Series__from_arrow, name,
#' @export
`[[.Series` <- `$.Series`

RPolarsSQLContext <- new.env(parent = emptyenv())

RPolarsSQLContext$new <- function() .Call(wrap__RPolarsSQLContext__new)

RPolarsSQLContext$execute <- function(query) .Call(wrap__RPolarsSQLContext__execute, self, query)

RPolarsSQLContext$get_tables <- function() .Call(wrap__RPolarsSQLContext__get_tables, self)

RPolarsSQLContext$register <- function(name, lf) .Call(wrap__RPolarsSQLContext__register, self, name, lf)

RPolarsSQLContext$unregister <- function(name) .Call(wrap__RPolarsSQLContext__unregister, self, name)

#' @export
`$.RPolarsSQLContext` <- function (self, name) { func <- RPolarsSQLContext[[name]]; environment(func) <- environment(); func }

#' @export
`[[.RPolarsSQLContext` <- `$.RPolarsSQLContext`

RPolarsStringCacheHolder <- new.env(parent = emptyenv())

RPolarsStringCacheHolder$hold <- function() .Call(wrap__RPolarsStringCacheHolder__hold)
Expand Down
167 changes: 167 additions & 0 deletions R/sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#' @title Run SQL queries against DataFrame/LazyFrame data.
#' @description Run SQL queries against DataFrame/LazyFrame data.
#' @details Currently, only available when built with the `full` feature.
#' See [polars_info()] for more information.
#' @name SQLContext_class
#' @keywords SQLContext
#' @examplesIf pl$polars_info()$features$sql
#' lf = pl$LazyFrame(a = 1:3, b = c("x", NA, "z"))
#' res = pl$SQLContext(frame = lf)$execute(
#' "SELECT b, a*2 AS two_a FROM frame WHERE b IS NOT NULL"
#' )
#' res$collect()
RPolarsSQLContext


#' @title auto complete $-access into a polars object
#' @description called by the interactive R session internally
#' @param x RPolarsSQLContext
#' @param pattern code-stump as string to auto-complete
#' @return char vec
#' @export
#' @noRd
#' @inherit .DollarNames.DataFrame return
#' @keywords internal
.DollarNames.RPolarsSQLContext = function(x, pattern = "") {
get_method_usages(RPolarsSQLContext, pattern = pattern)
}


#' Initialise a new SQLContext
#' @name pl_SQLContext
#' @description Create a new SQLContext and register the given LazyFrames.
#' @param ... Name-value pairs of [LazyFrame][LazyFrame_class] like objects to register.
#' @return RPolarsSQLContext
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext(mtcars = mtcars)
#' ctx
pl$SQLContext = function(...) {
self = .pr$RPolarsSQLContext$new()
lazyframes = list(...)

if (length(lazyframes)) {
for (index in seq_along(lazyframes)) {
.pr$RPolarsSQLContext$register(
self,
names(lazyframes[index]),
lazyframes[[index]]
) |>
unwrap("in $SQLContext()")
}
}

self
}


#' @title Execute SQL query against the registered data
#' @description Parse the given SQL query and execute it against the registered frame data.
#' @param query A valid string SQL query.
#' @param eager A logical flag indicating whether to collect the result immediately.
#' If FALSE (default), a [LazyFrame][LazyFrame_class] is returned. If TRUE, a [DataFrame][DataFrame_class] is returned.
#' @return A [LazyFrame][LazyFrame_class] or [DataFrame][DataFrame_class] depending on the value of `eager`.
#' @examplesIf pl$polars_info()$features$sql
#' query = "SELECT * FROM mtcars WHERE cyl = 4"
#' pl$SQLContext(mtcars = mtcars)$execute(query)
#' pl$SQLContext(mtcars = mtcars)$execute(query, eager = TRUE)
SQLContext_execute = function(query, eager = FALSE) {
lf = .pr$RPolarsSQLContext$execute(self, query) |>
unwrap("in $execute()")

if (eager) {
lf$collect()
} else {
lf
}
}


#' @title Register a single data as a table
#' @description Register a single frame as a table, using the given name.
#' @param name A string name to register the frame as.
#' @param frame A [LazyFrame][LazyFrame_class] like object to register.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' ctx$register("mtcars", mtcars)
#'
#' ctx$execute("SELECT * FROM mtcars LIMIT 5")$collect()
SQLContext_register = function(name, frame) {
.pr$RPolarsSQLContext$register(self, name, frame) |>
unwrap("in $register()")
invisible(self)
}


#' @title Register multiple data as tables
#' @description Register multiple frames as tables.
#' @param ... Name-value pairs of [LazyFrame][LazyFrame_class] like objects to register.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' r_df = mtcars
#' pl_df = pl$DataFrame(mtcars)
#' pl_lf = pl$LazyFrame(mtcars)
#'
#' ctx$register_many(r_df = r_df, pl_df = pl_df, pl_lf = pl_lf)
#'
#' ctx$execute(
#' "SELECT * FROM r_df
#' UNION ALL
#' SELECT * FROM pl_df
#' UNION ALL
#' SELECT * FROM pl_lf"
#' )$collect()
SQLContext_register_many = function(...) {
lazyframes = list(...)

if (length(lazyframes)) {
for (index in seq_along(lazyframes)) {
.pr$RPolarsSQLContext$register(
self,
names(lazyframes[index]),
lazyframes[[index]]
) |>
unwrap("in $register_many()")
}
}

invisible(self)
}


#' @title Unregister tables by name
#' @description Unregister tables by name.
#' @param names A character vector of table names to unregister.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' # Initialise a new SQLContext and register the given tables.
#' ctx = pl$SQLContext(x = mtcars, y = mtcars, z = mtcars)
#' ctx$tables()
#'
#' # Unregister some tables.
#' ctx$unregister(c("x", "y"))
#' ctx$tables()
SQLContext_unregister = function(names) {
for (index in seq_along(names)) {
.pr$RPolarsSQLContext$unregister(self, names[index]) |>
unwrap("in $register()")
}
invisible(self)
}


#' @title List registered tables
#' @description Return a character vector of the registered table names.
#' @return A character vector of the registered table names.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' ctx$tables()
#' ctx$register("df1", mtcars)
#' ctx$tables()
#' ctx$register("df2", mtcars)
#' ctx$tables()
SQLContext_tables = function() {
.pr$RPolarsSQLContext$get_tables(self) |>
unwrap("in $tables()")
}
2 changes: 2 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ replace_private_with_pub_methods(Series, "^Series_")
# RThreadHandle
replace_private_with_pub_methods(RThreadHandle, "^RThreadHandle_")

# SQLContext
replace_private_with_pub_methods(RPolarsSQLContext, "^SQLContext_")



Expand Down
4 changes: 2 additions & 2 deletions docs/make-docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ rd2md = function(src) {
for (i in seq_along(chunks)) {
if (any(grepl("<h3>Usage</h3>", chunks[[i]], fixed = TRUE))) {
# order is important
for (cl in c("DataFrame_", "Series_", "Expr_", "LazyFrame_", "LazyGroupBy_", "GroupBy_", "RField_")) {
for (cl in c("DataFrame_", "Series_", "Expr_", "LazyFrame_", "LazyGroupBy_", "GroupBy_", "RField_", "SQLContext_")) {
chunks[[i]] = gsub(cl, paste0("&lt", sub("_$", "", cl), "&gt$"), chunks[[i]])
}
}
Expand Down Expand Up @@ -136,7 +136,7 @@ make_doc_hierarchy = function() {
"pl", "Series", "DataFrame", "LazyFrame", "GroupBy",
"LazyGroupBy", "ExprList", "ExprBin", "ExprCat", "ExprDT",
"ExprMeta", "ExprName", "ExprStr", "ExprStruct",
"Expr", "IO", "RThreadHandle"
"Expr", "IO", "RThreadHandle", "SQLContext"
)
for (cl in classes) {
files = grep(paste0("^", cl, "_"), other, value = TRUE)
Expand Down
22 changes: 22 additions & 0 deletions man/SQLContext_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/SQLContext_execute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/SQLContext_register.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/SQLContext_register_many.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0e730bc

Please sign in to comment.