Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement $with_colums_seq() and $select_seq() #1003

Merged
merged 2 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
- Method `$over()` gains an argument `mapping_strategy` (#984, #988).
- New method `$item()` for `DataFrame` and `Series` (#992).
- New active binding `<Series>$struct$fields` (#1002).
- New methods `$select_seq()` and `$with_columns_seq()` for `DataFrame` and
`LazyFrame` (#1003).

### Bug fixes

Expand Down
53 changes: 53 additions & 0 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,26 @@ DataFrame_select = function(...) {
unwrap("in $select()")
}

#' @inherit DataFrame_select title params return
#'
#' @description
#' Similar to `dplyr::mutate()`. However, it discards unmentioned columns (like
#' `.()` in `data.table`).
#'
#' This will run all expression sequentially instead of in parallel. Use this
#' when the work per expression is cheap. Otherwise, `$select()` should be
#' preferred.
#'
#' @examples
#' pl$DataFrame(iris)$select_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
DataFrame_select_seq = function(...) {
.pr$DataFrame$select_seq(self, unpack_list(..., .context = "in $select_seq()")) |>
unwrap("in $select_seq()")
}

#' Drop in place
#' @name DataFrame_drop_in_place
#' @description Drop a single column in-place and return the dropped column.
Expand Down Expand Up @@ -821,6 +841,39 @@ DataFrame_with_columns = function(...) {
unwrap("in $with_columns()")
}

#' @inherit DataFrame_with_columns title params return
#'
#' @description
#' Add columns or modify existing ones with expressions. This is
#' the equivalent of `dplyr::mutate()` as it keeps unmentioned columns (unlike
#' `$select()`).
#'
#' This will run all expression sequentially instead of in parallel. Use this
#' when the work per expression is cheap. Otherwise, `$with_columns()` should be
#' preferred.
#'
#' @examples
#' pl$DataFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#'
#' # same query
#' l_expr = list(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#' pl$DataFrame(iris)$with_columns_seq(l_expr)
#'
#' pl$DataFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs(), # not named expr will keep name "Sepal.Length"
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
DataFrame_with_columns_seq = function(...) {
.pr$DataFrame$with_columns_seq(self, unpack_list(..., .context = "in $with_columns_seq()")) |>
unwrap("in $with_columns_seq()")
}


#' @inherit LazyFrame_head title details
#' @param n Number of rows to return. If a negative value is passed,
Expand Down
8 changes: 7 additions & 1 deletion R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ RPolarsDataFrame$drop_in_place <- function(names) .Call(wrap__RPolarsDataFrame__

RPolarsDataFrame$select <- function(exprs) .Call(wrap__RPolarsDataFrame__select, self, exprs)

RPolarsDataFrame$select_seq <- function(exprs) .Call(wrap__RPolarsDataFrame__select_seq, self, exprs)

RPolarsDataFrame$with_columns <- function(exprs) .Call(wrap__RPolarsDataFrame__with_columns, self, exprs)

RPolarsDataFrame$with_columns_seq <- function(exprs) .Call(wrap__RPolarsDataFrame__with_columns_seq, self, exprs)

RPolarsDataFrame$to_struct <- function(name) .Call(wrap__RPolarsDataFrame__to_struct, self, name)

RPolarsDataFrame$unnest <- function(names) .Call(wrap__RPolarsDataFrame__unnest, self, names)
Expand Down Expand Up @@ -1144,11 +1148,13 @@ RPolarsLazyFrame$slice <- function(offset, length) .Call(wrap__RPolarsLazyFrame_

RPolarsLazyFrame$with_columns <- function(exprs) .Call(wrap__RPolarsLazyFrame__with_columns, self, exprs)

RPolarsLazyFrame$with_columns_seq <- function(exprs) .Call(wrap__RPolarsLazyFrame__with_columns_seq, self, exprs)

RPolarsLazyFrame$unnest <- function(names) .Call(wrap__RPolarsLazyFrame__unnest, self, names)

RPolarsLazyFrame$select <- function(exprs) .Call(wrap__RPolarsLazyFrame__select, self, exprs)

RPolarsLazyFrame$select_str_as_lit <- function(exprs) .Call(wrap__RPolarsLazyFrame__select_str_as_lit, self, exprs)
RPolarsLazyFrame$select_seq <- function(exprs) .Call(wrap__RPolarsLazyFrame__select_seq, self, exprs)

RPolarsLazyFrame$tail <- function(n) .Call(wrap__RPolarsLazyFrame__tail, self, n)

Expand Down
44 changes: 42 additions & 2 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,23 @@ LazyFrame_select = function(...) {
unwrap("in $select()")
}

#' @title Select and modify columns of a LazyFrame
#' @inherit LazyFrame_select title
#' @inherit DataFrame_select_seq description params
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$select_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
LazyFrame_select_seq = function(...) {
.pr$LazyFrame$select_seq(self, unpack_list(..., .context = "in $select_seq()")) |>
unwrap("in $select_seq()")
}

#' Select and modify columns of a LazyFrame
#'
#' @inherit DataFrame_with_columns description params
#' @keywords LazyFrame
#'
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$with_columns(
Expand All @@ -298,6 +312,32 @@ LazyFrame_with_columns = function(...) {
unwrap("in $with_columns()")
}

#' @inherit LazyFrame_with_columns title
#' @inherit DataFrame_with_columns_seq description params
#'
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#'
#' # same query
#' l_expr = list(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#' pl$LazyFrame(iris)$with_columns_seq(l_expr)
#'
#' pl$LazyFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs(), # not named expr will keep name "Sepal.Length"
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
LazyFrame_with_columns_seq = function(...) {
.pr$LazyFrame$with_columns_seq(self, unpack_list(..., .context = "in $with_columns_seq()")) |>
unwrap("in $with_columns_seq()")
}


#' @inherit DataFrame_with_row_index title description params
#' @return A new LazyFrame with a counter column in front
Expand Down
30 changes: 30 additions & 0 deletions man/DataFrame_select_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/DataFrame_with_columns_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/LazyFrame_select_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/LazyFrame_with_columns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/LazyFrame_with_columns_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 11 additions & 8 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::lazy::dsl::*;
use crate::rdataframe::RPolarsDataFrame as RDF;
use crate::rdatatype::{new_ipc_compression, new_parquet_compression, RPolarsDataType};
use crate::robj_to;
use crate::rpolarserr::{polars_to_rpolars_err, RPolarsErr, RResult, WithRctx};
use crate::rpolarserr::{polars_to_rpolars_err, RPolarsErr, RResult};
use crate::utils::{r_result_list, try_f64_into_usize};
use extendr_api::prelude::*;
use pl::{AsOfOptions, Duration, RollingGroupOptions};
Expand Down Expand Up @@ -291,24 +291,27 @@ impl RPolarsLazyFrame {
}

pub fn with_columns(&self, exprs: Robj) -> RResult<Self> {
let exprs =
robj_to!(VecPLExprColNamed, exprs).when("preparing expressions for $with_columns()")?;
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.with_columns(exprs)))
}

pub fn with_columns_seq(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.with_columns_seq(exprs)))
}

pub fn unnest(&self, names: Vec<String>) -> RResult<Self> {
Ok(RPolarsLazyFrame(self.clone().0.unnest(names)))
}

pub fn select(&self, exprs: Robj) -> RResult<Self> {
let exprs =
robj_to!(VecPLExprColNamed, exprs).when("preparing expressions for $select()")?;
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.select(exprs)))
}

pub fn select_str_as_lit(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprNamed, exprs).when("preparing columns for DataFrame")?;
Ok(RPolarsLazyFrame(self.clone().0.select(exprs)))
pub fn select_seq(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.select_seq(exprs)))
}

fn tail(&self, n: Robj) -> Result<RPolarsLazyFrame, String> {
Expand Down
8 changes: 8 additions & 0 deletions src/rust/src/rdataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,18 @@ impl RPolarsDataFrame {
self.lazy().select(exprs)?.collect()
}

pub fn select_seq(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().select_seq(exprs)?.collect()
}

pub fn with_columns(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().with_columns(exprs)?.collect()
}

pub fn with_columns_seq(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().with_columns_seq(exprs)?.collect()
}

pub fn to_struct(&self, name: Robj) -> RResult<RPolarsSeries> {
use pl::IntoSeries;
let name = robj_to!(Option, str, name)?.unwrap_or("");
Expand Down
Loading
Loading