From 47ad198945c2878f6dad92905da3a4a3fb846357 Mon Sep 17 00:00:00 2001 From: Zhiyu Fu Date: Tue, 15 Dec 2020 23:28:28 -0600 Subject: [PATCH 1/8] add transform! and select! --- src/DataFramesMeta.jl | 132 ++++++++++++++++++++++++++++++++++++++++++ test/dataframes.jl | 6 ++ 2 files changed, 138 insertions(+) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 3740c384..f6f241c5 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -6,6 +6,7 @@ using Reexport # Basics: export @with, @where, @orderby, @transform, @by, @combine, @select, @eachrow, + @transform!, @select!, @byrow, @byrow!, @based_on # deprecated @@ -583,6 +584,59 @@ macro transform(x, args...) esc(transform_helper(x, args...)) end +############################################################################## +## +## transform! & @transform! +## +############################################################################## + + +function transform!_helper(x, args...) + + t = (fun_to_vec(arg) for arg in args) + + quote + $DataFrames.transform!($x, $(t...)) + end +end + +""" + @transform!(d, i...) + +Mutate `d` inplace to add additional columns or keys based on keyword arguments and return it. + +### Arguments + +* `d` : an `AbstractDataFrame`, or `GroupedDataFrame` +* `i...` : keyword arguments defining new columns or keys + +### Returns + +* `::AbstractDataFrame` or `::GroupedDataFrame` + +### Examples + +```jldoctest +julia> using DataFramesMeta + +julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); + +julia> @transform!(df, a = 2 * :A, x = :A .+ :B); + +julia> df +3×4 DataFrame +│ Row │ A │ B │ a │ x │ +│ │ Int64 │ Int64 │ Int64 │ Int64 │ +├─────┼───────┼───────┼───────┼───────┤ +│ 1 │ 1 │ 2 │ 2 │ 3 │ +│ 2 │ 2 │ 1 │ 4 │ 3 │ +│ 3 │ 3 │ 2 │ 6 │ 5 │ +``` +""" +macro transform!(x, args...) + esc(transform!_helper(x, args...)) +end + ############################################################################## ## @@ -868,4 +922,82 @@ macro select(x, args...) esc(select_helper(x, args...)) end + +############################################################################## +## +## @select! - in-place select and transform columns +## +############################################################################## + +function select!_helper(x, args...) + t = (fun_to_vec(arg) for arg in args) + + quote + $DataFrames.select!($x, $(t...)) + end +end + +""" + @select!(d, e...) + +Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. + +### Arguments + +* `d` : an AbstractDataFrame +* `e` : keyword arguments specifying new columns in terms of existing columns + or symbols to specify existing columns + +### Returns + +* `::AbstractDataFrame` + +### Examples + +```jldoctest +julia> using DataFrames, DataFramesMeta + +julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); + +julia> @select!(df, :c, :a); + +julia> df + +8×2 DataFrame +│ Row │ c │ a │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 1 │ +│ 2 │ 2 │ 2 │ +│ 3 │ 3 │ 3 │ +│ 4 │ 4 │ 4 │ +│ 5 │ 5 │ 1 │ +│ 6 │ 6 │ 2 │ +│ 7 │ 7 │ 3 │ +│ 8 │ 8 │ 4 │ + +julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); + +julia> @select!(df, :c, x = :b + :c); + +julia> df + +8×2 DataFrame +│ Row │ c │ x │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 3 │ +│ 2 │ 2 │ 3 │ +│ 3 │ 3 │ 5 │ +│ 4 │ 4 │ 5 │ +│ 5 │ 5 │ 7 │ +│ 6 │ 6 │ 7 │ +│ 7 │ 7 │ 9 │ +│ 8 │ 8 │ 9 │ +``` +""" +macro select!(x, args...) + esc(select!_helper(x, args...)) +end + end # module diff --git a/test/dataframes.jl b/test/dataframes.jl index b34893d3..f1ec90b9 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -78,6 +78,9 @@ const ≅ = isequal @test @transform(df, n = 1).n == fill(1, nrow(df)) @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + + @transform!(df, n = :i) + @test newdf ≅ df end # Defined outside of `@testset` due to use of `@eval` @@ -194,6 +197,9 @@ end @test @select(df, cols("new" * "_" * "column") = :i).new_column == df.i @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + + @select!(df, :i, :g, n = :i .+ :g) + @test df == df2[!, [:i, :g, :n]] end # Defined outside of `@testset` due to use of `@eval` From 3b415ad47f02aefe21993af9f9057a93aab9c2c9 Mon Sep 17 00:00:00 2001 From: Zhiyu Fu Date: Thu, 17 Dec 2020 15:52:54 -0600 Subject: [PATCH 2/8] update docs and readme --- README.md | 2 ++ docs/src/index.md | 42 ++++++++++++++++++++++++++++++++++++++++-- src/DataFramesMeta.jl | 6 +++--- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e5b79a42..7435e1a7 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,9 @@ Metaprogramming tools for DataFrames.jl objects. # Macros * `@transform`, for adding new columns to a data frame +* `@transform!`, for adding new columns to a data frame *in-place* * `@select`, for selecting columns in a data frame +* `@select!`, for selecting columns in a data frame *in-place* * `@combine`, for applying operations on each group of a grouped data frame * `@orderby`, for sorting data frames * `@where`, for keeping rows of a DataFrame matching a given condition diff --git a/docs/src/index.md b/docs/src/index.md index 4833a230..b8fadd36 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,9 +2,9 @@ Metaprogramming tools for DataFrames.jl objects to provide more convenient syntax. -DataFrames.jl has the functions `select`, `transform`, and `combine` +DataFrames.jl has the functions `select(!)`, `transform(!)`, and `combine` for manipulating data frames. DataFramesMeta provides the macros -`@select`, `@transform`, and `@combine` to mirror these functions with +`@select(!)`, `@transform(!)`, and `@combine` to mirror these functions with more convenient syntax. Inspired by [dplyr](https://dplyr.tidyverse.org/) in R and [LINQ](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/linq/) in C#. @@ -65,6 +65,25 @@ gd = groupby(df, :x); @select(df, x2 = 2 * :x, :y) @select(gd, x2 = 2 .* :y .* first(:y)) ``` + +## `@select!` + +In-place column selections and transformations, mirroring `select!` in DataFrames.jl. +Only specified columns are kept. Operates on both a `DataFrame` and a `GroupedDataFrame`. +The mutated dataframe is returned. + +When given a `GroupedDataFrame`, performs a transformation by group and then +if necessary repeats the result to have as many rows as the input +data frame. + +```julia +df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); +gd = groupby(df, :x); +@select!(df, :x, :y) +@select!(df, x = 2 * :x, :y) +@select!(gd, y = 2 .* :y .* first(:y)) +``` + ## `@transform` Add additional columns based on keyword arguments. Operates on both a @@ -82,6 +101,25 @@ gd = groupby(df, :x); @transform(gd, x2 = 2 .* :y .* first(:y)) ``` +## `@transform!` + +Add additional columns based on keyword arguments. Operates on both a +`DataFrame` and a `GroupedDataFrame`. The mutated dataframe is returned. + +When given a `GroupedDataFrame`, performs a transformation by group and then +if necessary repeats the result to have as many rows as the input +data frame. + + + +```julia +df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); +gd = groupby(df, :x); +@transform!(df, :x, :y) +@transform!(df, x = 2 * :x, :y) +@transform!(gd, y = 2 .* :y .* first(:y)) +``` + ## `@where` Select row subsets. Operates on both a `DataFrame` and a `GroupedDataFrame`. diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index f6f241c5..9fc630be 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -612,7 +612,7 @@ Mutate `d` inplace to add additional columns or keys based on keyword arguments ### Returns -* `::AbstractDataFrame` or `::GroupedDataFrame` +* `::DataFrame` ### Examples @@ -874,7 +874,7 @@ Select and transform columns. ### Arguments -* `d` : an AbstractDataFrame +* `d` : an `AbstractDataFrame` or `GroupedDataFrame` * `e` : keyword arguments specifying new columns in terms of existing columns or symbols to specify existing columns @@ -950,7 +950,7 @@ Mutate `d` in-place to retain only columns or transformations specified by `e` a ### Returns -* `::AbstractDataFrame` +* `::DataFrame` ### Examples From 176f70b9e2d1f303a63049d8ecb0cd461d575c0c Mon Sep 17 00:00:00 2001 From: Zhiyu Fu Date: Thu, 17 Dec 2020 16:52:25 -0600 Subject: [PATCH 3/8] add tests for @select! and @transform! --- src/DataFramesMeta.jl | 10 +-- test/dataframes.jl | 159 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 6 deletions(-) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 9fc630be..647dcf48 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -621,9 +621,9 @@ julia> using DataFramesMeta julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); -julia> @transform!(df, a = 2 * :A, x = :A .+ :B); +julia> df2 = @transform!(df, a = 2 * :A, x = :A .+ :B); -julia> df +julia> (df === df2) && df2 3×4 DataFrame │ Row │ A │ B │ a │ x │ │ │ Int64 │ Int64 │ Int64 │ Int64 │ @@ -631,6 +631,8 @@ julia> df │ 1 │ 1 │ 2 │ 2 │ 3 │ │ 2 │ 2 │ 1 │ 4 │ 3 │ │ 3 │ 3 │ 2 │ 6 │ 5 │ + +julia> @transform! ``` """ macro transform!(x, args...) @@ -978,9 +980,9 @@ julia> df julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); -julia> @select!(df, :c, x = :b + :c); +julia> df2 = @select!(df, :c, x = :b + :c); -julia> df +julia> (df === df2) && df2 8×2 DataFrame │ Row │ c │ x │ diff --git a/test/dataframes.jl b/test/dataframes.jl index f1ec90b9..61b45f49 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -83,6 +83,81 @@ const ≅ = isequal @test newdf ≅ df end + +@testset "@transform!" begin + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + m = [100, 200, 300, 400, 500] + + gq = :g + iq = :i + tq = :t + yq = :y + cq = :c + + gr = "g" + ir = "i" + tr = "t" + yr = "y" + cr = "c" + + n_str = "new_column" + n_sym = :new_column + n_space = "new column" + + @test @transform!(df, n = :i).n == df.i + @test @transform!(df, n = :i .+ :g).n == df.i .+ df.g + @test @transform!(df, n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @transform!(df, n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, body = :i).body == df.i + @test @transform!(df, transform = :i).transform == df.i + + @test @transform!(df, n = cols(iq)).n == df.i + @test @transform!(df, n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @transform!(df, n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @transform!(df, n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, body = cols(iq)).body == df.i + @test @transform!(df, transform = cols(iq)).transform == df.i + + @test @transform!(df, n = cols(ir)).n == df.i + @test @transform!(df, n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @transform!(df, n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @transform!(df, n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, body = cols(ir)).body == df.i + @test @transform!(df, transform = cols(ir)).transform == df.i + @test @transform!(df, n = cols("g") + cols(:i)).n == df.g + df.i + @test @transform!(df, n = cols(1) + cols(2)).n == df.g + df.i + + @test @transform!(df, cols("new_column") = :i).new_column == df.i + @test @transform!(df, cols(n_str) = :i).new_column == df.i + @test @transform!(df, cols(n_sym) = :i).new_column == df.i + @test @transform!(df, cols(n_space) = :i)."new column" == df.i + @test @transform!(df, cols("new" * "_" * "column") = :i).new_column == df.i + + @test @transform!(df, n = 1).n == fill(1, nrow(df)) + @test @transform!(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + + # non-copying + @test @transform!(df, n = :i).g === df.g + @test @transform!(df, n = :i).n === df.i + # mutating + df2 = copy(df) + @transform!(df, n = :i) + df == @transform(df2, n = :i) + + + +end + # Defined outside of `@testset` due to use of `@eval` df = DataFrame( g = [1, 1, 1, 2, 2], @@ -198,8 +273,88 @@ end @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] - @select!(df, :i, :g, n = :i .+ :g) - @test df == df2[!, [:i, :g, :n]] +end + +@testset "@select!" begin + # Defined outside of `@testset` due to use of `@eval` + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + m = [100, 200, 300, 400, 500] + + gq = :g + iq = :i + tq = :t + yq = :y + cq = :c + + gr = "g" + ir = "i" + tr = "t" + yr = "y" + cr = "c" + + n_str = "new_column" + n_sym = :new_column + n_space = "new column" + + df2 = copy(df) + df2.n = df2.i .+ df2.g + + @test @select!(copy(df), :i, :g, n = :i .+ :g) == df2[!, [:i, :g, :n]] + @test @select!(copy(df), :i, :g) == df2[!, [:i, :g]] + @test @select!(copy(df), :i) == df2[!, [:i]] + + @test @select!(copy(df), n = :i .+ :g).n == df.i .+ df.g + @test @select!(copy(df), n = :i).n == df2.i + @test @select!(copy(df), n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @select!(copy(df), n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), body = :i).body == df.i + @test @select!(copy(df), transform = :i).transform == df.i + + @test @select!(copy(df), n = cols(iq)).n == df.i + @test @select!(copy(df), n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @select!(copy(df), n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @select!(copy(df), n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), body = cols(iq)).body == df.i + @test @select!(copy(df), transform = cols(iq)).transform == df.i + + @test @select!(copy(df), n = cols(ir)).n == df.i + @test @select!(copy(df), n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @select!(copy(df), n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @select!(copy(df), n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), body = cols(ir)).body == df.i + @test @select!(copy(df), transform = cols(ir)).transform == df.i + @test @select!(copy(df), n = cols("g") + cols(:i)).n == df.g + df.i + @test @select!(copy(df), n = cols(1) + cols(2)).n == df.g + df.i + + + @test @select!(copy(df), n = 1).n == fill(1, nrow(df)) + + @test @select!(copy(df), cols("new_column") = :i).new_column == df.i + @test @select!(copy(df), cols(n_str) = :i).new_column == df.i + @test @select!(copy(df), cols(n_sym) = :i).new_column == df.i + @test @select!(copy(df), cols(n_space) = :i)."new column" == df.i + @test @select!(copy(df), cols("new" * "_" * "column") = :i).new_column == df.i + + # non-copying + newcol = [1:5;] + df2 = copy(df) + df2.newcol = newcol + @test @select!(df2, :newcol).newcol === newcol + + # mutating + df2 = @select(df, :i) + @select!(df, :i) + @test df == df2 end # Defined outside of `@testset` due to use of `@eval` From c1eef8b90f96b3851b372021b8bf78c914ade3b2 Mon Sep 17 00:00:00 2001 From: FuZhiyu Date: Fri, 18 Dec 2020 10:21:29 -0600 Subject: [PATCH 4/8] improve documentation Co-authored-by: pdeffebach <23196228+pdeffebach@users.noreply.github.com> --- docs/src/index.md | 6 +++--- src/DataFramesMeta.jl | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index b8fadd36..68a52d84 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,9 +2,9 @@ Metaprogramming tools for DataFrames.jl objects to provide more convenient syntax. -DataFrames.jl has the functions `select(!)`, `transform(!)`, and `combine` +DataFrames.jl has the functions `select`, `transform`, and `combine`, as well as the in-place `select!` and `transform!` for manipulating data frames. DataFramesMeta provides the macros -`@select(!)`, `@transform(!)`, and `@combine` to mirror these functions with +`@select`, `@transform`, `@combine`, `@select!`, and `@transform!` to mirror these functions with more convenient syntax. Inspired by [dplyr](https://dplyr.tidyverse.org/) in R and [LINQ](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/linq/) in C#. @@ -487,4 +487,4 @@ x_thread = @pipe df |> ```@contents Pages = ["api/api.md"] Depth = 3 -``` \ No newline at end of file +``` diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 647dcf48..4a11fdd4 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -603,7 +603,7 @@ end """ @transform!(d, i...) -Mutate `d` inplace to add additional columns or keys based on keyword arguments and return it. +Mutate `d` inplace to add additional columns or keys based on keyword arguments and return it. No copies of existing columns are made, meaning modifications of the returned data frame may affect the input data frame as well. ### Arguments @@ -942,7 +942,7 @@ end """ @select!(d, e...) -Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. +Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. No copies of existing columns are made, meaning modifications of the returned data frame may affect the input data frame as well. ### Arguments From dc896237bef5d2523be4be17fe691921fafb2639 Mon Sep 17 00:00:00 2001 From: Zhiyu Fu Date: Fri, 18 Dec 2020 11:10:05 -0600 Subject: [PATCH 5/8] improve tests --- src/DataFramesMeta.jl | 6 +++--- test/dataframes.jl | 13 +++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 4a11fdd4..2988a300 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -961,9 +961,9 @@ julia> using DataFrames, DataFramesMeta julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); -julia> @select!(df, :c, :a); +julia> df2 = @select!(df, :c, :a); -julia> df +julia> (df2.c === df.c) && df2 8×2 DataFrame │ Row │ c │ a │ @@ -982,7 +982,7 @@ julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), julia> df2 = @select!(df, :c, x = :b + :c); -julia> (df === df2) && df2 +julia> (df.c === df2.c) && df2 8×2 DataFrame │ Row │ c │ x │ diff --git a/test/dataframes.jl b/test/dataframes.jl index 61b45f49..f20398a8 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -79,8 +79,6 @@ const ≅ = isequal @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] - @transform!(df, n = :i) - @test newdf ≅ df end @@ -151,11 +149,10 @@ end @test @transform!(df, n = :i).n === df.i # mutating df2 = copy(df) - @transform!(df, n = :i) - df == @transform(df2, n = :i) - - - + @test @transform!(df, :i) ≅ df2 + @test @transform!(df, :i, :g) ≅ df2 + @transform!(df, n2 = :i) + @test df[:, Not(:n2)] ≅ df2 end # Defined outside of `@testset` due to use of `@eval` @@ -311,7 +308,7 @@ end @test @select!(copy(df), :i) == df2[!, [:i]] @test @select!(copy(df), n = :i .+ :g).n == df.i .+ df.g - @test @select!(copy(df), n = :i).n == df2.i + @test @select!(copy(df), n = :i).n == df.i @test @select!(copy(df), n = :t .* string.(:y)).n == df.t .* string.(df.y) @test @select!(copy(df), n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) @test @select!(copy(df), n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) From 7b292ef0095af8738118dc6fb4bb7838c65aefe2 Mon Sep 17 00:00:00 2001 From: Zhiyu Fu Date: Sat, 19 Dec 2020 17:59:11 -0600 Subject: [PATCH 6/8] combine documentations and slight changes --- docs/src/index.md | 41 ++++++++--------------------------------- src/DataFramesMeta.jl | 29 ++++++++++++++++++----------- test/dataframes.jl | 5 +++-- 3 files changed, 29 insertions(+), 46 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 68a52d84..f4d8e498 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -49,11 +49,14 @@ but exported by DataFramesMeta for convenience. and `Not` when selecting and transforming columns. DataFramesMeta does not currently support this syntax. -## `@select` +## `@select` and '@select!` Column selections and transformations. Only newly created columns are kept. Operates on both a `DataFrame` and a `GroupedDataFrame`. +`@select` copies the specified columns and return a new dataframe, while `@select!` +mutates the original dataframe and return it. + When given a `GroupedDataFrame`, performs a transformation by group and then if necessary repeats the result to have as many rows as the input data frame. @@ -64,31 +67,19 @@ gd = groupby(df, :x); @select(df, :x, :y) @select(df, x2 = 2 * :x, :y) @select(gd, x2 = 2 .* :y .* first(:y)) -``` - -## `@select!` - -In-place column selections and transformations, mirroring `select!` in DataFrames.jl. -Only specified columns are kept. Operates on both a `DataFrame` and a `GroupedDataFrame`. -The mutated dataframe is returned. - -When given a `GroupedDataFrame`, performs a transformation by group and then -if necessary repeats the result to have as many rows as the input -data frame. - -```julia -df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); -gd = groupby(df, :x); @select!(df, :x, :y) @select!(df, x = 2 * :x, :y) @select!(gd, y = 2 .* :y .* first(:y)) ``` -## `@transform` +## `@transform and @transform!` Add additional columns based on keyword arguments. Operates on both a `DataFrame` and a `GroupedDataFrame`. +`@transform` copies the original dataframe and return it together with newly created +columns, while `@transform!` adds additional columns to the original dataframe and return it. + When given a `GroupedDataFrame`, performs a transformation by group and then if necessary repeats the result to have as many rows as the input data frame. @@ -99,22 +90,6 @@ gd = groupby(df, :x); @transform(df, :x, :y) @transform(df, x2 = 2 * :x, :y) @transform(gd, x2 = 2 .* :y .* first(:y)) -``` - -## `@transform!` - -Add additional columns based on keyword arguments. Operates on both a -`DataFrame` and a `GroupedDataFrame`. The mutated dataframe is returned. - -When given a `GroupedDataFrame`, performs a transformation by group and then -if necessary repeats the result to have as many rows as the input -data frame. - - - -```julia -df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); -gd = groupby(df, :x); @transform!(df, :x, :y) @transform!(df, x = 2 * :x, :y) @transform!(gd, y = 2 .* :y .* first(:y)) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 2988a300..147c2003 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -603,7 +603,8 @@ end """ @transform!(d, i...) -Mutate `d` inplace to add additional columns or keys based on keyword arguments and return it. No copies of existing columns are made, meaning modifications of the returned data frame may affect the input data frame as well. +Mutate `d` inplace to add additional columns or keys based on keyword arguments and return it. +No copies of existing columns are made. ### Arguments @@ -621,9 +622,8 @@ julia> using DataFramesMeta julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); -julia> df2 = @transform!(df, a = 2 * :A, x = :A .+ :B); +julia> df2 = @transform!(df, a = 2 * :A, x = :A .+ :B) -julia> (df === df2) && df2 3×4 DataFrame │ Row │ A │ B │ a │ x │ │ │ Int64 │ Int64 │ Int64 │ Int64 │ @@ -632,7 +632,8 @@ julia> (df === df2) && df2 │ 2 │ 2 │ 1 │ 4 │ 3 │ │ 3 │ 3 │ 2 │ 6 │ 5 │ -julia> @transform! +julia> df === df2 +true ``` """ macro transform!(x, args...) @@ -942,7 +943,7 @@ end """ @select!(d, e...) -Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. No copies of existing columns are made, meaning modifications of the returned data frame may affect the input data frame as well. +Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. No copies of existing columns are made. ### Arguments @@ -961,9 +962,7 @@ julia> using DataFrames, DataFramesMeta julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); -julia> df2 = @select!(df, :c, :a); - -julia> (df2.c === df.c) && df2 +julia> df2 = @select!(df, :c, :a) 8×2 DataFrame │ Row │ c │ a │ @@ -978,11 +977,15 @@ julia> (df2.c === df.c) && df2 │ 7 │ 7 │ 3 │ │ 8 │ 8 │ 4 │ -julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); +julia> df === df2 + +true + -julia> df2 = @select!(df, :c, x = :b + :c); -julia> (df.c === df2.c) && df2 +julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); + +julia> df2 = @select!(df, :c, x = :b + :c) 8×2 DataFrame │ Row │ c │ x │ @@ -996,6 +999,10 @@ julia> (df.c === df2.c) && df2 │ 6 │ 6 │ 7 │ │ 7 │ 7 │ 9 │ │ 8 │ 8 │ 9 │ + +julia> df === df2 + +true ``` """ macro select!(x, args...) diff --git a/test/dataframes.jl b/test/dataframes.jl index f20398a8..9e3439c1 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -149,7 +149,8 @@ end @test @transform!(df, n = :i).n === df.i # mutating df2 = copy(df) - @test @transform!(df, :i) ≅ df2 + @test @transform!(df, :i) === df + @test df ≅ df2 @test @transform!(df, :i, :g) ≅ df2 @transform!(df, n2 = :i) @test df[:, Not(:n2)] ≅ df2 @@ -350,7 +351,7 @@ end # mutating df2 = @select(df, :i) - @select!(df, :i) + @select!(df, :i) === df @test df == df2 end From 804d484d960e69a06eebf1ef7e13f5a11280f9d9 Mon Sep 17 00:00:00 2001 From: FuZhiyu Date: Mon, 28 Dec 2020 11:57:04 -0600 Subject: [PATCH 7/8] Apply suggestions from code review correct typos and formatting issues. Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 10 +++++----- src/DataFramesMeta.jl | 4 ---- test/dataframes.jl | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index f4d8e498..7ea2db1a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -54,8 +54,8 @@ but exported by DataFramesMeta for convenience. Column selections and transformations. Only newly created columns are kept. Operates on both a `DataFrame` and a `GroupedDataFrame`. -`@select` copies the specified columns and return a new dataframe, while `@select!` -mutates the original dataframe and return it. +`@select` returns a new data frame with newly allocated columns, while `@select!` +mutates the original data frame and returns it. When given a `GroupedDataFrame`, performs a transformation by group and then if necessary repeats the result to have as many rows as the input @@ -72,13 +72,13 @@ gd = groupby(df, :x); @select!(gd, y = 2 .* :y .* first(:y)) ``` -## `@transform and @transform!` +## `@transform` and `@transform!` Add additional columns based on keyword arguments. Operates on both a `DataFrame` and a `GroupedDataFrame`. -`@transform` copies the original dataframe and return it together with newly created -columns, while `@transform!` adds additional columns to the original dataframe and return it. +`@transform` returns a new data frame with newly allocated columns, while `@transform!` +mutates the original data frame and returns it. When given a `GroupedDataFrame`, performs a transformation by group and then if necessary repeats the result to have as many rows as the input diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 147c2003..98a94838 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -623,7 +623,6 @@ julia> using DataFramesMeta julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); julia> df2 = @transform!(df, a = 2 * :A, x = :A .+ :B) - 3×4 DataFrame │ Row │ A │ B │ a │ x │ │ │ Int64 │ Int64 │ Int64 │ Int64 │ @@ -978,7 +977,6 @@ julia> df2 = @select!(df, :c, :a) │ 8 │ 8 │ 4 │ julia> df === df2 - true @@ -986,7 +984,6 @@ true julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); julia> df2 = @select!(df, :c, x = :b + :c) - 8×2 DataFrame │ Row │ c │ x │ │ │ Int64 │ Int64 │ @@ -1001,7 +998,6 @@ julia> df2 = @select!(df, :c, x = :b + :c) │ 8 │ 8 │ 9 │ julia> df === df2 - true ``` """ diff --git a/test/dataframes.jl b/test/dataframes.jl index 9e3439c1..a3e91838 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -351,7 +351,7 @@ end # mutating df2 = @select(df, :i) - @select!(df, :i) === df + @test @select!(df, :i) === df @test df == df2 end From a0b05f6dc3068abdfe48c99b14a2f42ffadbfd1a Mon Sep 17 00:00:00 2001 From: FuZhiyu Date: Mon, 28 Dec 2020 11:58:24 -0600 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 2 +- src/DataFramesMeta.jl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 7ea2db1a..604a417f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -49,7 +49,7 @@ but exported by DataFramesMeta for convenience. and `Not` when selecting and transforming columns. DataFramesMeta does not currently support this syntax. -## `@select` and '@select!` +## `@select` and `@select!` Column selections and transformations. Only newly created columns are kept. Operates on both a `DataFrame` and a `GroupedDataFrame`. diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 98a94838..43413b6b 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -962,7 +962,6 @@ julia> using DataFrames, DataFramesMeta julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = 1:8); julia> df2 = @select!(df, :c, :a) - 8×2 DataFrame │ Row │ c │ a │ │ │ Int64 │ Int64 │