From 065e1429c67bbae9f6b766ab2ee6240b606f27a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 9 Apr 2024 11:52:34 +0200 Subject: [PATCH 1/3] do not pass empty vector to Tables.columntable --- src/abstractdataframe/selection.jl | 32 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index a90f1203d..2f669448e 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -805,21 +805,25 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr res = _transformation_helper(df, col_idx, Ref{Any}(fun)) if newname === AsTable || newname isa AbstractVector{Symbol} - if res isa AbstractVector && !isempty(res) - kp1 = keys(res[1]) - prepend = all(x -> x isa Integer, kp1) - if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) - throw(ArgumentError("keys of the returned elements must be " * - "`Symbol`s, strings or integers")) - end - if any(x -> !isequal(keys(x), kp1), res) - throw(ArgumentError("keys of the returned elements must be identical")) - end - newres = DataFrame() - for n in kp1 - newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] + if res isa AbstractVector + if isempty(res) + res = NamedTuple() + else + kp1 = keys(res[1]) + prepend = all(x -> x isa Integer, kp1) + if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) + throw(ArgumentError("keys of the returned elements must be " * + "`Symbol`s, strings or integers")) + end + if any(x -> !isequal(keys(x), kp1), res) + throw(ArgumentError("keys of the returned elements must be identical")) + end + newres = DataFrame() + for n in kp1 + newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] + end + res = newres end - res = newres elseif !(res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix, Tables.AbstractRow}) res = Tables.columntable(res) From 22c6a685bd600bf1b9413afb92063809fa3e94ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 9 Apr 2024 19:17:16 +0200 Subject: [PATCH 2/3] change implementation to be more strict --- src/abstractdataframe/selection.jl | 39 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 2f669448e..81dae2da9 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -805,28 +805,29 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr res = _transformation_helper(df, col_idx, Ref{Any}(fun)) if newname === AsTable || newname isa AbstractVector{Symbol} - if res isa AbstractVector - if isempty(res) - res = NamedTuple() - else - kp1 = keys(res[1]) - prepend = all(x -> x isa Integer, kp1) - if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) - throw(ArgumentError("keys of the returned elements must be " * - "`Symbol`s, strings or integers")) - end - if any(x -> !isequal(keys(x), kp1), res) - throw(ArgumentError("keys of the returned elements must be identical")) - end - newres = DataFrame() - for n in kp1 - newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] - end - res = newres + if res isa AbstractVector && !isempty(res) + kp1 = keys(res[1]) + prepend = all(x -> x isa Integer, kp1) + if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) + throw(ArgumentError("keys of the returned elements must be " * + "`Symbol`s, strings or integers")) end + if any(x -> !isequal(keys(x), kp1), res) + throw(ArgumentError("keys of the returned elements must be identical")) + end + newres = DataFrame() + for n in kp1 + newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] + end + res = newres elseif !(res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix, Tables.AbstractRow}) - res = Tables.columntable(res) + if res isa Union{AbstractVector{Any}, AbstractVector{<:AbstractVector}} + @assert isempty(res) + res = DataFrame() + else + res = Tables.columntable(res) + end end end From 8a1e018a0c972b578da059d163bc22c13eda2ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 9 Apr 2024 23:45:10 +0200 Subject: [PATCH 3/3] add tests and news --- NEWS.md | 3 +++ test/select.jl | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/NEWS.md b/NEWS.md index 22d2045f0..1d7fd3050 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,9 @@ * Ensure that `allunique(::AbstractDataFrame, ::Any)` always gets interpreted as test for uniqueness of rows in the first positional argument ([#3434](https://github.com/JuliaData/DataFrames.jl/issues/3434)) +* Make sure that an empty vector of `Any` or of `AbstractVector` is treated as having + no columns when a data frame is being processed with `combine`/`select`/`transform`. + ([#3435](https://github.com/JuliaData/DataFrames.jl/issues/3435)) # DataFrames.jl v1.6.1 Release Notes diff --git a/test/select.jl b/test/select.jl index 67f97df2f..3a8ad3b23 100644 --- a/test/select.jl +++ b/test/select.jl @@ -3024,4 +3024,22 @@ end @test_throws ArgumentError combine(gdf, :x => (x -> x[1] == 2 ? "x" : cr) => AsTable) end +@testset "empty vector" begin + df = DataFrame(a=1:3) + + @test_throws ArgumentError select(df, :a => (x -> Vector{Any}[])) + + for T in (Vector{Any}, Any, NamedTuple{(:x,),Tuple{Int64}}) + v = combine(df, :a => (x -> T[])).a_function + @test isempty(v) + @test eltype(v) === T + end + + @test size(combine(df, :a => (x -> Vector{Any}[]) => AsTable)) == (0, 0) + @test size(combine(df, :a => (x -> Any[]) => AsTable)) == (0, 0) + df2 = combine(df, :a => (x -> NamedTuple{(:x,),Tuple{Int64}}[]) => AsTable) + @test size(df2) == (0, 1) + @test eltype(df2.x) === Int +end + end # module