Skip to content
This repository has been archived by the owner on Dec 11, 2022. It is now read-only.

Use missing instead of nothing in DataFrame() overload #105

Merged
merged 6 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions docs/src/examples/dataframes.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ using Statistics
using DataFrames

# Load environmental data
temperature, precipitation = worldclim([1,12])
temperature, precipitation = SimpleSDMPredictor(WorldClim, BioClim, [1,12])

# Get GBIF occurrences
kingfisher = GBIF.taxon("Megaceryle alcyon", strict=true)
Expand Down Expand Up @@ -59,11 +59,12 @@ rename!(env_df, :x1 => :temperature, :x2 => :precipitation)
first(env_df, 5)
```

Note that the resulting `DataFrame` will include the values set to `nothing` in
the layers. We might want to remove those rows using `filter!`:
Note that the resulting `DataFrame` will include `missing` values for the
elements set to `nothing` in the layers. We might want to remove those rows
using `filter!` or `dropmissing!`:

```@example dataframes
filter!(x -> !isnothing(x.temperature) && !isnothing(x.precipitation), env_df);
dropmissing!(env_df, [:temperature, :precipitation]);
last(env_df, 5)
```

Expand Down
4 changes: 2 additions & 2 deletions src/SimpleSDMLayers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ export clip
function __init__()
@require GBIF="ee291a33-5a6c-5552-a3c8-0f29a1181037" begin
@info "GBIF integration loaded"
include(joinpath(dirname(pathof(SimpleSDMLayers)), "integrations", "GBIF.jl"))
include("integrations/GBIF.jl")
end
@require DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" begin
@info "DataFrames integration loaded"
include(joinpath(dirname(pathof(SimpleSDMLayers)), "integrations", "DataFrames.jl"))
include("integrations/DataFrames.jl")
end

end
Expand Down
14 changes: 10 additions & 4 deletions src/integrations/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,24 @@ end
DataFrames.DataFrame(layer::T) where {T <: SimpleSDMLayer}

Returns a DataFrame from a `SimpleSDMLayer` element, with columns for latitudes,
longitudes and grid values.
longitudes and grid values. `nothing` values in the layer grid are replaced by
`missing` values in the DataFrame.
"""
function DataFrames.DataFrame(layer::T; kw...) where {T <: SimpleSDMLayer}
lats = repeat(latitudes(layer), outer = size(layer, 2))
lons = repeat(longitudes(layer), inner = size(layer, 1))
return DataFrames.DataFrame(latitude = lats, longitude = lons, values = vec(layer.grid); kw...)
values = replace(vec(layer.grid), nothing => missing)
df = DataFrames.DataFrame(longitude = lons, latitude = lats, values = values; kw...)
return df
end

"""
DataFrames.DataFrame(layers::Array{SimpleSDMLayer})

Returns a single DataFrame from an `Array` of compatible`SimpleSDMLayer`
elements, with every layer as a column, as well as columns for latitudes and longitudes.
elements, with every layer as a column, as well as columns for latitudes and
longitudes. `nothing` values in the layer grids are replaced by
`missing` values in the DataFrame.
"""
function DataFrames.DataFrame(layers::Array{T}; kw...) where {T <: SimpleSDMLayer}
l1 = layers[1]
Expand All @@ -75,10 +80,11 @@ function DataFrames.DataFrame(layers::Array{T}; kw...) where {T <: SimpleSDMLaye
lats = repeat(latitudes(l1), outer = size(l1, 2))
lons = repeat(longitudes(l1), inner = size(l1, 1))
values = mapreduce(x -> vec(x.grid), hcat, layers)
values = replace(values, nothing => missing)

df = DataFrames.DataFrame(values, :auto; kw...)
DataFrames.insertcols!(df, 1, :latitude => lats)
DataFrames.insertcols!(df, 1, :longitude => lons)
DataFrames.insertcols!(df, 2, :latitude => lats)
return df
end

Expand Down
21 changes: 16 additions & 5 deletions test/dataframes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,33 @@ using Test

temperature = SimpleSDMPredictor(WorldClim, BioClim, 1)

df = DataFrame(latitude = [0.0, 1.0], longitude = [30.0, 31.0], values = [42.0, 15.0])
df = DataFrame(longitude = [30.0, 31.0], latitude = [0.0, 1.0], values = [42.0, 15.0])

@test eltype(temperature[df]) <: Number

temperature_clip = clip(temperature, df)

@test typeof(temperature_clip) == typeof(temperature)

@test typeof(DataFrame(temperature_clip)) == DataFrame
@test eltype(DataFrame(temperature_clip).values) == eltype(temperature_clip.grid)
@test typeof(DataFrame([temperature_clip, temperature_clip])) == DataFrame
@test eltype(DataFrame([temperature_clip, temperature_clip]).x1) == eltype(temperature_clip.grid)
df1 = DataFrame(temperature_clip)
df2 = DataFrame([temperature_clip, temperature_clip])

@test typeof(df1) == DataFrame
@test eltype(df1.values) == Union{Missing, eltype(temperature_clip)}
@test typeof(df2) == DataFrame
@test eltype(df2.x1) == Union{Missing, eltype(temperature_clip)}

@test typeof(SimpleSDMPredictor(df, :values, temperature_clip)) <: SimpleSDMLayer
@test typeof(SimpleSDMPredictor(df, :values, temperature_clip)) <: SimpleSDMPredictor

l1 = SimpleSDMPredictor(df1, :values, temperature_clip)
l2 = SimpleSDMPredictor(df2, :x2, temperature_clip)
for l in (l1, l2)
@test isequal(l.grid, temperature_clip.grid)
@test isequal(longitudes(l), longitudes(temperature_clip))
@test isequal(latitudes(l), latitudes(temperature_clip))
end

mbool = mask(temperature_clip, df, Bool)
@test eltype(mbool) == Bool

Expand Down