diff --git a/.gitignore b/.gitignore index 04015c06..d3723acd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.jl.cov *.jl.*.cov +assets test/assets test/gallery diff --git a/Makefile b/Makefile deleted file mode 100644 index 9f5729b5..00000000 --- a/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -.PHONY: clean - -clean: - @rm -r test/assets/ diff --git a/Project.toml b/Project.toml index 134198cc..c569573e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,10 @@ name = "SimpleSDMLayers" uuid = "2c645270-77db-11e9-22c3-0f302a89c64c" authors = ["Timothée Poisot "] -version = "0.2.2" +version = "0.3.0" [deps] -GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a" +ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Requires = "ae029012-a4dd-5104-9daa-d747884805df" @@ -12,7 +12,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] -GDAL = "1.0" +ArchGDAL = "0.4" HTTP = "0.8" RecipesBase = "0.7, 0.8, 1.0" Requires = "1.0" diff --git a/data/connectivity.tiff b/data/connectivity.tiff new file mode 100644 index 00000000..ae57cf0c Binary files /dev/null and b/data/connectivity.tiff differ diff --git a/docs/Project.toml b/docs/Project.toml index 117d1dba..479ad53c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,3 +3,8 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" + +[compat] +GBIF = "0.2" diff --git a/docs/make.jl b/docs/make.jl index d9935029..948b0a8b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,6 +2,7 @@ push!(LOAD_PATH, joinpath("..", "src")) using Documenter, SimpleSDMLayers using GBIF +using Statistics makedocs( sitename = "Simple SDM Layers", @@ -17,7 +18,11 @@ makedocs( ], "Examples" => [ "Temperature data" => "examples/temperature.md", - "GBIF integration" => "examples/gbif.md" + "GBIF integration" => "examples/gbif.md", + "Importing raster data" => "examples/import.md", + "Sliding window analysis" => "examples/slidingwindow.md", + "Landcover data" => "examples/landcover.md", + "Landcover consensus" => "examples/consensus.md" ] ] ) diff --git a/docs/src/examples/consensus.md b/docs/src/examples/consensus.md new file mode 100644 index 00000000..46ea585f --- /dev/null +++ b/docs/src/examples/consensus.md @@ -0,0 +1,83 @@ +# Landcover consensus + +In this example, we will create a consensus map of landcover for Corsica based +on the EarthEnv data, and measure the variation within each pixel using the +variance. The first step is to load the packages we need, and create a bounding +box: + +```@example cons +using SimpleSDMLayers +using Plots + +bbox = (left=8.25, right=10.0, bottom=41.2, top=43.2) +``` + +We will then do two things. First, get the first layer of landcover (see the +help of `landcover` for a list of the layers), and then create a datacube, +organized around dimensions of latitude, longitude, and layer value - we will +only focus on the 11 first variables, since we do not want the information on +open water (layer 12): + +```@example cons +lc = landcover(1; full=false, bbox...) +use = fill(NaN32, size(lc)..., 11) +``` + +At this point, we will simply fill in the first "slice" of our datacube with +values from the layer: + +```@example cons +for (i,e) in enumerate(lc.grid) + coord = (CartesianIndices(size(lc.grid))[i].I..., 1) + if !isnothing(e) + use[coord...] = e + end +end +``` + +The next step is to repeat this process for all other layers, filling the +appropriate data cube slice: + +```@example cons +for layer in 2:11 + lc = landcover(layer; full=false, bbox...) + for (i,e) in enumerate(lc.grid) + coord = (CartesianIndices(size(lc.grid))[i].I..., layer) + if !isnothing(e) + use[coord...] = e + end + end +end +``` + +To perform the actual analysis, we will define a `get_most_common_landuse` function, which will return the index of the layer with the highest score: + +```@example cons +function get_most_common_landuse(f) + f[isnan.(f)] .= 0.0 + sum(f) == 0 && return NaN + return last(findmax(f)) +end + +function shannon(x) + v = filter(!isnan, x) + length(v) == 0 && return NaN + v = v ./ sum(v) + return -sum(v.*log2.(v)) +end +``` + +```@example cons +consensus = mapslices(get_most_common_landuse, use; dims=3)[:,:,1] +entropy = mapslices(shannon, use; dims=3)[:,:,1] + +consensus = SimpleSDMResponse(consensus, lc) +entropy = SimpleSDMResponse(entropy, lc) +``` + +```@example cons +p1 = plot(consensus, c=:Paired_11, frame=:grid) +p2 = plot(entropy, c=:Greys, frame=:grid) + +plot(p1, p2, size=(900, 400)) +``` diff --git a/docs/src/examples/import.md b/docs/src/examples/import.md new file mode 100644 index 00000000..d36a452e --- /dev/null +++ b/docs/src/examples/import.md @@ -0,0 +1,64 @@ +# Importing your own data + +It is possible to import your own rasters into a `SimpleSDMLayer` object. This +requires defining a new type and two "helper" functions, which might seem a +little bit convoluted, but helps *immensely* underneath in case you want to also +*download* rasters from the web with different arguments. In this example, we +will look at a data file produced by the *OmniScape* package, and which +represents landscape connectivity in the Laurentians region of Québec. This +example will also show how we can use the `broadcast` operation to modify the +values of a raster. + +```@example temp +using SimpleSDMLayers +using Plots +using StatsBase +``` + +The file comes with the package itself, so we can read it directly - this is a +geotiff file, where values are floating point numbers representing connectivity. + +```@example temp +file = joinpath(dirname(pathof(SimpleSDMLayers)), "..", "data", "connectivity.tiff") +``` + +To import this file as a `SimpleSDMLayer`, we need to create a type +(`MyConnectivityMap`), and declare a method for `latitudes` and `longitudes` for +this type, where the output is the range of latitudes and longitudes. This might +seem cumbersome, but remember: it can be automated, and if you do not declare a +`latitude` and `longitude` method, it will be assumed that the raster covers the +entire globe. From a end-user perspective, it also removes the need to pass the +bounding box of your layer as an argument, and to focus instead of the region of +interest. + +```@example temp +struct MyConnectivityMap <: SimpleSDMLayers.SimpleSDMSource end +SimpleSDMLayers.latitudes(::Type{MyConnectivityMap}) = (45.34523, 47.38457) +SimpleSDMLayers.longitudes(::Type{MyConnectivityMap}) = (-75.17734,-72.36486) +``` + +Now that this is done, we can read this file as a `SimpleSDMResponse` using the +`raster` function: + +```@example temp +mp = SimpleSDMLayers.raster(SimpleSDMResponse, MyConnectivityMap(), file) +``` + +Because this file has raw values, which are not necessarily great for plotting, +we will transform it to quantiles, using the `StatsBase.ecdf` function. + +```@example temp +qfunc = ecdf(convert(Vector{Float64}, filter(!isnothing, mp.grid))) +``` + +And we can now broadcast this function to the layer: + +```@example temp +qmap = broadcast(qfunc, mp) +``` + +Finally, we are ready for plotting: + +```@example temp +plot(qmap, frame=:grid, c=:YlGnBu) +``` diff --git a/docs/src/examples/landcover.md b/docs/src/examples/landcover.md new file mode 100644 index 00000000..5e15fd99 --- /dev/null +++ b/docs/src/examples/landcover.md @@ -0,0 +1,43 @@ +# Getting landcover data + +In this example, we will look at landcover data, specifically the proportion of +urban/built area in Europe; the entire dataset is very large to fit in memory, +as it has a resolution of about 1 kilometre squared. Therefore, we will take +advantage of the ability to only load the part that matters by passing the +limits of a bounding box. + +```@example urban +using SimpleSDMLayers +urban = landcover(9; left=-11.0, right=31.1, bottom=29.0, top=71.1) +``` + +This dataset is returning data as `UInt8` (as it represents a proportion of the +pixel occupied by the type), but this is not something that can be plotted efficiently. So in the +next step, we will manipulate this object a little bit to have something more +workable. + +Let's start by preparing a new grid, with the same dimensions, but a friendlier +type, and then we can then fill these values using a simple rule of using either +`NaN` or the converted value: + +```@example urban +n_urban_grid = zeros(Float32, size(urban)); +for (i,e) in enumerate(urban.grid) + n_urban_grid[i] = isnothing(e) ? NaN : Float32(e) +end +``` + +We can now overwrite our `urban` object as a layer: + +```@example urban +urban = SimpleSDMPredictor(n_urban_grid, urban) +``` + +Note that the previous instruction uses a shortcut where the bounding box from a +new `SimpleSDMLayer` is drawn from the bounding box for an existing layer. With +this done, we can show the results: + +```@example urban +using Plots +heatmap(urban, c=:terrain) +``` diff --git a/docs/src/examples/slidingwindow.md b/docs/src/examples/slidingwindow.md new file mode 100644 index 00000000..53ac5b2a --- /dev/null +++ b/docs/src/examples/slidingwindow.md @@ -0,0 +1,30 @@ +# Sliding window analysis + +In this example, we will get precipitation data from Québec, and use a sliding +window analysis to smooth them out. The beginning of the code should now be +familiar: + +```@example slide +using SimpleSDMLayers +using Plots +using Statistics + +precipitation = worldclim(12; left=-80.0, right=-56.0, bottom=44.0, top=62.0) +``` + +The sliding window works by taking all pixels *within a given radius* (expressed +in kilometres) around the pixel of interest, and then applying the function +given as the second argument to their values. Empty pixels are removed. In this +case, we will do a summary across a 100 km radius around each pixel: + +```@example slide +averaged = slidingwindow(precipitation, Statistics.mean, 100.0) +``` + +We can finally overlap the two layers -- the result of sliding window is a +little bit smoother than the raw data. + +```@example slide +plot(precipitation, c=:alpine) +contour!(averaged, c=:white, lw=2.0) +``` diff --git a/docs/src/examples/temperature.md b/docs/src/examples/temperature.md index a918f1e1..7a359846 100644 --- a/docs/src/examples/temperature.md +++ b/docs/src/examples/temperature.md @@ -15,7 +15,7 @@ visualize these data: ```@example temp using Plots, StatsPlots -heatmap(temperature, c=:magma, frame=:box) +heatmap(temperature, c=:cividis, frame=:box) xaxis!("Longitude") yaxis!("Latitude") ``` @@ -33,7 +33,7 @@ can also make a quick heatmap to see what the region looks like: ```@example temp temperature_europe = temperature[left=-11.0, right=31.1, bottom=29.0, top=71.1] -heatmap(temperature_europe, c=:magma, aspectratio=1, frame=:box) +heatmap(temperature_europe, c=:cividis, aspectratio=1, frame=:box) ``` The next step will be to coarsen these data, which requires to give the number @@ -60,7 +60,7 @@ temperature_europe_coarse = coarsen(temperature_europe, Statistics.mean, (2, 2)) One again, we can plot these data: ```@example temp -heatmap(temperature_europe_coarse, aspectratio=1, c=:magma, frame=:box) +heatmap(temperature_europe_coarse, aspectratio=1, c=:cividis, frame=:box) ``` Finally, we can compare our different clipping and approximations to the overall diff --git a/docs/src/man/data.md b/docs/src/man/data.md index bda1130a..926a9756 100644 --- a/docs/src/man/data.md +++ b/docs/src/man/data.md @@ -1,8 +1,23 @@ -# Bioclimatic data +# Datasets -The package offers access to bioclimatic datasets - they are downloaded, saved -to the disk, and then read locally. +The package offers access to bioclimatic and other datasets - they are +downloaded, saved to the disk, and then read locally. Please note that some of +them require a lot of memory, so make sure your machine can handle them. + +## Worldclim 2.1 ```@docs worldclim ``` + +## CHELSA V1 + +```@docs +bioclim +``` + +## EarthEnv landcover + +```@docs +landcover +``` diff --git a/docs/src/man/operations.md b/docs/src/man/operations.md index 49b11a52..129018de 100644 --- a/docs/src/man/operations.md +++ b/docs/src/man/operations.md @@ -2,6 +2,7 @@ ```@docs coarsen +slidingwindow latitudes longitudes clip diff --git a/docs/src/man/overloads.md b/docs/src/man/overloads.md index 7d0b1046..6191a693 100644 --- a/docs/src/man/overloads.md +++ b/docs/src/man/overloads.md @@ -22,6 +22,12 @@ Base.maximum Base.minimum ``` +## From `Broadcast` + +```@docs +broadcast +``` + ## From `Statistics` ```@docs diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index 8cfa826c..b2ad6fcb 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -1,6 +1,6 @@ module SimpleSDMLayers -using GDAL +using ArchGDAL using HTTP using RecipesBase using ZipFile @@ -16,18 +16,34 @@ include(joinpath("lib", "generated.jl")) include(joinpath("lib", "basics.jl")) export latitudes, longitudes -include(joinpath("lib", "geotiff.jl")) -export geotiff +include(joinpath("datasets", "sources.jl")) +include(joinpath("datasets", "download_layer.jl")) +export EarthEnv, WorldClim, BioClim -include(joinpath("bioclimaticdata", "worldclim.jl")) +include(joinpath("datasets", "geotiff.jl")) +include(joinpath("datasets", "raster.jl")) +include(joinpath("datasets", "worldclim.jl")) +include(joinpath("datasets", "chelsa.jl")) +include(joinpath("datasets", "landcover.jl")) export worldclim +export bioclim +export landcover include(joinpath("operations", "coarsen.jl")) -export coarsen +include(joinpath("operations", "sliding.jl")) +export coarsen, slidingwindow include(joinpath("recipes", "recipes.jl")) -# HACK but this fixes the export of clip when GBIF or others are loaded +# This next bit is about being able to change the path for raster assets +# globally, which avoids duplication this argument across multiple functions. +_layers_assets_path = "assets" +function assets_path() + isdir(SimpleSDMLayers._layers_assets_path) || mkdir(SimpleSDMLayers._layers_assets_path) + return SimpleSDMLayers._layers_assets_path +end + +# Fixes the export of clip when GBIF or others are loaded clip(::T) where {T <: SimpleSDMLayer} = nothing export clip diff --git a/src/bioclimaticdata/worldclim.jl b/src/bioclimaticdata/worldclim.jl deleted file mode 100644 index 0bf09978..00000000 --- a/src/bioclimaticdata/worldclim.jl +++ /dev/null @@ -1,100 +0,0 @@ -""" - worldclim(layers::Vector{T}; resolution::AbstractString="10", path::AbstractString="assets") where {T <: Integer} - -Download and prepare WorldClim 2.0 bioclimatic variables, and returns them as an -array of `SimpleSDMPredictor`s. Layers are called by their number, from 1 to 19. -The list of available layers is given in a table below. - -The two keywords are `resolution`, which must be a string, and either `2.5`, -`5`, or `10`; and `path`, which refers to the path where the function will look -for the zip and geotiff files. - -Internally, this function will download the main zip file for the required -resolution from the WordlClim website, extract it, and parse the required -layers. - -It is recommended to *keep* the content of the `path` folder, as it will -eliminate the need to download and/or extract the tiff files. For example, -calling `wordlclim(1:19)` will download and extract everything, and future calls -will be much faster. - -| Variable | Description | -| ------ | ------ | -| 1 | Annual Mean Temperature | -| 2 | Mean Diurnal Range (Mean of monthly (max temp - min temp)) | -| 3 | Isothermality (BIO2/BIO7) (* 100) | -| 4 | Temperature Seasonality (standard deviation *100) | -| 5 | Max Temperature of Warmest Month | -| 6 | Min Temperature of Coldest Month | -| 7 | Temperature Annual Range (BIO5-BIO6) | -| 8 | Mean Temperature of Wettest Quarter | -| 9 | Mean Temperature of Driest Quarter | -| 10 | Mean Temperature of Warmest Quarter | -| 11 | Mean Temperature of Coldest Quarter | -| 12 | Annual Precipitation | -| 13 | Precipitation of Wettest Month | -| 14 | Precipitation of Driest Month | -| 15 | Precipitation Seasonality (Coefficient of Variation) | -| 16 | Precipitation of Wettest Quarter | -| 17 | Precipitation of Driest Quarter | -| 18 | Precipitation of Warmest Quarter | -| 19 | Precipitation of Coldest Quarter | - -""" -function worldclim(layers::Vector{T}; resolution::AbstractString="10", path::AbstractString="assets") where {T <: Integer} - all(1 .≤ layers .≤ 19) || throw(ArgumentError("The number of the layers must all be between 1 and 19")) - isdir(path) || mkdir(path) - resolution ∈ ["2.5", "5", "10"] || throw(ArgumentError("The resolution argument ($(resolution) must be 2.5, 5, or 10")) - - codes = [lpad(code, 2, "0") for code in layers] - paths = [joinpath(path, "wc2.0_bio_$(resolution)m_$(code).tif") for code in codes] - - #= - Download the files if they are missing. In order of preference, this - function will extract from the zip file if found, and download the zip file - if not. - =# - missing_files = !all(isfile.(paths)) - zip_file = "wc2.0_bio$(resolution)m.zip" - missing_zip_file = !isfile(joinpath(path, zip_file)) - if missing_files - if missing_zip_file - @info "Downloading $(zip_file)" - url = "http://biogeo.ucdavis.edu/data/worldclim/v2.0/tif/base/wc2.0_$(resolution)m_bio.zip" - #download(url, joinpath(path, zip_file)) - r = HTTP.request("GET", url) - open(joinpath(path, zip_file), "w") do f - write(f, String(r.body)) - end - end - zf = ZipFile.Reader(joinpath(path, zip_file)) - for rf in zf.files - if joinpath(path, rf.name) in paths - if !isfile(joinpath(path, rf.name)) - @info "Reading layer $(rf.name) from archive" - write(joinpath(path, rf.name), read(rf)) - end - end - end - close(zf) - end - - data_layers = geotiff.(paths) - - return SimpleSDMPredictor.(data_layers, -180.0, 180.0, -90.0, 90.0) - -end - -""" - worldclim(layer::T; x...) where {T <: Integer} - -Return a single layer from WorldClim 2.0. -""" -worldclim(layer::T; x...) where {T <: Integer} = first(worldclim([layer]; x...)) - -""" - worldclim(layers::UnitRange{T}; x...) where {T <: Integer} - -Return a range of layers from WorldClim 2.0. -""" -worldclim(layers::UnitRange{T}; x...) where {T <: Integer} = worldclim(collect(layers); x...) diff --git a/src/bioclimaticdata/chelsa.jl b/src/datasets/chelsa.jl similarity index 70% rename from src/bioclimaticdata/chelsa.jl rename to src/datasets/chelsa.jl index 18d13dcc..1b67a991 100644 --- a/src/bioclimaticdata/chelsa.jl +++ b/src/datasets/chelsa.jl @@ -6,6 +6,11 @@ from 1 to 19. The list of available layers is given in a table below. The keyword argument is `path`, which refers to the path where the function will look for the geotiff files. +Note that these files are *large* due the fine resolution of the data, and for +this reason this function will return the *integer* version of the layers. Also +note that the bioclim data are only available for the V1 of CHELSA, and are not +from the V2. + It is recommended to *keep* the content of the `path` folder, as it will eliminate the need to download the tiff files (which are quite large). For example, calling `bioclim(1:19)` will download and everything, and future @@ -34,31 +39,9 @@ calls will be much faster. | 19 | Precipitation of Coldest Quarter | """ -function bioclim(layers::Vector{Int64}; path::AbstractString="assets") - @assert all(1 .≤ layers .≤ 19) - isdir(path) || mkdir(path) - codes = [lpad(code, 2, "0") for code in layers] - filenames = ["CHELSA_bio10_$(lpad(code, 2, '0')).tif" for code in codes] - url_root = "https://www.wsl.ch/lud/chelsa/data/bioclim/integer/" - - for f in filenames - p = joinpath(path, f) - if !(isfile(p)) - res = HTTP.request("GET", url_root * f) - open(p, "w") do f - write(f, String(res.body)) - end - end - end - paths = [joinpath(path, filename) for filename in filenames] - data_layers = geotiff.(paths; T=Int64) - return SimpleSDMPredictor.(data_layers, -180.0, 180.0, -90.0, 90.0) - +function bioclim(layer::Integer; left=nothing, right=nothing, bottom=nothing, top=nothing) + return raster(SimpleSDMPredictor, BioClim(), layer=layer, left=left, right=right, bottom=bottom, top=top) end -""" -Return a single layer of bioclim variables from the CHELSA database. -""" -bioclim(layer::Int64; x...) = first(bioclim([layer]; x...)) - -bioclim(layers::UnitRange{Int64}; x...) = bioclim(collect(layers); x...) +bioclim(layers::Vector{T}; args...) where {T <: Integer} = [bioclim(l; args...) for l in layers] +bioclim(layers::UnitRange{T}; args...) where {T <: Integer} = [bioclim(l; args...) for l in layers] diff --git a/src/datasets/download_layer.jl b/src/datasets/download_layer.jl new file mode 100644 index 00000000..ddf74795 --- /dev/null +++ b/src/datasets/download_layer.jl @@ -0,0 +1,69 @@ +function download_layer(w::WorldClim, layer::Integer) + 1 ≤ layer ≤ 19 || throw(ArgumentError("The layer must be between 1 and 19")) + + path = SimpleSDMLayers.assets_path() + + res = Dict(2.5 => "2.5", 5.0 => "5", 10.0 => "10") + + output_file = joinpath(path, "wc2.1_$(res[w.resolution])m_bio_$(layer).tif") + zip_file = joinpath(path, "bioclim_2.1_$(res[w.resolution])m.zip") + + if !isfile(path) + if !isfile(zip_file) + root = "https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/" + stem = "wc2.1_$(res[w.resolution])m_bio.zip" + r = HTTP.request("GET", root * stem) + open(zip_file, "w") do f + write(f, String(r.body)) + end + end + zf = ZipFile.Reader(zip_file) + file_to_read = + first(filter(f -> joinpath(path, f.name) == output_file, zf.files)) + + if !isfile(joinpath(path, file_to_read.name)) + write(joinpath(path, file_to_read.name), read(file_to_read)) + end + close(zf) + end + + return joinpath(path, file_to_read.name) +end + +function download_layer(l::EarthEnv, layer::Integer) + 1 ≤ layer ≤ 12 || throw(ArgumentError("The layer must be between 1 and 12")) + + path = SimpleSDMLayers.assets_path() + + root = "https://data.earthenv.org/consensus_landcover/" + stem = l.full ? "with_DISCover/consensus_full_class_$(layer).tif" : + "without_DISCover/Consensus_reduced_class_$(layer).tif" + filetype = l.full ? "complete" : "partial" + filename = "landcover_$(filetype)_$(layer).tif" + + if !isfile(joinpath(path, filename)) + layerrequest = HTTP.request("GET", root * stem) + open(joinpath(path, filename), "w") do layerfile + write(layerfile, String(layerrequest.body)) + end + end + + return joinpath(path, filename) +end + +function download_layer(::BioClim, layer::Integer) + 1 ≤ layer ≤ 19 || throw(ArgumentError("The layer must be between 1 and 19")) + path = SimpleSDMLayers.assets_path() + layer = lpad(layer, 2, "0") + filename = "CHELSA_bio10_$(layer).tif" + url_root = "ftp://envidatrepo.wsl.ch/uploads/chelsa/chelsa_V1/bioclim/integer/" + + filepath = joinpath(path, filename) + if !(isfile(filepath)) + res = HTTP.request("GET", url_root * filename) + open(filepath, "w") do f + write(f, String(res.body)) + end + end + return filepath +end diff --git a/src/datasets/geotiff.jl b/src/datasets/geotiff.jl new file mode 100644 index 00000000..3fef083e --- /dev/null +++ b/src/datasets/geotiff.jl @@ -0,0 +1,78 @@ +function _find_span(n, m, M, pos) + pos > M && return nothing + pos < m && return nothing + stride = (M - m) / n + centers = (m + 0.5stride):stride:(M-0.5stride) + span_pos = last(findmin(abs.(pos .- centers))) + return (stride, centers[span_pos], span_pos) +end + +""" + geotiff(::Type{LT}, tiff_file; left::T=-180.0, right::T=180.0, bottom::T=-90.0, top::T=90.0) where {LT <: SimpleSDMLayer, T <: Number} + +The geotiff function reads a geotiff file, and returns it as a matrix of the +correct type. The optional arguments `left`, `right`, `bottom`, and `left` are +defining the bounding box to read from the file. This is particularly useful if +you want to get a small subset from large files. + +The first argument is the type of the `SimpleSDMLayer` to be returned. +""" +function geotiff( + ::Type{LT}, + ::Type{ST}, + tiff_file; + left = nothing, + right = nothing, + bottom = nothing, + top = nothing, +) where {LT<:SimpleSDMLayer,ST<:SimpleSDMSource} + + left = isnothing(left) ? minimum(longitudes(ST)) : left + right = isnothing(right) ? maximum(longitudes(ST)) : right + bottom = isnothing(bottom) ? minimum(latitudes(ST)) : bottom + top = isnothing(top) ? maximum(latitudes(ST)) : top + + # We do a bunch of checking that the required bounding box is not out of bounds + # for the range of latitudes and longitudes. + @assert right > left + @assert top > bottom + + # This next block is reading the geotiff file, but also making sure that we + # clip the file correctly to avoid reading more than we need. + ArchGDAL.read(tiff_file) do dataset + + # The data we need is pretty much always going to be stored in the first + # band, so this is what we will get for now. Note that this is not + # reading the data yet, just retrieving the metadata. + band = ArchGDAL.getband(dataset, 1) + + # This next bit of information is crucial, as it will allow us to assign + # a matrix of the correct size, but also to get the right latitudes and + # longitudes. + width = ArchGDAL.width(dataset) + height = ArchGDAL.height(dataset) + + global lon_stride, lat_stride + global left_pos, right_pos + global bottom_pos, top_pos + + lon_stride, left_pos, min_width = _find_span(width, minimum(longitudes(ST)), maximum(longitudes(ST)), left) + _, right_pos, max_width = _find_span(width, minimum(longitudes(ST)), maximum(longitudes(ST)), right) + + lat_stride, top_pos, max_height = _find_span(height, minimum(latitudes(ST)), maximum(latitudes(ST)), top) + _, bottom_pos, min_height = _find_span(height, minimum(latitudes(ST)), maximum(latitudes(ST)), bottom) + + max_height, min_height = height .- (min_height, max_height) .+ 1 + + # We are now ready to initialize a matrix of the correct type. + pixel_type = ArchGDAL.pixeltype(band) + buffer = Matrix{pixel_type}(undef, length(min_width:max_width), length(min_height:max_height)) + ArchGDAL.read!(dataset, buffer, 1, min_height:max_height, min_width:max_width) + end + + buffer = convert(Matrix{Union{Nothing,eltype(buffer)}}, rotl90(buffer)) + buffer[findall(buffer .== minimum(buffer))] .= nothing + + return LT(buffer, left_pos-0.5lon_stride, right_pos+0.5lon_stride, bottom_pos+0.5lat_stride, top_pos-0.5lat_stride) + +end diff --git a/src/datasets/landcover.jl b/src/datasets/landcover.jl new file mode 100644 index 00000000..c0787ddd --- /dev/null +++ b/src/datasets/landcover.jl @@ -0,0 +1,49 @@ + +""" + landcover(layers::Vector{T}; full::Bool=false, path::AbstractString="assets") where {T <: Integer} + +Download and prepare the EarthEnv consensus landcover data, and returns them as +an array of `SimpleSDMPredictor`s. Layers are called by their number, from 1 to +19. The list of available layers is given in a table below. The raw data come +from https://www.earthenv.org/landcover. + +THe `full` keyword indicates whether the *DISCover* information must be +included. Quoting from the reference website: + +> Although DISCover is based on older remote sensing imagery (1992-1993), it +> contains some complementary information which is useful for capturing +> sub-pixel land cover heterogeneity (please see the associated article for +> details). Therefore, it is recommended to use the full version of the +> consensus land cover dataset for most applications. However, the reduced +> version may provide an alternative for applications in regions with large land +> cover change in the past two decades. + +It is recommended to *keep* the content of the `path` folder, as it will +eliminate the need to download and/or extract the tiff files. For example, +calling `landcover(1:12)` will download and extract everything, and future calls +will be much faster. Please keep in mind that the layers can be quite large, so +keeping the models stored is particularly important. + +| Variable | Description | +| -------- | ------------------------------------ | +| 1 | Evergreen/Deciduous Needleleaf Trees | +| 2 | Evergreen Broadleaf Trees | +| 3 | Deciduous Broadleaf Trees | +| 4 | Mixed/Other Trees | +| 5 | Shrubs | +| 6 | Herbaceous Vegetation | +| 7 | Cultivated and Managed Vegetation | +| 8 | Regularly Flooded Vegetation | +| 9 | Urban/Built-up | +| 10 | Snow/Ice | +| 11 | Barren | +| 12 | Open Water | + +These data are released under a CC-BY-NC license to Tuanmu & Jetz. +""" +function landcover(layer::Integer; full::Bool=false, left=nothing, right=nothing, bottom=nothing, top=nothing) + return raster(SimpleSDMPredictor, EarthEnv(full), layer=layer, left=left, right=right, bottom=bottom, top=top) +end + +landcover(layers::Vector{T}; args...) where {T <: Integer} = [landcover(l; args...) for l in layers] +landcover(layers::UnitRange{T}; args...) where {T <: Integer} = [landcover(l; args...) for l in layers] diff --git a/src/datasets/raster.jl b/src/datasets/raster.jl new file mode 100644 index 00000000..040d4be9 --- /dev/null +++ b/src/datasets/raster.jl @@ -0,0 +1,16 @@ +function raster(::Type{IT}, source::ST; layer::Integer=1, left=nothing, right=nothing, bottom=nothing, top=nothing) where {IT <: SimpleSDMLayer, ST <: SimpleSDMSource} + file = download_layer(source, layer) + left = isnothing(left) ? minimum(longitudes(ST)) : left + right = isnothing(right) ? maximum(longitudes(ST)) : right + bottom = isnothing(bottom) ? minimum(latitudes(ST)) : bottom + top = isnothing(top) ? maximum(latitudes(ST)) : top + return geotiff(IT, ST, file; left=left, right=right, bottom=bottom, top=top) +end + +function raster(::Type{IT}, source::ST, file; left=nothing, right=nothing, bottom=nothing, top=nothing) where {IT <: SimpleSDMLayer, ST <: SimpleSDMSource} + left = isnothing(left) ? minimum(longitudes(ST)) : left + right = isnothing(right) ? maximum(longitudes(ST)) : right + bottom = isnothing(bottom) ? minimum(latitudes(ST)) : bottom + top = isnothing(top) ? maximum(latitudes(ST)) : top + return geotiff(IT, ST, file; left=left, right=right, bottom=bottom, top=top) +end diff --git a/src/datasets/sources.jl b/src/datasets/sources.jl new file mode 100644 index 00000000..54ef31f4 --- /dev/null +++ b/src/datasets/sources.jl @@ -0,0 +1,26 @@ +abstract type SimpleSDMSource end + +latitudes(::Type{T}) where {T <: SimpleSDMSource} = (-90.0, 90.0) +longitudes(::Type{T}) where {T <: SimpleSDMSource} = (-180.0, 180.0) + +struct WorldClim <: SimpleSDMSource + resolution::AbstractFloat + function WorldClim(resolution::AbstractFloat) + resolution ∈ [2.5, 5.0, 10.0] || throw(ArgumentError("The resolution argument ($(resolution)) must be 2.5, 5, or 10")) + return new(resolution) + end +end + +WorldClim() = WorldClim(10.0) + +struct BioClim <: SimpleSDMSource end +longitudes(::Type{BioClim}) = (-180.0001388888, 179.9998611111) +latitudes(::Type{BioClim}) = (-90.0001388888, 83.9998611111) + +struct EarthEnv <: SimpleSDMSource + full::Bool +end + +EarthEnv() = EarthEnv(false) +latitudes(::Type{EarthEnv}) = (-56.0, 90.0) +longitudes(::Type{EarthEnv}) = (-180.0, 180.0) diff --git a/src/datasets/worldclim.jl b/src/datasets/worldclim.jl new file mode 100644 index 00000000..acddcddc --- /dev/null +++ b/src/datasets/worldclim.jl @@ -0,0 +1,50 @@ +""" + worldclim(layer::Integer; resolution::Float64=10.0, left=nothing, right=nothing, bottom=nothing, top=nothing) + +Download and prepare a WorldClim 2.1 bioclimatic variable, and returns it as an +`SimpleSDMPredictor`. Layers are called by their number, from 1 to 19. The list +of available layers is given in a table below. + +The keywords are `resolution`, which must be a floating point value, and either +`2.5`, `5.0`, or `10.0`, as well as optionally `left`, `right`, `bottom`, and +`top`, which will allow to only load parts of the data. + +Internally, this function will download the main zip file for the required +resolution from the WordlClim website, extract it, and parse the required +layers. + +It is recommended to *keep* the content of the `path` folder, as it will +eliminate the need to download and/or extract the tiff files. For example, +calling `wordlclim(1:19)` will download and extract everything, and future calls +will be much faster. + +| Variable | Description | +| ------ | ------ | +| 1 | Annual Mean Temperature | +| 2 | Mean Diurnal Range (Mean of monthly (max temp - min temp)) | +| 3 | Isothermality (BIO2/BIO7) (* 100) | +| 4 | Temperature Seasonality (standard deviation *100) | +| 5 | Max Temperature of Warmest Month | +| 6 | Min Temperature of Coldest Month | +| 7 | Temperature Annual Range (BIO5-BIO6) | +| 8 | Mean Temperature of Wettest Quarter | +| 9 | Mean Temperature of Driest Quarter | +| 10 | Mean Temperature of Warmest Quarter | +| 11 | Mean Temperature of Coldest Quarter | +| 12 | Annual Precipitation | +| 13 | Precipitation of Wettest Month | +| 14 | Precipitation of Driest Month | +| 15 | Precipitation Seasonality (Coefficient of Variation) | +| 16 | Precipitation of Wettest Quarter | +| 17 | Precipitation of Driest Quarter | +| 18 | Precipitation of Warmest Quarter | +| 19 | Precipitation of Coldest Quarter | + +Original data: https://www.worldclim.org/data/worldclim21.html +""" +function worldclim(layer::Integer; resolution::Float64=10.0, left=nothing, right=nothing, bottom=nothing, top=nothing) + return raster(SimpleSDMPredictor, WorldClim(resolution), layer=layer, left=left, right=right, bottom=bottom, top=top) +end + +worldclim(layers::Vector{T}; args...) where {T <: Integer} = [worldclim(l; args...) for l in layers] +worldclim(layers::UnitRange{T}; args...) where {T <: Integer} = [worldclim(l; args...) for l in layers] diff --git a/src/lib/generated.jl b/src/lib/generated.jl index f461b024..cbf6fb12 100644 --- a/src/lib/generated.jl +++ b/src/lib/generated.jl @@ -1,23 +1,64 @@ -import Base: maximum, minimum, sum -import Statistics: mean, median, std +ops = Dict( + :Base => [:sum, :maximum, :minimum], + :Statistics => [:mean, :median, :std] + ) -ops = Symbol.(( - "Base.sum", "Base.maximum", "Base.minimum", - "Statistics.mean", "Statistics.median", "Statistics.std" - )) +for op in ops + mod = op.first + if mod != :Base + eval(quote + using $mod + end) + end + for fun in op.second + eval(quote + import $mod: $fun + end) + for ty in (:SimpleSDMResponse, :SimpleSDMPredictor) + eval(quote + """ + $($mod).$($fun)(l::$($ty){T}) where {T <: Number} -for op in ops, ty in (:SimpleSDMResponse, :SimpleSDMPredictor) - eval(quote - """ - $($op)(l::$($ty){T}) where {T <: Number} + Applies `$($fun)` (from `$($mod)`) to an object of type `$($ty)`. This function has been + automatically generated. Note that this function is only applied to the + non-`nothing` elements of the layer, and has no method to work on the `dims` + keyword; the grid itself can be extracted with `convert(Matrix, l)`. + """ + function $mod.$fun(l::$ty{T}) where {T <: Number} + return $mod.$fun(filter(!isnothing, l.grid)) + end + end) + end + end +end + +whole = Dict( + :Base => [:abs, :sqrt, :log, :log2, :log10, :log1p, :exp, :exp2, :exp10, :expm1] +) + +for op in whole + mod = op.first + if mod != :Base + eval(quote + using $mod + end) + end + for fun in op.second + eval(quote + import $mod: $fun + end) + for ty in (:SimpleSDMResponse, :SimpleSDMPredictor) + eval(quote + """ + $($mod).$($fun)(l::$($ty){T}) where {T <: Number} - Applies `$($op)` to an object of type `$($ty)`. This function has been - automatically generated. Note that this function is only applied to the - non-`NaN` elements of the layer, and has no method to work on the `dims` - keyword; the grid itself can be extracted with `convert(Matrix, l)`. - """ - function $op(l::$ty{T}) where {T <: Number} - return $op(filter(!isnan, l.grid)) + Applies `$($fun)` (from `$($mod)`) to every cell wihtin a `$($ty)`, as long as + this cell is not `nothing`. This function has been automatically generated. + """ + function $mod.$fun(l::$ty{T}) where {T <: Number} + return broadcast($mod.$fun, l) + end + end) end - end) + end end diff --git a/src/lib/geotiff.jl b/src/lib/geotiff.jl deleted file mode 100644 index 99958878..00000000 --- a/src/lib/geotiff.jl +++ /dev/null @@ -1,48 +0,0 @@ -""" - geotiff(tiff_file; T::Type=Float64) - -The geotiff function reads a geotiff file, and returns it as a matrix of type -`T`. -""" -function geotiff(tiff_file; T::Type=Float64) - # Register GDAL drivers - GDAL.gdalallregister() - - # Load the dataset - dataset = GDAL.gdalopen(tiff_file, GDAL.GA_ReadOnly) - - # Band - band = GDAL.gdalgetrasterband(dataset, 1) - - # Matrix - xs = GDAL.gdalgetrasterxsize(dataset) - ys = GDAL.gdalgetrasterysize(dataset) - - bandtype = GDAL.gdalgetrasterdatatype(band) - - V = zeros(T, (xs, ys)) - - GDAL.gdalrasterio( - band, - GDAL.GF_Read, - 0, 0, xs, ys, - pointer(V), - xs, ys, - GDAL.gdalgetrasterdatatype(band), - 0, 0 - ) - - K = zeros(Float64, (ys, xs)) - for (i,r) in enumerate(reverse(1:size(V, 2))) - K[i,:] = float(V[:,r]) - end - - this_min = minimum(V) - - for i in eachindex(K) - K[i] = K[i] > this_min ? K[i] : NaN - end - - return K - -end diff --git a/src/lib/overloads.jl b/src/lib/overloads.jl index fda0c049..9575cc52 100644 --- a/src/lib/overloads.jl +++ b/src/lib/overloads.jl @@ -7,6 +7,7 @@ import Base: similar import Base: copy import Base: eltype import Base: convert +import Base.Broadcast: broadcast """ Base.convert(::Type{SimpleSDMResponse}, layer::T) where {T <: SimpleSDMPredictor} @@ -112,7 +113,8 @@ function Base.getindex(layer::T, i::R, j::R) where {T <: SimpleSDMLayer, R <: Un j_max = isempty(j) ? max(j.stop+2, size(layer, 2)) : j.stop i_fix = i_min:i_max j_fix = j_min:j_max - return T( + RT = T <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor + return RT( layer.grid[i_fix,j_fix], minimum(longitudes(layer)[j_fix])-stride(layer)[1], maximum(longitudes(layer)[j_fix])+stride(layer)[1], @@ -180,7 +182,7 @@ end Base.getindex(layer::T, n::NT) where {T <: SimpleSDMLayer, NT <: NamedTuple} Returns a subset of the argument layer, where the new limits are given in -a NamedTuple by `left`, `right`, `top`, and `bottom`, in any order. Up to +a NamedTuple by `left`, `right`, `top`, and `bottom`, in any order. Up to three of these can be omitted, and if so these limits will not be affected. """ function Base.getindex(layer::T, n::NT) where {T <: SimpleSDMLayer, NT <: NamedTuple} @@ -230,15 +232,15 @@ end Base.similar(l::T) where {T <: SimpleSDMLayer} Returns a `SimpleSDMResponse` of the same dimensions as the original layer, with -`NaN` in the same positions. The rest of the values are replaced by the output -of `zero(eltype(layer.grid))`, which implies that there must be a way to get a zero -for the type. If not, the same result can always be achieved through the use of -`copy`, manual update, and `convert`. +`nothing` in the same positions. The rest of the values are replaced by the +output of `zero(eltype(layer.grid))`, which implies that there must be a way to +get a zero for the type. If not, the same result can always be achieved through +the use of `copy`, manual update, and `convert`. """ function Base.similar(layer::T) where {T <: SimpleSDMLayer} emptygrid = similar(layer.grid) for i in eachindex(emptygrid) - emptygrid[i] = isnan(layer.grid[i]) ? NaN : zero(eltype(layer.grid)) + emptygrid[i] = isnothing(layer.grid[i]) ? nothing : zero(eltype(layer.grid[i])) end return SimpleSDMResponse(emptygrid, layer.left, layer.right, layer.bottom, layer.top) end @@ -250,5 +252,25 @@ Returns a new copy of the layer, which has the same type. """ function Base.copy(layer::T) where {T <: SimpleSDMLayer} copygrid = copy(layer.grid) - return T(copygrid, copy(layer.left), copy(layer.right), copy(layer.bottom), copy(layer.top)) + RT = T <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor + return RT(copygrid, copy(layer.left), copy(layer.right), copy(layer.bottom), copy(layer.top)) +end + +""" + Broadcast.broadcast(f, L::LT) where {LT <: SimpleSDMLayer} + +TODO +""" +function Base.Broadcast.broadcast(f, L::LT) where {LT <: SimpleSDMLayer} + newgrid = Array{Any}(nothing, size(L)) + N = SimpleSDMResponse(newgrid, L) + v = filter(!isnothing, L.grid) + fv = f.(v) + N.grid[findall(!isnothing, L.grid)] .= fv + + internal_types = unique(typeof.(N.grid)) + N.grid = convert(Matrix{Union{internal_types...}}, N.grid) + + RT = LT <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor + return RT(N.grid, N) end diff --git a/src/lib/types.jl b/src/lib/types.jl index aedfdb4a..54c67c71 100644 --- a/src/lib/types.jl +++ b/src/lib/types.jl @@ -4,10 +4,8 @@ All types in the package are part of the abstract type `SimpleSDMLayer`. A `left`, `right`, `bottom` and `top` are floating point numbers specifying the bounding box. -It is assumed that the missing values will be represented as `NaN`, so the -"natural" type for the values of `grid` are floating points, but it is possible -to use any other type `T` by having `grid` contain `Union{T,Float64}` (for -example). +It is assumed that the missing values will be represented as `nothing`, so +internally the matrix will have type `Union{T, Nothing}`. """ abstract type SimpleSDMLayer end @@ -19,21 +17,14 @@ be modified by the analysis. Note that if you are in a bind, the values of the way of handling predictors you need to modify would be to use `convert` methods. """ struct SimpleSDMPredictor{T} <: SimpleSDMLayer - grid::Matrix{T} + grid::Matrix{Union{Nothing,T}} left::AbstractFloat right::AbstractFloat bottom::AbstractFloat top::AbstractFloat -end - -""" - SimpleSDMPredictor(grid::Matrix{T}) where {T} - -If only a matrix is given to `SimpleSDMPredictor`, by default we assume that it -covers the entire range of latitudes and longitudes. -""" -function SimpleSDMPredictor(grid::Matrix{T}) where {T} - return SimpleSDMPredictor(grid, -180., 180., -90., 90.) + function SimpleSDMPredictor(grid::Matrix{Union{Nothing,T}}, l::K, r::K, b::K, t::K) where {T, K<:AbstractFloat} + return new{T}(grid, l, r, b, t) + end end """ @@ -41,19 +32,47 @@ A response is a `SimpleSDMLayer` that is mutable, and is the usual type to store analysis outputs. You can transform a response into a predictor using `convert`. """ mutable struct SimpleSDMResponse{T} <: SimpleSDMLayer - grid::Matrix{T} + grid::Matrix{Union{Nothing,T}} left::AbstractFloat right::AbstractFloat bottom::AbstractFloat top::AbstractFloat + function SimpleSDMResponse(grid::Matrix{Union{Nothing,T}}, l::K, r::K, b::K, t::K) where {T, K<:AbstractFloat} + return new{T}(grid, l, r, b, t) + end end -""" - SimpleSDMResponse(grid::Matrix{T}) where {T} +# Begin code generation for the constructors -If only a matrix is given to `SimpleSDMPredictor`, by default we assume that it -covers the entire range of latitudes and longitudes. -""" -function SimpleSDMResponse(grid::Matrix{T}) where {T} - return SimpleSDMResponse(grid, -180., 180., -90., 90.) +simplesdm_types = (:SimpleSDMResponse, :SimpleSDMPredictor) + +for simplesdm_type in simplesdm_types + eval(quote + """ + $($simplesdm_type)(grid::Matrix{Union{Nothing,T}}) where {T} + + Returns a `$($simplesdm_type)` spanning the entire globe. + """ + function $simplesdm_type(grid::Matrix{Union{Nothing,T}}) where {T} + return $simplesdm_type(grid, -180.0, 180.0, -90.0, 90.0) + end + + """ + $($simplesdm_type)(grid::Matrix{Union{Nothing,T}}) where {T} + + Returns a `$($simplesdm_type)` spanning the entire globe by converting to the + correct type, *i.e.* with `Nothing` as an acceptable value. + """ + function $simplesdm_type(grid::Matrix{T}) where {T} + return $simplesdm_type(convert(Matrix{Union{Nothing,T}}, grid), -180.0, 180.0, -90.0, 90.0) + end + + function $simplesdm_type(grid::Matrix{T}, l::K, r::K, b::K, t::K) where {T, K<:AbstractFloat} + return $simplesdm_type(convert(Matrix{Union{Nothing,T}}, grid), l, r, b, t) + end + + function $simplesdm_type(grid::Matrix{T}, L::K) where {T, K<:SimpleSDMLayer} + return $simplesdm_type(convert(Matrix{Union{Nothing,T}}, grid), L.left, L.right, L.bottom, L.top) + end + end) end diff --git a/src/operations/coarsen.jl b/src/operations/coarsen.jl index d9859a68..3172529e 100644 --- a/src/operations/coarsen.jl +++ b/src/operations/coarsen.jl @@ -1,34 +1,37 @@ """ - coarsen(L::LT, f::FT, d::Tuple{IT,IT}; NaNremove::Bool=true) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Integer} + coarsen(L::LT, f::FT, d::Tuple{IT,IT}) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Integer} This function will *aggregate* a layer by merging cells. The function `f` passed as its second argument is expected to take an array as input, and return a -single value of the same type as the values of the layer, or as a floating point -value. Note that the element type of the returned layer will have type `Any`, -which is not really pretty, but should serve well enough (and can be changed by -working on the `grid` field directly if you really need it). +single value of any type (but it is sort of a social contract here that this +will be a number of some sort, and if it is not, that you really know what +you're doing). The size of the cells to aggregate is given by the tuple, so that `(2,2)` will make groups of two cells vertically and two cells horizontally, for a total of -four cells. +four cells. By default, the cells containing `nothing` will be *ignored*, so +that `f` is only applied to non-nothing values. -The `NaNremove` keyword argument is intended to remove `NaN` values before -applying `f`. It defaults to `true`. +In cases where the number of cells to aggregate is not matching with the size of +the grid, and `ArgumentError` will be thrown. Note that there is no expectation +that the two values in `d` will be the same. """ -function coarsen(L::LT, f::FT, d::Tuple{IT,IT}; NaNremove::Bool=true) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Integer} +function coarsen(L::LT, f::FT, d::Tuple{IT,IT}) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Integer} cx, cy = d CX, CY = size(L) + # Check that the arguments are compatible with the size of the grid - mod(CX, cx) == 0 || throw(ArgumentError("The number of cells to aggregate must be compatible with the number of rows")) - mod(CY, cy) == 0 || throw(ArgumentError("The number of cells to aggregate must be compatible with the number of columns")) + mod(CX, cx) == 0 || throw(ArgumentError("The number of cells to aggregate ($(cx)) must be compatible with the number of rows ($(CX))")) + mod(CY, cy) == 0 || throw(ArgumentError("The number of cells to aggregate ($(cy)) must be compatible with the number of columns ($(CY))")) + # Then we create a new grid, full of undefined values of the same type as - # the elements of L + # the elements of L. This is not the best type here, of course, but I'm not + # sure how to get a better idea _before_. This will all be converted to the + # more compact type possible at the end, before returning a layer. nx = convert(Int64, CX/cx) ny = convert(Int64, CY/cy) - # NOTE The union type here is not really pretty, but very much necessary to - # play nicely with NaN values. This needs to be replaced by a better - # solution eventually. - newgrid = Array{Union{eltype(L),Float64},2}(undef, (nx, ny)) + newgrid = Array{Any}(undef, (nx, ny)) + # At this point, we do not need to create the new SimpleSDMLayer object, as # it can be a SimpleSDMPredictor which is not mutable. Instead, it is better # to work directly on the grid. We will iterate directly on the new grid, @@ -37,21 +40,28 @@ function coarsen(L::LT, f::FT, d::Tuple{IT,IT}; NaNremove::Bool=true) where {LT old_i = ((i-1)*cx+1):(i*cx) for j in 1:size(newgrid, 2) old_j = ((j-1)*cy+1):(j*cy) - V = vec(L.grid[old_i, old_j]) - # If there are NaN to remove, then we call filter!. NaN only make - # sense if the type of the elements of V is a floating point, so we - # need to do an additional check to only apply this whenever there - # are floating point elements: - !NaNremove || filter!(x -> typeof(x)<:AbstractFloat ? !isnan(x) : true, V) - if length(V) == 0 - # If nothing is left in V, then the grid gets a NaN - # automatically - newgrid[i,j] = NaN - else - newgrid[i,j] = f(V) - end + + # This operation is super-duper important because it is the only + # thing that makes the function work for some types - why it would + # be the case is beyond me, but it turned out that being explicit + # about converting things to a vector keep Julia happy, and my sense + # of self worth is directly tied to how little error messages I get. + V = convert(Vector, vec(L.grid[old_i, old_j])) + + # We remove the nothing values from the grid + filter!(!isnothing, V) + + # If there is nothing left in V, we return nothing -- if not, we return f(V) + newgrid[i,j] = length(V) == 0 ? nothing : f(V) + end end - # Now that everything is done, we can return an object of the same type as L - return LT(newgrid, L.left, L.right, L.bottom, L.top) + + # Now we change the type + internal_types = unique(typeof.(newgrid)) + newgrid = convert(Matrix{Union{internal_types...}}, newgrid) + + # Now that everything is done, we can return an object of the correct type + NT = LT <: SimpleSDMPredictor ? SimpleSDMPredictor : SimpleSDMResponse + return NT(newgrid, L.left, L.right, L.bottom, L.top) end diff --git a/src/operations/sliding.jl b/src/operations/sliding.jl new file mode 100644 index 00000000..45b9c624 --- /dev/null +++ b/src/operations/sliding.jl @@ -0,0 +1,55 @@ +function haversine(p1, p2; R=6371.0) + lon1, lat1 = p1 + lon2, lat2 = p2 + φ1 = lat1 * π/180.0 + φ2 = lat2 * π/180.0 + Δφ = (lat2-lat1) * π/180.0 + Δλ = (lon2-lon1) * π/180.0 + a = sin(Δφ/2.0)^2.0 + cos(φ1)*cos(φ2) * sin(Δλ)^2.0 + c = 2.0 * atan(sqrt(a), sqrt(1.0-a)) + return R*c +end + +""" + slidingwindow(L::LT, f::FT, d::IT) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Number} + +This function will replace the value at any cell by applying the function `f` to +the array of cells values that are within a distance `d` (in kilometers) from +the focal cell. This is, for example, useful to use an average to smooth out the +layers. The distance is estimated using the haversine distance, assuming that +the radius of the Earth is 6371.0 km. This means that the size of the window +will vary a little bit across latitudes, but this is far better than using a +number of cells, which would have dramatic consequences near the poles. + +It *always* returns a `SimpleSDMResponse`, and the cells containing `nothing` +will also not contain a value in the output. This is *different* from the +behavior of `coarsen`, which tends to expand the area of the layer in which we +have data. + +This function is currently relatively slow. Performance improvements will arrive +at some point. +""" +function slidingwindow(L::LT, f::FT, d::IT) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Number} + newgrid = Array{Any}(nothing, size(L)) + N = SimpleSDMResponse(newgrid, L) + pixels = [] + for lat in latitudes(L) + for lon in longitudes(L) + if !isnothing(L[lon,lat]) + push!(pixels, (lon, lat) => L[lon,lat]) + end + end + end + + for p1 in pixels + ok = filter(p2 -> haversine(p2.first, p1.first) < 100.0, pixels) + val = [p2.second for p2 in ok] + N[p1.first...] = f(val) + end + + internal_types = unique(typeof.(N.grid)) + N.grid = convert(Matrix{Union{internal_types...}}, N.grid) + N = typeof(L) <: SimpleSDMPredictor ? convert(SimpleSDMPredictor, N) : N + + return N +end diff --git a/src/recipes/recipes.jl b/src/recipes/recipes.jl index 558d0084..19da905a 100644 --- a/src/recipes/recipes.jl +++ b/src/recipes/recipes.jl @@ -3,14 +3,15 @@ test 1 """ @recipe function plot(layer::T) where {T <: SimpleSDMLayer} seriestype --> :heatmap - @assert eltype(layer) <: Number if get(plotattributes, :seriestype, :heatmap) in [:heatmap, :contour] aspect_ratio --> 1 xlims --> (minimum(longitudes(layer)),maximum(longitudes(layer))) ylims --> (minimum(latitudes(layer)),maximum(latitudes(layer))) - longitudes(layer), latitudes(layer), layer.grid + lg = copy(layer.grid) + lg[lg.==nothing] .= NaN + longitudes(layer), latitudes(layer), lg elseif get(plotattributes, :seriestype, :histogram) in [:histogram, :density] - filter(!isnan, layer.grid) + filter(!isnothing, layer.grid) end end @@ -20,10 +21,8 @@ test 2 @recipe function plot(l1::FT, l2::ST) where {FT <: SimpleSDMLayer, ST <: SimpleSDMLayer} seriestype --> :scatter if get(plotattributes, :seriestype, :scatter) in [:scatter, :histogram2d] - @assert eltype(l1) <: Number - @assert eltype(l2) <: Number SimpleSDMLayers._layers_are_compatible(l1, l2) - valid_i = filter(i -> !(isnan(l1[i])|isnan(l2[i])), eachindex(l1.grid)) + valid_i = filter(i -> !(isnothing(l1[i])|isnothing(l2[i])), eachindex(l1.grid)) l1.grid[valid_i], l2.grid[valid_i] end end diff --git a/test/Project.toml b/test/Project.toml index d5d9f511..a8a37519 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,3 +2,7 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[compat] +GBIF = "0.2" diff --git a/test/chelsa.jl b/test/chelsa.jl new file mode 100644 index 00000000..e76cdb8d --- /dev/null +++ b/test/chelsa.jl @@ -0,0 +1,8 @@ +module SSLTestCHELSA +using SimpleSDMLayers +using Test + +lc1 = bioclim(1; left=-5.0, right=7.0, bottom=30.0, top=45.0) +@test typeof(lc1) <: SimpleSDMPredictor + +end diff --git a/test/coarsen.jl b/test/coarsen.jl index 06e63e4d..1a042208 100644 --- a/test/coarsen.jl +++ b/test/coarsen.jl @@ -1,6 +1,7 @@ module SSLTestCoarsen using SimpleSDMLayers using Test +using Statistics M = convert(Matrix, reshape(1:36, (6,6))) S = SimpleSDMPredictor(M, 0.0, 1.0, 0.0, 1.0) @@ -12,7 +13,14 @@ max33 = coarsen(S, maximum, (3,3)) @test max33.grid == [15 33; 18 36] # Coarsen should play nicely with the non-float types -M = SimpleSDMResponse(["a" NaN "b" "c"; "d" "e" "f" "g"; "d" "e" NaN "g"; NaN "x" "y" "z"], 0.0, 1.0, 0.0, 1.0) +M = SimpleSDMResponse(["a" nothing "b" "c"; "d" "e" "f" "g"; "d" "e" nothing "g"; nothing "x" "y" "z"], 0.0, 1.0, 0.0, 1.0) @test coarsen(M, x -> reduce(*, x), (2,2)).grid == ["ade" "bfcg"; "dex" "ygz"] +# Should work on a real-world example for a series of functions +temperature = worldclim(1) +for f in [minimum, maximum, mean, median] + t_coarse = coarsen(temperature, f, (10, 10)) + @test typeof(t_coarse) <: SimpleSDMPredictor +end + end diff --git a/test/construction.jl b/test/construction.jl index bc1e4c1a..7b57b982 100644 --- a/test/construction.jl +++ b/test/construction.jl @@ -27,4 +27,12 @@ R = SimpleSDMPredictor(M) @test R.top == 90. @test R.bottom == -90. +# Construction for a matrix of nothing and values +M = Matrix{Union{Nothing,Float64}}(nothing, (3,5)) +R = SimpleSDMResponse(M) +@test R.left == -180. +@test R.right == 180. +@test R.top == 90. +@test R.bottom == -90. + end diff --git a/test/dataread.jl b/test/dataread.jl new file mode 100644 index 00000000..8642e78d --- /dev/null +++ b/test/dataread.jl @@ -0,0 +1,14 @@ +module SSLTestDataRead +using SimpleSDMLayers +using Test + +file = joinpath(dirname(pathof(SimpleSDMLayers)), "..", "data", "connectivity.tiff") +struct MyConnectivityMap <: SimpleSDMLayers.SimpleSDMSource end +SimpleSDMLayers.latitudes(::Type{MyConnectivityMap}) = (-10.0, 10.0) +SimpleSDMLayers.longitudes(::Type{MyConnectivityMap}) = (-20.0, 20.0) +mp = SimpleSDMLayers.raster(SimpleSDMResponse, MyConnectivityMap(), file) + +@test typeof(mp) <: SimpleSDMResponse +@test size(mp) == (1255, 1206) + +end diff --git a/test/gallery/contour.png b/test/gallery/contour.png deleted file mode 100644 index 4ba50b87..00000000 Binary files a/test/gallery/contour.png and /dev/null differ diff --git a/test/gallery/density.png b/test/gallery/density.png deleted file mode 100644 index 2ef6eda6..00000000 Binary files a/test/gallery/density.png and /dev/null differ diff --git a/test/gallery/filled_contour.png b/test/gallery/filled_contour.png deleted file mode 100644 index 6afc4813..00000000 Binary files a/test/gallery/filled_contour.png and /dev/null differ diff --git a/test/gallery/heatmap.png b/test/gallery/heatmap.png deleted file mode 100644 index 6a03a9b7..00000000 Binary files a/test/gallery/heatmap.png and /dev/null differ diff --git a/test/gallery/heatmap_scaledown.png b/test/gallery/heatmap_scaledown.png deleted file mode 100644 index eb6f665b..00000000 Binary files a/test/gallery/heatmap_scaledown.png and /dev/null differ diff --git a/test/gallery/scatter-2d.png b/test/gallery/scatter-2d.png deleted file mode 100644 index 1fb00881..00000000 Binary files a/test/gallery/scatter-2d.png and /dev/null differ diff --git a/test/gallery/scatter.png b/test/gallery/scatter.png deleted file mode 100644 index ee12df5d..00000000 Binary files a/test/gallery/scatter.png and /dev/null differ diff --git a/test/gbif.jl b/test/gbif.jl index cd045656..5ce7182b 100644 --- a/test/gbif.jl +++ b/test/gbif.jl @@ -21,7 +21,7 @@ for oc in o numboc[oc] += 1 end -@test typeof(convert(Matrix, numboc)) == Array{Int64,2} +@test typeof(convert(Matrix, numboc)) == Matrix{Union{Int64,Nothing}} @test typeof(clip(temperature, o)) == typeof(temperature) diff --git a/test/generated.jl b/test/generated.jl new file mode 100644 index 00000000..b0b6aa69 --- /dev/null +++ b/test/generated.jl @@ -0,0 +1,21 @@ +module SSLTestGenerated +using SimpleSDMLayers +using Test + +M = rand(Float64, (5, 10)) +S = SimpleSDMPredictor(M, 0.0, 1.0, 0.0, 1.0) + +@test maximum(S) ≤ 1.0 +@test minimum(S) ≥ 0.0 + +using Statistics + +@test mean(S) ≈ mean(M) +@test std(S) ≈ std(M) +@test median(S) ≈ median(M) + +M = ones(Float64, (5, 10)) +S = SimpleSDMPredictor(M, 0.0, 1.0, 0.0, 1.0) +@test typeof(sqrt(S)) <: SimpleSDMLayer + +end diff --git a/test/landcover.jl b/test/landcover.jl new file mode 100644 index 00000000..9779dac4 --- /dev/null +++ b/test/landcover.jl @@ -0,0 +1,8 @@ +module SSLTestLandCover +using SimpleSDMLayers +using Test + +lc1 = landcover(1; left=-5.0, right=7.0, bottom=30.0, top=45.0) +@test typeof(lc1) <: SimpleSDMPredictor + +end diff --git a/test/plots.jl b/test/plots.jl index 60f0da5c..f4092293 100644 --- a/test/plots.jl +++ b/test/plots.jl @@ -7,22 +7,58 @@ temperature, precipitation = worldclim([1,12]) ispath("gallery") || mkpath("gallery") -plot(temperature, c=:RdYlBu_r, title="Temperature", frame=:box, clim=(-50,50), +chelsa1 = bioclim(1; left=-11.0, right=31.1, bottom=29.0, top=71.1) +n_chelsa1 = zeros(Float32, size(chelsa1)); +for (i,e) in enumerate(chelsa1.grid) + n_chelsa1[i] = isnothing(e) ? NaN : Float32(e) +end +chelsa1 = SimpleSDMPredictor(n_chelsa1, chelsa1) +plot(chelsa1, c=:heat, title="Temperature from CHELSA", frame=:box, + xlabel = "Longitude", + ylabel= "Latitude") +savefig(joinpath("gallery", "range-comparison-chelsa.png")) + +wc1 = worldclim(1; left=-11.0, right=31.1, bottom=29.0, top=71.1) +plot(wc1, c=:heat, title="Temperature from worldclim @ 10", frame=:box, + xlabel = "Longitude", + ylabel= "Latitude") +savefig(joinpath("gallery", "range-comparison-worldclim-10.png")) + +wc1 = worldclim(1; resolution=5.0, left=-11.0, right=31.1, bottom=29.0, top=71.1) +plot(wc1, c=:heat, title="Temperature from worldclim @ 5", frame=:box, + xlabel = "Longitude", + ylabel= "Latitude") +savefig(joinpath("gallery", "range-comparison-worldclim-5.png")) + +lc1 = landcover(1; left=-11.0, right=31.1, bottom=29.0, top=71.1) +n_lc1 = zeros(Float32, size(lc1)); +for (i,e) in enumerate(lc1.grid) + n_lc1[i] = isnothing(e) ? NaN : Float32(e) +end +lc1 = SimpleSDMPredictor(n_lc1, lc1) +plot(lc1, c=:terrain, title="Landcover class 1", frame=:box, + xlabel = "Longitude", + ylabel= "Latitude") +savefig(joinpath("gallery", "range-comparison-landcover.png")) + +plot(temperature, c=:magma, title="Temperature", frame=:box, xlabel = "Longitude", ylabel= "Latitude") savefig(joinpath("gallery", "heatmap.png")) -contour(temperature, c=:RdYlBu_r, title="Temperature", frame=:box, clim=(-50,50), +contour(temperature, c=:viridis, title="Temperature", frame=:box, xlabel = "Longitude", ylabel= "Latitude") savefig(joinpath("gallery", "contour.png")) -contour(temperature, c=:RdYlBu_r, title="Temperature", frame=:box, clim=(-50,50), fill=true, +contour(temperature, c=:cividis, title="Temperature", frame=:box, + fill=true, lw=0.0, xlabel = "Longitude", ylabel= "Latitude") savefig(joinpath("gallery", "filled_contour.png")) -heatmap(coarsen(temperature, minimum, (10,10)), c=:RdYlBu_r, title="Temperature", frame=:box, clim=(-50,50)) +cmap = coarsen(temperature, minimum, (10,10)) +heatmap(cmap, c=:RdYlBu, title="Temperature", frame=:box) xaxis!("Longitude") yaxis!("Latitude") savefig(joinpath("gallery", "heatmap_scaledown.png")) diff --git a/test/runtests.jl b/test/runtests.jl index 7558d017..9c127a8b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,24 +3,28 @@ using Test global anyerrors = false -tests = Dict{String,String}( - "construction" => "construction.jl", - "basics" => "basics.jl", - "overloads" => "overloads.jl", - "worldclim" => "worldclim.jl", - "coarsen" => "coarsen.jl", - "plotting" => "plots.jl", - "GBIF" => "gbif.jl" - ) +tests = [ + "construction" => "construction.jl", + "basics" => "basics.jl", + "overloads" => "overloads.jl", + "generated" => "generated.jl", + "import" => "dataread.jl", + "worldclim" => "worldclim.jl", + "landcover" => "landcover.jl", + "chelsa" => "chelsa.jl", + "coarsen" => "coarsen.jl", + "plotting" => "plots.jl", + "GBIF" => "gbif.jl" +] -for (name,test) in tests +for test in tests try - include(test) - println("\033[1m\033[32m✓\033[0m\t$(name)") + include(test.second) + println("\033[1m\033[32m✓\033[0m\t$(test.first)") catch e global anyerrors = true - println("\033[1m\033[31m×\033[0m\t$(name)") - println("\033[1m\033[38m→\033[0m\ttest/$(test)") + println("\033[1m\033[31m×\033[0m\t$(test.first)") + println("\033[1m\033[38m→\033[0m\ttest/$(test.second)") showerror(stdout, e, backtrace()) println() break diff --git a/test/worldclim.jl b/test/worldclim.jl index 1e230e4b..426ae9f5 100644 --- a/test/worldclim.jl +++ b/test/worldclim.jl @@ -3,9 +3,14 @@ using SimpleSDMLayers using Test wc1and2 = worldclim([1,2]) - @test typeof(first(wc1and2)) <: SimpleSDMPredictor +temp = worldclim(1) +@test size(temp) == (1080, 2160) +@test round(first(stride(temp)); digits=2) ≈ round(last(stride(temp)); digits=2) +@test length(longitudes(temp)) == 2160 +@test length(latitudes(temp)) == 1080 + wc3 = worldclim(3) @test typeof(wc3) <: SimpleSDMPredictor