diff --git a/.github/ISSUE_TEMPLATE/bug.yaml b/.github/ISSUE_TEMPLATE/bug.yaml new file mode 100644 index 00000000..2173318c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug.yaml @@ -0,0 +1,34 @@ +name: Bug Report +description: File a bug report - this is the type of issue to use if you are fairly sure something is going awfully wrong with the package. +title: "[Bug]: " +labels: ["bug", "triage"] +assignees: + - tpoisot +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! This is one of the most effective steps in making the project better. + - type: input + id: contact + attributes: + label: Contact Details + description: How can we get in touch with you if we need more info? This is mostly important if the bug can be reproduced only when using data you are not able to share publicly. + placeholder: ex. email@example.com + validations: + required: false + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! + value: "A bug happened!" + validations: + required: true + - type: textarea + id: logs + attributes: + label: Stacktrace + description: Please copy and paste the stacktrace that give the error message. + render: shell \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index bf663a29..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: bug, need-triage -assignees: 'tpoisot', 'gabrieldansereau' ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -A [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) that is enough to show what the problem is - -~~~ julia -using SimpleSDMLayers - -# Add your code here -~~~ - -**Stacktrace** - -~~~ julia -# Please paste your stacktrace here -~~~ - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Environment:** - - OS: - - Julia version: - - Other packages used in the example and their versions: - -**Additional context** -Add any other context about the problem here. diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index d3c2dac8..74e0e05a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -3,7 +3,7 @@ name: CI on: push: branches: - - master + - main pull_request: jobs: @@ -30,6 +30,9 @@ jobs: arch: ${{ matrix.arch }} - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest + env: + PYTHON: "" + GKSwstype: "100" - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/CleanPR.yml b/.github/workflows/CleanPR.yml new file mode 100644 index 00000000..22d90435 --- /dev/null +++ b/.github/workflows/CleanPR.yml @@ -0,0 +1,26 @@ +name: Doc Preview Cleanup + +on: + pull_request: + types: [closed] + +jobs: + doc-preview-cleanup: + runs-on: ubuntu-latest + steps: + - name: Checkout gh-pages branch + uses: actions/checkout@v2 + with: + ref: gh-pages + - name: Delete preview and history + push changes + run: | + if [ -d "previews/PR$PRNUM" ]; then + git config user.name "Documenter.jl" + git config user.email "documenter@juliadocs.github.io" + git rm -rf "previews/PR$PRNUM" + git commit -m "delete preview" + git branch gh-pages-new $(echo "delete history" | git commit-tree HEAD^{tree}) + git push --force origin gh-pages-new:gh-pages + fi + env: + PRNUM: ${{ github.event.number }} \ No newline at end of file diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 6b44ede0..66298b0a 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -9,7 +9,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - julia-version: [1.5.0] + julia-version: [1.6.3] julia-arch: [x86] os: [ubuntu-latest] steps: diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index b7ab141a..ec924f84 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -4,7 +4,7 @@ on: release: push: branches: - - master + - main tags: '*' pull_request: @@ -15,10 +15,14 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1.5' + version: '1.6' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + env: + PYTHON: "" + GKSwstype: "100" - name: Build and deploy env: + GKSwstype: "100" GITHUB_TOKEN: ${{ secrets.TOKEN }} # For authentication with GitHub Actions token run: julia --project=docs/ docs/make.jl \ No newline at end of file diff --git a/Project.toml b/Project.toml index 3bea40fc..5ff16b37 100644 --- a/Project.toml +++ b/Project.toml @@ -1,20 +1,30 @@ name = "SimpleSDMLayers" uuid = "2c645270-77db-11e9-22c3-0f302a89c64c" authors = ["Timothée Poisot ", "Gabriel Dansereau "] -version = "0.7.0" +version = "0.8.0" [deps] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" +ColorBlendModes = "60508b50-96e1-4007-9d6c-f475c410f16b" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +PolygonOps = "647866c9-e3ac-4575-94e7-e3d426903924" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Requires = "ae029012-a4dd-5104-9daa-d747884805df" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] -ArchGDAL = "0.6, 0.7" +ArchGDAL = "0.7" +Distances = "0.10" Downloads = "1.4" -RecipesBase = "0.7, 0.8, 1.0" +GeometryBasics = "0.4" +PolygonOps = "0.1" +RecipesBase = "1.0" Requires = "1.0" -ZipFile = "0.8, 0.9" -julia = "1.5" +StatsBase = "0.33" +ZipFile = "0.9" +julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index 6c07a902..d6862623 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,8 +1,14 @@ [deps] +ColorBlendModes = "60508b50-96e1-4007-9d6c-f475c410f16b" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" \ No newline at end of file +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" diff --git a/docs/make.jl b/docs/make.jl index 7b1e70e5..b429e838 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -3,6 +3,22 @@ push!(LOAD_PATH, joinpath("..", "src")) using Documenter, SimpleSDMLayers using GBIF using Statistics +using Literate + +# Literate files +for ENDING in ["examples", "sdm"] + expl = joinpath("docs", "src", ENDING) + corefiles = [joinpath(expl, f) for f in readdir(expl)] + filter!(f -> endswith(f, "jl"), corefiles) + vignetteconfig = Dict( + "repo_root_url" => "https://github.com/EcoJulia/SimpleSDMLayers.jl", + "flavor" => Literate.DocumenterFlavor(), + "credit" => false + ) + for corefile in corefiles + Literate.markdown(corefile, expl; config=vignetteconfig) + end +end makedocs( sitename = "Simple SDM Layers", @@ -11,22 +27,28 @@ makedocs( "Home" => "index.md", "Manual" => [ "Types" => "man/types.md", + "Indexing" => "man/indexing.md", + "Clipping" => "man/clip.md", + "Operations on values" => "man/changevalues.md", "Overloads" => "man/overloads.md", "Other operations" => "man/operations.md", - "Data" => "man/data.md", + "Data access" => "man/data.md", + "IO" => "man/io.md" ], - "Examples" => [ - "Temperature data" => "examples/temperature.md", - "DataFrames integration" => "examples/dataframes.md", + "General examples" => [ + "Introduction: elevation data" => "examples/elevation.md", + "Geometry for clipping" => "examples/geometry.md", "Sliding window analysis" => "examples/slidingwindow.md", "Landcover data" => "examples/landcover.md", - "Landcover consensus" => "examples/consensus.md", - "Importing and exporting" => "examples/import.md", + "Bivariate mapping" => "examples/bivariate.md" ], - "Building SDMs" => [ + "SDM case studies" => [ "GBIF integration" => "sdm/gbif.md", - "BIOCLIM from scratch" => "sdm/bioclim.md", - "Future data" => "sdm/future.md" + "Variable selection (VIF)" => "sdm/vif.md", + "Building the BIOCLIM model" => "sdm/bioclim.md", + "Pseudo-absences" => "sdm/pseudoabsences.md", + "Dealing with future data" => "sdm/future.md", + "BRTs and climate change" => "sdm/brt.md" ] ] ) @@ -35,5 +57,6 @@ run(`find . -type f -size +40M -delete`) deploydocs( repo = "github.com/EcoJulia/SimpleSDMLayers.jl.git", - push_preview = true + push_preview = true, + devbranch = "main" ) diff --git a/docs/src/examples/bivariate.jl b/docs/src/examples/bivariate.jl new file mode 100644 index 00000000..347e4bc7 --- /dev/null +++ b/docs/src/examples/bivariate.jl @@ -0,0 +1,75 @@ +# # Bivariate maps + +using SimpleSDMLayers +using StatsPlots +using StatsPlots.PlotMeasures + +# **Justification for this use case:** We can show more than one (specifically, +# two) variables on a single map, using a bivariate color scale. In order to +# illustrate bivariate mapping, we will look at the joint distribution of two +# measures: eveness of land use, and terrain roughness. + +boundaries = (left=-12.0, right=30.0, bottom=36.0, top=72.0) +layer1 = convert( + Float16, + SimpleSDMPredictor(EarthEnv, HabitatHeterogeneity, 2; resolution=5, boundaries...), +) +layer2 = convert( + Float16, SimpleSDMPredictor(EarthEnv, Topography, 7; resolution=5, boundaries...) +) +layer2 = mask(layer1, layer2); + +# Note that bivariate maps usually work best when used with 9 classes in total +# (so 3 for each side). The next decision is to take a bivaraite color palette, +# and the combinations below are [commonly +# used](https://www.joshuastevens.net/cartography/make-a-bivariate-choropleth-map/). +# Note that you can definitely use [diverging +# colors](https://www.personal.psu.edu/cab38/ColorSch/Schemes.html) if you want. + +p0 = colorant"#e8e8e8" +bv_pal_1 = (p0=p0, p1=colorant"#64acbe", p2=colorant"#c85a5a") +bv_pal_2 = (p0=p0, p1=colorant"#73ae80", p2=colorant"#6c83b5") +bv_pal_3 = (p0=p0, p1=colorant"#9972af", p2=colorant"#c8b35a") +bv_pal_4 = (p0=p0, p1=colorant"#be64ac", p2=colorant"#5ac8c8") + +# The bivariate map itself is a call to plot. Internally, this will transform +# the layers into quantiles (determined by the `classes` keyword, defaults to +# 3): + +plot(layer1, layer2; st=:bivariate, bv_pal_3...) + +# Note that you can use the `bivariate` shorthand as well: + +pl1 = bivariate(layer1, layer2; classes=3, frame=:box, bv_pal_4...) +xaxis!(pl1, "Longitude") +yaxis!(pl1, "Latitude") + +# We can repeat essentially the same process for the legend: + +pl2 = bivariatelegend(layer1, layer2; classes=3, bv_pal_4...) +xaxis!(pl2, layernames(EarthEnv, HabitatHeterogeneity, 2)) +yaxis!(pl2, layernames(EarthEnv, Topography, 7)) + +# And now, we can plot the legend next to the map - future releases of the +# package will hopefully offer this in a far more user friendly way. + +plot(pl1, pl2; layout=@layout [a{0.75w} b]) + +# Using the `subplot` and `inset` arguments of Plots.jl, we can have the legend +# within the figure. Note how in this example we expand the limits on the x axis +# to make the legend fit, but also use more classes in the map to have a +# smoother result. + +p1 = bivariate(layer1, layer2; classes=6, bv_pal_2..., frame=:box, xlim=(-24, maximum(longitudes(layer1)))) +xaxis!(p1, "Longitude") +yaxis!(p1, "Latitude") +p2 = bivariatelegend!( + layer1, + layer2; + bv_pal_2..., + inset=(1, bbox(0.04, 0.05, 0.28, 0.28, :top, :left)), + subplot=2, + xlab=layernames(EarthEnv, HabitatHeterogeneity)[2], + ylab=layernames(EarthEnv, Topography)[7], + guidefontsize=7, +) diff --git a/docs/src/examples/consensus.md b/docs/src/examples/consensus.md deleted file mode 100644 index d56e1cc7..00000000 --- a/docs/src/examples/consensus.md +++ /dev/null @@ -1,45 +0,0 @@ -# Landcover consensus - -In this example, we will create a consensus map of landcover for Corsica based -on the EarthEnv data, and measure the variation within each pixel using the -variance. The first step is to load the packages we need, and create a bounding -box: - -```@example cons -using SimpleSDMLayers -using Plots - -bbox = (left=8.25, right=10.0, bottom=41.2, top=43.2) -``` - -First, we will download all values for our layers: - -```@example cons -lc = convert.(Float32, SimpleSDMPredictor(EarthEnv, LandCover, 1:12; full=false, bbox...)) -``` - -To perform the actual analysis, we will define a `shannon` function, which will -return the entropy of the land use categories: - -```@example cons -function shannon(x) - v = filter(n -> n>zero(eltype(x)), x) - length(v) == 0 && return NaN - v = v ./ sum(v) - return -sum(v.*log2.(v)) -end -``` - -We can then apply these functions using the `mosaic` method: - -```@example cons -consensus = mosaic(x -> last(findmax(x)), lc) -entropy = mosaic(shannon, lc) -``` - -```@example cons -p1 = plot(consensus, c=:terrain, frame=:none) -p2 = plot(entropy, c=:bamako, frame=:none) - -plot(p1, p2) -``` diff --git a/docs/src/examples/dataframes.md b/docs/src/examples/dataframes.md deleted file mode 100644 index b0d7d107..00000000 --- a/docs/src/examples/dataframes.md +++ /dev/null @@ -1,149 +0,0 @@ -# Working with DataFrames - -Both `SimpleSDMLayers.jl` and `GBIF.jl` offer an optional integration with the -`DataFrames.jl` package. Therefore, our [previous -example](https://ecojulia.github.io/SimpleSDMLayers.jl/latest/examples/gbif/) -with the kingfisher *Megaceryle alcyon* could also be approached with a -`DataFrame`-centered workflow. - -We will illustrate this using the same data and producing the same figures as in -the previous example. To do so, we will use `GBIF.jl` to produce the occurrence -`DataFrame` we will use throughout this example. However, it is also possible to -use a `DataFrame` of your choosing instead of one generated by `GBIF.jl`, as -long as it holds one occurrence per row, a column with the latitude coordinates, -and a column with longitude coordinates. For the rest, it can hold whatever -information you like. Most of our functions assume by default that the -coordinates are stored in columns named `:latitude` and `:longitude` (the order -doesn't matter), but you can generally specify other names with `latitude = -:lat` in case you don't want to rename them (we will show you how below). - -So let's start by getting our data: - -```@example dataframes -# Load packages -using SimpleSDMLayers -using GBIF -using Plots -using Statistics -# Load DataFrames too -using DataFrames - -# Load environmental data -temperature, precipitation = SimpleSDMPredictor(WorldClim, BioClim, [1,12]) - -# Get GBIF occurrences -kingfisher = GBIF.taxon("Megaceryle alcyon", strict=true) -kf_occurrences = occurrences(kingfisher, - "hasCoordinate" => "true", - "decimalLatitude" => (0.0, 65.0), - "decimalLongitude" => (-180.0, -50.0), - "limit" => 200) -for i in 1:4 - occurrences!(kf_occurrences) -end -@info kf_occurrences - -``` - -Once the data is loaded, we can easily convert the environmental layers to a -`DataFrame` with the corresponding coordinates. We can do this for a single -layer or for multiple layers at the same time: - -```@example dataframes -# Single layer -temperature_df = DataFrame(temperature) -# Multiple layers -env_layers = [temperature, precipitation] -env_df = DataFrame(env_layers) -rename!(env_df, :x1 => :temperature, :x2 => :precipitation) -first(env_df, 5) -``` - -Note that the resulting `DataFrame` will include `missing` values for the -elements set to `nothing` in the layers. We might want to remove those rows -using `filter!` or `dropmissing!`: - -```@example dataframes -dropmissing!(env_df, [:temperature, :precipitation]); -last(env_df, 5) -``` - -`GBIF.jl` allows us to convert a set of occurrences to a `DataFrame` just as -easily: - -```@example dataframes -kf_df = DataFrame(kf_occurrences) -last(kf_df, 5) -``` - -We can then extract the temperature values for all the occurrences. - -```@example dataframes -temperature[kf_df] -``` - -Or we can clip the layers according to the occurrences: - -```@example dataframes -temperature_clip = clip(temperature, kf_df) -precipitation_clip = clip(precipitation, kf_df) -``` - -In case your `DataFrame` has different column names for the coordinates, for -example `:lat` and `:lon`, you can clip it like this: - -```@example dataframes -kf_df_shortnames = rename(kf_df, :latitude => :lat, :longitude => :lon) -clip(temperature, kf_df_shortnames; latitude = :lat, longitude = :lon) -``` - -We can finally plot the layer and occurrence values in a similar way to any -`DataFrame` or `Array`. - -```@example dataframes -histogram2d(temperature_clip, precipitation_clip, c = :viridis) -scatter!(temperature_clip[kf_df], precipitation_clip[kf_df], - lab= "", c = :white, msc = :orange) -``` - -To plot the occurrence values over space, you can use: - -```@example dataframes -contour(temperature_clip, c = :alpine, title = "Temperature", - frame = :box, fill = true) -scatter!(kf_df.longitude, kf_df.latitude, - lab = "", c = :white, msc = :orange, ms = 2) -``` - -We can finally make a layer with the number of observations per cells: - -```@example dataframes -abundance = mask(precipitation_clip, kf_occurrences, Float32) -``` - -A useful trick to visualize sites with occurrences, in contrast with sites -without any occurrence, is to use `replace` or `replace!` to set the values -returned as `0` or `true` by the function `mask()` to `nothing`. This allows us -to first plot a background layer with a uniform colour, covering the whole area -to visualize, then plot the occurrence layer on top using a different colour -scale. - -```@example dataframes -abundance_nozeros = replace(abundance, 0 => nothing) -plot(precipitation_clip, c = :lightgrey) -plot!(abundance_nozeros, c = :viridis, clim = extrema(abundance_nozeros)) -``` - -Once again, the cells are rather small, and there are few observations, so this -is not necessarily going to be very informative. As in our other example, to -get a better sense of the distribution of observations, we can get the average -number of observations in a radius of 100km around each cell (we will do so for -a zoomed-in part of the map to save time): - -```@example dataframes -zoom = abundance[left = -100.0, right = -75.0, top = 43.0, bottom = 20.0] -buffered = slidingwindow(zoom, Statistics.mean, 100.0) -plot(buffered, c = :lapaz, legend = false, frame = :box) -scatter!(kf_df.longitude, kf_df.latitude, - lab = "", c = :white, msc = :orange, ms = 2, alpha = 0.5) -``` \ No newline at end of file diff --git a/docs/src/examples/elevation.jl b/docs/src/examples/elevation.jl new file mode 100644 index 00000000..14318ea6 --- /dev/null +++ b/docs/src/examples/elevation.jl @@ -0,0 +1,64 @@ +# # Basics: elevation data + +# In this example, we will look at elevation data from the worldclim 2 data, +# crop it for Western Europe, and then change the resolution to aggregate the +# data. The first step is to get the worldclim layer for elevation: + +using SimpleSDMLayers +using StatsPlots +import Statistics + +#- + +elevation = convert(Float32, SimpleSDMPredictor(WorldClim, Elevation)) + +# Thanks to the integration with Plots and StatsPlots, we can very rapidly +# visualize these data: + +heatmap(elevation, c=:cividis, frame=:box) +xaxis!("Longitude") +yaxis!("Latitude") + +# Let's also have a look at the density while we're at it: + +density(elevation, frame=:zerolines, c=:grey, fill=(0, :grey, 0.2), leg=false) +xaxis!("Elevation") + +# The next step is to clip the data to the region of interest. This requires a +# the coordinates of the bounding box as two tuples (for longitude and latitude) +# -- we can also make a quick heatmap to see what the region looks like: + +elevation_europe = clip(elevation; left=-11.0, right=31.5, bottom=29.0, top=71.5) + +#- + +heatmap(elevation_europe, c=:cividis, aspectratio=1, frame=:box) + +# The next step will be to coarsen these data, which requires to give the number +# of cells to merge alongside each dimension. This number of cells must be a +# divider of the grid size, which we can view with: + +size(elevation_europe) + +# In an ideal world, we could want to find a number of cells that is the same both +# for latitude and longitude, and one approach is to finagle our way into a +# correct grid by changing the clipping region. + +# In this case, we will use a coarsening scale of `(5,5)`, which gives us a total +# of 25 cells in the aggregated result. Our aggregation function will be `mean` +# (so we report the average elevation across these cells): + +elevation_europe_coarse = coarsen(elevation_europe, Statistics.mean, (5, 5)) + +# Once again, we can plot these data: + +heatmap(elevation_europe_coarse, aspectratio=1, c=:cividis, frame=:box) + +# Finally, we can compare our different clipping and approximations to the overall +# dataset: + +density(elevation, frame=:zerolines, c=:grey, fill=(0, :grey, 0.5), lab="") +density!(elevation_europe, c=:black, lab="Raw data") +density!(elevation_europe_coarse, c=:darkgrey, lab="Average") +xaxis!("Elevation") + diff --git a/docs/src/examples/geometry.jl b/docs/src/examples/geometry.jl new file mode 100644 index 00000000..872e9f46 --- /dev/null +++ b/docs/src/examples/geometry.jl @@ -0,0 +1,51 @@ +# # Working with geometry objects + +# The `SimpleSDMLayers` package uses `Point`s to represent coordinates, which +# allows to easily use `GeometryBasics` objects for masking. In this example, we +# will illustrate how we can get the values around a given point, and within a +# polygon. These functions all rely on `mask` to extract the values. + +using SimpleSDMLayers +using GeometryBasics +using Plots +using JSON + +layer = SimpleSDMPredictor(WorldClim, BioClim; resolution=5.0, left=-89., right=-70., top=27., bottom=15.) + +# Let's have a look at the data, before applying any transformation: + +plot(layer) + +# We will now define a center of 5 degree of radius centered on La Habana + +la_habana = Point(-82.38304, 23.13302) +area = Circle(la_habana, 5.0) + +# We can plot the background of the map, and add the clipped region: + +plot(layer, c=:lightgrey, frame=:box) +plot!(mask(area, layer), c=:turku) +scatter!(la_habana, lab="", c=:white, msw=2.0) + +# This approach is useful if you want to mask according to a polygon. In this +# case, we will keep the values within a polygon representing Cuba. We grab data +# from Natural Earth Resources, that do not have the best resolution in terms of +# borders, but are sufficient for this illustration: + +borders = download("https://raw.githubusercontent.com/AshKyd/geojson-regions/master/countries/50m/CUB.geojson") +cuba_data = JSON.parsefile(borders) +polys = cuba_data["geometry"]["coordinates"] +CUBA = SimpleSDMLayers._format_polygon.(polys) + +# This object is actually a multi-polygon, or an array of polygons. The `mask` +# function can handle this: + +plot(layer, c=:lightgrey, frame=:box) +plot!(mask(CUBA, layer), c=:turku) +scatter!(la_habana, lab="", c=:white, msw=2.0) + +# The delimitation of the area to crop is only as good as the underlying GeoJSON +# polygons, which in this case is missing some coastal areas. As a sidenote, the +# *center* of the grid cell is checked for being in the polygon (not *any* +# coordinate within the grid cell) - for this reason, coarser rasters (*e.g.* at +# 10 minutes resolution) may not respond perfectly well to masking in this way. \ No newline at end of file diff --git a/docs/src/examples/import.md b/docs/src/examples/import.md deleted file mode 100644 index cd39b330..00000000 --- a/docs/src/examples/import.md +++ /dev/null @@ -1,63 +0,0 @@ -# Importing and exporting your own data - -It is possible to import your own rasters into a `SimpleSDMLayer` object and to -export `SimpleSDMLayer` objects to raster files. In this example, we will look at a data file -produced by the *OmniScape* package, and which represents landscape connectivity -in the Laurentians region of Québec. This example will also show how we can use -the `broadcast` operation to modify the values of a raster. - -## Importing data - -```@example temp -using SimpleSDMLayers -using Plots -using StatsBase -``` - -The file comes with the package itself, so we can read it directly - this is a -geotiff file, where values are floating point numbers representing connectivity. - -```@example temp -file = joinpath(dirname(pathof(SimpleSDMLayers)), "..", "data", "connectivity.tiff") -``` - -To import this file as a `SimpleSDMLayer`, we need to call the `geotiff` -function, which assumes a WGS84 projection: - -```@example temp -mp = geotiff(SimpleSDMPredictor, file) -``` - -Because this file has raw values, which are not necessarily great for plotting, -we will transform it to quantiles, using the `rescale` function. - -```@example temp -qmap = rescale!(mp, collect(0.0:0.01:1.0)) -``` - -Finally, we are ready for plotting: - -```@example temp -plot(qmap, frame=:grid, c=:cork, clim=(0,1)) -``` - -## Exporting data - - `geotiff` can also be used in the opposite way to write `SimpleSDMLayer` -objects to tiff files. For instance, we might want to keep our layer with -quantile values for later on: - -```@example temp -geotiff("layer.tif", qmap) -``` - -Note that `geotiff` can also write multiple layers in a single file (as -different bands) as long as they have the same size and the same bounding -coordinates. We can then reimport them by specifying the band number. - -```@example temp -layers = SimpleSDMPredictor(WorldClim, BioClim, 1:2) - -geotiff("stack.tif", layers) -layers = [geotiff(SimpleSDMPredictor, "stack.tif", i) for i in 1:2] -``` \ No newline at end of file diff --git a/docs/src/examples/landcover.jl b/docs/src/examples/landcover.jl new file mode 100644 index 00000000..f30430f4 --- /dev/null +++ b/docs/src/examples/landcover.jl @@ -0,0 +1,83 @@ +# # Landcover data + +# In this example, we will look at landcover data in Corsica + +using SimpleSDMLayers +using Plots + +# To avoid loading the (rather large) dataset of land cover at once, we will +# rely on a bounding box: + +boundaries = (left=8.25, right=10.0, bottom=41.2, top=43.2) + +# The numbers corresponding to the layers can be found in the documentation for +# `SimpleSDMPredictor` - urban area is 9. + +urban = SimpleSDMPredictor(EarthEnv, LandCover, 9; boundaries...) + +# This dataset is returning data as `UInt8` (as it represents a proportion of +# the pixel occupied by the type), but this is not something that can be plotted +# efficiently (because we rely on `NaN` to indicate no data, and `UInt8` has no +# `NaN`). So in the next step, we will manipulate this object a little bit to +# have something more workable. + +urban = convert(Float16, urban) + +# Why `Float16`? It is the smallest floating point type with a `NaN` value +# (`NaN16`). We will replace the values of 0 by `nothing`, to only see the +# pixels with *at least* some urban cover: + +replace!(urban, zero(eltype(urban)) => nothing) + +# With this done, we can plot the results: + +plot(urban; c=:berlin, clim=(0, 100)) + +# Unsuprisingly to anyone who had the chance to visit Corsica, it is not a very +# densely urbanized island. This is a good time to question whether we can look +# at (i) which landcover type dominates within each pixel, and (ii) how +# heterogeneous the land use within each pixel is. + +# First, we will download all values for the landcover layers, including open +# water (12). + +landcover = + convert.( + Float16, SimpleSDMPredictor(EarthEnv, LandCover, 1:12; full=false, boundaries...) + ) + +plot( + plot.(landcover, leg=false, c=:oleron, clim=(0, 100))...; + leg=false, + grid=:none, + frame=:none, +) + +# To perform the actual analysis, we will define a `shannon` function, which +# will return the entropy of the land use categories: + +function shannon(x) + v = filter(n -> n > zero(eltype(x)), x) + length(v) == 0 && return NaN + v = v ./ sum(v) + return -sum(v .* log2.(v)) +end + +entropy = mosaic(shannon, landcover) + +# We can also get the index of the most common layer within the pixel: + +consensus = mosaic(x -> last(findmax(x)), landcover) + +# We may not be *that* interested in fully open water, so let's define a mask to +# remove it: + +openwater = broadcast(!isequal(12), consensus) + +# All that's left to do is to plot after applying this mask, and we now get a +# map of most common land cover type (left), and land cover heterogeneity +# (right) + +p1 = plot(mask(openwater, consensus); c=:terrain, frame=:none) +p2 = plot(mask(openwater, entropy); c=:lapaz, frame=:none) +plot(p1, p2) diff --git a/docs/src/examples/landcover.md b/docs/src/examples/landcover.md deleted file mode 100644 index 8c6b2d6b..00000000 --- a/docs/src/examples/landcover.md +++ /dev/null @@ -1,35 +0,0 @@ -# Getting landcover data - -In this example, we will look at landcover data, specifically the proportion of -urban/built area in Europe; the entire dataset is very large to fit in memory, -as it has a resolution of about 1 kilometre squared. Therefore, we will take -advantage of the ability to only load the part that matters by passing the -limits of a bounding box. - -```@example urban -using SimpleSDMLayers -urban = SimpleSDMPredictor(EarthEnv, LandCover, 9; left=-11.0, right=31.1, bottom=29.0, top=71.1) -``` - -This dataset is returning data as `UInt8` (as it represents a proportion of the -pixel occupied by the type), but this is not something that can be plotted -efficiently. So in the next step, we will manipulate this object a little bit to -have something more workable. - -```@example urban -urban = convert(Float32, urban) -``` - -We will replace the values of 0 by `nothing`, to only see the pixels with some -urban cover: - -```@example urban -replace!(urban, zero(eltype(urban)) => nothing) -``` - -With this done, we can plot the results: - -```@example urban -using Plots -heatmap(urban, c=:berlin) -``` diff --git a/docs/src/examples/slidingwindow.jl b/docs/src/examples/slidingwindow.jl new file mode 100644 index 00000000..8d02cf84 --- /dev/null +++ b/docs/src/examples/slidingwindow.jl @@ -0,0 +1,27 @@ +# # Sliding window analysis + +# In this example, we will get precipitation data from Québec, and use a sliding +# window analysis to smooth them out. The beginning of the code should now be +# familiar: + +using SimpleSDMLayers +using Plots +using Statistics + +#- + +precipitation = SimpleSDMPredictor(WorldClim, BioClim, 12; left=-80.0, right=-56.0, bottom=44.0, top=62.0) + +# The sliding window works by taking all pixels *within a given radius* +# (expressed in kilometres) around the pixel of interest, and then applying the +# function given as the second argument to their values. Empty pixels are +# removed. In this case, we will do a summary across a 100 km radius around each +# pixel: + +averaged = slidingwindow(precipitation, Statistics.mean, 100.0; threaded=false) + +# We can finally overlap the two layers -- the result of sliding window is a +# little bit smoother than the raw data. + +plot(precipitation, c=:alpine) +contour!(averaged, c=:white, lw=2.0) diff --git a/docs/src/examples/slidingwindow.md b/docs/src/examples/slidingwindow.md deleted file mode 100644 index 91289b95..00000000 --- a/docs/src/examples/slidingwindow.md +++ /dev/null @@ -1,30 +0,0 @@ -# Sliding window analysis - -In this example, we will get precipitation data from Québec, and use a sliding -window analysis to smooth them out. The beginning of the code should now be -familiar: - -```@example slide -using SimpleSDMLayers -using Plots -using Statistics - -precipitation = SimpleSDMPredictor(WorldClim, BioClim, 12; left=-80.0, right=-56.0, bottom=44.0, top=62.0) -``` - -The sliding window works by taking all pixels *within a given radius* (expressed -in kilometres) around the pixel of interest, and then applying the function -given as the second argument to their values. Empty pixels are removed. In this -case, we will do a summary across a 100 km radius around each pixel: - -```@example slide -averaged = slidingwindow(precipitation, Statistics.mean, 100.0) -``` - -We can finally overlap the two layers -- the result of sliding window is a -little bit smoother than the raw data. - -```@example slide -plot(precipitation, c=:alpine) -contour!(averaged, c=:white, lw=2.0) -``` diff --git a/docs/src/examples/temperature.md b/docs/src/examples/temperature.md deleted file mode 100644 index 800aa29d..00000000 --- a/docs/src/examples/temperature.md +++ /dev/null @@ -1,75 +0,0 @@ -# Getting temperature data - -In this example, we will look at temperature data from the worldclim 2 data, -crop it for Western Europe, and then change the resolution to aggregate the -data. The first step is to get the worldclim layer for temperature (the codes -for each layers are in the function documentation): - -```@example temp -using SimpleSDMLayers -temperature = SimpleSDMPredictor(WorldClim, BioClim, 1) -``` - -Thanks to the integration with Plots and StatsPlots, we can very rapidly -visualize these data: - -```@example temp -using Plots, StatsPlots -heatmap(temperature, c=:cividis, frame=:box) -xaxis!("Longitude") -yaxis!("Latitude") -``` - -Let's also have a look at the density while we're at it: - -```@example temp -density(temperature, frame=:zerolines, c=:grey, fill=(0, :grey, 0.2), leg=false) -xaxis!("Temperature", (-50,30)) -``` - -The next step is to clip the data to the region of interest. This requires a the -coordinates of the bounding box as two tuples (for longitude and latitude) -- we -can also make a quick heatmap to see what the region looks like: - -```@example temp -temperature_europe = temperature[left=-11.0, right=31.5, bottom=29.0, top=71.5] -heatmap(temperature_europe, c=:cividis, aspectratio=1, frame=:box) -``` - -The next step will be to coarsen these data, which requires to give the number -of cells to merge alongside each dimension. This number of cells must be a -divider of the grid size, which we can view with: - -```@example temp -size(temperature_europe) -``` - -In an ideal world, we could want to find a number of cells that is the same both -for latitude and longitude, and one approach is to finagle our way into a -correct grid by changing the clipping region. - -In this case, we will use a coarsening scale of `(5,5)`, which gives us a total -of 25 cells in the aggregated result. Our aggregation function will be `mean` -(so we report the average temperature across these cells): - -```@example temp -import Statistics -temperature_europe_coarse = coarsen(temperature_europe, Statistics.mean, (5, 5)) -``` - -One again, we can plot these data: - -```@example temp -heatmap(temperature_europe_coarse, aspectratio=1, c=:cividis, frame=:box) -``` - -Finally, we can compare our different clipping and approximations to the overall -dataset: - - -```@example temp -density(temperature, frame=:zerolines, c=:grey, fill=(0, :grey, 0.5), lab="") -density!(temperature_europe, c=:black, lab="Raw data") -density!(temperature_europe_coarse, c=:darkgrey, lab="Average") -xaxis!("Temperature", (-50,30)) -``` diff --git a/docs/src/index.md b/docs/src/index.md index a5f4b180..3439eb24 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -8,8 +8,30 @@ The two core types of the package are `SimpleSDMPredictor` and immutable, but responses are. All types belong to the abstract `SimpleSDMLayer`, and are organised in the same way: a `grid` field storing a matrix of data (of any type!), and the `left`, `right`, `bottom`, and `top` coordinates (as -floating point values). +floating point values). Of course these details are largely irrelevant, since we +have overloaded a large number of methods from `Base`, to make indexing, +converting, and modifying data as easy as possible. -Of course these details are largely irrelevant, since we have overloaded a large -number of methods from `Base`, to make indexing, converting, and modifying data -as easy as possible. +The aim of the package is to deliver (i) a series of methods to manipulate +raster data, and (ii) facilitated access to common datasets used to train +species distribution models. Despite what the name may suggest, this package +does *not* implement SDMs, but is instead intended as a library usable for this +purpose. Nevertheless, the documentation contains a few example of building +models, and integrating this package with the GBIF API. + +The documentations is split in three sections. The **manual** is a fairly +exhaustive documentation of the functions, methods, types, and interfaces, and +it is a good idea to skim it first, and come back for a focused reading when you +have a specific use-case in mind. The **general examples** section is a +collection of mostly disconnected workflows, intended to show how +`SimpleSDMLayers` interacts with other packages. It should give you a better +understanding of you can actually use the package. + +Finally, the **SDM case studies** are a more linear series of vignettes, +covering occurrence data, variable selection, bulding a presence-only model, +generating pseudo-absences, and using a machine learning approach to do range +forecasting under climate change. This last section can be used as a template to +develop new analyses, and will use almost all the features in the package. All +of the SDM vignettes use the same species throughout - *Hypomyces lactifluorum* +is a fungus of moderate commerical importance in North America, whose +distribution is probably going to be affected by climate change. \ No newline at end of file diff --git a/docs/src/man/changevalues.md b/docs/src/man/changevalues.md new file mode 100644 index 00000000..5aa9db22 --- /dev/null +++ b/docs/src/man/changevalues.md @@ -0,0 +1,12 @@ +# Operations on values + +```@docs +rescale! +rescale +replace +replace! +``` + +```@docs +broadcast +``` \ No newline at end of file diff --git a/docs/src/man/clip.md b/docs/src/man/clip.md new file mode 100644 index 00000000..626f60bd --- /dev/null +++ b/docs/src/man/clip.md @@ -0,0 +1,14 @@ +# Clipping and stitching rasters + +```@docs +clip +``` + +```@docs +Base.vcat +Base.hcat +``` + +```@docs +mosaic +``` \ No newline at end of file diff --git a/docs/src/man/data.md b/docs/src/man/data.md index aa062f34..dc5adc02 100644 --- a/docs/src/man/data.md +++ b/docs/src/man/data.md @@ -9,55 +9,61 @@ project. This being said, the prefered solution is to define a `SDMLAYERS_PATH` environment variable pointing to a specific path, where the layers will live. This will ensure that they are re-used between projects. -## General interface - All layers are returned as `SimpleSDMPredictor`, and therefore constructed by calling the `SimpleSDMPredictor` function on a `LayerProvider` and a `LayerDataset`, possibly with a future climate model and scenario. In all cases, the method accepts either a single layer, or an array of layers. -| Data provider | Dataset | Layers | Future models | Future scenarios | -| -------------------------------- | ---------------------- | ------ | ------------- | ------------------------------------ | -| `EarthEnv` | `Landcover` | 12 | | | -| `EarthEnv` | `HabitatHeterogeneity` | 14 | | | -| [`WorldClim`][worldclim-current] | `BioClim` | 19 | `CMIP6` | `SharedSocioeconomicPathway` | -| [`CHELSA`][chelsa-bioclim] | `BioClim` | 12 | `CMIP5` | `RepresentativeConcentrationPathway` | - -[earthenv-landcover]: http://www.earthenv.org/landcover -[earthenv-texture]: http://www.earthenv.org/texture -[worldclim-current]: https://www.worldclim.org/data/worldclim21.html -[chelsa-bioclim]: http://chelsa-climate.org/ +| Data provider | Dataset | Layers | Future models | Future scenarios | +| ------------- | ---------------------- | ------ | ---------------- | ------------------------------------------------------------------ | +| `EarthEnv` | `Landcover` | 12 | | | +| `EarthEnv` | `HabitatHeterogeneity` | 14 | | | +| `WorldClim` | `BioClim` | 19 | `CMIP6` | `SharedSocioeconomicPathway` | +| `WorldClim` | `Elevation` | 1 | `Elevation` | | +| `CHELSA` | `BioClim` | 12 | `CMIP5`, `CMIP6` | `RepresentativeConcentrationPathway`, `SharedSocioeconomicPathway` | + +## Providers and datasets -## Later providers +The `layernames` method (inputs are a provider and a dataset) will return a +tuple with the name of the layers. + +### Data providers ```@docs SimpleSDMLayers.LayerProvider +``` + +```@docs WorldClim CHELSA EarthEnv ``` -## Layer datasets +### Datasets ```@docs SimpleSDMLayers.LayerDataset +``` + +```@docs BioClim LandCover HabitatHeterogeneity +Elevation ``` -## Future climate models +## Future data + +### CMIP5 ```@docs -SharedSocioeconomicPathway -RepresentativeConcentrationPathway CMIP5 -CMIP6 +RepresentativeConcentrationPathway ``` -## File reading and writing +### CMIP6 ```@docs -SimpleSDMLayers.ascii -geotiff -``` \ No newline at end of file +CIMP6 +SharedSocioeconomicPathway +``` diff --git a/docs/src/man/indexing.md b/docs/src/man/indexing.md new file mode 100644 index 00000000..36404814 --- /dev/null +++ b/docs/src/man/indexing.md @@ -0,0 +1,19 @@ +# Indexing interface + +In versions prior to `0.8`, the indexing syntax (`x[y]`) had been a little bit +abused to mean two different things: getting data out of a raster, and getting a +raster out of a raster. Starting from `0.8`, the indexing interface (*i.e.* +anything relying on `getindex` and `setindex!`) is used to act on values. The +resizing of rasters is now handled by the `clip` function. + +## Getting values out of a raster + +```@docs +getindex +``` + +## Writing values in a raster + +```@docs +setindex! +``` \ No newline at end of file diff --git a/docs/src/man/io.md b/docs/src/man/io.md new file mode 100644 index 00000000..dad22ba2 --- /dev/null +++ b/docs/src/man/io.md @@ -0,0 +1,6 @@ +# Reading and writing files + +```@docs +SimpleSDMLayers.ascii +geotiff +``` \ No newline at end of file diff --git a/docs/src/man/operations.md b/docs/src/man/operations.md index cb3aed36..c7649741 100644 --- a/docs/src/man/operations.md +++ b/docs/src/man/operations.md @@ -3,12 +3,5 @@ ```@docs coarsen slidingwindow -latitudes -longitudes -boundingbox -clip mask -rescale! -rescale -mosaic ``` diff --git a/docs/src/man/overloads.md b/docs/src/man/overloads.md index 00980b92..232567a5 100644 --- a/docs/src/man/overloads.md +++ b/docs/src/man/overloads.md @@ -8,18 +8,13 @@ directly, and also allow to set and get values using the geographic coordinates ## From `Base` ```@docs -convert copy collect eltype size stride eachindex -getindex -setindex! similar -replace -replace! Base.sum Base.maximum Base.minimum @@ -36,10 +31,6 @@ isequal ## From `Broadcast` -```@docs -broadcast -``` - ## From `Statistics` ```@docs diff --git a/docs/src/man/types.md b/docs/src/man/types.md index 5e550310..66462165 100644 --- a/docs/src/man/types.md +++ b/docs/src/man/types.md @@ -6,12 +6,21 @@ and a bounding box indicated by the floating point coordinates of its limits. ## Implemented types ```@docs +SimpleSDMLayer SimpleSDMResponse SimpleSDMPredictor ``` -## Abstract type +## Getting the coordinates ```@docs -SimpleSDMLayer +latitudes +longitudes +boundingbox ``` + +## Type conversion + +```@docs +convert +``` \ No newline at end of file diff --git a/docs/src/sdm/bioclim.jl b/docs/src/sdm/bioclim.jl new file mode 100644 index 00000000..75456bf8 --- /dev/null +++ b/docs/src/sdm/bioclim.jl @@ -0,0 +1,192 @@ +# # Building the BIOCLIM model + +# **Justification for this use case:** `SImpleSDMLayers` can be used as a +# platform to build your own species distribution models. In this example, which +# assumes that you have read the vignettes on GBIF integration and variable +# selection through VIF, we will build our own version of the BIOCLIM model, and +# apply it to the distribution of *Hypomyces lactifluorum* in North America. + +using SimpleSDMLayers +using GBIF +using Plots +using GLM +using StatsBase +using Statistics +using GeometryBasics + +# BIOCLIM is a very simple model, which only requires presence information. The +# first step is therefore to get occurrences of *Hypomyces lactifluorum* in +# North America. Because the data in GBIF is only as good as the original data +# source, sometimes searching by `continent` gives fewer results than searching +# by country. + +observations = occurrences( + GBIF.taxon("Hypomyces lactifluorum"; strict=true), + "hasCoordinate" => "true", + "country" => "CA", + "country" => "US", + "limit" => 300, +) + +# We will now page through additional results (300 at a time). + +while length(observations) < size(observations) + occurrences!(observations) +end + +# At this point, we could read the whole predictor variables directly, and then +# clip them. This would be fairly wasteful, as we need a small area. For this +# reason, we will calculate the bounding box first, and then use it to only read +# the section we want. + +left, right = extrema(longitudes(observations)) .+ (-5, 5) +bottom, top = extrema(latitudes(observations)) .+ (-5, 5) +boundaries = (left=left, right=right, bottom=bottom, top=top) + +# With this information in hand, we can start getting our variables. In this +# example, we will take all of the BioClim data from WorldClim, at the 10 arc +# minute resolution, and add the elevation layer. Note that using the bounding +# box coordinates when calling the layers is *much* faster than clipping after +# the fact (assuming that you already have the files downloaded). + +predictors = + convert.( + Float32, SimpleSDMPredictor(WorldClim, BioClim, 1:19; resolution=10.0, boundaries...) + ); + +# We will add the elevation to the stack of variables we use -- we need to +# convert everything to `Float32` layers, because elevation is originally an +# `Int16` one and a number of operations we will make will require floating +# points + +push!( + predictors, + convert( + Float32, SimpleSDMPredictor(WorldClim, Elevation; resolution=10.0, boundaries...) + ), +); + +# It is not a bad idea to plot all of the predictors: + +plot(plot.(predictors, grid=:none, axes=false, frame=:none, leg=false, c=:imola)...) + +# Clearly, some of them show strong autocorrelation; we will therefore re-use +# our VIF code to select a subset that has uncorrelated variables. + +function vif(model) + R² = r2(model) + return 1 / (1-R²) +end + +function stepwisevif( + layers::Vector{T}, selection=collect(1:length(layers)), threshold::Float64=5.0 +) where {T<:SimpleSDMLayer} + x = hcat([layer[keys(layer)] for layer in layers[selection]]...) + X = (x .- mean(x; dims=1)) ./ std(x; dims=1) + vifs = zeros(Float64, length(selection)) + for i in eachindex(selection) + vifs[i] = vif(lm(X[:, setdiff(eachindex(selection), i)], X[:, i])) + end + all(vifs .<= threshold) && return selection + drop = last(findmax(vifs)) + popat!(selection, drop) + @info "Variables remaining: $(selection)" + return stepwisevif(layers, selection, threshold) +end + +# We will apply this function with the default parameters: + +layers_to_keep = stepwisevif(predictors) + +# When this is done, we can plot the layers again to check that they are all +# more or less unique: + +plot( + plot.( + predictors[layers_to_keep], grid=:none, axes=false, frame=:none, leg=false, c=:imola + )..., +) + +# The point of BIOCLIM (the model, not the dataset) is that the score assigned +# to a pixel is maximal if this pixel is the *median* value for a given +# variable. Therefore, we need to measure the cumulative density function for +# every pixel in every variable, and transform it with: + +_pixel_score(x) = 2.0(x > 0.5 ? 1.0 - x : x); + +# The *actual* model generation is fairly straightforward, as we will need to +# get the values of the layers in the cells occupied by an observation. Because +# sampling bias is very real, we will grid the observations by transforming them +# into a boolean layer: + +presences = mask(predictors[1], observations, Bool) +plot(convert(Float32, presences); c=cgrad([:lightgrey, :black]), leg=false) + +# This step is very important so as not to bias the estimation of quantiles, +# which overcounting observations within the same cell would do. We can now +# define the model: + +function SDM(predictor::T1, observations::T2) where {T1<:SimpleSDMLayer,T2<:SimpleSDMLayer} + _tmp = mask(observations, predictor) + qf = ecdf(convert(Vector{Float32}, _tmp[keys(_tmp)])) # We only want the observed values + return (_pixel_score ∘ qf) +end + +# Note that we use the ∘ (`\circ`) operator to chain the quantile estimation and +# the pixel scoring, which requires Julia 1.4. This function returns a *model*, +# *i.e.* a function that we can broadcast to a given layer, which might not be +# the same one we used for the training. + +# The next step in BIOCLIM is to get the *minimum* suitability across all layers +# for every pixel. Because we have a `min` method defined for a pair of layers, +# we can call `minimum` on an array of layers: + +function SDM(predictors::Vector{T}, models) where {T<:SimpleSDMLayer} + @assert length(models) == length(predictors) + return minimum([broadcast(models[i], predictors[i]) for i in 1:length(predictors)]) +end + +# The advantage of this approach is that we can call the `SDM` method for +# prediction on a smaller layer, or a different layer. This can allow us to do +# thing like stitching layers together with `hcat` and `vcat` to use +# multi-threading, or use a different resolution for the prediction than we did +# for the training. + +models = [SDM(predictor, presences) for predictor in predictors]; + +# We now get the prediction: + +prediction = SDM(predictors, models) + +# It's not a bad idea to look at this prediction, to get a sense of where the +# hotspots of presence would be: + +plot(prediction; c=:bamako, frame=:box) +xaxis!("Longitude") +yaxis!("Latitude") + +# Just because we may want to visualize this result in a transformed way, *i.e.* +# by looking at the quantiles of suitability, we can call the `rescale` +# function: + +prediction_quantile = rescale(prediction, collect(0.0:0.005:1.0)) + +# As this map now represents the quantiles of suitability, we may want to remove +# the lower 5%. For this, we need to create a boolean mask, which we can do by +# broadcasting a conditional: + +cutoff = broadcast(x -> x > 0.05, prediction_quantile) + +# The raw prediction, minus the 5% bottom quantiles, can the be plotted: + +plot(prediction; frame=:box, c=:lightgrey) +plot!(mask(cutoff, prediction); c=:bamako) +xaxis!("Longitude") +yaxis!("Latitude") + +# And there it is! A simple way to write the BIOCLIM model by building on the +# integration between SimpleSDMLayers and GBIF. BIOCLIM has a tendency to +# underfit the distribution quite a bit - in fact, the range returned here is +# larger than what other methods (like BRT) would return. The next vignettes in +# this section will be focused on using additional functionalities of +# `SimpleSDMLayers` until we are able to make a better model for this species. \ No newline at end of file diff --git a/docs/src/sdm/bioclim.md b/docs/src/sdm/bioclim.md deleted file mode 100644 index f61ecbe0..00000000 --- a/docs/src/sdm/bioclim.md +++ /dev/null @@ -1,114 +0,0 @@ -# Writing BIOCLIM from scratch - -In this example, we will write the BIOCLIM species distribution model using -`SimpleSDMLayers.jl` and `GBIF.jl`. - -```@example bioclim -using SimpleSDMLayers -using GBIF -using Plots -using StatsBase -using Statistics -``` - -We can get some occurrences for the taxon of interest: - -```@example bioclim -obs = occurrences( - GBIF.taxon("Alces alces", strict=true), - "hasCoordinate" => "true", - "continent" => "EUROPE", - "limit" => 50 -) -while length(obs) < min(2000, size(obs)) - occurrences!(obs) -end -``` - -This query uses a range for the longitude and latitude, so as to make sure that -we get a relatively small region. Before we get the layers, we will figure out -the bounding box for the observations - just to make sure that we will have -something large enough, we will add a 2 degrees padding around it: - -```@example bioclim -left, right = extrema([o.longitude for o in obs]) .+ (-5,5) -bottom, top = extrema([o.latitude for o in obs]) .+ (-5,5) -``` - -With this information in hand, we can start getting our variables. In this -example, we will take all worldclim data, at the default 10 arc minute -resolution: - -```@example bioclim -predictors = SimpleSDMPredictor(WorldClim, BioClim, 1:19; left=left, right=right, bottom=bottom, top=top); -first(predictors) -``` - -The point of BIOCLIM (the model, not the dataset) is that the score assigned to -a pixel is maximal is this pixel is the *median* value for a given variable - -therefore, we need to measure the cumulative density function for every pixel in -every variable. - -```@example bioclim -_pixel_score(x) = 2.0(x > 0.5 ? 1.0-x : x) - -function SDM(layer::T, observations::GBIFRecords) where {T <: SimpleSDMLayer} - qf = ecdf(layer[observations]) # We only want the observed values - return (_pixel_score∘qf) -end -``` - -Note that we use the ∘ (`\circ`) operator to chain the quantile estimation and -the pixel scoring, which requires Julia 1.4. This function returns a *model*, -*i.e.* a function that we can broadcast to a given layer, which might not be the -same one we used for the training. - -The next step in BIOCLIM is to get the *minimum* suitability across all layers -for every pixel. Because we have a `min` method defined for a pair of layers, we -can call `minimum` on an array of layers: - -```@example bioclim -function SDM(predictors::Vector{T}, models) where {T <: SimpleSDMLayer} - @assert length(models) == length(predictors) - return minimum([broadcast(models[i], predictors[i]) for i in 1:length(predictors)]) -end -``` - -The advantage of this approach is that we can call the `SDM` method for -prediction on a smaller layer, or a different layer. This can allow us to do -thing like stitching layers together with `hcat` and `vcat` to use -multi-threading, or use a different resolution for the prediction than we did -for the training. - -```@example bioclim -models = [SDM(predictor, obs) for predictor in predictors] -prediction = SDM(predictors, models) -``` - -Just because we may want to visualize this result in a transformed way, *i.e.* -by looking at the quantiles of suitability, we can call the `rescale!` function: - -```@example bioclim -rescale!(prediction, collect(0.0:0.01:1.0)) -``` - -As this map now represents the quantiles of suitability, we may want to remove -the lower 5%. For this, we need to create a boolean mask, which we can do by -broadcasting a conditional: - -```@example bioclim -cutoff = broadcast(x -> x > 0.05, prediction) -``` - -This map can be plotted as we would normally do: - -```@example bioclim -plot(prediction, frame=:box, c=:lightgrey) # Plot a uniform background -plot!(mask(cutoff, prediction), clim=(0,1), c=:bamako) -scatter!([(o.longitude, o.latitude) for o in obs], ms=4, c=:orange, lab="") -xaxis!("Longitude") -yaxis!("Latitude") -``` - -And there it is! A simple way to write the BIOCLIM model by building on the -integration between SimpleSDMLayers and GBIF. diff --git a/docs/src/sdm/brt.jl b/docs/src/sdm/brt.jl new file mode 100644 index 00000000..0157d316 --- /dev/null +++ b/docs/src/sdm/brt.jl @@ -0,0 +1,268 @@ +# # BRTs for species distribution forecasting + +using SimpleSDMLayers +using EvoTrees +using GBIF +using StatsBase +using StatsPlots + +# **Justification for this use case:** Boosted Regression Trees (BRTs) are a +# powerful way to predict the distribution of species. We will see how we can +# get information in and out of layers to use them, and how to use this model to +# predict a new distribution under a climate change scenario. This use-case +# assumes that you have read the manual pages for GBIF integration, future data, +# and pseudo-absences generation. + +# We will re-use the data from the pseudo-absences example: + +sp = GBIF.taxon("Hypomyces lactifluorum") +observations = occurrences( + sp, "hasCoordinate" => true, "limit" => 300, "country" => "CA", "country" => "US" +) +while length(observations) < size(observations) + occurrences!(observations) +end + +# We will pick the entire BioClim layers at a 10 minutes resolution, and clip +# them to the observations (this adds a 5 degrees buffer). + +layers = [ + clip(layer, observations) for layer in SimpleSDMPredictor(WorldClim, BioClim, 1:19) +]; + +# To remove the sampling effect, we transform the presences to a grid, and +# generate pseudo-absences using the surface range envelope method. + +presences = mask(layers[1], observations, Bool) +absences = rand(SurfaceRangeEnvelope, presences) + +# The next step is to extract coordinates at which the species is present or +# pseudo-absent - we can rely on the `replace` method to empty any `false` +# values: + +xy_presence = keys(replace(presences, false => nothing)); +xy_absence = keys(replace(absences, false => nothing)); +xy = vcat(xy_presence, xy_absence); + +# With the `xy` list of coordinates, we can get a predictor `X`, and a response +# `y`. + +X = hcat([layer[xy] for layer in layers]...); +y = vcat(fill(1.0, length(xy_presence)), fill(0.0, length(xy_absence))); + +# To train the model, we will use a random subset representing 70% of the +# dataset: + +train_size = floor(Int, 0.7 * length(y)); +train_idx = sample(1:length(y), train_size; replace=false); +test_idx = setdiff(1:length(y), train_idx); + +# This gives use the training and testing (or evaluation) sets: + +Xtrain, Xtest = X[train_idx, :], X[test_idx, :]; +Ytrain, Ytest = y[train_idx], y[test_idx]; + +# In order to fit the tree, we need to define a number of parameters. We will +# use a Gaussian maximum likelihood tree (from `EvoTrees`), which will give us a +# measure of the average prediction, but also the standard deviation. This is +# important in order to communicate uncertainty. + +gaussian_tree_parameters = EvoTreeGaussian(; + loss=:gaussian, + metric=:gaussian, + nrounds=100, + nbins=100, + λ=0.0, + γ=0.0, + η=0.1, + max_depth=7, + min_weight=1.0, + rowsample=0.5, + colsample=1.0, +) + +# We can now fit the BRT. This function has an additional `print_every_n` to +# update on the progress every `n` epochs, but we don't really need it here. + +model = fit_evotree(gaussian_tree_parameters, Xtrain, Ytrain; X_eval=Xtest, Y_eval=Ytest) + +# The next step is to gather *all* the values of all the layers in a matrix, in +# order to run the full spatial prediction: + +all_values = hcat([layer[keys(layer)] for layer in layers]...); + +# If the matrix is too big, we could resort to a combination of `clip`, make the +# prediction on each tile, and then use `hcat` and `vcat` to combine them. This +# is not the case here, so we can predict directly: + +pred = EvoTrees.predict(model, all_values); + +# Once the prediction is done, we can copy the values into a layer. + +distribution = similar(layers[1], Float64) +distribution[keys(distribution)] = pred[:, 1] +distribution + +# The BRT is able to calculate a measure of relative gain from the different +# variables: + +top5_var = importance(model, collect(layernames(WorldClim, BioClim)))[1:5] + +# This is an interesting alternative to VIF for variable selection, but also +# shows the very strong impact of the mean temperature in the wettest quarter: + +most_important_layer = findfirst(isequal(top5_var[1].first), collect(layernames(WorldClim, BioClim))) +histogram( + layers[most_important_layer][xy_presence]; fill=(0, :teal, 0.2), lc=:teal, frame=:origin, lab="Present" +) +histogram!( + layers[most_important_layer][xy_absence]; fill=(0, :white, 0.0), frame=:origin, lc=:grey, lab="Absent" +) +xaxis!(layernames(WorldClim, BioClim, most_important_layer)) + +# It is interesting to notice that despite the importance of this predictor, the +# difference between the presence and absence locations are not as clear as we +# may expect! + +# We can similarly extract uncertainty: + +uncertainty = similar(layers[1], Float64) +uncertainty[keys(uncertainty)] = pred[:, 2] +uncertainty + +# And we can now visualize the prediction, which we force to be in `[0,1]`. + +p_dis = plot(rescale(distribution, (0, 1)); c=:bamako, frame=:box) +scatter!(xy_presence; lab="", c=:black, alpha=0.2, msw=0.0, ms=3) + +# We can do the same thing for the uncertainty + +p_unc = plot(uncertainty; c=:tokyo, frame=:box) + +# Of course, this prediction is returing values for the entire range of the +# initial layer, so let's compare the distributions of the prediction score: + +histogram( + distribution[xy_presence]; fill=(0, :teal, 0.2), lc=:teal, frame=:origin, lab="Present" +) +histogram!( + distribution[xy_absence]; fill=(0, :white, 0.0), frame=:origin, lc=:grey, lab="Absent" +) +xaxis!("Prediction score") + +# This looks like a good opportunity to do some thresholding. Note that the +# values are *not* moved back to the unit range, because we'll need the raw +# values for a little surprise later on. We will find the value of the score +# that optimizes Youden's J (Cohen's κ is also a suitable alternative): + +cutoff = LinRange(extrema(distribution)..., 500); +J = similar(cutoff); + +# The loop to measure everything is fairly simple, as we already know the +# correct positions of presences and absences: + +for (i, c) in enumerate(cutoff) + p_presence = distribution[xy_presence] .>= c + p_absence = distribution[xy_absence] .>= c + tp = sum(p_presence) + fp = length(p_presence) - tp + fn = sum(p_absence) + tn = length(p_absence) - fn + J[i] = tp / (tp + fn) + tn / (tn + fp) - 1 +end + +# We can finally replace the `NaN` values by the random estimate, and look at +# the plot: + +J[isnan.(J)] .= 0.5 +plot(cutoff, J; lab="", fill=(0, 0.5, :grey), c=:grey) +xaxis!(extrema(distribution), "Threshold") +yaxis!((0.5, 1), "Informedness") + +# It's fairly noteworthy that the region around 0.5 is relatively smooth, which +# was also clear from the histogram of predicted values. This is a good sign +# that the exact cutoff may vary as a function of, for example, small changes in +# the pseudo-absence locations. This can be an interesting exercise: what +# happens to this curve if there are more pseudo-absences than presences? What +# happens to the cutoff if there are multiple runs of the model on different +# test/train set, or different pseudo-absences? For now, we will assume that the +# correct cutoff τ for a presence is readable directly at the peak of the curve: + +τ = cutoff[last(findmax(J))] + +# We can now map the result using the `distribution` data: + +range_mask = broadcast(v -> v >= τ, distribution) + +# And finally, plot the whole thing: + +plot(distribution; c=:lightgrey, leg=false) +plot!(mask(range_mask, distribution); c=:darkgreen) +scatter!(xy_presence; lab="", c=:orange, alpha=0.5, msw=0.0, ms=2) + +# Because our BRT also returns the uncertainty, we can combine both maps into a +# bivariate one, showing both where we expect the species, but also where we are +# uncertain about the prediction: + +plot(distribution; leg=false, c=:lightgrey, frame=:grid, xlab="Longitude", ylab="Latitude", grid=false) +bivariate!(mask(range_mask, distribution), mask(range_mask, uncertainty)) +p2 = bivariatelegend!( + mask(range_mask, distribution), + mask(range_mask, uncertainty); + inset=(1, bbox(0.04, 0.08, 0.23, 0.23, :center, :left)), + subplot=2, + xlab="Prediction", + ylab="Uncertainty", + guidefontsize=7, +) + +# Now, for the big question - will this range move in the future? To explore +# this, we will get the same variables, but in the future. In order to simplify +# the code, we will limit ourselves to one SSP (585) and one CMIP6 model +# (CanESM5), around 2050: + +future_layers = [ + clip(layer, observations) for + layer in SimpleSDMPredictor(WorldClim, BioClim, CanESM5, SSP585, 1:19; year="2041-2060") +]; + +# We can get all the future values from this data: + +all_future_values = hcat([layer[keys(layer)] for layer in future_layers]...); + +# And make a prediction based on our BRT model. This is, of course, assuming +# that BRTs are good at this type of prediction (they're OK). + +future_pred = EvoTrees.predict(model, all_future_values); + +# As before, we also have a measure of uncertainty. In the interest of keeping +# this vignette small, we will not look at it. + +future_distribution = similar(layers[1], Float64) +future_distribution[keys(future_distribution)] = future_pred[:, 1] +future_distribution + +# The values in `future_distribution` are in the scale of what the BRT returns, +# so we can compare them with the values of `distribution`: + +plot(future_distribution - distribution; clim=(-1.1, 1.1), c=:lisbon) + +# This shows the area of predicted gain and loss of presence. Because we have +# thresholded our current distribution, we can look at the predicted ranges of +# suitability: + +future_range_mask = broadcast(v -> v >= τ, future_distribution) + +# The last step is to get the difference between the future and current masks +# (so +1 is a gain of range, 0 is no change, and -1 is a loss), and to only +# report this for the cells that are both in the current and future data: + +range_change = convert(Float32, future_range_mask) - convert(Float32, range_mask) +both_ranges_mask = maximum([future_range_mask, range_mask]) + +# We can now plot the result, with the brown area being range that becomes +# unfavorable, the green one remaining suitable, and the blue area being newly +# opened range: + +plot(distribution; c=:lightgrey, leg=false) +plot!(mask(both_ranges_mask, range_change); c=:roma) diff --git a/docs/src/sdm/future.jl b/docs/src/sdm/future.jl new file mode 100644 index 00000000..32667a38 --- /dev/null +++ b/docs/src/sdm/future.jl @@ -0,0 +1,78 @@ +# # Future climate data + +using SimpleSDMLayers +using Plots +using Statistics + +# **Justification for this use case:** we will often want to forecast the range +# of species under a variety of climate change scenarios. For this reason, +# `SimpleSDMLayers` offers access to CMIP5 and CMIP6 scenarios or models, to be +# used in these situations. + +# For some data providers and datasets, `SimpleSDMLayers` offers access to future +# climate data. Future climates are usually specified by a model, and a +# scenario. For example, WorldClim 2.1 offers the full suite of BioClim variable +# under four SSPs and a number of CMIP6 models. + +# We can use this to look at, for example, the temperature difference between the +# current and future climate. To illustrate this, we will do a simple example +# where we contrast the "historical" climate (*i.e.* what is assumed to be the +# current data) to the projected data under SSP585 in the 2041-2060 period. + +# We will start by getting the contemporary data: + +boundaries = (left=-169.0, right=-50.0, bottom=24.0, top=71.0) +baseline = SimpleSDMPredictor(WorldClim, BioClim, 1; boundaries...) +contour(baseline; c=:cork, frame=:box, fill=true, clim=(-30, 30), levels=6) + +# To get a future dataset, we need to specify the model - models are +# combinations of a CMIP version, and either a RCP or SSP: + +instances(CMIP6) + +# And we need to check the names of the SSP we want to use: + +instances(SharedSocioeconomicPathway) + +# We can now get our future temperature layer (and plot it), for a valid +# model/scenario combination, and plot it. We will pick an extreme scenario at a +# long (yet frighteningly short) time in the future. In order to facilitate the +# comparison with the previous plot, we will re-use the same color limites, +# where blue is negatuve temperatures. + +future = SimpleSDMPredictor(WorldClim, BioClim, CanESM5, SSP585, 1; year="2061-2080", boundaries...) +contour(future; c=:cork, frame=:box, fill=true, clim=(-30, 30), levels=6) + +# Note that the call to get the future data is almost the same as the historical +# one - the exception is the addition of the model and scenario, and the +# specification of the years. With this layer, we can now measure the difference +# in mean annual temperature, because layers can be substracted. Note that we +# are switching the scale: the difference between the two layers here is always +# positive. + +plot(future - baseline, frame=:box, c=:lajolla) + +# We might actually be interested in averaging multiple models. Because we know +# the variety of models worldclim has (`instances(CMIP6)`), we can do this +# fairly easily. One of the model has no predictions for SSP585 (which we would +# learn in the form of an error message), so we will filter it out directly. + +ensemble = [ + SimpleSDMPredictor( + WorldClim, BioClim, model, SSP585, 1; + year="2061-2080", boundaries... + ) for model in instances(CMIP6) if model != GFDLESM4 +]; + +# We can create a layer of differences from each scenario to the baseline: + +differences = [component - baseline for component in ensemble]; + +# This can be plotted as a grid of differences, using the same colorscale as in +# the previous plot: + +plot(plot.(differences, c=:lajolla, grid=:none, axes=false, frame=:none, leg=false)...) + +# Finally, we can plot the average of the expected conditions under the scenario we considered: + +contour(mosaic(mean, ensemble); c=:cork, frame=:box, fill=true, clim=(-30, 30), levels=6) \ No newline at end of file diff --git a/docs/src/sdm/future.md b/docs/src/sdm/future.md deleted file mode 100644 index f5f58061..00000000 --- a/docs/src/sdm/future.md +++ /dev/null @@ -1,80 +0,0 @@ -# Future climate data - -For some dta providers and datasets, `SimpleSDMLayers` offers access to future -climate data. Future climates are usually specified by a model, and a scenario. -For example, WorldClim 2.1 offers the full suite of BioClim variable under four -SSPs and a number of CMIP6 models. - -We can use this to look at, for example, the temperature difference between the -current and future climate. To illustrate this, we will do a simple example -where we contrast the "historical" climate (*i.e.* what is assumed to be the -current data) to the projected data under SSP585 in the 2041-2060 period. - -```@example future -using SimpleSDMLayers -using Plots -using Statistics -``` - -We will start by getting the contemporary data: - -```@example future -baseline = SimpleSDMPredictor(WorldClim, BioClim, 1; left=60.0, right=95.0, bottom=0.0, top=40.0) -plot(baseline, frame=:box, c=:heat) -``` - -To get a future dataset, we need to specify the model: - -```@example future -instances(CMIP6) -``` - -And we need to check the names of the SSP we want to use: - -```@example future -instances(SharedSocioeconomicPathway) -``` - -We can now get our future temperature layer (and plot it): - -```@example future -future = SimpleSDMPredictor(WorldClim, BioClim, CanESM5, SSP585, 1; year="2041-2060", left=60.0, right=95.0, bottom=0.0, top=40.0) -plot(future, frame=:box, c=:heat) -``` - -Note that the call to get the future data is almost the same as the historical -one - the exception is the addition of the model and scenario, and the -specification of the years. - -With this layer, we can now measure the difference in mean annual temperature: - -```@example future -plot(future - baseline, c=:lapaz, frame=:box) -``` - -We might actually be interested in averaging multiple models. Because we know -the variety of models worldclim has (`instances(CMIP6)`), we can do this fairly -easily. One of the model has no predictions for SSP585 (which we would learn in -the form of an error message), so we will filter it out directly. - -```@example future -ensemble = [ - SimpleSDMPredictor( - WorldClim, BioClim, model, SSP585, 1; - year="2041-2060", left=60.0, right=95.0, bottom=0.0, top=40.0 - ) for model in instances(CMIP6) if model != GFDLESM4 -]; -``` - -We will measure the difference of each layer to the baseline: - -```@example future -differences = [component - baseline for component in ensemble] -plot(plot.(differences, c=:lapaz)..., frame=:box) -``` - -From this, we can look at the average difference (across multiple models): - -```@example future -plot(mosaic(mean, differences), c=:lapaz, frame=:box) -``` \ No newline at end of file diff --git a/docs/src/sdm/gbif.jl b/docs/src/sdm/gbif.jl new file mode 100644 index 00000000..92058f59 --- /dev/null +++ b/docs/src/sdm/gbif.jl @@ -0,0 +1,101 @@ +# # Working with GBIF data + +# **Justification for this use case:** building species distribution models +# requires information of where species are. In this document, we will see how +# the `SimpleSDMLayers` and `GBIF` packages interact, as a first step to get in +# a usable state. Specifically, we will work on the occurences of the +# relationship between temperature and precipitation for a few occurrences of +# the fungus *Hypomyces lactifluorum*, which will be the taxon used for all +# SDM-related vignettes. + +using SimpleSDMLayers +using GBIF +using Plots +using Statistics + +# We will focus on showing where on the temperature/precipitation space the +# occurrences are, so we will download these layers: + +temperature, precipitation = SimpleSDMPredictor(WorldClim, BioClim, [1, 12]) + +# And now, we can follow the GBIF documentation to get some occurrences + +observations = occurrences( + GBIF.taxon("Hypomyces lactifluorum"; strict=true), + "hasCoordinate" => "true", + "country" => "CA", + "country" => "US", + "limit" => 300, +) +while length(observations) < size(observations) + occurrences!(observations) +end + +@info observations + +# We can then extract the temperature for the first occurrence: + +temperature[observations[1]] + +# Of course, it would be unwieldy to do this for every occurrence in our dataset, +# and so we will see a way do it much faster. But first, we do not need the entire +# surface of the planet to perform our analysis, and so we will instead clip the +# layers: + +temperature = clip(temperature, observations) +precipitation = clip(precipitation, observations) + +# This will make the future queries a little faster. By default, the `clip` +# function will ad a 5% margin on every side. To get the values of a layer at +# every occurrence in a `GBIFRecord`, we simply pass the records as a position: + +histogram2d(temperature, precipitation; c=:devon, frame=:zerolines, leg=false, lab="") +scatter!( + temperature[observations], + precipitation[observations]; + lab="", + c=:white, + msc=:orange, + alpha=0.2, +) +xaxis!("Temperature (°C)") +yaxis!("Precipitation (mm)") + +# This will return a record of all data for all geo-localized occurrences +# (*i.e.* neither the latitude nor the longitude is `missing`) in a +# `GBIFRecords` collection, as an array of the `eltype` of the layer. Note that +# the layer values can be `nothing`, in which case you might need to run +# `filter(!isnothing, temperature_clip[kf_occurrences]` for it to work with the +# plotting functions. + +# We can also plot the records over space, using the overloads of the `latitudes` +# and `longitudes` functions: + +contour(temperature; c=:cork, frame=:box, fill=true, clim=(-20, 20), levels=6) +scatter!( + longitudes(observations), latitudes(observations); lab="", c=:white, msc=:orange, ms=2 +) + +# We can finally make a layer with the number of observations per cells: + +presabs = mask(temperature, observations, Float32) +plot(log1p(presabs); c=:tokyo) + +# Because the cells are rather small, and there are few observations, this is not +# necessarily going to be very informative - to get a better sense of the +# distribution of observations, we can get the average number of observations in a +# radius of 100km around each cell (we will do so for a zoomed-in part of the map +# to save time): + +zoom = clip(presabs; left=-80.0, right=-65.0, top=50.0, bottom=40.0) +buffered = slidingwindow(zoom, Statistics.mean, 100.0) +plot(buffered; c=:tokyo, frame=:box) +scatter!( + longitudes(observations), + latitudes(observations); + lab="", + c=:white, + msc=:orange, + ms=2, + alpha=0.5, +) diff --git a/docs/src/sdm/gbif.md b/docs/src/sdm/gbif.md deleted file mode 100644 index 6b39efda..00000000 --- a/docs/src/sdm/gbif.md +++ /dev/null @@ -1,91 +0,0 @@ -# Working with GBIF data - -In this example, we will see how we can make the packages `SimpleSDMLayers` and -[the `GBIF.jl` package](https://ecojulia.github.io/GBIF.jl/dev/) interact. We -will specifically plot the relationship between temperature and precipitation -for a few occurrences of the kingfisher *Megaceryle alcyon*. - -```@example temp -using SimpleSDMLayers -using GBIF -using Plots -using Statistics -temperature, precipitation = SimpleSDMPredictor(WorldClim, BioClim, [1,12]) -``` - -We can get some occurrences for the taxon of interest: - -```@example temp -kingfisher = GBIF.taxon("Megaceryle alcyon", strict=true) -kf_occurrences = occurrences(kingfisher, "hasCoordinate" => "true", "decimalLatitude" => (0.0, 65.0), "decimalLongitude" => (-180.0, -50.0), "limit" => 200) - -for i in 1:4 - occurrences!(kf_occurrences) -end - -@info kf_occurrences -``` - -We can then extract the temperature for the first occurrence: - -```@example temp -temperature[kf_occurrences[1]] -``` - -Of course, it would be unwieldy to do this for every occurrence in our dataset, -and so we will see a way do it much faster. But first, we do not need the entire -surface of the planet to perform our analysis, and so we will instead clip the -layers: - -```@example temp -temperature_clip = clip(temperature, kf_occurrences) -precipitation_clip = clip(precipitation, kf_occurrences) -``` - -This will make the future queries a little faster. By default, the `clip` -function will ad a 5% margin on every side. To get the values of a layer at -every occurrence in a `GBIFRecord`, we simply pass the records as a position: - -```@example temp -histogram2d(temperature_clip, precipitation_clip, c=:viridis) -scatter!(temperature_clip[kf_occurrences], precipitation_clip[kf_occurrences], lab="", c=:white, msc=:orange) -``` - -This will return a record of all data for all geo-localized occurrences (*i.e.* -neither the latitude nor the longitude is `missing`) in a `GBIFRecords` -collection, as an array of the `eltype` of the layer. -Note that the layer values can be `nothing`, in which case you might need to -run `filter(!isnothing, temperature_clip[kf_occurrences]` for it to work with -the plotting functions. - -We can also plot the records over space, using the overloads of the `latitudes` -and `longitudes` functions: - -```@example temp -contour(temperature_clip, c=:alpine, title="Precipitation", frame=:box, fill=true) -scatter!(longitudes(kf_occurrences), latitudes(kf_occurrences), lab="", c=:white, msc=:orange, ms=2) -``` - -These extensions of `SimpleSDMLayers` functions to work with the `GBIF` package -are meant to greatly simplify the expression of more complex pipelines, notably -for actual species distribution modeling. - -We can finally make a layer with the number of observations per cells: - -```@example temp -presabs = mask(precipitation_clip, kf_occurrences, Float32) -``` - -Because the cells are rather small, and there are few observations, this is not -necessarily going to be very informative - to get a better sense of the -distribution of observations, we can get the average number of observations in a -radius of 100km around each cell (we will do so for a zoomed-in part of the map -to save time): - -```@example temp -zoom = presabs[left=-100., right=-75.0, top=43.0, bottom=20.0] -buffered = slidingwindow(zoom, Statistics.mean, 100.0) -plot(buffered, c=:lapaz, legend=false, frame=:box) -scatter!(longitudes(kf_occurrences), latitudes(kf_occurrences), lab="", c=:white, msc=:orange, ms=2, alpha=0.5) -``` - diff --git a/docs/src/sdm/pseudoabsences.jl b/docs/src/sdm/pseudoabsences.jl new file mode 100644 index 00000000..288d8ecf --- /dev/null +++ b/docs/src/sdm/pseudoabsences.jl @@ -0,0 +1,90 @@ +# # Generating pseudo absences + +using SimpleSDMLayers +using Plots +using GBIF +using StatsBase + +# **Justification for this use case:** by contrast to the BIOCLIM model from the +# previous use case, many models require background knowledge about where the +# species is *not*, which is rarely available. For this reason, we often need to +# resort to generating pseudo-absences, by applying various guesses based on +# where we know species are. + +# In this example, we will see how to generate pseudo-absences (according to +# [Barbet-Massin *et +# al.*](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00172.x)) +# using three methods: radius-based, surface range envelope, and random +# selection. To begin with, we will occurrences for the Lobster mushroom in +# Canada and the US. + +sp = GBIF.taxon("Hypomyces lactifluorum") +observations = occurrences(sp, "hasCoordinate" => true, "limit" => 300, "country" => "CA", "country" => "US") +while length(observations) < size(observations) + occurrences!(observations) +end + +# In order to have a layer to start working, we will get the precipitation +# layer: + +layer = clip(SimpleSDMPredictor(WorldClim, BioClim, 12), observations) + +# We can visualize the results of this query: + +plot(layer, c=:devon) +scatter!(longitudes(observations), latitudes(observations), lab="", msw=0.0, ms=1, c=:orange) + +# The first step here is to remove the redundancy in observations: multiple +# observations in the same cell do not really convey a lot of information. For +# this reason, we can create a very sparse layer with only presences: + +presences = mask(layer, observations, Bool) + +# This is enough to start generating pseudo-absences. We will first use the +# `RandomSelection` method, which will pick positions anywhere on the layer +# *except* in places that are already occupied. Because our species has one +# occurrence far away in Alaska this might not be the best method, but this is +# a simple one to grasp. + +rs_pa = rand(RandomSelection, presences) + +# We can plot this layer to see what it looks like: + +plot(convert(Float32, rs_pa), c=:Greys, leg=false) +scatter!(longitudes(observations), latitudes(observations), lab="", msw=0.0, ms=1, c=:orange) + +# This is obviously not ideal, as there are pseudo-absences very far from the +# observations. An alternative is to use the `SurfaceRangeEnvelope` method, +# which is limited to the bounding box of observations. + +sre_pa = rand(SurfaceRangeEnvelope, presences) + +# We can plot this layer to see what it looks like: + +plot(convert(Float32, sre_pa), c=:Greys, leg=false) +scatter!(longitudes(observations), latitudes(observations), lab="", msw=0.0, ms=1, c=:orange) + +# It is a little bit better, but the extreme point means that the surface range +# envelope is very large -- in addition, the species has a distribution with +# large gaps in it, so we are going to experiment with the `WithinRadius` +# method. + +# This method will allow pseudo-absences to be within a set distance (expressed +# in degrees) of any given observation, excluding the grid cells for which we +# already have an observation. The default distance is 1 degree. + +wr_one_pa = rand(WithinRadius, presences) + +# This function can take a little longer to run, as it involves a clipping step +# based on circles around the presences; this will be optimized in the future. + +# We can plot this layer to see what it looks like: + +plot(convert(Float32, wr_one_pa), c=:Greys, leg=false) +scatter!(longitudes(observations), latitudes(observations), lab="", msw=0.0, ms=1, c=:orange) + +# This is a much better distribution of pseudo-absences! Of course, the +# consequences of which pseudo-absence method to pick is key in the accuracy of +# the model. The `WithinRadius` method may not always perform better. In fact, +# in the Boosted Regression Tree exmaple, we will see how `SurfaceRangeEnvelope` +# gives excellent results. \ No newline at end of file diff --git a/docs/src/sdm/vif.jl b/docs/src/sdm/vif.jl new file mode 100644 index 00000000..47f1f7aa --- /dev/null +++ b/docs/src/sdm/vif.jl @@ -0,0 +1,98 @@ +# # Variable selection with the VIF + +using SimpleSDMLayers +using GLM +using Statistics + +# **Justification for this use case:** a lot of predictive variables are +# auto-correlated, and therefore one might argue that maybe, we may eventually +# build better models by removing some of them. This is generally refered to as +# variable selection, and sharing one's opinion on this is the fastest way to +# start a brawl at any gathering of ecologists. + +# We will illustrate variable selection with the Variance Inflation Factor using +# the bioclim data from the region in which *Hypomyces lactifluorum* is found. + +layers = SimpleSDMPredictor( + WorldClim, BioClim, 1:19; left=-169.0, right=-50.0, bottom=24.0, top=71.0 +); + +# We will first gather everything in a matrix: + +x = hcat([layer[keys(layer)] for layer in layers]...); +size(x) + +# Because of the spread of some values, we will center and reduce this matrix to +# give every variable a mean of 0 and unit variance: + +X = (x .- mean(x; dims=1)) ./ std(x; dims=1) +round.(Int, mean(X; dims=1)) + +# The VIF is simply measured as 1/(1-R²) by regressing every variable against +# all others. Let's have an illustration with the first predictor: + +function vif(model) + R² = r2(model) + return 1 / (1 - R²) +end + +vif(lm(X[:, 2:end], X[:, 1])) + +# The generally agreed threshold for a good VIF is 2, or 5, or 10 (so both +# "generally" and "agreed" are overstatements here), and as this one is higher, +# it suggests that we may not need all of these data. It is also entirely +# possible to have an *infinite* VIF, in case two variables are perfectly +# correlated (this happens a fair amount with bioclim, in fact). + +# For this reason, we will go through an iterative process to get rid of +# variables one by one until the largest VIF is no larger than some threshold. +# Specifically, we get rid of the variable with the largest VIF first. Let's try +# this with the full sample: + +vifs = zeros(Float64, length(layers)) +for i in eachindex(layers) + vifs[i] = vif(lm(X[:, setdiff(eachindex(selection), i)], X[:, i])) +end +findmax(vifs) + +# This result suggests that this variable, because it has the highest VIF (and +# one that is above our threshold), should be dropped. Repeating the process is +# a good use case for a recursive function: + +function stepwisevif( + layers::Vector{T}, selection=collect(1:length(layers)), threshold::Float64=5.0 +) where {T<:SimpleSDMLayer} + x = hcat([layer[keys(layer)] for layer in layers[selection]]...) + X = (x .- mean(x; dims=1)) ./ std(x; dims=1) + vifs = zeros(Float64, length(selection)) + for i in eachindex(selection) + vifs[i] = vif(lm(X[:, setdiff(eachindex(selection), i)], X[:, i])) + end + all(vifs .<= threshold) && return selection + drop = last(findmax(vifs)) + popat!(selection, drop) + @info "Variables remaining: $(selection)" + return stepwisevif(layers, selection, threshold) +end + +# This function will operate on a collection of layers, and starting from a +# selection (of indices), iterate until a subset satisfying max(VIF) < threshold +# is found. We can run this function on our entire dataset: + +selected_variables_id = stepwisevif(layers) + +# Which variables are these? + +layernames(WorldClim, BioClim)[selected_variables_id] + +# Finally, we can select the variables this process recommends: + +layers[selected_variables_id] + +# Before we move on -- variable selection, especially stepwise, is not +# necessarilly a *good* practice. Alternatives are model selection, +# dimensionality reduction using *e.g.* PCA, or other methods to remove the +# problematic covariance structure in the data. In Julia, a lot of this can be +# done using the `MultivariateStats` package. This can be an interesting +# exercise: rather than relying on VIF, what would the dimensionsality of the +# results look like with a PCA cutoff at 99% of explained variance? \ No newline at end of file diff --git a/joss/figures/paper_gbif_1.png b/joss/figures/paper_gbif_1.png deleted file mode 100644 index 1d9df4d7..00000000 Binary files a/joss/figures/paper_gbif_1.png and /dev/null differ diff --git a/joss/figures/paper_temp_1.png b/joss/figures/paper_temp_1.png deleted file mode 100644 index e4bd537c..00000000 Binary files a/joss/figures/paper_temp_1.png and /dev/null differ diff --git a/joss/paper.bib b/joss/paper.bib deleted file mode 100644 index cdf0869e..00000000 --- a/joss/paper.bib +++ /dev/null @@ -1,109 +0,0 @@ - -@article{Araujo2019StaDis, - title = {Standards for Distribution Models in Biodiversity Assessments}, - author = {Ara{\'u}jo, Miguel B. and Anderson, Robert P. and Barbosa, A. M{\'a}rcia and Beale, Colin M. and Dormann, Carsten F. and Early, Regan and Garcia, Raquel A. and Guisan, Antoine and Maiorano, Luigi and Naimi, Babak and O'Hara, Robert B. and Zimmermann, Niklaus E. and Rahbek, Carsten}, - year = {2019}, - month = jan, - volume = {5}, - pages = {eaat4858}, - publisher = {{American Association for the Advancement of Science}}, - issn = {2375-2548}, - doi = {10.1126/sciadv.aat4858}, - url = {https://advances.sciencemag.org/content/5/1/eaat4858}, - urldate = {2020-03-14}, - abstract = {Demand for models in biodiversity assessments is rising, but which models are adequate for the task? We propose a set of best-practice standards and detailed guidelines enabling scoring of studies based on species distribution models for use in biodiversity assessments. We reviewed and scored 400 modeling studies over the past 20 years using the proposed standards and guidelines. We detected low model adequacy overall, but with a marked tendency of improvement over time in model building and, to a lesser degree, in biological data and model evaluation. We argue that implementation of agreed-upon standards for models in biodiversity assessments would promote transparency and repeatability, eventually leading to higher quality of the models and the inferences used in assessments. We encourage broad community participation toward the expansion and ongoing development of the proposed standards and guidelines. Biodiversity assessments use a variety of data and models. We propose best-practice standards for studies in these assessments. Biodiversity assessments use a variety of data and models. We propose best-practice standards for studies in these assessments.}, - chapter = {Review}, - copyright = {Copyright \textcopyright{} 2019 The Authors, some rights reserved; exclusive licensee American Association for the Advancement of Science. No claim to original U.S. Government Works. Distributed under a Creative Commons Attribution NonCommercial License 4.0 (CC BY-NC).. This is an open-access article distributed under the terms of the Creative Commons Attribution-NonCommercial license, which permits use, distribution, and reproduction in any medium, so long as the resultant use is not for commercial advantage and provided the original work is properly cited.}, - journal = {Science Advances}, - keywords = {annotated,priority}, - language = {en}, - number = {1} -} - -@article{Bezanson2017JulFre, - title = {Julia: A Fresh Approach to Numerical Computing}, - shorttitle = {Julia}, - author = {Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B.}, - year = {2017}, - month = jan, - volume = {59}, - pages = {65--98}, - issn = {0036-1445, 1095-7200}, - doi = {10.1137/141000671}, - url = {https://epubs.siam.org/doi/10.1137/141000671}, - urldate = {2019-04-29}, - journal = {SIAM Review}, - language = {en}, - number = {1} -} - -@article{Booth2014BioFir, - title = {{{BIOCLIM}}: The First Species Distribution Modelling Package, Its Early Applications and Relevance to Most Current {{MaxEnt}} Studies}, - shorttitle = {Bioclim}, - author = {Booth, Trevor H. and Nix, Henry A. and Busby, John R. and Hutchinson, Michael F.}, - year = {2014}, - volume = {20}, - pages = {1--9}, - issn = {1472-4642}, - doi = {10.1111/ddi.12144}, - url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/ddi.12144}, - urldate = {2019-08-16}, - abstract = {Aim Interest in species distribution models (SDMs) and related niche studies has increased dramatically in recent years, with several books and reviews being prepared since 2000. The earliest SDM studies are dealt with only briefly even in the books. Consequently, many researchers are unaware of when the first SDM software package (bioclim) was developed and how a broad range of applications using the package was explored within the first 8 years following its release. The purpose of this study is to clarify these early developments and initial applications, as well as to highlight bioclim's continuing relevance to current studies. Location Mainly Australia and New Zealand, but also some global applications. Methods We outline the development of the bioclim package, early applications (1984\textendash 1991) and its current relevance. Results bioclim was the first SDM package to be widely used. Early applications explored many of the possible uses of SDMs in conservation biogeography, such as quantifying the environmental niche of species, identifying areas where a species might be invasive, assisting conservation planning and assessing the likely impacts of climate change on species distributions. Main conclusions Understanding this pioneering work is worthwhile as bioclim was for many years one of the leading SDM packages and remains widely used. Climate interpolation methods developed for bioclim were used to create the WorldClim database, the most common source of climate data for SDM studies, and bioclim variables are used in about 76\% of recent published MaxEnt analyses of terrestrial ecosystems. Also, some of the bioclim studies from the late 1980s, such as measuring niche (both realized and fundamental) and assessing possible impacts of climate change, are still highly relevant to key conservation biogeography issues.}, - copyright = {\textcopyright{} 2013 John Wiley \& Sons Ltd}, - journal = {Diversity and Distributions}, - keywords = {annotated,bioclimate envelope,Biogeography,biological conservation,climate change,climate interpolation,ecological modelling,ecological niche,priority,SDM}, - language = {en}, - number = {1} -} - -@misc{Chamberlain2020RgbInt, - title = {{{rgbif}}: Interface to the {{Global Biodiversity Information Facility API}}}, - author = {Chamberlain, Scott and Barve, Vijay and Mcglinn, Dan and Oldoni, Damiano and Desmet, Peter and Geffert, Laurens and Ram, Karthik}, - year = {2020}, - url = {https://CRAN.R-project.org/package=rgbif} -} - -@misc{Hijmans2017DisSpe, - title = {{{dismo}}: Species Distribution Modeling}, - author = {Hijmans, Robert J. and Phillips, Steven and Leathwick, John and Elith, Jane}, - year = {2017}, - url = {https://CRAN.R-project.org/package=dismo}, - keywords = {SDM} -} - -@misc{Hijmans2020RasGeo, - title = {{{raster}}: {{Geographic}} Data Analysis and Modeling}, - author = {Hijmans, Robert J.}, - year = {2020}, - url = {https://CRAN.R-project.org/package=raster} -} - -@article{Lai2019EvaPop, - title = {Evaluating the Popularity of {{R}} in Ecology}, - author = {Lai, Jiangshan and Lortie, Christopher J. and Muenchen, Robert A. and Yang, Jian and Ma, Keping}, - year = {2019}, - volume = {10}, - pages = {e02567}, - issn = {2150-8925}, - doi = {10.1002/ecs2.2567}, - url = {https://esajournals.onlinelibrary.wiley.com/doi/abs/10.1002/ecs2.2567}, - urldate = {2020-10-16}, - abstract = {The programming language R is widely used in many fields. We explored the extent of reported R use in the field of ecology using the Web of Science and text mining. We analyzed the frequencies of R packages reported in more than 60,000 peer-reviewed articles published in 30 ecology journals during a 10-yr period ending in 2017. The number of studies reported using R as their primary tool in data analysis increased linearly from 11.4\% in 2008 to 58.0\% in 2017. The top 10 packages reported were lme4, vegan, nlme, ape, MuMIn, MASS, mgcv, ade4, multcomp, and car. The increasing popularity of R has most likely furthered open science in ecological research because it can improve reproducibility of analyses and captures workflows when scripts and codes are included and shared. These findings may not be entirely unique to R because there are other programming languages used by ecologists, but they do strongly suggest that given the relatively high frequency of reported use of R, it is a significant component of contemporary analytics in the field of ecology.}, - annotation = {\_eprint: https://esajournals.onlinelibrary.wiley.com/doi/pdf/10.1002/ecs2.2567}, - copyright = {\textcopyright{} 2019 The Authors.}, - journal = {Ecosphere}, - keywords = {code,ecology journal,open science,packages,R,reproducibility,statistical programming,Web of Science}, - language = {en}, - number = {1} -} - -@book{RCoreTeam2020RLan, - title = {R: A Language and Environment for Statistical Computing}, - author = {{R Core Team}}, - year = {2020}, - publisher = {{R Foundation for Statistical Computing}}, - address = {{Vienna, Austria}}, - url = {https://www.R-project.org/} -} - - diff --git a/joss/paper.jmd b/joss/paper.jmd deleted file mode 100644 index 0717d991..00000000 --- a/joss/paper.jmd +++ /dev/null @@ -1,269 +0,0 @@ ---- -title: 'SimpleSDMLayers.jl and GBIF.jl: A Framework for Species Distribution Modeling in Julia' -tags: - - Julia - - ecology - - biogeography - - GBIF - - species distribution modeling -authors: - - name: Gabriel Dansereau^[Correspondance to gabriel.dansereau\@umontreal.ca] - orcid: 0000-0002-2212-3584 - affiliation: 1 # (Multiple affiliations must be quoted) - - name: Timothée Poisot - orcid: 0000-0002-0735-5184 - affiliation: 1 -affiliations: - - name: Département de sciences biologiques, Université de Montréal - index: 1 -date: 13 November 2020 -bibliography: [paper.bib] - ---- - -
- -# Summary - -Predicting where species should be found in space is a common question in -Ecology and Biogeography. -Species distribution models (SDMs), for instance, aim to predict where -environmental conditions are suitable for a given species, often on continuous -geographic scales. -Such analyses require the use of geo-referenced data on species distributions -coupled with climate or land cover information, hence a tight integration -between environmental data, species occurrence data, and spatial coordinates. -Thus, it requires an efficient way to access these different data types within -the same software, as well as a flexible framework on which to build various -analysis workflows. -Here we present `SimpleSDMLayers.jl` and `GBIF.jl`, two packages in the _Julia_ -language implementing a simple framework and type-system on which to build SDM -analyses, as well as providing access to popular data sources for species -occurrences and environmental conditions. - -# Statement of need - -Species distribution modeling (SDM) is an increasingly growing field in Ecology -and Biogeography, with many applications in biodiversity assessment, management, -and conservation [@Araujo2019StaDis]. -Most SDM models aim at predicting a species distribution in space based on -environmental data and information on where the species was previously seen. -Hence, SDM studies require a tight and efficient integration between -geo-referenced environmental and species occurrence data. -However, such data are complex to handle and often require different software: -climate and land use data are stored as layers in raster files, then visualized -and manipulated in specialized GIS (geographic information systems) software, -while occurrence data are stored in tables and spreadsheets, then manipulated in -data analysis and statistics-oriented tools or programming languages. -Therefore, there is a need for efficient tools to manipulate bioclimatic data, -specifically oriented towards species distribution modeling. - -In recent years, _R_ [@RCoreTeam2020RLan] has become the most widely used -programming language in Ecology, especially in spatial ecology studies -[@Lai2019EvaPop]. -Hence, many efficient packages and tools for species distribution modeling -have been developed in _R_. -For instance, the package `raster` [@Hijmans2020RasGeo] can be used to -manipulate raster format data (for example climatic or land use data), `dismo` -[@Hijmans2017DisSpe] implements many SDM models and provides access to common -climatic data sources, and `rgbif` [@Chamberlain2020RgbInt] provides access to -the GBIF database, a common source of species occurrence data in SDM studies. -In comparison, few specific SDM resources currently exist for the _Julia_ language -[@Bezanson2017JulFre], although SDM models could benefit from its speed, -efficiency and ease of writing (removing the need to rewrite functions in other -languages for higher performance, as in _R_). -There are currently packages such as -[`GDAL.jl`](https://github.com/JuliaGeo/GDAL.jl) and -[`ArchGDAL.jl`](https://github.com/yeesian/ArchGDAL.jl) to manipulate raster -data; however, these are lower-level implementations than what is typically used -by most ecologists, and they lack support for common layer manipulation. -Generalized Linear Models ([GLM.jl](https://github.com/JuliaStats/GLM.jl)), -Random Forests ([DecisionTrees.jl](https://github.com/bensadeghi/DecisionTree.jl)), -Neural Networks ([Flux.jl](https://github.com/FluxML/Flux.jl)), and other commonly used models have excellent implementations -in _Julia_, although not oriented towards species distribution modeling and -raster format data. - -`SimpleSDMLayers.jl` is a package to facilitate manipulation of geo-referenced -raster data in _Julia_, specifically aimed at species distribution modeling. -It is a higher-level implementation, building on `ArchGDAL.jl`, that is easier -to use and provides support for common SDM operations (see _Feature Overview_ -section below). -The package implements simple type structures to manipulate the input and output -data of SDM models, and is meant to be a flexible framework on which to build -more complex analyses. -While it does not implement SDM models in itself, we believe the package is a -step that will make _Julia_ more popular for species distribution modeling, and -lead to the development of more complete implementations. -`SimpleSDMLayers.jl` also offers built-in access to some of the most common -data sources in SDM studies, such as the WorldClim 2.1 climatic data, which is -the most common source of climate data in SDM studies [@Booth2014BioFir]. -The package is also tightly integrated with `GBIF.jl`, which allows easy access -to the GBIF database, a common data source for species occurrences. -Both `SimpleSDMLayers.jl` and `GBIF.jl` are part of the _EcoJulia_ organization, -whose aim is to integrate a variety of packages for ecological analyses in -_Julia_. - -# Basic structure - -The core structure implemented in `SimpleSDMLayers.jl` is the `SimpleSDMLayer` -type, with two variants, `SimpleSDMPredictor` and `SimpleSDMResponse`, depending -if the layer is meant to be mutable or not. - -A `SimpleSDMLayer` element is made of a `grid` field, which contains the raster -data as a simple `Array` (matrix) of any type, easily manipulable. -It also contains the fields `left`, `right`, `bottom`, and `top`, representing -the bounding coordinates of the layer. - -To illustrate this structure, the following code loads a layer of WorldClim 2.1 -climate data, which also shows how easily this can be done in a single -call. -By default, this will return a layer with the values for the whole world if no -bounding coordinates are specified. - -```julia -using SimpleSDMLayers - -# Get world temperature data -temperature = worldclim(1) -``` - -The raster values can be displayed by calling the `grid` field. - -```julia -# Display data grid -temperature.grid -``` - -`SimpleSDMLayers.jl` then makes it very simple to plot and visualize the layer -as a map using `Plots.jl` (\autoref{fig:temp}). - -```julia; eval = false -using Plots -plot(temperature) -``` - -```julia; echo = false; label = "temp"; fig_cap = "Map of the average annual temperature data from WorldClim 2.1, accessed as a layer through SimpleSDMLayers.jl\\label{fig:temp}" -using Plots -plot(temperature, xguide = "Longitude", yguide = "Latitude", - colorbar_title = "Average Temperature (°C)") -``` - -# Feature overview - -`SimpleSDMLayers.jl` implements the following features: - -- **Overloads for common functions**: The `SimpleSDMLayer` types are implemented - along with overloads for many common functions and operations, such as - subsetting, changing values, copying, and iterating. Therefore, the layers and - the raster values stored in the `grid` field can be manipulated as easily as - any `Array`, without losing their spatial aspect. -- **Statistical operations on layer values**: Common operations can be - performed directly on the layer values without worrying about the underlying - structure (for example, sum, minimum, maximum, mean, median). -- **Statistical operations on multiple layers**: Operations can also be - performed between layers to produce a new layer, just as `Arrays`, as long as - they share the same coordinates. For instance, two layers can be added or - subtracted, and calling `mean()` will produce a new layer with the mean value - per pixel. -- **Spatial operations**: `SimpleSDMLayers.jl` implements spatial operations - such as clipping a layer to given coordinates, coarsening the resolution by - grouping values, and performing sliding window operations given a certain - radius. -- **Datasets supported**: The package provides access to climate data - at different resolutions from WorldClim 2.1 and CHELSA, as well as land cover - data from EarthEnv. Custom raster data can be loaded as well. -- **Plotting recipes**: Default recipes are implemented for the `SimpleSDMLayer` - types, allowing to directly map them, view the grid data as histograms - and density plots, or compare layers as 2-dimensional histograms. -- **Integration with GBIF.jl (and DataFrames.jl)**: `SimpleSDMLayer.jl` is well - integrated with `GBIF.jl`, allowing to clip layers based on the occurrence - data, as well as to map occurrences by displaying them over the layers. - Both packages also offer an integration with `DataFrames.jl` to easily convert - environmental and occurrence data to a table format. - -# Examples - -### Spatial operations - -To illustrate a few of the spatial operations supported by `SimpleSDMLayers.jl`, -the following code reuses the previous `temperature` layer, and shows how it is -possible to : 1) clip the layer to a region of interest (Europe for instance); -2) coarsen the resolution by averaging groups of cells for large scale -analyses; and 3) perform sliding window operations to aggregate values for -each site based on a certain radius. -Each of these operations can be performed in a single command and returns new -layers, which can then be plotted as shown previously. - -```julia; eval = false -using Statistics -# Clip to Europe -temperature_europe = temperature[left = -11.2, right = 30.6, - bottom = 29.1, top = 71.0] -# Coarsen resolution -temperature_coarse = coarsen(temperature_europe, Statistics.mean, (4, 4)) -# Sliding window averaging -temperature_slided = slidingwindow(temperature_europe, Statistics.mean, 100.0) -``` - -### GBIF integration - -The following example shows how the integration between `SimpleSDMLayers.jl` and -`GBIF.jl` allows to easily map the occurrences of any species in GBIF. -The species represented in this example is the Belted Kingfisher -(_Megaceryle alcyon_). - -`GBIF.jl` first allows us to retrieve the latest occurrences from the GBIF -database. Note that the element returned here uses the `GBIFRecords` format, -which contains the metadata associated to each GBIF occurrence. - -```julia -using GBIF -kingfisher = GBIF.taxon("Megaceryle alcyon", strict=true) -kf_occurrences = occurrences(kingfisher) -# Get at least 200 occurrences -while length(kf_occurrences) < 200 - occurrences!(kf_occurrences) - @info "$(length(kf_occurrences)) occurrences" -end -kf_occurrences -``` - -`SimpleSDMLayers.jl` then provides a simple integration between the occurrence -data and the environmental layers. -The layers can be clipped to the spatial extent of the occurrences in a single -call using the `clip()` function. -The occurrences' coordinates can also be extracted with `longitudes()` and -`latitudes()`. -Using these functions, we can easily create a map of the occurrences by -overlaying them on top of the clipped environmental layer (\autoref{fig:gbif}) - -```julia; eval = false -# Clip layer to occurrences -temperature_clip = clip(temperature, kf_occurrences) - -# Plot occurrences -contour(temperature_clip, fill = true) -scatter!(longitudes(kf_occurrences), latitudes(kf_occurrences)) -``` - -```julia; echo = false; label = "gbif"; fig_cap = "Latest Belted Kingfisher occurrences from the GBIF database displayed over the temperature data through the integration between SimpleSDMLayers.jl and GBIF.jl\\label{fig:gbif}" -# Clip layer tooccurrences -temperature_clip = clip(temperature, kf_occurrences) - -# Plot occurrences -contour(temperature_clip, fill = true, colorbar_title = "Average temperature (°C)", - xguide = "Longitude", yguide = "Latitude") -scatter!(longitudes(kf_occurrences), latitudes(kf_occurrences), - label = "Kingfisher occurrences", legend = :bottomleft, - c = :white, msc = :orange) -``` - -# Acknowledgements - -We would like to thank all contributors to the _EcoJulia_ organization for their -help in developing this series of packages for ecological research. -Funding was provided by Fonds de recherche du Québec - Nature et technologies -(FRQNT) and the Computational Biodiversity Science and Services (BIOS²) training -program. - -# References \ No newline at end of file diff --git a/joss/paper.md b/joss/paper.md deleted file mode 100644 index fdfd1b37..00000000 --- a/joss/paper.md +++ /dev/null @@ -1,310 +0,0 @@ ---- -title: 'SimpleSDMLayers.jl and GBIF.jl: A Framework for Species Distribution Modeling in Julia' -tags: - - Julia - - ecology - - biogeography - - GBIF - - species distribution modeling -authors: - - name: Gabriel Dansereau^[Correspondance to gabriel.dansereau\@umontreal.ca] - orcid: 0000-0002-2212-3584 - affiliation: 1 # (Multiple affiliations must be quoted) - - name: Timothée Poisot - orcid: 0000-0002-0735-5184 - affiliation: 1 -affiliations: - - name: Département de sciences biologiques, Université de Montréal - index: 1 -date: 13 November 2020 -bibliography: paper.bib - ---- - - -
- -# Summary - -Predicting where species should be found in space is a common question in -ecology and biogeography. -Species distribution models (SDMs), for instance, aim to predict where -environmental conditions are suitable for a given species, often on continuous -geographic scales. -Such analyses require the use of geo-referenced data on species distributions -coupled with climate or land cover information, hence a tight integration -between environmental data, species occurrence data, and spatial coordinates. -Thus, it requires an efficient way to access these different data types within -the same software, as well as a flexible framework on which to build various -analysis workflows. -Here we present `SimpleSDMLayers.jl` and `GBIF.jl`, two packages in the _Julia_ -language implementing a simple framework and type-system on which to build SDM -analyses, as well as providing access to popular data sources for species -occurrences and environmental conditions. - -# Statement of need - -Species distribution modeling (SDM) is an increasingly growing field in ecology -and biogeography, with many applications in biodiversity assessment, management, -and conservation [@Araujo2019StaDis]. -Most SDM models aim at predicting a species distribution in space based on -environmental data and information on where the species was previously seen. -Hence, SDM studies require a tight and efficient integration between -geo-referenced environmental and species occurrence data. -However, such data are complex to handle and often require different software: -climate and land use data are stored as layers in raster files, then visualized -and manipulated in specialized GIS (geographic information systems) software, -while occurrence data are stored in tables and spreadsheets, then manipulated in -data analysis and statistics-oriented tools or programming languages. -Therefore, there is a need for efficient tools to manipulate bioclimatic data, -specifically oriented towards species distribution modeling. - -In recent years, _R_ [@RCoreTeam2020RLan] has become the most widely used -programming language in ecology, especially in spatial ecology studies -[@Lai2019EvaPop]. -Hence, many efficient packages and tools for species distribution modeling -have been developed in _R_. -For instance, the package `raster` [@Hijmans2020RasGeo] can be used to -manipulate raster format data (for example climatic or land use data), `dismo` -[@Hijmans2017DisSpe] implements many SDM models and provides access to common -climatic data sources, and `rgbif` [@Chamberlain2020RgbInt] provides access to -the GBIF database, a common source of species occurrence data in SDM studies. -In comparison, few specific SDM resources currently exist for the _Julia_ language -[@Bezanson2017JulFre], although SDM models could benefit from its speed, -efficiency and ease of writing (removing the need to rewrite functions in other -languages for higher performance, as in _R_). -There are currently packages such as -[`GDAL.jl`](https://github.com/JuliaGeo/GDAL.jl) and -[`ArchGDAL.jl`](https://github.com/yeesian/ArchGDAL.jl) to manipulate raster -data; however, these are lower-level implementations than what is typically used -by most ecologists, and they lack support for common layer manipulation. -generalized linear models ([GLM.jl](https://github.com/JuliaStats/GLM.jl)), -random forests ([DecisionTrees.jl](https://github.com/bensadeghi/DecisionTree.jl)), -neural networks ([Flux.jl](https://github.com/FluxML/Flux.jl)), and other commonly used models have excellent implementations -in _Julia_, although not oriented towards species distribution modeling and -raster format data. - -`SimpleSDMLayers.jl` is a package to facilitate manipulation of geo-referenced -raster data in _Julia_, specifically aimed at species distribution modeling. -It is a higher-level implementation, building on `ArchGDAL.jl`, that is easier -to use and provides support for common SDM operations (see _Feature Overview_ -section below). -The package implements simple type structures to manipulate the input and output -data of SDM models, and is meant to be a flexible framework on which to build -more complex analyses. -While it does not implement SDM models in itself, we believe the package is a -step that will make _Julia_ more popular for species distribution modeling, and -lead to the development of more complete implementations. -`SimpleSDMLayers.jl` also offers built-in access to some of the most common -data sources in SDM studies, such as the WorldClim 2.1 climatic data, which is -the most common source of climate data in SDM studies [@Booth2014BioFir]. -The package is also tightly integrated with `GBIF.jl`, which allows easy access -to the GBIF database, a common data source for species occurrences. -Both `SimpleSDMLayers.jl` and `GBIF.jl` are part of the _EcoJulia_ organization, -whose aim is to integrate a variety of packages for ecological analyses in -_Julia_. - -# Basic structure - -The core structure implemented in `SimpleSDMLayers.jl` is the `SimpleSDMLayer` -type, with two variants, `SimpleSDMPredictor` and `SimpleSDMResponse`, depending -if the layer is meant to be mutable or not. - -A `SimpleSDMLayer` element is made of a `grid` field, which contains the raster -data as a simple `Array` (matrix) of any type, easily manipulable. -It also contains the fields `left`, `right`, `bottom`, and `top`, representing -the bounding coordinates of the layer. - -To illustrate this structure, the following code loads a layer of WorldClim 2.1 -climate data, which also shows how easily this can be done in a single -call. -By default, this will return a layer with the values for the whole world if no -bounding coordinates are specified. - -```julia -# Load package -using SimpleSDMLayers - -# Get world temperature data -temperature = worldclim(1) -``` - -``` -SimpleSDMLayers.SimpleSDMPredictor{Float32}(Union{Nothing, Float32}[-31.017 -105f0 -31.62153f0 … -32.81253f0 -31.620333f0; -30.391916f0 -31.63478f0 … -3 -2.81005f0 -30.995281f0; … ; nothing nothing … nothing nothing; nothing noth -ing … nothing nothing], -180.0, 180.0, -90.0, 90.0) -``` - - - - - -The raster values can be displayed by calling the `grid` field. - -```julia -# Display data grid -temperature.grid -``` - -``` -1080×2160 Array{Union{Nothing, Float32},2}: - -31.0171 -31.6215 -31.6227 … -32.8129 -32.8125 -31.6203 - -30.3919 -31.6348 -31.6341 -32.8092 -32.8101 -30.9953 - -33.4822 -34.1494 -34.1493 -35.4658 -35.4633 -34.1374 - -33.6104 -34.2875 -34.2865 -35.596 -35.5931 -34.2528 - -33.7199 -34.4041 -34.4014 -35.6932 -35.691 -34.3311 - -33.8224 -34.5184 -34.5162 … -35.8037 -35.7996 -34.4165 - -31.6613 -32.3194 -32.3184 -33.5133 -33.5101 -32.2032 - -31.7635 -32.4307 -33.7036 -34.9522 -33.6282 -32.3038 - -33.7063 -36.0738 -39.2075 -40.6438 -37.3938 -34.3026 - -33.9768 -34.7016 -35.8662 -37.2408 -36.0364 -34.5988 - - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing … nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing - nothing nothing nothing nothing nothing nothing -``` - - - - - -`SimpleSDMLayers.jl` then makes it very simple to plot and visualize the layer -as a map using `Plots.jl` (\autoref{fig:temp}). - -```julia -using Plots -plot(temperature) -``` - -![Map of the average annual temperature data from WorldClim 2.1, accessed as a layer through SimpleSDMLayers.jl\label{fig:temp}](figures/paper_temp_1.png) - - - -# Feature overview - -`SimpleSDMLayers.jl` implements the following features: - -- **Overloads for common functions**: The `SimpleSDMLayer` types are implemented - along with overloads for many common functions and operations, such as - subsetting, changing values, copying, and iterating. Therefore, the layers and - the raster values stored in the `grid` field can be manipulated as easily as - any `Array`, without losing their spatial aspect. -- **Statistical operations on layer values**: Common operations can be - performed directly on the layer values without worrying about the underlying - structure (for example, sum, minimum, maximum, mean, median). -- **Statistical operations on multiple layers**: Operations can also be - performed between layers to produce a new layer, just as `Arrays`, as long as - they share the same coordinates. For instance, two layers can be added or - subtracted, and calling `mean()` will produce a new layer with the mean value - per pixel. -- **Spatial operations**: `SimpleSDMLayers.jl` implements spatial operations - such as clipping a layer to given coordinates, coarsening the resolution by - grouping values, and performing sliding window operations given a certain - radius. -- **Datasets supported**: The package provides access to climate data - at different resolutions from WorldClim 2.1 and CHELSA, as well as land cover - data from EarthEnv. Custom raster data can be loaded as well. -- **Plotting recipes**: Default recipes are implemented for the `SimpleSDMLayer` - types, allowing to directly map them, view the grid data as histograms - and density plots, or compare layers as 2-dimensional histograms. -- **Integration with GBIF.jl (and DataFrames.jl)**: `SimpleSDMLayer.jl` is well - integrated with `GBIF.jl`, allowing to clip layers based on the occurrence - data, as well as to map occurrences by displaying them over the layers. - Both packages also offer an integration with `DataFrames.jl` to easily convert - environmental and occurrence data to a table format. - -# Examples - -### Spatial operations - -To illustrate a few of the spatial operations supported by `SimpleSDMLayers.jl`, -the following code reuses the previous `temperature` layer, and shows how it is -possible to : 1) clip the layer to a region of interest (Europe for instance); -2) coarsen the resolution by averaging groups of cells for large scale -analyses; and 3) perform sliding window operations to aggregate values for -each site based on a certain radius. -Each of these operations can be performed in a single command and returns new -layers, which can then be plotted as shown previously. - -```julia -using Statistics -# Clip to Europe -temperature_europe = temperature[left = -11.2, right = 30.6, - bottom = 29.1, top = 71.0] -# Coarsen resolution -temperature_coarse = coarsen(temperature_europe, Statistics.mean, (4, 4)) -# Sliding window averaging -temperature_slided = slidingwindow(temperature_europe, Statistics.mean, 100.0) -``` - - - -### GBIF integration - -The following example shows how the integration between `SimpleSDMLayers.jl` and -`GBIF.jl` allows to easily map the occurrences of any species in GBIF. -The species represented in this example is the belted kingfisher -(_Megaceryle alcyon_). - -`GBIF.jl` first allows us to retrieve the latest occurrences from the GBIF -database. Note that the element returned here uses the `GBIFRecords` format, -which contains the metadata associated to each GBIF occurrence. - -```julia -using GBIF -kingfisher = GBIF.taxon("Megaceryle alcyon", strict=true) -kf_occurrences = occurrences(kingfisher) -# Get at least 200 occurrences -while length(kf_occurrences) < 200 - occurrences!(kf_occurrences) - @info "$(length(kf_occurrences)) occurrences" -end -kf_occurrences -``` - -``` -GBIF records: downloaded 200 out of 100000 -``` - - - - - -`SimpleSDMLayers.jl` then provides a simple integration between the occurrence -data and the environmental layers. -The layers can be clipped to the spatial extent of the occurrences in a single -call using the `clip()` function. -The occurrences' coordinates can also be extracted with `longitudes()` and -`latitudes()`. -Using these functions, we can easily create a map of the occurrences by -overlaying them on top of the clipped environmental layer (\autoref{fig:gbif}). - -```julia -# Clip layer to occurrences -temperature_clip = clip(temperature, kf_occurrences) - -# Plot occurrences -contour(temperature_clip, fill = true) -scatter!(longitudes(kf_occurrences), latitudes(kf_occurrences)) -``` - -![Latest belted kingfisher occurrences from the GBIF database displayed over the temperature data through the integration between SimpleSDMLayers.jl and GBIF.jl\label{fig:gbif}](figures/paper_gbif_1.png) - - - -# Acknowledgements - -We would like to thank all contributors to the _EcoJulia_ organization for their -help in developing this series of packages for ecological research. -Funding was provided by Fonds de recherche du Québec - Nature et technologies -(FRQNT) and the Computational Biodiversity Science and Services (BIOS²) training -program. - -# References diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index 583e9f6f..b880faff 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -3,21 +3,39 @@ module SimpleSDMLayers using ArchGDAL using Downloads using RecipesBase +using Colors, ColorBlendModes using ZipFile using Requires +using Distances using Statistics +using GeometryBasics +export Point, Polygon +using PolygonOps +using StatsBase +# Basic types for the package include(joinpath("lib", "types.jl")) export SimpleSDMLayer, SimpleSDMResponse, SimpleSDMPredictor +# Main functions to match coordinates +include(joinpath("lib", "coordinateconversion.jl")) + +# Implements a series of interfaces (AbstractArray, iteration, and indexing) +include(joinpath("interfaces", "common.jl")) +include(joinpath("interfaces", "iteration.jl")) +include(joinpath("interfaces", "indexing.jl")) +include(joinpath("interfaces", "broadcast.jl")) + +# Additional overloads include(joinpath("lib", "overloads.jl")) +# Raster clipping +include(joinpath("lib", "clip.jl")) + include(joinpath("lib", "generated.jl")) include(joinpath("lib", "basics.jl")) -export latitudes, longitudes, boundingbox - -include(joinpath("lib", "iteration.jl")) +export latitudes, longitudes, boundingbox, grid include(joinpath("datasets", "ascii.jl")) include(joinpath("datasets", "geotiff.jl")) @@ -25,7 +43,7 @@ export geotiff include(joinpath("datasets", "types.jl")) export WorldClim, CHELSA, EarthEnv -export BioClim, LandCover, HabitatHeterogeneity +export BioClim, LandCover, HabitatHeterogeneity, Elevation, Topography export CMIP6, SharedSocioeconomicPathway export CMIP5, RepresentativeConcentrationPathway for s in instances(CMIP5) @@ -41,15 +59,26 @@ for s in instances(SharedSocioeconomicPathway) @eval export $(Symbol(s)) end +include(joinpath("datasets", "layernames.jl")) +export layernames + include(joinpath("datasets", "chelsa", "download.jl")) include(joinpath("datasets", "chelsa", "bioclim.jl")) include(joinpath("datasets", "worldclim", "download.jl")) include(joinpath("datasets", "worldclim", "bioclim.jl")) +include(joinpath("datasets", "worldclim", "elevation.jl")) include(joinpath("datasets", "earthenv", "download.jl")) include(joinpath("datasets", "earthenv", "landcover.jl")) include(joinpath("datasets", "earthenv", "habitatheterogeneity.jl")) +include(joinpath("datasets", "earthenv", "topography.jl")) + +include(joinpath("pseudoabsences", "main.jl")) +include(joinpath("pseudoabsences", "radius.jl")) +include(joinpath("pseudoabsences", "randomselection.jl")) +include(joinpath("pseudoabsences", "surfacerangeenvelope.jl")) +export WithinRadius, RandomSelection, SurfaceRangeEnvelope include(joinpath("operations", "coarsen.jl")) include(joinpath("operations", "sliding.jl")) @@ -59,6 +88,7 @@ include(joinpath("operations", "mosaic.jl")) export coarsen, slidingwindow, mask, rescale!, rescale, mosaic include(joinpath("recipes", "recipes.jl")) +export bivariate # This next bit is about being able to change the path for raster assets # globally, which avoids duplication this argument across multiple functions. @@ -66,7 +96,6 @@ _layers_assets_path = get(ENV, "SDMLAYERS_PATH", "assets") isdir(_layers_assets_path) || mkpath(_layers_assets_path) # Fixes the export of clip when GBIF or others are loaded -clip(::T) where {T <: SimpleSDMLayer} = nothing export clip function __init__() diff --git a/src/datasets/chelsa/bioclim.jl b/src/datasets/chelsa/bioclim.jl index b88587a3..a26c9f2b 100644 --- a/src/datasets/chelsa/bioclim.jl +++ b/src/datasets/chelsa/bioclim.jl @@ -44,3 +44,13 @@ function SimpleSDMPredictor(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP5, fut::Re @assert eltype(layers) <: Integer return [SimpleSDMPredictor(CHELSA, BioClim, mod, fut, l; kwargs...) for l in layers] end + +function SimpleSDMPredictor(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP6, fut::SharedSocioeconomicPathway, layers::AbstractArray; kwargs...) + @assert eltype(layers) <: Integer + return [SimpleSDMPredictor(CHELSA, BioClim, mod, fut, l; kwargs...) for l in layers] +end + +function SimpleSDMPredictor(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP6, fut::SharedSocioeconomicPathway, layer::Integer=1; year="2041-2060", kwargs...) + file = _get_raster(CHELSA, BioClim, mod, fut, layer, year) + return geotiff(SimpleSDMPredictor, file; kwargs...) +end diff --git a/src/datasets/chelsa/download.jl b/src/datasets/chelsa/download.jl index 0e863678..9e4c110a 100644 --- a/src/datasets/chelsa/download.jl +++ b/src/datasets/chelsa/download.jl @@ -17,6 +17,7 @@ end function _get_raster(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP5, fut::RepresentativeConcentrationPathway, layer::Integer, year="2041-2060") 1 ≤ layer ≤ 19 || throw(ArgumentError("The layer must be between 1 and 19")) + year in ["2041-2060", "2061-2080"] || throw(ArgumentError("The year must be 2041-2060 or 2061-2080")) path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(CHELSA), _rasterpath(BioClim), _rasterpath(mod), _rasterpath(fut), year) isdir(path) || mkpath(path) @@ -30,4 +31,22 @@ function _get_raster(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP5, fut::Represent end return filepath -end \ No newline at end of file +end + +function _get_raster(::Type{CHELSA}, ::Type{BioClim}, mod::CMIP6, fut::SharedSocioeconomicPathway, layer::Integer, year="2041-2060") + 1 ≤ layer ≤ 19 || throw(ArgumentError("The layer must be between 1 and 19")) + year in ["2011-2040", "2041-2070", "2071-2100"] || throw(ArgumentError("The year must be 2011-2040, 2041-2070, or 2071-2100")) + + path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(CHELSA), _rasterpath(BioClim), _rasterpath(mod), _rasterpath(fut), year) + isdir(path) || mkpath(path) + + root = "https://os.zhdk.cloud.switch.ch/envicloud/chelsa/chelsa_V2/GLOBAL/climatologies/$(year)/$(_rasterpath(mod))/$(_rasterpath(fut))/bio/" + filename = "CHELSA_bio$(layer)_$(year)_$(lowercase(_rasterpath(mod)))_$(_rasterpath(fut))_V.2.1.tif" + + filepath = joinpath(path, filename) + if !(isfile(filepath)) + Downloads.download(root * filename, filepath) + end + + return filepath +end diff --git a/src/datasets/earthenv/download.jl b/src/datasets/earthenv/download.jl index 672acd98..3494ad37 100644 --- a/src/datasets/earthenv/download.jl +++ b/src/datasets/earthenv/download.jl @@ -52,5 +52,28 @@ function _get_raster(::Type{EarthEnv}, ::Type{HabitatHeterogeneity}, layer::Inte Downloads.download(root * stem, joinpath(path, filename)) end + return joinpath(path, filename) +end + +function _get_raster(::Type{EarthEnv}, ::Type{Topography}, layer::Integer, resolution::Integer=50, source::String="GMTED", aggregation::String="mean") + 1 ≤ layer ≤ 16 || throw(ArgumentError("The layer must be between 1 and 16")) + + _src = Dict("GMTED" => "GMTED") + _agr = Dict("mean" => "mn", "median" => "md", "minimum" => "mi", "maximum" => "ma", "std" => "sd ") + _lay = ["elevation", "slope", "aspectcosine", "aspectsine", "eastness", "northness", "roughness", "tpi", "tri", "vrm", "dx" ,"dxx", "dy", "dyy", "pcurv", "tcurv"] + _res = Dict(1 => "1KM", 10 => "10KM", 5 => "5KM", 50 => "50KM", 100 => "100KM") + _sfx = layer == 1 ? _agr[aggregation] : "md" + + path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(EarthEnv), _rasterpath(Topography), _res[resolution]) + isdir(path) || mkpath(path) + + root = "https://data.earthenv.org/topography/" + stem = "$(_lay[layer])_$(_res[resolution])$(_agr[aggregation])_$(_src[source])$(_sfx).tif" + filename = stem + + if !isfile(joinpath(path, filename)) + Downloads.download(root * stem, joinpath(path, filename)) + end + return joinpath(path, filename) end \ No newline at end of file diff --git a/src/datasets/earthenv/topography.jl b/src/datasets/earthenv/topography.jl new file mode 100644 index 00000000..b0d62f7b --- /dev/null +++ b/src/datasets/earthenv/topography.jl @@ -0,0 +1,21 @@ +""" + SimpleSDMPredictor(::Type{EarthEnv}, ::Type{Topography}, layer::Integer=1; resolution::Int64=100, kwargs...) + +Allowed resolutions (in km) are 1, 5, 10, 50 (default), and 100 + +Allowed sources is currently *only* "GMTED" + +Allowed aggregations are "mean" (default), "median", "minimum", "maximum", and "std" +""" +function SimpleSDMPredictor(::Type{EarthEnv}, ::Type{Topography}, layer::Integer=1; resolution::Int64=100, source::String="GMTED", aggregation::String="mean", kwargs...) + @assert resolution in [1, 5, 10, 50, 100] + @assert source in ["GMTED"] + @assert aggregation in ["mean", "median", "minimum", "maximum", "std"] + file = _get_raster(EarthEnv, Topography, layer, resolution, source, aggregation) + return geotiff(SimpleSDMPredictor, file; kwargs...) +end + +function SimpleSDMPredictor(::Type{EarthEnv}, ::Type{Topography}, layers::AbstractArray; kwargs...) + @assert eltype(layers) <: Integer + return [SimpleSDMPredictor(EarthEnv, Topography, l; kwargs...) for l in layers] +end \ No newline at end of file diff --git a/src/datasets/layernames.jl b/src/datasets/layernames.jl new file mode 100644 index 00000000..590d5f50 --- /dev/null +++ b/src/datasets/layernames.jl @@ -0,0 +1,87 @@ +layernames(::Type{WorldClim}, ::Type{Elevation}) = ("Elevation",) + +function layernames(::Type{WorldClim}, ::Type{BioClim}) + return ( + "Annual Mean Temperature", + "Mean Diurnal Range", + "Isothermality", + "Temperature Seasonality", + "Max Temperature of Warmest Month", + "Min Temperature of Coldest Month", + "Temperature Annual Range", + "Mean Temperature of Wettest Quarter", + "Mean Temperature of Driest Quarter", + "Mean Temperature of Warmest Quarter", + "Mean Temperature of Coldest Quarter", + "Annual Precipitation", + "Precipitation of Wettest Month", + "Precipitation of Driest Month", + "Precipitation Seasonality", + "Precipitation of Wettest Quarter", + "Precipitation of Driest Quarter", + "Precipitation of Warmest Quarter", + "Precipitation of Coldest Quarter", + ) +end + +layernames(::Type{CHELSA}, ::Type{BioClim}) = layernames(WorldClim, BioClim) + +function layernames(::Type{EarthEnv}, ::Type{HabitatHeterogeneity}) + return ( + "Coefficient of variation", + "Evenness", + "Range", + "Shannon", + "Simpson", + "Standard deviation", + "Contrast", + "Correlation", + "Dissimilarity", + "Entropy", + "Homogeneity", + "Maximum", + "Uniformity", + "Variance", + ) +end + +function layernames(::Type{EarthEnv}, ::Type{LandCover}) + return ( + "Evergreen/Deciduous Needleleaf Trees", + "Evergreen Broadleaf Trees", + "Deciduous Broadleaf Trees", + "Mixed/Other Trees", + "Shrubs", + "Herbaceous Vegetation", + "Cultivated and Managed Vegetation", + "Regularly Flooded Vegetation", + "Urban/Built-up", + "Snow/Ice", + "Barren", + "Open Water", + ) +end + +function layernames(::Type{EarthEnv}, ::Type{Topography}) + return ( + "Elevation", + "Slope", + "Aspect Cosine", + "Aspect Sine", + "Aspect Eastness", + "Aspect Northness", + "Roughness", + "Topographic Position Index", + "Terrain Ruggedness Index", + "Vector Ruggedness Measure", + "∂(E-W slope)", + "∂²(E-W slope)", + "∂(N-S slope)", + "∂²(N-S slope)", + "Profile curvature", + "Tangential curvature" + ) +end + +layernames(p::Type{<:LayerProvider}, d::Type{<:LayerDataset}, i::Int) = layernames(p, d)[i] +layernames(p::Type{<:LayerProvider}, d::Type{<:LayerDataset}, i::AbstractArray) = layernames(p, d)[i] \ No newline at end of file diff --git a/src/datasets/types.jl b/src/datasets/types.jl index e2901dca..70f179f6 100644 --- a/src/datasets/types.jl +++ b/src/datasets/types.jl @@ -5,43 +5,49 @@ A `LayerProvider` is an abstract type used to dispatch the correct call of `SimpleSDMPredictor` to a specific dataset. A dataset is specified by a `LayerProvider` and a `LayerDataset`, as well as optionally one or multiple -layers, and future climate information. +layers, and future climate information, resolution, or dates. """ abstract type LayerProvider end """ LayerDataset -A `LayerDataset` is a specific set of rasters provided by a `LayerProvider`. +A `LayerDataset` is a specific set of rasters provided by a `LayerProvider`. For +a number of dataset types that are very broad (`LandCover`, +`HabitatHeterogeneity`), the precise mapping of layers is documented in their +`SimpleSDMPredictor` method. """ abstract type LayerDataset end """ WorldClim -TODO WorldClim +[WorldClim](https://worldclim.org/) offers bioclimatic data both historical and +future, under `CMIP6` scenarios. This provider currently offers `BioClim` data, both historical and future under -`CMIP6`. +`CMIP6`, and the `Elevation` raster. """ struct WorldClim <: LayerProvider end """ CHELSA -TODO CHELSA +[CHELSA](https://chelsa-climate.org/) offers high resolution climatologies. This +provider currently offers `BioClim` data, both historical and future under +`CMIP5` *and* `CMIP6`. -This provider currently offers `BioClim` data, both historical and future under -`CMIP5`. +Note that CHELSA offers a subset of all possible CMIP6 combinations, which is +supposed to be the most informative. """ struct CHELSA <: LayerProvider end """ EarthEnv -Data from the earthenv project, all released under a CC-BY-NC license to Tuanmu -& Jetz. This provider currently offers `LandCover` and `HabitatHeterogeneity` -rasters. +Data from the [EarthEnv](https://www.earthenv.org//) project, all released under +a CC-BY-NC license to Tuanmu & Jetz. This provider currently offers `LandCover` +and `HabitatHeterogeneity` rasters. """ struct EarthEnv <: LayerProvider end @@ -83,13 +89,28 @@ Information on land cover, currently only provided by `EarthEnv`. """ struct LandCover <: LayerDataset end +""" + Elevation + +General type for a DEM, currently available through `WorldClim` +""" +struct Elevation <: LayerDataset end + """ HabitatHeterogeneity -Information on habitat heterogeneity, currently only provided by `EarthEnv`. +Information on [habitat heterogeneity](https://www.earthenv.org/texture), +currently only provided by `EarthEnv`. """ struct HabitatHeterogeneity <: LayerDataset end +""" + Topography + +Information on habitat topography, currently provided by `EarthEnv`. +""" +struct Topography <: LayerDataset end + """ SharedSocioeconomicPathway @@ -124,7 +145,6 @@ Enumeration of the models from CMIP5, which can be listed with """ @enum CMIP5 ACCESS10 BNUESM CCSM4 CESM1BGC CESM1CAM5 CMCCCMS CMCCCM CNRMCM5 CSIROMK360 CanESM2 FGOALSG2 FIOESM GFDLCM3 GFDLESM2G GFDLESM2M GISSE2HCC GISSE2H GISSE2RCC GISSE2R HADGEM2AO HADGEM2CC IPSLCM5ALR IPSLCM5AMR MIROCESMCHEM MIROCESM MIROC5 MPIESMLR MPIESMMR MRICGCM3 MRIESM1 NORESM1M BCCCSM11 INMCM4 - # Provider paths _rasterpath(::Type{WorldClim}) = "WorldClim" _rasterpath(::Type{CHELSA}) = "CHELSA" @@ -134,6 +154,8 @@ _rasterpath(::Type{EarthEnv}) = "EarthEnv" _rasterpath(::Type{BioClim}) = "BioClim" _rasterpath(::Type{LandCover}) = "LandCover" _rasterpath(::Type{HabitatHeterogeneity}) = "HabitatHeterogeneity" +_rasterpath(::Type{Elevation}) = "Elevation" +_rasterpath(::Type{Topography}) = "Topography" # Future paths _rasterpath(model::CMIP6) = _rasterpath(Val{model}) diff --git a/src/datasets/worldclim/bioclim.jl b/src/datasets/worldclim/bioclim.jl index 2274e5ee..3117be86 100644 --- a/src/datasets/worldclim/bioclim.jl +++ b/src/datasets/worldclim/bioclim.jl @@ -43,7 +43,7 @@ will be much faster. Original data: https://www.worldclim.org/data/worldclim21.html """ function SimpleSDMPredictor(::Type{WorldClim}, ::Type{BioClim}, layer::Integer=1; resolution::Float64=10.0, kwargs...) - @assert resolution in [2.5, 5.0, 10.0] + @assert resolution in [0.5, 2.5, 5.0, 10.0] file = _get_raster(WorldClim, BioClim, layer, resolution) return geotiff(SimpleSDMPredictor, file; kwargs...) end diff --git a/src/datasets/worldclim/download.jl b/src/datasets/worldclim/download.jl index e8b3afe5..e2ead1ba 100644 --- a/src/datasets/worldclim/download.jl +++ b/src/datasets/worldclim/download.jl @@ -1,18 +1,46 @@ +function _get_raster(::Type{WorldClim}, ::Type{Elevation}, layer::Integer, resolution=10.0) + res = Dict(0.5 => "30s", 2.5 => "2.5m", 5.0 => "5m", 10.0 => "10m") + + path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(WorldClim), _rasterpath(Elevation), res[resolution]) + isdir(path) || mkpath(path) + + output_file = joinpath(path, "wc2.1_$(res[resolution])_elev.tif") + zip_file = joinpath(path, "wc2.1_$(res[resolution])_elev.zip") + + if !isfile(path) + if !isfile(zip_file) + root = "https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/" + stem = "wc2.1_$(res[resolution])_elev.zip" + Downloads.download(root * stem, zip_file) + end + zf = ZipFile.Reader(zip_file) + file_to_read = + first(filter(f -> joinpath(path, f.name) == output_file, zf.files)) + + if !isfile(joinpath(path, file_to_read.name)) + write(joinpath(path, file_to_read.name), read(file_to_read)) + end + close(zf) + end + + return joinpath(path, file_to_read.name) +end + function _get_raster(::Type{WorldClim}, ::Type{BioClim}, layer::Integer, resolution=10.0) 1 ≤ layer ≤ 19 || throw(ArgumentError("The layer must be between 1 and 19")) - res = Dict(2.5 => "2.5", 5.0 => "5", 10.0 => "10") + res = Dict(0.5 => "30s", 2.5 => "2.5m", 5.0 => "5m", 10.0 => "10m") path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(WorldClim), _rasterpath(BioClim), res[resolution]) isdir(path) || mkpath(path) - output_file = joinpath(path, "wc2.1_$(res[resolution])m_bio_$(layer).tif") - zip_file = joinpath(path, "bioclim_2.1_$(res[resolution])m.zip") + output_file = joinpath(path, "wc2.1_$(res[resolution])_bio_$(layer).tif") + zip_file = joinpath(path, "bioclim_2.1_$(res[resolution]).zip") if !isfile(path) if !isfile(zip_file) root = "https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/" - stem = "wc2.1_$(res[resolution])m_bio.zip" + stem = "wc2.1_$(res[resolution])_bio.zip" Downloads.download(root * stem, zip_file) end zf = ZipFile.Reader(zip_file) @@ -29,16 +57,16 @@ function _get_raster(::Type{WorldClim}, ::Type{BioClim}, layer::Integer, resolut end function _get_raster(::Type{WorldClim}, ::Type{BioClim}, mod::CMIP6, fut::SharedSocioeconomicPathway, resolution=10.0, year="2021-2040") - res = Dict(2.5 => "2.5", 5.0 => "5", 10.0 => "10") + res = Dict(2.5 => "2.5m", 5.0 => "5m", 10.0 => "10m") path = joinpath(SimpleSDMLayers._layers_assets_path, _rasterpath(WorldClim), _rasterpath(BioClim), _rasterpath(mod), _rasterpath(fut), year, res[resolution]) isdir(path) || mkpath(path) - zip_file = joinpath(path, "$(res[resolution])m_$(_rasterpath(mod))_$(_rasterpath(fut)).zip") + zip_file = joinpath(path, "$(res[resolution])_$(_rasterpath(mod))_$(_rasterpath(fut)).zip") if !isfile(path) if !isfile(zip_file) root = "https://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/" - stem = "$(res[resolution])m/wc2.1_$(res[resolution])m_bioc_$(_rasterpath(mod))_$(_rasterpath(fut))_$(year).zip" + stem = "$(res[resolution])/wc2.1_$(res[resolution])_bioc_$(_rasterpath(mod))_$(_rasterpath(fut))_$(year).zip" Downloads.download(root * stem, zip_file) end zf = ZipFile.Reader(zip_file) diff --git a/src/datasets/worldclim/elevation.jl b/src/datasets/worldclim/elevation.jl new file mode 100644 index 00000000..08c43b99 --- /dev/null +++ b/src/datasets/worldclim/elevation.jl @@ -0,0 +1,5 @@ +function SimpleSDMPredictor(::Type{WorldClim}, ::Type{Elevation}, layer::Integer=1; resolution::Float64=10.0, kwargs...) + @assert resolution in [0.5, 2.5, 5.0, 10.0] + file = _get_raster(WorldClim, Elevation, layer, resolution) + return geotiff(SimpleSDMPredictor, file; kwargs...) +end \ No newline at end of file diff --git a/src/integrations/GBIF.jl b/src/integrations/GBIF.jl index ec296fa7..5f2dbdf9 100644 --- a/src/integrations/GBIF.jl +++ b/src/integrations/GBIF.jl @@ -32,7 +32,7 @@ Extracts the value of a layer at a given position for a `GBIFRecord`. If the function Base.getindex(layer::T, record::GBIF.GBIFRecord) where {T <: SimpleSDMLayer} ismissing(record.latitude) && return nothing ismissing(record.longitude) && return nothing - return layer[record.longitude, record.latitude] + return layer[Point(record.longitude, record.latitude)] end """ @@ -46,7 +46,7 @@ function Base.setindex!(layer::SimpleSDMResponse{T}, v::T, record::GBIF.GBIFReco ismissing(record.latitude) && return nothing ismissing(record.longitude) && return nothing isnothing(layer[record]) && return nothing - setindex!(layer, v, record.longitude, record.latitude) + setindex!(layer, v, Point(record.longitude, record.latitude)) end """ @@ -74,7 +74,7 @@ function SimpleSDMLayers.clip(layer::T, records::GBIF.GBIFRecords) where {T <: S lon_max = min(layer.right, lon_max+lon_s) lon_min = max(layer.left, lon_min-lon_s) - return layer[left=lon_min, right=lon_max, bottom=lat_min, top=lat_max] + return clip(layer; left=lon_min, right=lon_max, bottom=lat_min, top=lat_max) end """ diff --git a/src/interfaces/broadcast.jl b/src/interfaces/broadcast.jl new file mode 100644 index 00000000..b4e4b18b --- /dev/null +++ b/src/interfaces/broadcast.jl @@ -0,0 +1,18 @@ +Base.broadcastable(layer::SimpleSDMResponse) = layer +Base.BroadcastStyle(::Type{<:SimpleSDMResponse}) = Broadcast.Style{SimpleSDMResponse}() + +Base.broadcastable(layer::SimpleSDMPredictor) = similar(layer) +Base.BroadcastStyle(::Type{<:SimpleSDMPredictor}) = Broadcast.Style{SimpleSDMPredictor}() + +function Base.Broadcast.broadcast(f, L::LT) where {LT <: SimpleSDMLayer} + newgrid = Array{Any}(nothing, size(L)) + N = SimpleSDMResponse(newgrid, L) + v = filter(!isnothing, L.grid) + fv = f.(v) + N.grid[findall(!isnothing, L.grid)] .= fv + + internal_types = unique(typeof.(N.grid)) + + RT = LT <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor + return RT(convert(Matrix{Union{internal_types...}}, N.grid), N) +end \ No newline at end of file diff --git a/src/interfaces/common.jl b/src/interfaces/common.jl new file mode 100644 index 00000000..b22743e9 --- /dev/null +++ b/src/interfaces/common.jl @@ -0,0 +1,28 @@ +""" + Base.size(layer::T) where {T <: SimpleSDMLayer} + +Returns the size of the grid. +""" +Base.size(layer::T) where {T <: SimpleSDMLayer} = size(layer.grid) + + +""" + Base.size(layer::T, i...) where {T <: SimpleSDMLayer} + +Returns the size of the grid alongside a dimension. +""" +Base.size(layer::T, i...) where {T <: SimpleSDMLayer} = size(layer.grid, i...) + +Base.length(layer::T) where {T <: SimpleSDMLayer} = count(!isnothing, layer.grid) + +""" + Base.keys(layer::T) where {T <: SimpleSDMLayer} + +Returns an array of `Point` where every entry in the array is a non-`nothing` +grid coordinate. +""" +function Base.keys(layer::T) where {T <: SimpleSDMLayer} + _lon = longitudes(layer) + _lat = latitudes(layer) + return [Point(_lon[p[2]], _lat[p[1]]) for p in findall(!isnothing, layer.grid)] +end \ No newline at end of file diff --git a/src/interfaces/indexing.jl b/src/interfaces/indexing.jl new file mode 100644 index 00000000..f2f4a459 --- /dev/null +++ b/src/interfaces/indexing.jl @@ -0,0 +1,114 @@ +Base.IndexStyle(::Type{<:SimpleSDMLayer}) = IndexCartesian() + +""" + Base.CartesianIndices(layer::T) where {T <: SimpleSDMLayer} + +This function is used to access the positions in a layer as cartesian indices. +Internally, although it may be more convenient to access positions by their +coordinates, the value of the raster are extracted using their cartesian +coordinates. +""" +function Base.CartesianIndices(layer::T) where {T <: SimpleSDMLayer} + return CartesianIndices(layer.grid) +end + +""" + Base.getindex(layer::T, i::CartesianIndex{2}) where {T <: SimpleSDMLayer} + +Returns the value stored at a given cartesian index. +""" +function Base.getindex(layer::T, i::CartesianIndex{2}) where {T <: SimpleSDMLayer} + return layer.grid[i] +end + +""" + Base.getindex(layer::T, i::Integer) where {T <: SimpleSDMLayer} + +Returns the value stored at a linear index (a good candidate for deprecation as +we have a better iteration interface now). +""" +function Base.getindex(layer::T, i::Integer) where {T <: SimpleSDMLayer} + return layer.grid[CartesianIndices(layer)[i]] +end + +""" + Base.getindex(layer::T, i::Integer, j::Integer) where {T <: SimpleSDMLayer} + +Standard abstract array accession, where the dimensions follow the dimensions of +the underlying grid. +""" +function Base.getindex(layer::T, i::Integer, j::Integer) where {T <: SimpleSDMLayer} + return layer.grid[CartesianIndex(i,j)] +end + +""" + Base.getindex(layer::T, i::Array{CartesianIndex{2}}) where {T <: SimpleSDMLayer} + +Returns an array of values based on an array of cartesian indices - this can be +a vector or a matrix, and the elements can be in any order. This will *not* +return a raster. +""" +function Base.getindex(layer::T, i::Array{CartesianIndex{2}}) where {T <: SimpleSDMLayer} + return layer.grid[i] +end + +""" + Base.getindex(layer::T, longitude::AbstractFloat, latitude::AbstractFloat) where {T <: SimpleSDMLayer} + +Returns a value by longitude and latitude. +""" +function Base.getindex(layer::T, longitude::AbstractFloat, latitude::AbstractFloat) where {T <: SimpleSDMLayer} + return layer[Point(longitude,latitude)] +end + +""" + Base.getindex(layer::T, c::Point) where {T <: SimpleSDMLayer} + +Access a value through a `Point` (from `GeometryBasics`), which has the +longitude first and the latitude last -- this follows the GeoJSON convention. +""" +function Base.getindex(layer::T, c::Point) where {T <: SimpleSDMLayer} + return layer[_point_to_cartesian(layer, c)] +end + +""" + Base.getindex(layer::T, c::Array{<:Point}) where {T <: SimpleSDMLayer} + +Access a value through an array of `Point` (from `GeometryBasics`), which has +the longitude first and the latitude last. The array can be in any order, so +this method will not return a raster. +""" +function Base.getindex(layer::T, c::Array{<:Point}) where {T <: SimpleSDMLayer} + return layer[[_point_to_cartesian(layer, i) for i in c]] +end + +""" + Base.getindex(::T, ::Nothing) where {T <: SimpleSDMLayer} + +If the user requests a point that is out of bounds, its cartesian coordinate +will be matched to `nothing`, and then we return `nothing`. +""" +function Base.getindex(::T, ::Nothing) where {T <: SimpleSDMLayer} + return nothing +end + +function Base.setindex!(layer::T, v, i::CartesianIndex{2}) where {T <: SimpleSDMResponse} + return setindex!(layer.grid, v, i) +end + +function Base.setindex!(layer::T, v, i::Array{CartesianIndex{2}}) where {T <: SimpleSDMResponse} + return setindex!(layer.grid, v, i) +end + +function Base.setindex!(layer::T, v, point::Point) where {T <: SimpleSDMResponse} + return setindex!(layer, v, _point_to_cartesian(layer, point)) +end + +function Base.setindex!(layer::T, v, points::Array{<:Point}) where {T <: SimpleSDMResponse} + i = broadcast(point -> _point_to_cartesian(layer, point), points) + return setindex!(layer, v, i) +end + +function Base.setindex!(layer::T, v, longitude::Float64, latitude::Float64) where {T <: SimpleSDMResponse} + return setindex!(layer, v, Point(longitude, latitude)) +end \ No newline at end of file diff --git a/src/interfaces/iteration.jl b/src/interfaces/iteration.jl new file mode 100644 index 00000000..23a032ed --- /dev/null +++ b/src/interfaces/iteration.jl @@ -0,0 +1,20 @@ +function Base.iterate(layer::T) where {T <: SimpleSDMLayer} + position = findfirst(!isnothing, layer.grid) + isnothing(position) && return nothing + value = layer.grid[position] + coordinates = Point(longitudes(layer)[last(position.I)], latitudes(layer)[first(position.I)]) + return (coordinates => value, position) +end + +function Base.iterate(layer::T, state) where {T <: SimpleSDMLayer} + newstate = LinearIndices(layer.grid)[state]+1 + newstate > prod(size(layer.grid)) && return nothing + position = findnext(!isnothing, layer.grid, CartesianIndices(layer.grid)[newstate]) + isnothing(position) && return nothing + value = layer.grid[position] + coordinates = Point(longitudes(layer)[last(position.I)], latitudes(layer)[first(position.I)]) + return (coordinates => value, position) +end + +#Base.IteratorSize(::T) where {T <: SimpleSDMLayer} = Base.HasShape{2}() +Base.IteratorEltype(::T) where {T <: SimpleSDMLayer} = Base.EltypeUnknown() diff --git a/src/lib/basics.jl b/src/lib/basics.jl index 803a3965..a303aee3 100644 --- a/src/lib/basics.jl +++ b/src/lib/basics.jl @@ -39,3 +39,7 @@ end function _layers_are_compatible(layers::Array{T}) where {T <: SimpleSDMLayer} all(layer -> _layers_are_compatible(layer, layers[1]), layers) end + +function grid(layer::T) where {T <: SimpleSDMLayer} + return copy(layer.grid) +end \ No newline at end of file diff --git a/src/lib/clip.jl b/src/lib/clip.jl new file mode 100644 index 00000000..0d3088b0 --- /dev/null +++ b/src/lib/clip.jl @@ -0,0 +1,58 @@ +""" + clip(layer::T, p1::Point, p2::Point) where {T <: SimpleSDMLayer} + +Return a raster by defining a bounding box through two points. The order of the +points (in terms of bottom/top/left/right) is not really important, as the +correct coordinates will be extracted. +""" +function clip(layer::T, p1::Point, p2::Point) where {T <: SimpleSDMLayer} + latextr = extrema([p1[2], p2[2]]) + lonextr = extrema([p1[1], p2[1]]) + pmin = _point_to_cartesian(layer, Point(minimum(lonextr), minimum(latextr)); side=:bottomleft) + pmax = _point_to_cartesian(layer, Point(maximum(lonextr), maximum(latextr)); side=:topright) + R = T <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor + return R( + layer.grid[pmin:pmax], + longitudes(layer)[last(pmin.I)]-stride(layer, 1), + longitudes(layer)[last(pmax.I)]+stride(layer, 1), + latitudes(layer)[first(pmin.I)]-stride(layer, 2), + latitudes(layer)[first(pmax.I)]+stride(layer, 2) + ) +end + +""" + clip(layer::T; left=nothing, right=nothing, top=nothing, bottom=nothing) where {T <: SimpleSDMLayer} + +Clips a raster by giving the (optional) limites `left`, `right`, `bottom`, and `top`. +""" +function clip(layer::T; left=nothing, right=nothing, top=nothing, bottom=nothing) where {T <: SimpleSDMLayer} + p1 = Point( + isnothing(left) ? layer.left : left, + isnothing(bottom) ? layer.bottom : bottom + ) + p2 = Point( + isnothing(right) ? layer.right : right, + isnothing(top) ? layer.top : top + ) + return clip(layer, p1, p2) +end + +""" + clip(origin::T1, destination::T2) where {T1 <: SimpleSDMLayer, T2 <: SimpleSDMLayer} + +Clips a layer by another layer, *i.e.* subsets the first layer so that it has +the dimensions of the second layer. This operation applies a very small +displacement to the limits (`5eps()`) to ensure that if the coordinate to cut at +falls exactly on a cell boundary, the correct cell will be used in the return +layer. +""" +function clip(origin::T1, destination::T2) where {T1 <: SimpleSDMLayer, T2 <: SimpleSDMLayer} + _d = 5eps() + err = false + destination.right > origin.right && (err = true) + destination.left < origin.left && (err = true) + destination.bottom < origin.bottom && (err = true) + destination.top > origin.top && (err = true) + err && throw(ArgumentError("The two layers are not compatible")) + return clip(origin, Point(destination.left+_d, destination.bottom+_d), Point(destination.right-_d, destination.top-_d)) +end diff --git a/src/lib/coordinateconversion.jl b/src/lib/coordinateconversion.jl new file mode 100644 index 00000000..6dcec4c9 --- /dev/null +++ b/src/lib/coordinateconversion.jl @@ -0,0 +1,44 @@ +function _point_to_cartesian(layer::T, c::Point; side=:center) where {T <: SimpleSDMLayer} + lonside, latside = true, true + if side == :bottomleft + lonside, latside = false, false + end + if side == :topright + lonside, latside = true, true + end + lon = SimpleSDMLayers._match_longitude(layer, c[1]; lower=lonside) + lat = SimpleSDMLayers._match_latitude(layer, c[2]; lower=latside) + isnothing(lon) && return nothing + isnothing(lat) && return nothing + return CartesianIndex(lat, lon) +end + +function _match_latitude(layer::T, lat::K; lower::Bool=true) where {T <: SimpleSDMLayer, K <: AbstractFloat} + layer.bottom <= lat <= layer.top || return nothing + lat == layer.bottom && return 1 + lat == layer.top && return size(layer, 1) + relative = (lat - layer.bottom)/(layer.top - layer.bottom) + fractional = relative * size(layer, 1)+1 + if lat in layer.bottom:2stride(layer,2):layer.top + f = lower ? floor : ceil + d = lower ? 1 : 0 + return min(f(Int64, fractional-d), size(layer, 1)) + else + return min(floor(Int, fractional), size(layer, 1)) + end +end + +function _match_longitude(layer::T, lon::K; lower::Bool=true) where {T <: SimpleSDMLayer, K <: AbstractFloat} + layer.left <= lon <= layer.right || return nothing + lon == layer.left && return 1 + lon == layer.right && return size(layer, 2) + relative = (lon - layer.left)/(layer.right - layer.left) + fractional = relative * size(layer, 2)+1 + if lon in layer.left:2stride(layer,1):layer.right + f = lower ? floor : ceil + d = lower ? 1 : 0 + return min(f(Int64, fractional-d), size(layer, 2)) + else + return min(floor(Int, fractional), size(layer, 2)) + end +end \ No newline at end of file diff --git a/src/lib/generated.jl b/src/lib/generated.jl index 3f1fdcec..113f2569 100644 --- a/src/lib/generated.jl +++ b/src/lib/generated.jl @@ -1,4 +1,4 @@ -## Single layer overloads +# Single layer overloads ops = Dict( :Base => [:sum, :maximum, :minimum, :extrema], diff --git a/src/lib/iteration.jl b/src/lib/iteration.jl deleted file mode 100644 index c889ae82..00000000 --- a/src/lib/iteration.jl +++ /dev/null @@ -1,23 +0,0 @@ -Base.IteratorSize(::Type{T}) where {T <: SimpleSDMLayer} = Base.HasLength() -Base.IteratorEltype(::Type{T}) where {T <: SimpleSDMLayer} = Base.HasEltype() - -function Base.getindex(layer::T, i::CartesianIndex{2}) where {T <: SimpleSDMLayer} - return layer.grid[i] -end - -function Base.iterate(layer::T) where {T <: SimpleSDMLayer} - length(layer) == 0 && return nothing - idx = findfirst(!isnothing, layer.grid) - return (layer[idx], LinearIndices(layer.grid)[idx]) -end - -function Base.iterate(layer::T, state) where {T <: SimpleSDMLayer} - state == prod(size(layer)) && return nothing - idx = findnext(!isnothing, layer.grid, CartesianIndices(layer.grid)[state+1]) - isnothing(idx) && return nothing - return (layer[idx], LinearIndices(layer.grid)[idx]) -end - -function Base.length(layer::T) where {T <: SimpleSDMLayer} - return count(!isnothing, layer.grid) -end \ No newline at end of file diff --git a/src/lib/overloads.jl b/src/lib/overloads.jl index ab6d72f9..40ab29dd 100644 --- a/src/lib/overloads.jl +++ b/src/lib/overloads.jl @@ -1,8 +1,5 @@ -import Base: size import Base: stride import Base: eachindex -import Base: getindex -import Base: setindex! import Base: replace import Base: replace! import Base: similar @@ -10,7 +7,6 @@ import Base: copy import Base: eltype import Base: convert import Base: collect -import Base.Broadcast: broadcast import Base: hcat import Base: vcat import Base: show @@ -28,8 +24,8 @@ function Base.show(io::IO, ::MIME"text/plain", layer::T) where {T <: SimpleSDMLa itype = eltype(layer) otype = T <: SimpleSDMPredictor ? "predictor" : "response" print(io, """SDM $(otype) → $(size(layer,1))×$(size(layer,2)) grid with $(length(layer)) $(itype)-valued cells - \x20\x20Latitudes\t$(Tuple(latitudes(layer)[[1, end]])) - \x20\x20Longitudes\t$(Tuple(longitudes(layer)[[1, end]]))""") + \x20\x20Latitudes\t$(layer.bottom) ⇢ $(layer.top) + \x20\x20Longitudes\t$(layer.left) ⇢ $(layer.right)""") end function Base.show(io::IO, layer::T) where {T <: SimpleSDMLayer} @@ -88,23 +84,6 @@ omitted. Base.eltype(::SimpleSDMResponse{T}) where {T} = T Base.eltype(::SimpleSDMPredictor{T}) where {T} = T - -""" - Base.size(layer::T) where {T <: SimpleSDMLayer} - -Returns the size of the grid. -""" -Base.size(layer::T) where {T <: SimpleSDMLayer} = size(layer.grid) - - -""" - Base.size(layer::T, i...) where {T <: SimpleSDMLayer} - -Returns the size of the grid alongside a dimension. -""" -Base.size(layer::T, i...) where {T <: SimpleSDMLayer} = size(layer.grid, i...) - - """ Base.stride(layer::T; dims::Union{Nothing,Integer}=nothing) where {T <: SimpleSDMLayer} @@ -128,184 +107,6 @@ Returns the index of the grid. """ Base.eachindex(layer::T) where {T <: SimpleSDMLayer} = eachindex(layer.grid) - -""" -Extracts a value from a layer by its grid position. -""" -Base.getindex(layer::T, i::Int64) where {T <: SimpleSDMLayer} = layer.grid[i] - - -""" - Base.getindex(layer::T, i::R, j::R) where {T <: SimpleSDMLayer, R <: UnitRange} - -Extracts a series of positions in a layer, and returns a layer corresponding to -the result. This is essentially a way to rapidly crop a layer to a given subset -of its extent. The `i` and `j` arguments are `UnitRange`s (of `Integer`). - -The layer returned by this function will have the same type as the layer passed -as its argument, but this can be changed using `convert`. Note that this function -performs additional checks to ensure that the range is not empty, and to also -ensure that it does not overflows from the size of the layer. -""" -function Base.getindex(layer::T, i::R, j::R) where {T <: SimpleSDMLayer, R <: UnitRange} - i_min = isempty(i) ? max(i.start-1, 1) : i.start - i_max = isempty(i) ? max(i.stop+2, size(layer, 1)) : i.stop - j_min = isempty(j) ? max(j.start-1, 1) : j.start - j_max = isempty(j) ? max(j.stop+2, size(layer, 2)) : j.stop - i_fix = i_min:i_max - j_fix = j_min:j_max - RT = T <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor - return RT( - layer.grid[i_fix,j_fix], - minimum(longitudes(layer)[j_fix])-stride(layer,1), - maximum(longitudes(layer)[j_fix])+stride(layer,1), - minimum(latitudes(layer)[i_fix])-stride(layer,2), - maximum(latitudes(layer)[i_fix])+stride(layer,2) - ) -end - -""" -Given a layer and a latitude, returns `nothing` if the latitude is outside the -range, or the grid index containing this latitude if it is within range -""" -function _match_latitude(layer::T, lat::K; side=:none) where {T <: SimpleSDMLayer, K <: AbstractFloat} - side in [:none, :bottom, :top] || throw(ArgumentError("side must be one of :none (default), :bottom, :top")) - - lat > layer.top && return nothing - lat < layer.bottom && return nothing - - ldiff = abs.(lat .- latitudes(layer)) - lapprox = isapprox.(ldiff, stride(layer, 2)) - if side == :none || !any(lapprox) - l = last(findmin(ldiff)) - elseif side == :bottom - l = findlast(lapprox) - elseif side == :top - l = findfirst(lapprox) - end - - return l -end - - -""" -Given a layer and a longitude, returns `nothing` if the longitude is outside the -range, or the grid index containing this longitude if it is within range -""" -function _match_longitude(layer::T, lon::K; side::Symbol=:none) where {T <: SimpleSDMLayer, K <: AbstractFloat} - side in [:none, :left, :right] || throw(ArgumentError("side must be one of :none (default), :left, :right")) - - lon > layer.right && return nothing - lon < layer.left && return nothing - - ldiff = abs.(lon .- longitudes(layer)) - lapprox = isapprox.(ldiff, stride(layer, 1)) - if side == :none || !any(lapprox) - l = last(findmin(ldiff)) - elseif side == :left - l = findlast(lapprox) - elseif side == :right - l = findfirst(lapprox) - end - - return l -end - -""" - Base.getindex(layer::T, longitude::K, latitude::K) where {T <: SimpleSDMLayer, K <: AbstractFloat} - -Extracts the value of a layer at a given latitude and longitude. If values -outside the range are requested, will return `nothing`. -""" -function Base.getindex(layer::T, longitude::K, latitude::K) where {T <: SimpleSDMLayer, K <: AbstractFloat} - i = _match_longitude(layer, longitude) - j = _match_latitude(layer, latitude) - isnothing(i) && return nothing - isnothing(j) && return nothing - return layer.grid[j, i] -end - -""" - Base.getindex(layer::T; left=nothing, right=nothing, top=nothing, bottom=nothing) where {T <: SimpleSDMLayer, K <: Union{Nothing,AbstractFloat}} - -Returns a subset of the argument layer, where the new limits are given by -`left`, `right`, `top`, and `bottom`. Up to three of these can be omitted, and -if so these limits will not be affected. -""" -function Base.getindex(layer::T; left=nothing, right=nothing, top=nothing, bottom=nothing) where {T <: SimpleSDMLayer} - for limit in [left, right, top, bottom] - if !isnothing(limit) - @assert typeof(limit) <: AbstractFloat - end - end - imax = _match_longitude(layer, isnothing(right) ? layer.right : right; side=:right) - imin = _match_longitude(layer, isnothing(left) ? layer.left : left; side=:left) - jmax = _match_latitude(layer, isnothing(top) ? layer.top : top; side=:top) - jmin = _match_latitude(layer, isnothing(bottom) ? layer.bottom : bottom; side=:bottom) - any(isnothing.([imin, imax, jmin, jmax])) && throw(ArgumentError("Unable to extract, coordinates outside of range")) - # Note that this is LATITUDE first - return layer[jmin:jmax, imin:imax] -end - -""" - Base.getindex(layer::T, n::NT) where {T <: SimpleSDMLayer, NT <: NamedTuple} - -Returns a subset of the argument layer, where the new limits are given in -a NamedTuple by `left`, `right`, `top`, and `bottom`, in any order. Up to -three of these can be omitted, and if so these limits will not be affected. -""" -function Base.getindex(layer::T, n::NT) where {T <: SimpleSDMLayer, NT <: NamedTuple} - l = isdefined(n, :left) ? n.left : nothing - r = isdefined(n, :right) ? n.right : nothing - t = isdefined(n, :top) ? n.top : nothing - b = isdefined(n, :bottom) ? n.bottom : nothing - Base.getindex(layer; left=l, right=r, top=t, bottom=b) -end - -""" - Base.getindex(layer1::T1, layer2::T2) where {T1 <: SimpleSDMLayer, T2 <: SimpleSDMLayer} - -Extract a layer based on a second layer. Note that the two layers must be -*compatible*, which is to say they must have the same stride and the bounding -coordinates of layer2 must be contained in layer1. -""" -function Base.getindex(layer1::T1, layer2::T2) where {T1 <: SimpleSDMLayer, T2 <: SimpleSDMLayer} - iscompat = all( - [ - layer2.left >= layer1.left, - layer2.right <= layer1.right, - layer2.bottom >= layer1.bottom, - layer2.top <= layer1.top, - ] - ) - iscompat || throw(ArgumentError("layer2 has bounding coordinates that are not contained in layer1")) - stride(layer1) == stride(layer2) || throw(ArgumentError("The layers have different strides")) - return layer1[left=layer2.left, right=layer2.right, bottom=layer2.bottom, top=layer2.top] -end - -""" - Base.setindex!(layer::SimpleSDMResponse{T}, v::T, i...) where {T} - -Changes the value of a cell, or a range of cells, as indicated by their grid -positions. -""" -function Base.setindex!(layer::SimpleSDMResponse{T}, v::T, i...) where {T} - typeof(v) <: eltype(layer.grid) || throw(ArgumentError("Impossible to set a value to a non-matching type")) - layer.grid[i...] = v -end - -""" - Base.setindex!(layer::T, v, lon::Float64, lat::Float64) where {T <: SimpleSDMResponse} - -Changes the values of the cell including the point at the requested latitude and -longitude. -""" -function Base.setindex!(layer::SimpleSDMResponse{T}, v::T, lon::Float64, lat::Float64) where {T} - i = _match_longitude(layer, lon) - j = _match_latitude(layer, lat) - layer[j,i] = v -end - """ Base.similar(layer::T, ::Type{TC}) where {TC <: Any, T <: SimpleSDMLayer} @@ -346,24 +147,6 @@ function Base.copy(layer::T) where {T <: SimpleSDMLayer} return RT(copygrid, copy(layer.left), copy(layer.right), copy(layer.bottom), copy(layer.top)) end -""" - Broadcast.broadcast(f, L::LT) where {LT <: SimpleSDMLayer} - -TODO -""" -function Base.Broadcast.broadcast(f, L::LT) where {LT <: SimpleSDMLayer} - newgrid = Array{Any}(nothing, size(L)) - N = SimpleSDMResponse(newgrid, L) - v = filter(!isnothing, L.grid) - fv = f.(v) - N.grid[findall(!isnothing, L.grid)] .= fv - - internal_types = unique(typeof.(N.grid)) - - RT = LT <: SimpleSDMResponse ? SimpleSDMResponse : SimpleSDMPredictor - return RT(convert(Matrix{Union{internal_types...}}, N.grid), N) -end - """ Base.collect(l::T) where {T <: SimpleSDMLayer} @@ -496,3 +279,16 @@ function Base.isequal(layer1::SimpleSDMLayer, layer2::SimpleSDMLayer) ] ) end + +Base.:*(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n*x, layer) +Base.:*(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x*n, layer) +Base.:/(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n/x, layer) +Base.:/(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x/n, layer) +Base.:-(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n-x, layer) +Base.:-(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x-n, layer) +Base.:+(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n+x, layer) +Base.:+(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x+n, layer) +Base.://(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n//x, layer) +Base.://(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x//n, layer) +Base.:%(n::Number, layer::T) where {T <: SimpleSDMLayer} = broadcast(x -> n%x, layer) +Base.:%(layer::T, n::Number) where {T <: SimpleSDMLayer} = broadcast(x -> x%n, layer) \ No newline at end of file diff --git a/src/operations/mask.jl b/src/operations/mask.jl index 1ac686bf..d76d57ef 100644 --- a/src/operations/mask.jl +++ b/src/operations/mask.jl @@ -27,3 +27,44 @@ function mask(l1::T1, l2::T2) where {T1 <: SimpleSDMLayer, T2 <: SimpleSDMLayer} mask!(l1, l3) return l3 end + + +_format_polygon(xy) = Polygon([Point(c[1], c[2]) for c in xy[1]]) + +function _points_from_polygon(poly) + pl = Point[] + for i in 1:length(poly.exterior) + push!(pl, poly.exterior[i][1]) + end + push!(pl, pl[1]) + return pl +end + +function _mask_from_polygon(polygon::Polygon, layer::T) where {T <: SimpleSDMLayer} + msk = similar(layer, Bool) + pts = _points_from_polygon(polygon) + keep = filter(p -> inpolygon(Array(p), pts)!=0, keys(layer)) + if length(keep) > 0 + msk[keep] = fill(true, length(keep)) + end + return msk +end + +function mask(multipolygon::P, layer::T) where {T <: SimpleSDMLayer, P <: Vector{<:GeometryBasics.Polygon}} + msks = [_mask_from_polygon(polygon, layer) for polygon in multipolygon] + return mask(maximum(msks), layer) +end + +function mask(polygon::Polygon, layer::T) where {T <: SimpleSDMLayer} + msk = _mask_from_polygon(polygon, layer) + return mask(msk, layer) +end + +function mask(circle::Circle, layer::T) where {T <: SimpleSDMLayer} + msk = similar(layer, Bool) + keep = filter(k -> k in circle, keys(layer)) + if length(keep) > 0 + msk[keep] = fill(true, length(keep)) + end + return mask(msk, layer) +end \ No newline at end of file diff --git a/src/operations/sliding.jl b/src/operations/sliding.jl index 441d3acb..4deb414f 100644 --- a/src/operations/sliding.jl +++ b/src/operations/sliding.jl @@ -1,15 +1,3 @@ -function haversine(lon1, lat1, lon2, lat2; R=6371.0) - φ1 = lat1 * π/180.0 - φ2 = lat2 * π/180.0 - Δφ = (lat2-lat1) * π/180.0 - Δλ = (lon2-lon1) * π/180.0 - a = sin(Δφ/2.0)^2.0 + cos(φ1)*cos(φ2) * sin(Δλ)^2.0 - c = 2.0 * atan(sqrt(a), sqrt(1.0-a)) - return R*c -end - -haversine(p1, p2; R=6371.0) = haversine(p1..., p2...; R=R) - """ slidingwindow(L::LT, f::FT, d::IT) where {LT <: SimpleSDMLayer, FT <: Function, IT <: Number} @@ -36,25 +24,38 @@ function slidingwindow(layer::LT, f::FT, d::IT; threaded::Bool=Threads.nthreads( # New layer using typed similar N = similar(layer, return_type) - # Store latitudes and longitudes - _lat, _lon = latitudes(layer), longitudes(layer) - # Vector of all positions with a value - filled_positions = CartesianIndices(layer.grid)[findall(!isnothing, layer.grid)] + filled_positions = keys(layer) # We then filter in the occupied positions if threaded Threads.@threads for pos in filled_positions - neighbors = filter(p -> haversine((_lon[Tuple(pos)[2]], _lat[Tuple(pos)[1]]), (_lon[Tuple(p)[2]], _lat[Tuple(p)[1]])) < d, filled_positions) - N.grid[pos] = f(layer.grid[neighbors]) + N[pos] = f(_sliding_values(layer, pos, d)) end else for pos in filled_positions - neighbors = filter(p -> haversine((_lon[Tuple(pos)[2]], _lat[Tuple(pos)[1]]), (_lon[Tuple(p)[2]], _lat[Tuple(p)[1]])) < d, filled_positions) - N.grid[pos] = f(layer.grid[neighbors]) + N[pos] = f(_sliding_values(layer, pos, d)) end end # And we return the object return N end + +function _sliding_values(layer, pt, d; R=6371.0) + # Bounding box (approx.) for the sliding window of length d at the given point + max_lat = min(layer.top, pt[2]+(180.0*d)/(π*R)) + min_lat = max(layer.bottom, pt[2]-(180.0*d)/(π*R)) + max_lon = min(layer.right, pt[1]+(360.0*d)/(π*R)) + min_lon = max(layer.left, pt[1]-(360.0*d)/(π*R)) + + # Extracted layer for the sliding window + _tmp = clip(layer; left=min_lon, right=max_lon, top=max_lat, bottom=min_lat) + + # Filter the correct positions + filled_positions = keys(_tmp) + neighbors = filter(p -> Distances.haversine((pt[1], pt[2]), (p[1], p[2]), R) < d, filled_positions) + + # Return + return _tmp[neighbors] +end \ No newline at end of file diff --git a/src/pseudoabsences/main.jl b/src/pseudoabsences/main.jl new file mode 100644 index 00000000..e22a4978 --- /dev/null +++ b/src/pseudoabsences/main.jl @@ -0,0 +1,10 @@ +abstract type PseudoAbsenceGenerator end + +struct WithinRadius <: PseudoAbsenceGenerator +end + +struct SurfaceRangeEnvelope <: PseudoAbsenceGenerator +end + +struct RandomSelection <: PseudoAbsenceGenerator +end diff --git a/src/pseudoabsences/radius.jl b/src/pseudoabsences/radius.jl new file mode 100644 index 00000000..42536609 --- /dev/null +++ b/src/pseudoabsences/radius.jl @@ -0,0 +1,20 @@ +function Base.rand(::Type{WithinRadius}, presences::T; distance::Number=1.0) where {T <: SimpleSDMLayer} + @assert SimpleSDMLayers._inner_type(presences) <: Bool + n_occ = sum(presences) + iszero(n_occ) && throw(ArgumentError("The presences layer is empty")) + _msk = mask(presences, presences) + _radius_msk = similar(presences, Bool) + for k in keys(_msk) + _msk_keys = keys(mask(Circle(k, distance), presences)) + _radius_msk[_msk_keys] = fill(true, length(_msk_keys)) + end + _radius_msk = mask(_radius_msk, _radius_msk) + acceptable_cells = setdiff(keys(_radius_msk), keys(_msk)) + if length(acceptable_cells) < n_occ + @warn "There are fewer acceptable pseudo-absences than occurrences" + end + pa_places = sample(acceptable_cells, min(length(acceptable_cells), n_occ), replace=false) + pa = similar(presences, Bool) + pa[pa_places] = fill(true, length(pa_places)) + return pa +end \ No newline at end of file diff --git a/src/pseudoabsences/randomselection.jl b/src/pseudoabsences/randomselection.jl new file mode 100644 index 00000000..b0ade9e3 --- /dev/null +++ b/src/pseudoabsences/randomselection.jl @@ -0,0 +1,14 @@ +function Base.rand(::Type{RandomSelection}, presences::T) where {T <: SimpleSDMLayer} + @assert SimpleSDMLayers._inner_type(presences) <: Bool + n_occ = sum(presences) + iszero(n_occ) && throw(ArgumentError("The presences layer is empty")) + _msk = mask(presences, presences) + acceptable_cells = setdiff(keys(presences), keys(_msk)) + if length(acceptable_cells) < n_occ + @warn "There are fewer acceptable pseudo-absences than occurrences" + end + pa_places = sample(acceptable_cells, min(length(acceptable_cells), n_occ), replace=false) + pa = similar(presences, Bool) + pa[pa_places] = fill(true, length(pa_places)) + return pa +end diff --git a/src/pseudoabsences/surfacerangeenvelope.jl b/src/pseudoabsences/surfacerangeenvelope.jl new file mode 100644 index 00000000..6495d00e --- /dev/null +++ b/src/pseudoabsences/surfacerangeenvelope.jl @@ -0,0 +1,18 @@ +function Base.rand(::Type{SurfaceRangeEnvelope}, presences::T) where {T <: SimpleSDMLayer} + @assert SimpleSDMLayers._inner_type(presences) <: Bool + n_occ = sum(presences) + iszero(n_occ) && throw(ArgumentError("The presences layer is empty")) + _msk = mask(presences, presences) + acceptable_cells = setdiff(keys(presences), keys(_msk)) + rlat = extrema([p[2] for p in keys(_msk)]) + rlon = extrema([p[1] for p in keys(_msk)]) + filter!(p -> rlat[1] <= p[2] <= rlat[2], acceptable_cells) + filter!(p -> rlon[1] <= p[1] <= rlon[2], acceptable_cells) + if length(acceptable_cells) < n_occ + @warn "There are fewer acceptable pseudo-absences than occurrences" + end + pa_places = sample(acceptable_cells, min(length(acceptable_cells), n_occ), replace=false) + pa = similar(presences, Bool) + pa[pa_places] = fill(true, length(pa_places)) + return pa +end \ No newline at end of file diff --git a/src/recipes/recipes.jl b/src/recipes/recipes.jl index 931910c0..848380e9 100644 --- a/src/recipes/recipes.jl +++ b/src/recipes/recipes.jl @@ -1,3 +1,7 @@ +@shorthands bivariate +@shorthands bivariatelegend + + """ test 1 """ @@ -20,7 +24,7 @@ end """ test 2 """ -@recipe function plot(l1::FT, l2::ST) where {FT <: SimpleSDMLayer, ST <: SimpleSDMLayer} +@recipe function plot(l1::FT, l2::ST; classes::Int=3, p0=colorant"#e8e8e8", p1=colorant"#64acbe", p2=colorant"#c85a5a") where {FT <: SimpleSDMLayer, ST <: SimpleSDMLayer} eltype(l1) <: Number || throw(ArgumentError("Plotting is only supported for layers with number values ($(eltype(l1)))")) eltype(l2) <: Number || throw(ArgumentError("Plotting is only supported for layers with number values ($(eltype(l2)))")) seriestype --> :scatter @@ -28,5 +32,70 @@ test 2 SimpleSDMLayers._layers_are_compatible(l1, l2) valid_i = findall(.!isnothing.(l1.grid) .& .!isnothing.(l2.grid)) l1.grid[valid_i], l2.grid[valid_i] + elseif get(plotattributes, :seriestype, :bivariate) in [:bivariate] + SimpleSDMLayers._layers_are_compatible(l1, l2) + c1 = LinRange(p0, p1, classes) + c2 = LinRange(p0, p2, classes) + breakpoints = LinRange(0.0, 1.0, classes+1) + q1 = rescale(l1, collect(LinRange(0.0, 1.0, 10classes))) + q2 = rescale(l2, collect(LinRange(0.0, 1.0, 10classes))) + classified = similar(l1, Int) + cols = typeof(p0)[] + for i in 1:classes + if isequal(classes)(i) + fi = (v) -> breakpoints[i] < v <= breakpoints[i+1] + else + fi = (v) -> breakpoints[i] <= v < breakpoints[i+1] + end + m1 = broadcast(fi, q1) + for j in 1:classes + if isequal(classes)(j) + fj = (v) -> breakpoints[j] < v <= breakpoints[j+1] + else + fj = (v) -> breakpoints[j] <= v < breakpoints[j+1] + end + m2 = broadcast(fj, q2) + push!(cols, ColorBlendModes.BlendMultiply(c1[i], c2[j])) + m = reduce(*, [m1, m2]) + replace!(m, false => nothing) + if length(m) > 0 + classified[keys(m)] = fill(length(cols), length(m)) + end + end + end + replace!(classified, 0 => 1) + @series begin + seriescolor := vec(cols) + seriestype := :heatmap + subplot := 1 + legend --> false + clims --> (1, classes^2) + convert(Float16, classified) + end + elseif get(plotattributes, :seriestype, :bivariatelegend) in [:bivariatelegend] + c1 = LinRange(p0, p1, classes) + c2 = LinRange(p0, p2, classes) + grid --> :none + ticks --> :none + legend --> false + frametype --> :none + xlims --> (1-0.5, classes+0.5) + ylims --> (1-0.5, classes+0.5) + aspect_ratio --> 1 + cols = Vector{typeof(p0)}(undef, classes^2) + class = 1 + m = zeros(Float64, classes, classes) + for i in 1:classes + for j in 1:classes + m[j,i] = class + cols[class] = ColorBlendModes.BlendMultiply(c1[i], c2[j]) + class += 1 + end + end + @series begin + seriescolor := cols + seriestype := :heatmap + m + end end end diff --git a/test/Project.toml b/test/Project.toml index 3efaa6bd..26508182 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,4 @@ [deps] -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -7,5 +6,4 @@ StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -DataFrames = "0.22, 1.0" GBIF = "0.2, 0.3" diff --git a/test/construction.jl b/test/core/construction.jl similarity index 100% rename from test/construction.jl rename to test/core/construction.jl diff --git a/test/matching.jl b/test/core/coordconvert.jl similarity index 100% rename from test/matching.jl rename to test/core/coordconvert.jl diff --git a/test/iteration.jl b/test/core/iteration.jl similarity index 59% rename from test/iteration.jl rename to test/core/iteration.jl index ec3db356..8c4320c7 100644 --- a/test/iteration.jl +++ b/test/core/iteration.jl @@ -5,7 +5,7 @@ using Test S = SimpleSDMPredictor([1 2 3; 4 5 6; 7 nothing 9], 0.0, 1.0, 0.0, 1.0) val = sort([v for v in S]) -@test all(val .== vec([1 2 3 4 5 6 7 9])) -@test all(val .== sort(collect(S))) +@test all(sort(map(last, val)) .== vec([1 2 3 4 5 6 7 9])) +@test all(sort(map(last, val)) .== sort(collect(S))) end \ No newline at end of file diff --git a/test/basics.jl b/test/core/latlon.jl similarity index 95% rename from test/basics.jl rename to test/core/latlon.jl index 501a29bc..9234fc40 100644 --- a/test/basics.jl +++ b/test/core/latlon.jl @@ -20,4 +20,7 @@ bbox = boundingbox(S) @test bbox.bottom == S.bottom @test bbox.top == S.top +A = grid(S) +@test M == A + end diff --git a/test/core/setindex.jl b/test/core/setindex.jl new file mode 100644 index 00000000..bccc3936 --- /dev/null +++ b/test/core/setindex.jl @@ -0,0 +1,41 @@ +module SSLTestSetIndex +using SimpleSDMLayers +using Test + +S = SimpleSDMResponse(zeros(10, 10), 0.0, 1.0, 0.0, 1.0) + +# Change a single value +changepoint = Point(0.2, 0.5) +S[changepoint] = 1.0 +@test S[changepoint] == 1.0 + +# Change multiple by multiple values +changepoints = [Point(rand(2)...) for i in 1:8] +changevalues = rand(length(changepoints)) +S[changepoints] = changevalues +# NOTE we test that the values in the layer are part of the pool of possible +# values, because it is entirely possible that two points will fall within the +# same cell +@test all(.!isnothing.(indexin(S[changepoints], changevalues))) + +# Change multiple by multiple values as a matrix +changepoints = reshape([Point(rand(2)...) for i in 1:8], (2, 4)) +changevalues = reshape(rand(length(changepoints)), (2,4)) +S[changepoints] = changevalues +@test all(.!isnothing.(indexin(S[changepoints], changevalues))) + +#= + +# Change multiple values by a single value +changepoints = [Point(rand(2)...) for i in 1:8] +S[changepoints] .= 5.0 +@test all(S[changepoints] .== 5.0) + +# Change multiple values by a single value as a matrix +changepoints = reshape([Point(rand(2)...) for i in 1:8], (2, 4)) +S[changepoints] .= 5.0 +@test all(S[changepoints] .== 5.0) + +=# + +end \ No newline at end of file diff --git a/test/core/tiling.jl b/test/core/tiling.jl new file mode 100644 index 00000000..37407f59 --- /dev/null +++ b/test/core/tiling.jl @@ -0,0 +1,34 @@ +module SSLTestTiling +using SimpleSDMLayers +using Test + +# hcat / vcat +l1 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=10.0, bottom=0.0, top=10.0) +l2 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=10.0, bottom=10.0, top=20.0) +l3 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=10.0, right=20.0, bottom=0.0, top=10.0) +l4 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=10.0, right=20.0, bottom=10.0, top=20.0) +l5 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=20.0, bottom=0.0, top=20.0) + +ml1 = hcat(l1, l3) +vl1 = vcat(l1, l2) +ml2 = hcat(l2, l4) +vl2 = vcat(l3, l4) + +vml = vcat(ml1, ml2) +mvl = hcat(vl1, vl2) + +@test all(vml.grid == mvl.grid) + +for l in (vml, mvl) + @test all(l.grid == l5.grid) + @test size(l) == size(l5) + @test stride(l) == stride(l5) + @test longitudes(l) == longitudes(l5) + @test latitudes(l) == latitudes(l5) + @test l.left == l5.left + @test l.right == l5.right + @test l.bottom == l5.bottom + @test l.top == l5.top +end + +end \ No newline at end of file diff --git a/test/ascii.jl b/test/data/ascii.jl similarity index 100% rename from test/ascii.jl rename to test/data/ascii.jl diff --git a/test/chelsa.jl b/test/data/chelsa.jl similarity index 100% rename from test/chelsa.jl rename to test/data/chelsa.jl diff --git a/test/dataread.jl b/test/data/dataread.jl similarity index 100% rename from test/dataread.jl rename to test/data/dataread.jl diff --git a/test/earthenv.jl b/test/data/earthenv.jl similarity index 100% rename from test/earthenv.jl rename to test/data/earthenv.jl diff --git a/test/worldclim.jl b/test/data/worldclim.jl similarity index 100% rename from test/worldclim.jl rename to test/data/worldclim.jl diff --git a/test/dataframes.jl b/test/extensions/dataframes.jl similarity index 100% rename from test/dataframes.jl rename to test/extensions/dataframes.jl diff --git a/test/gbif.jl b/test/extensions/gbif.jl similarity index 100% rename from test/gbif.jl rename to test/extensions/gbif.jl diff --git a/test/plots.jl b/test/extensions/plots.jl similarity index 100% rename from test/plots.jl rename to test/extensions/plots.jl diff --git a/test/operations/clip.jl b/test/operations/clip.jl new file mode 100644 index 00000000..44c8e8e0 --- /dev/null +++ b/test/operations/clip.jl @@ -0,0 +1,23 @@ +module SSLTestClip +using SimpleSDMLayers +using Test + +M = rand(Bool, (10, 10)) +S = SimpleSDMPredictor(M, 0.0, 1.0, 0.0, 1.0) + +cl1 = clip(S; left=0.2, right=0.6, bottom=0.5, top=1.0) +@test typeof(cl1) == typeof(S) +@test cl1.top ≈ 1.0 +@test cl1.bottom ≈ 0.5 +@test cl1.right ≈ 0.6 +@test cl1.left ≈ 0.2 +@test clip(S; left=0.19).left <= 0.2 + +cl2 = clip(S; left=0.2, bottom=0.5) +@test typeof(cl2) == typeof(S) +@test cl2.top ≈ 1.0 +@test cl2.bottom ≈ 0.5 +@test cl2.right ≈ 1.0 +@test cl2.left ≈ 0.2 + +end \ No newline at end of file diff --git a/test/coarsen.jl b/test/operations/coarsen.jl similarity index 100% rename from test/coarsen.jl rename to test/operations/coarsen.jl diff --git a/test/mosaic.jl b/test/operations/mosaic.jl similarity index 100% rename from test/mosaic.jl rename to test/operations/mosaic.jl diff --git a/test/rescale.jl b/test/operations/rescale.jl similarity index 100% rename from test/rescale.jl rename to test/operations/rescale.jl diff --git a/test/subsetting.jl b/test/operations/subsetting.jl similarity index 97% rename from test/subsetting.jl rename to test/operations/subsetting.jl index 7f16bb85..fec178cb 100644 --- a/test/subsetting.jl +++ b/test/operations/subsetting.jl @@ -6,7 +6,7 @@ temp = SimpleSDMPredictor(WorldClim, BioClim, 1) @test size(temp) == (1080, 2160) coords = (left = -145.0, right = -50.0, bottom = 20.0, top = 75.0) -l1 = temp[coords] +l1 = clip(temp; coords...) l2 = SimpleSDMPredictor(WorldClim, BioClim, 1; coords...) tempfile = tempname() geotiff(tempfile, l2) diff --git a/test/overloads.jl b/test/overloads.jl index b727fc7c..633654c3 100644 --- a/test/overloads.jl +++ b/test/overloads.jl @@ -13,30 +13,17 @@ for i in eachindex(S) end @test typeof(S[0.2, 0.6]) == eltype(M) +@test S[0.2, 0.6] == S[Point(0.2, 0.6)] @test isnothing(S[1.2, 0.3]) @test isnothing(S[1.2, 1.3]) @test isnothing(S[0.2, 1.3]) -@test typeof(S[1:2, 5:7]) == typeof(S) - -@test typeof(S[left=0.2, right=0.6, bottom=0.5, top=1.0]) == typeof(S) -@test S[left=0.2, right=0.6, bottom=0.5, top=1.0].top ≈ 1.0 -@test S[left=0.2, right=0.6, bottom=0.5, top=1.0].bottom ≈ 0.4 -@test S[left=0.2, right=0.6, bottom=0.5, top=1.0].right ≈ 0.6 -@test S[left=0.19, right=0.6, bottom=0.5, top=1.0].left <= 0.2 - -@test typeof(S[left=0.2, bottom=0.5]) == typeof(S) -@test S[left=0.2, bottom=0.5].top ≈ 1.0 -@test S[left=0.2, bottom=0.5].bottom ≈ 0.4 -@test S[left=0.2, bottom=0.5].right ≈ 1.0 -@test S[left=0.2, bottom=0.5].left ≈ 0.2 - C = (left=0.2, bottom=0.5) -@test typeof(S[C]) == typeof(S) +@test typeof(clip(S; C...)) == typeof(S) Y = SimpleSDMResponse(zeros(Float64, (5,5)), 0.0, 1.0, 0.0, 1.0) Y[0.1,0.1] = 0.2 -@test Y[0.1,0.1] == 0.2 +@test Y[Point(0.1,0.1)] == 0.2 Z = convert(SimpleSDMPredictor, Y) Y[0.1,0.1] = 4.0 @@ -47,37 +34,8 @@ Z = convert(SimpleSDMPredictor, Y) V = collect(Z) @test typeof(V) == Vector{Float64} -# hcat / vcat -l1 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=10.0, bottom=0.0, top=10.0) -l2 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=10.0, bottom=10.0, top=20.0) -l3 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=10.0, right=20.0, bottom=0.0, top=10.0) -l4 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=10.0, right=20.0, bottom=10.0, top=20.0) -l5 = SimpleSDMPredictor(WorldClim, BioClim, 1; left=0.0, right=20.0, bottom=0.0, top=20.0) - -ml1 = hcat(l1, l3) -vl1 = vcat(l1, l2) -ml2 = hcat(l2, l4) -vl2 = vcat(l3, l4) - -vml = vcat(ml1, ml2) -mvl = hcat(vl1, vl2) - -@test all(vml.grid == mvl.grid) - -for l in (vml, mvl) - @test all(l.grid == l5.grid) - @test size(l) == size(l5) - @test stride(l) == stride(l5) - @test longitudes(l) == longitudes(l5) - @test latitudes(l) == latitudes(l5) - @test l.left == l5.left - @test l.right == l5.right - @test l.bottom == l5.bottom - @test l.top == l5.top -end - # typed similar -c2 = similar(l1, Bool) +c2 = similar(Y, Bool) @test eltype(c2) == Bool @test eltype(convert(Int64, c2)) == Int64 @@ -137,11 +95,11 @@ l1, l2 = SimpleSDMPredictor(WorldClim, BioClim, 1:2; left = 0.0, right = 10.0, b l3 = SimpleSDMPredictor(WorldClim, BioClim, 1; left = 5.0, right = 10.0, bottom = 5.0, top = 10.0) @test stride(l1) == stride(l3) -l4 = l1[l2] +l4 = clip(l1, l2) @test l4 == l1 -l5 = l1[l3] +l5 = clip(l1, l3) @test l5 == l3 -@test_throws ArgumentError l3[l1] +@test_throws ArgumentError clip(l3, l1) end diff --git a/test/runtests.jl b/test/runtests.jl index 0de6391b..7a263d92 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,24 +4,26 @@ using Test global anyerrors = false tests = [ - "construction" => "construction.jl", - "basics" => "basics.jl", - "matching" => "matching.jl", + "construction" => "core/construction.jl", + "lat./lon." => "core/latlon.jl", + "lat./lon. conversion" => "core/coordconvert.jl", + "iteration" => "core/iteration.jl", + "tiling" => "core/tiling.jl", + "setindex" => "core/setindex.jl", "overloads" => "overloads.jl", - "ascii" => "ascii.jl", - "iteration" => "iteration.jl", - "rescale" => "rescale.jl", - "mosaic" => "mosaic.jl", + "clipping" => "operations/clip.jl", + "rescale" => "operations/rescale.jl", + "mosaic" => "operations/mosaic.jl", + "coarsen" => "operations/coarsen.jl", + "subsetting" => "operations/subsetting.jl", "generated" => "generated.jl", - "import" => "dataread.jl", - "worldclim" => "worldclim.jl", - "earthenv" => "earthenv.jl", - "chelsa" => "chelsa.jl", - "subsetting" => "subsetting.jl", - "coarsen" => "coarsen.jl", - "plotting" => "plots.jl", - "GBIF" => "gbif.jl", - "DataFrames" => "dataframes.jl" + "ascii" => "data/ascii.jl", + "import" => "data/dataread.jl", + "worldclim" => "data/worldclim.jl", + "earthenv" => "data/earthenv.jl", + "chelsa" => "data/chelsa.jl", + "plotting" => "extensions/plots.jl", + "GBIF" => "extensions/gbif.jl", ] for test in tests