Skip to content

Commit

Permalink
Merge pull request #820 from sadielbartholomew/student-recipes-3
Browse files Browse the repository at this point in the history
Add new recipe (18) by summer student: correlation calculation
  • Loading branch information
sadielbartholomew authored Nov 8, 2024
2 parents 9836d52 + 186b4ab commit b5c40da
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 4 deletions.
140 changes: 140 additions & 0 deletions docs/source/recipes/plot_18_recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""
Calculating the Pearson correlation coefficient between datasets
================================================================
In this recipe, we will take two datasets, one for an independent variable
(in this example elevation) and one for a dependent variable (snow
cover over a particuar day), regrid them to the same resolution then
calculate the correlation coefficient, to get a measure of the relationship
between them.
"""

# %%
# 1. Import cf-python, cf-plot and other required packages:
import cfplot as cfp
import cf

import matplotlib.pyplot as plt
import scipy.stats.mstats as mstats

# %%
# 2. Read the data in and unpack the Fields from FieldLists using indexing.
# In our example We are investigating the influence of the land height on
# the snow cover extent, so snow cover is the dependent variable. The snow
# cover data is the
# 'Snow Cover Extent 2017-present (raster 500 m), Europe, daily – version 1'
# sourced from the Copernicus Land Monitoring Service which is described at:
# https://land.copernicus.eu/en/products/snow/snow-cover-extent-europe-v1-0-500m
# and the elevation data is the 'NOAA NGDC GLOBE topo: elevation data' dataset
# which can be sourced from the IRI Data Library, or details found, at:
# http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NGDC/.GLOBE/.topo/index.html.
orog = cf.read("~/recipes/1km_elevation.nc")[0]
snow = cf.read("~/recipes/snowcover")[0]

# %%
# 3. Choose the day of pre-aggregated snow cover to investigate. We will
# take the first datetime element corresponding to the first day from the
# datasets, 1st January 2024, but by changing the indexing you can explore
# other days by changing the index. We also get the string corresponding to
# the date, to reference later:
snow_day = snow[0]
snow_day_dt = snow_day.coordinate("time")[0].data
snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(' ')[0]}"

# %%
# 4. Choose the region to consider to compare the relationship across,
# which must be defined across both datasets, though not necessarily on the
# same grid since we regrid to the same grid next and subspace to the same
# area for both datasets ready for comparison in the next steps. By changing
# the latitude and longitude points in the tuple below, you can change the
# area that is used:
region_in_mid_uk = ((-3.0, -1.0), (52.0, 55.0))
sub_orog = orog.subspace(
longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
)
sub_snow = snow_day.subspace(
longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
)

# %%
# 5. Ensure data quality, since the standard name here corresponds to a
# unitless fraction, but the values are in the tens, so we need to
# normalise these to all lie between 0 and 1 and change the units
# appropriately:
sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range()))
sub_snow.override_units("1", inplace=True)

# %%
# 6. Regrid the data so that they lie on the same grid and therefore each
# array structure has values with corresponding geospatial points that
# can be statistically compared. Here the elevation field is regridded to the
# snow field since the snow is higher-resolution, but the other way round is
# possible by switching the field order:
regridded_orog = sub_orog.regrids(sub_snow, method="linear")

# %%
# 7. Squeeze the snow data to remove the size 1 axes so we have arrays of
# the same dimensions for each of the two fields to compare:
sub_snow = sub_snow.squeeze()

# %%
# 8. Finally, perform the statistical calculation by using the SciPy method
# to find the Pearson correlation coefficient for the two arrays now they are
# in comparable form. Note we need to use 'scipy.stats.mstats' and not
# 'scipy.stats' for the 'pearsonr' method, to account for masked
# data in the array(s) properly:
coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array)
print(f"The Pearson correlation coefficient is: {coefficient}")

# %%
# 9. Make a final plot showing the two arrays side-by-side and quoting the
# determined Pearson correlation coefficient to illustrate the relatoinship
# and its strength visually. We use 'gpos' to position the plots in two
# columns and apply some specific axes ticks and labels for clarity.
cfp.gopen(
rows=1, columns=2, top=0.85,
file="snow_and_orog_on_same_grid.png",
user_position=True,
)

# Joint configuration of the plots, including adding an overall title
plt.suptitle(
(
"Snow cover compared to elevation for the same area of the UK "
f"aggregated across\n day {snow_day_daystring} with correlation "
"coefficient (on the same grid) of "
f"{coefficient.statistic:.4g} (4 s.f.)"
),
fontsize=17,
)
cfp.mapset(resolution="10m")
cfp.setvars(ocean_color="white", lake_color="white")
label_info = {
"xticklabels": ("3W", "2W", "1W"),
"yticklabels": ("52N", "53N", "54N", "55N"),
"xticks": (-3, -2, -1),
"yticks": (52, 53, 54, 55),
}

# Plot the two contour plots as columns
cfp.gpos(1)
cfp.cscale("wiki_2_0_reduced")
cfp.con(
regridded_orog,
lines=False,
title="Elevation (from 1km-resolution orography)",
colorbar_drawedges=False,
**label_info,
)
cfp.gpos(2)
# Don't add extentions on the colourbar since it can only be 0 to 1 inclusive
cfp.levs(min=0, max=1, step=0.1, extend="neither")
cfp.cscale("precip_11lev", ncols=11, reverse=1)
cfp.con(sub_snow, lines=False,
title="Snow cover extent (from satellite imagery)",
colorbar_drawedges=False,
**label_info
)

cfp.gclose()
10 changes: 6 additions & 4 deletions docs/source/recipes/recipe_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ plot_06_recipe.html#sphx-glr-recipes-plot-06-recipe-py
plot_07_recipe.html#sphx-glr-recipes-plot-07-recipe-py
<div class="sphx-glr-thumbcontainer aggregate lineplot subspace" tooltip="Aggregate, Lineplot, Subspace">
plot_08_recipe.html#sphx-glr-recipes-plot-08-recipe-py
<div class="sphx-glr-thumbcontainer collapse contourmap" tooltip="Collapse, Contourmap, Subplot">
<div class="sphx-glr-thumbcontainer collapse contourmap stats subplot" tooltip="Collapse, Contourmap, Statistical Operations, Subplot">
plot_09_recipe.html#sphx-glr-recipes-plot-09-recipe-py
<div class="sphx-glr-thumbcontainer histogram" tooltip="Histogram">
plot_10_recipe.html#sphx-glr-recipes-plot-10-recipe-py
Expand All @@ -23,12 +23,14 @@ plot_11_recipe.html#sphx-glr-recipes-plot-11-recipe-py
plot_12_recipe.html#sphx-glr-recipes-plot-12-recipe-py
<div class="sphx-glr-thumbcontainer contourmap mask" tooltip=" Contourmap, Mask">
plot_13_recipe.html#sphx-glr-recipes-plot-13-recipe-py
<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot" tooltip="Subspace, Collapse, Contourmap, Lineplot">
<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot stats" tooltip="Subspace, Collapse, Contourmap, Lineplot, Statistical Operations">
plot_14_recipe.html#sphx-glr-recipes-plot-14-recipe-py
<div class="sphx-glr-thumbcontainer subspace collapse contourmap" tooltip="Subspace, Collapse, Contourmap">
<div class="sphx-glr-thumbcontainer subspace collapse contourmap stats" tooltip="Subspace, Collapse, Contourmap, Statistical Operations">
plot_15_recipe.html#sphx-glr-recipes-plot-15-recipe-py
<div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace, Subplot">
<div class="sphx-glr-thumbcontainer histogram subspace subplot" tooltip="Histogram, Subspace, Subplot">
plot_16_recipe.html#sphx-glr-recipes-plot-16-recipe-py
<div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace">
plot_17_recipe.html#sphx-glr-recipes-plot-17-recipe-py
<div class="sphx-glr-thumbcontainer contourmap subspace subplot" tooltip="Contourmap, Subspace, Subplot">
plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py
<div class="sphx-glr-thumbcontainer regrid stats" tooltip="Regrid, Statistical Operations">

0 comments on commit b5c40da

Please sign in to comment.