From 011978bd423f85c98f56735f65dba70885f08eae Mon Sep 17 00:00:00 2001 From: Natalia Hunt <> Date: Tue, 8 Oct 2024 16:51:41 +0100 Subject: [PATCH 01/11] Add Natalia's recipe for calculating snow-elevation correlation --- docs/source/recipes/plot_18_recipe.py | 172 ++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 docs/source/recipes/plot_18_recipe.py diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py new file mode 100644 index 0000000000..e83a6eddb1 --- /dev/null +++ b/docs/source/recipes/plot_18_recipe.py @@ -0,0 +1,172 @@ +""" +Plot to compare two linked datasets : Snow against Elevation +============================================ +In this recipe, we will plot a dependant variable (in this example snow cover) +against an independent variable (elevation). This will be on a contour plot +separately and together, then finding the coefficient of correlation and making an +elevation line plot. +""" + +# %% +# 1. Import cf-python, cf-plot and other required packages: + +import cfplot as cf +import cf +import sys +import scipy.stats.mstats as mstats +import matplotlib.pyplot as plt + +#%% +# 2. Read the data in: +# We are investigating the influence of the land height on the snow cover, +#so snow cover is the dependent variable. You can use different variable +# names for easier understanding. +# We are selecting the first field in the data with [0] + +orog = cf.read(f"~/cfplot_data/1km_elevation.nc")[0] +snow = cf.read(f"~/cfplot_data/snowcover")[0] + +snow_day = snow[0] # first day, Jan 1st of this year (2024) + +#%% +# 3. Choose the regions: +# Ensuring both datasets cover it, and also that its not too large as then +# regridding could cause the program to crash on small computers (dont +# regrid the whole map), as well as being mainly land. +# Tool for finding rectangular area for lat and lon points +# https://www.findlatitudeandlongitude.com/l/Yorkshire%2C+England/5009414/ + +region_in_mid_uk = [(-3.0, -1.0), (52.0, 55.0)] # lon then lat +use_orog = orog.subspace( + longitude=cf.wi(*region_in_mid_uk[0]), + latitude=cf.wi(*region_in_mid_uk[1]) +) + +#%% +# 4. Subspace snow to same bounding box as orography data (orog) + +use_snow = snow_day.subspace( + longitude=cf.wi(*region_in_mid_uk[0]), + latitude=cf.wi(*region_in_mid_uk[1]) +) + +#%% +# 5. Normalise the data +# This normalises to between 0 and 1 so multiply by 100 to get a percentage + +use_snow_normal = ((use_snow - use_snow.minimum())/ (use_snow.range()))*100 +print(use_snow_normal.data.stats()) + +#%% +# 6. Reassign the units as they are removed by cf-python after calculation +# Only do this if you are certain the units are convertible to % +use_snow_normal.override_units("percentage", inplace=True) + +#%% +# 7. Plot of Snowcover contour +# First it outputs the newly formatted data, you can change the file names +# the plots will save as. +# colour_scale.txt is a colour scale specifically made and saved to show +# the data but you could make your own or use existing ones from +# https://ncas-cms.github.io/cf-plot/build/colour_scales.html +# You will also need to adjust the labels and axis for your region +print("SNOW IS", use_snow_normal) +cfp.gopen(file="snow-1.png") +cfp.cscale("~/cfplot_data/colour_scale.txt") +cfp.mapset(resolution="10m") +cfp.con(use_snow_normal, + lines=False, + title = "Snow Cover Contour Plot", + xticklabels = ("3W", "2W", "1W"), + yticklabels =("52N", "53N", "54N", "55N"), + xticks = (-3, -2, -1), + yticks= (52, 53, 54, 55)) +cfp.gclose() +print("OROG IS", use_orog) + +#%% +# 8. Plot of 1km Resolution Orography Contour +# Here the ocean doesnt get coloured out because when it is regridded it gets +# masked by the lack of data for the dependant value (snow) over the seas + +cfp.gopen(file="orog-1.png") +cfp.cscale("wiki_2_0_reduced") +cfp.mapset(resolution="10m") +cfp.con(use_orog, + lines=False, + title = "1km resolution Orography Contour Plot", + xticklabels = ("3W", "2W", "1W"), + yticklabels =("52N", "53N", "54N", "55N"), + xticks = (-3, -2, -1), + yticks= (52, 53, 54, 55)) +cfp.gclose() + +#%% +# 9. Plot of Vertical Orography Lineplot of 1km Resolution Elevation + +lonz = use_orog.construct("longitude").data[0] +elevation_orog = use_orog.subspace(longitude = lonz) +xticks_elevation_orog = [52, 52.5, 53, 53.5, 54, 54.5, 55] + +cfp.gopen(figsize=(12, 6), file ="orography_vertical.png") + +cfp.lineplot( + x=elevation_orog.coord('latitude').array, + y=elevation_orog.array.squeeze(), + color="black", + title="Orography Profile over part of UK", + ylabel="Elevation (m)", + xlabel="Latitude", + xticks=xticks_elevation_orog, +) + +cfp.plotvars.plot.fill_between( + elevation_orog.coord('latitude').array, + 0, + elevation_orog.array.squeeze(), + color='black', + alpha=0.7 +) +cfp.gclose() + +#%% +# 10. Regrid the data to have a comparable array. +# Here the orography file is regridded to the snow file +# as snow is higher resolution (if this doesnt work switch them). +# The regridded data is saved as a file, when rerun this could be +# commented out and instead read in the regridded file +# as regridding takes quite awhile + +reg = use_orog.regrids(use_snow_normal, method="linear") +print("REGRIDDED IS", reg, type(reg)) +cf.write(reg, "regridded_data.nc") +print("DONE WRITING OUT REGRID FILE") +# This plots the regridded file data +cfp.gopen(file="regridded-1.png") +cfp.con(reg, lines=False) +cfp.gclose() + +reg = cf.read("~/cfplot_data/regridded_data.nc")[0] + +#%% +# 11. Compare the elevation and snow cover +# The "reg" field is the elevation data on the same grid as the snow data. +# compare this to the snow data itself "use_snow" + +reg_data = reg.data +snow_data = use_snow_normal.data +print("(REGRIDDED) OROG DATA IS", reg_data, reg_data.shape) +print("SNOW DATA IS", snow_data, snow_data.shape) + +#%% +# 12. Squeeze snow data to remove the size 1 axes + +snow_data = snow_data.squeeze() + +#%% +# 13. Final statistical calculations + +sys.setrecursionlimit(1500) +coeff = mstats.pearsonr(reg_data.array, snow_data.array) +print(coeff) + From 57fac282e6073c9575bb42f6e8221ee0a2254940 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 8 Oct 2024 16:56:15 +0100 Subject: [PATCH 02/11] Update NH original recipe 18 w/ minor fixes for cleanE2E run --- docs/source/recipes/plot_18_recipe.py | 51 ++++++--------------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index e83a6eddb1..7494259a92 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -10,7 +10,7 @@ # %% # 1. Import cf-python, cf-plot and other required packages: -import cfplot as cf +import cfplot as cfp import cf import sys import scipy.stats.mstats as mstats @@ -22,9 +22,9 @@ #so snow cover is the dependent variable. You can use different variable # names for easier understanding. # We are selecting the first field in the data with [0] - -orog = cf.read(f"~/cfplot_data/1km_elevation.nc")[0] -snow = cf.read(f"~/cfplot_data/snowcover")[0] +PATH="~/summerstudents/final-recipes/new-required-datasets" +orog = cf.read(f"{PATH}/1km_elevation.nc")[0] +snow = cf.read(f"{PATH}/snowcover")[0] snow_day = snow[0] # first day, Jan 1st of this year (2024) @@ -35,7 +35,6 @@ # regrid the whole map), as well as being mainly land. # Tool for finding rectangular area for lat and lon points # https://www.findlatitudeandlongitude.com/l/Yorkshire%2C+England/5009414/ - region_in_mid_uk = [(-3.0, -1.0), (52.0, 55.0)] # lon then lat use_orog = orog.subspace( longitude=cf.wi(*region_in_mid_uk[0]), @@ -44,7 +43,6 @@ #%% # 4. Subspace snow to same bounding box as orography data (orog) - use_snow = snow_day.subspace( longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) @@ -53,9 +51,7 @@ #%% # 5. Normalise the data # This normalises to between 0 and 1 so multiply by 100 to get a percentage - use_snow_normal = ((use_snow - use_snow.minimum())/ (use_snow.range()))*100 -print(use_snow_normal.data.stats()) #%% # 6. Reassign the units as they are removed by cf-python after calculation @@ -70,9 +66,8 @@ # the data but you could make your own or use existing ones from # https://ncas-cms.github.io/cf-plot/build/colour_scales.html # You will also need to adjust the labels and axis for your region -print("SNOW IS", use_snow_normal) cfp.gopen(file="snow-1.png") -cfp.cscale("~/cfplot_data/colour_scale.txt") +###cfp.cscale("~/cfplot_data/colour_scale.txt") cfp.mapset(resolution="10m") cfp.con(use_snow_normal, lines=False, @@ -82,13 +77,11 @@ xticks = (-3, -2, -1), yticks= (52, 53, 54, 55)) cfp.gclose() -print("OROG IS", use_orog) #%% # 8. Plot of 1km Resolution Orography Contour # Here the ocean doesnt get coloured out because when it is regridded it gets # masked by the lack of data for the dependant value (snow) over the seas - cfp.gopen(file="orog-1.png") cfp.cscale("wiki_2_0_reduced") cfp.mapset(resolution="10m") @@ -103,7 +96,6 @@ #%% # 9. Plot of Vertical Orography Lineplot of 1km Resolution Elevation - lonz = use_orog.construct("longitude").data[0] elevation_orog = use_orog.subspace(longitude = lonz) xticks_elevation_orog = [52, 52.5, 53, 53.5, 54, 54.5, 55] @@ -133,40 +125,19 @@ # 10. Regrid the data to have a comparable array. # Here the orography file is regridded to the snow file # as snow is higher resolution (if this doesnt work switch them). -# The regridded data is saved as a file, when rerun this could be -# commented out and instead read in the regridded file -# as regridding takes quite awhile - reg = use_orog.regrids(use_snow_normal, method="linear") -print("REGRIDDED IS", reg, type(reg)) -cf.write(reg, "regridded_data.nc") -print("DONE WRITING OUT REGRID FILE") + + # This plots the regridded file data cfp.gopen(file="regridded-1.png") cfp.con(reg, lines=False) cfp.gclose() -reg = cf.read("~/cfplot_data/regridded_data.nc")[0] - -#%% -# 11. Compare the elevation and snow cover -# The "reg" field is the elevation data on the same grid as the snow data. -# compare this to the snow data itself "use_snow" - -reg_data = reg.data -snow_data = use_snow_normal.data -print("(REGRIDDED) OROG DATA IS", reg_data, reg_data.shape) -print("SNOW DATA IS", snow_data, snow_data.shape) - #%% -# 12. Squeeze snow data to remove the size 1 axes - +# 11. Squeeze snow data to remove the size 1 axes snow_data = snow_data.squeeze() #%% -# 13. Final statistical calculations - -sys.setrecursionlimit(1500) -coeff = mstats.pearsonr(reg_data.array, snow_data.array) -print(coeff) - +# 12. Final statistical calculations +coefficient = mstats.pearsonr(reg_data.array, snow_data.array) +print(f"The Pearson correlation coefficient is {coefficient}") From 727cb451d60529660be69788c8b231a7c604b7ef Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 8 Oct 2024 17:35:11 +0100 Subject: [PATCH 03/11] Update NH original recipe 18 to produce one plot only w/ result --- docs/source/recipes/plot_18_recipe.py | 122 +++++++++----------------- 1 file changed, 40 insertions(+), 82 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index 7494259a92..bb2fca5fd7 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -12,6 +12,7 @@ import cfplot as cfp import cf + import sys import scipy.stats.mstats as mstats import matplotlib.pyplot as plt @@ -25,11 +26,11 @@ PATH="~/summerstudents/final-recipes/new-required-datasets" orog = cf.read(f"{PATH}/1km_elevation.nc")[0] snow = cf.read(f"{PATH}/snowcover")[0] - +# Could use any of the seven days by indexing differently snow_day = snow[0] # first day, Jan 1st of this year (2024) #%% -# 3. Choose the regions: +# 3. Choose the regions and subspace to same area for both datasets: # Ensuring both datasets cover it, and also that its not too large as then # regridding could cause the program to crash on small computers (dont # regrid the whole map), as well as being mainly land. @@ -40,104 +41,61 @@ longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) - -#%% -# 4. Subspace snow to same bounding box as orography data (orog) use_snow = snow_day.subspace( longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) #%% -# 5. Normalise the data -# This normalises to between 0 and 1 so multiply by 100 to get a percentage +# 4. Normalise the data and change the units appropriately. +# Reassign the units as they are removed by cf-python after calculation +# Only do this if you are certain the units are convertible to % +# This normalises to between 0 and 1 so multiply by 100 to get a percentage: use_snow_normal = ((use_snow - use_snow.minimum())/ (use_snow.range()))*100 +# TODO SB not CF Compliant way to handle normalisation, check +use_snow_normal.override_units("percentage", inplace=True) #%% -# 6. Reassign the units as they are removed by cf-python after calculation -# Only do this if you are certain the units are convertible to % -use_snow_normal.override_units("percentage", inplace=True) +# 5. Regrid the data to have a comparable array. +# Here the orography file is regridded to the snow file +# as snow is higher resolution (if this doesnt work switch them). +reg = use_orog.regrids(use_snow_normal, method="linear") + +#%% +# 6. Squeeze snow data to remove the size 1 axes +use_snow_normal = use_snow_normal.squeeze() + +#%% +# 7. Final statistical calculations +coefficient = mstats.pearsonr(reg.array, use_snow_normal.array) +print(f"The Pearson correlation coefficient is {coefficient}") + #%% -# 7. Plot of Snowcover contour +# 8. Plots including title stating the coefficient. # First it outputs the newly formatted data, you can change the file names # the plots will save as. # colour_scale.txt is a colour scale specifically made and saved to show # the data but you could make your own or use existing ones from # https://ncas-cms.github.io/cf-plot/build/colour_scales.html # You will also need to adjust the labels and axis for your region -cfp.gopen(file="snow-1.png") +cfp.gopen(rows=1, columns=2, file="snow_and_orog_on_same_grid.png") ###cfp.cscale("~/cfplot_data/colour_scale.txt") +# Joint config cfp.mapset(resolution="10m") -cfp.con(use_snow_normal, - lines=False, - title = "Snow Cover Contour Plot", - xticklabels = ("3W", "2W", "1W"), - yticklabels =("52N", "53N", "54N", "55N"), - xticks = (-3, -2, -1), - yticks= (52, 53, 54, 55)) -cfp.gclose() - -#%% -# 8. Plot of 1km Resolution Orography Contour -# Here the ocean doesnt get coloured out because when it is regridded it gets -# masked by the lack of data for the dependant value (snow) over the seas -cfp.gopen(file="orog-1.png") +label_info = { + "xticklabels": ("3W", "2W", "1W"), + "yticklabels": ("52N", "53N", "54N", "55N"), + "xticks": (-3, -2, -1), + "yticks": (52, 53, 54, 55), +} + +cfp.gpos(1) +cfp.con(use_snow_normal, lines=False, title = "Snow Cover Plot", + **label_info) +# This plots the regridded file data. The destination grid is the snow grid. +cfp.gpos(2) cfp.cscale("wiki_2_0_reduced") -cfp.mapset(resolution="10m") -cfp.con(use_orog, - lines=False, - title = "1km resolution Orography Contour Plot", - xticklabels = ("3W", "2W", "1W"), - yticklabels =("52N", "53N", "54N", "55N"), - xticks = (-3, -2, -1), - yticks= (52, 53, 54, 55)) -cfp.gclose() - -#%% -# 9. Plot of Vertical Orography Lineplot of 1km Resolution Elevation -lonz = use_orog.construct("longitude").data[0] -elevation_orog = use_orog.subspace(longitude = lonz) -xticks_elevation_orog = [52, 52.5, 53, 53.5, 54, 54.5, 55] - -cfp.gopen(figsize=(12, 6), file ="orography_vertical.png") - -cfp.lineplot( - x=elevation_orog.coord('latitude').array, - y=elevation_orog.array.squeeze(), - color="black", - title="Orography Profile over part of UK", - ylabel="Elevation (m)", - xlabel="Latitude", - xticks=xticks_elevation_orog, -) - -cfp.plotvars.plot.fill_between( - elevation_orog.coord('latitude').array, - 0, - elevation_orog.array.squeeze(), - color='black', - alpha=0.7 -) -cfp.gclose() - -#%% -# 10. Regrid the data to have a comparable array. -# Here the orography file is regridded to the snow file -# as snow is higher resolution (if this doesnt work switch them). -reg = use_orog.regrids(use_snow_normal, method="linear") - - -# This plots the regridded file data -cfp.gopen(file="regridded-1.png") -cfp.con(reg, lines=False) +cfp.con(reg, lines=False, title="Elevation (1km resolution Orography) Plot", + **label_info) cfp.gclose() - -#%% -# 11. Squeeze snow data to remove the size 1 axes -snow_data = snow_data.squeeze() - -#%% -# 12. Final statistical calculations -coefficient = mstats.pearsonr(reg_data.array, snow_data.array) -print(f"The Pearson correlation coefficient is {coefficient}") From 4490550dc5e29c7082e8782d2ce9b5d48bc7b9d3 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 00:51:43 +0100 Subject: [PATCH 04/11] Update NH original recipe 18 to finalise single plot --- docs/source/recipes/plot_18_recipe.py | 104 ++++++++++++++++---------- 1 file changed, 64 insertions(+), 40 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index bb2fca5fd7..24c9c19721 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -1,101 +1,125 @@ """ -Plot to compare two linked datasets : Snow against Elevation -============================================ -In this recipe, we will plot a dependant variable (in this example snow cover) -against an independent variable (elevation). This will be on a contour plot -separately and together, then finding the coefficient of correlation and making an -elevation line plot. +Plot to compare two linked datasets: Snow against Elevation +=========================================================== + +In this recipe, we will plot a dependant variable (in this example snow +cover) against an independent variable (elevation). This will be on a +contour plot separately and together, then finding the coefficient of +correlation and making an elevation line plot. """ # %% # 1. Import cf-python, cf-plot and other required packages: - import cfplot as cfp import cf -import sys -import scipy.stats.mstats as mstats import matplotlib.pyplot as plt +import scipy.stats.mstats as mstats -#%% +# %% # 2. Read the data in: -# We are investigating the influence of the land height on the snow cover, -#so snow cover is the dependent variable. You can use different variable +# We are investigating the influence of the land height on the snow cover, +# so snow cover is the dependent variable. You can use different variable # names for easier understanding. # We are selecting the first field in the data with [0] -PATH="~/summerstudents/final-recipes/new-required-datasets" +PATH = "~/summerstudents/final-recipes/new-required-datasets" orog = cf.read(f"{PATH}/1km_elevation.nc")[0] snow = cf.read(f"{PATH}/snowcover")[0] # Could use any of the seven days by indexing differently snow_day = snow[0] # first day, Jan 1st of this year (2024) +snow_day.dump() +orog.dump() +print("OROG DATA IS", orog.data) -#%% +# %% # 3. Choose the regions and subspace to same area for both datasets: -# Ensuring both datasets cover it, and also that its not too large as then -# regridding could cause the program to crash on small computers (dont +# Ensuring both datasets cover it, and also that its not too large as then +# regridding could cause the program to crash on small computers (dont # regrid the whole map), as well as being mainly land. # Tool for finding rectangular area for lat and lon points # https://www.findlatitudeandlongitude.com/l/Yorkshire%2C+England/5009414/ region_in_mid_uk = [(-3.0, -1.0), (52.0, 55.0)] # lon then lat use_orog = orog.subspace( - longitude=cf.wi(*region_in_mid_uk[0]), - latitude=cf.wi(*region_in_mid_uk[1]) + longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) use_snow = snow_day.subspace( - longitude=cf.wi(*region_in_mid_uk[0]), - latitude=cf.wi(*region_in_mid_uk[1]) + longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) -#%% +# %% # 4. Normalise the data and change the units appropriately. # Reassign the units as they are removed by cf-python after calculation # Only do this if you are certain the units are convertible to % # This normalises to between 0 and 1 so multiply by 100 to get a percentage: -use_snow_normal = ((use_snow - use_snow.minimum())/ (use_snow.range()))*100 +use_snow_normal = ((use_snow - use_snow.minimum()) / (use_snow.range())) * 100 # TODO SB not CF Compliant way to handle normalisation, check use_snow_normal.override_units("percentage", inplace=True) -#%% +# %% # 5. Regrid the data to have a comparable array. -# Here the orography file is regridded to the snow file +# Here the orography file is regridded to the snow file # as snow is higher resolution (if this doesnt work switch them). reg = use_orog.regrids(use_snow_normal, method="linear") -#%% +# %% # 6. Squeeze snow data to remove the size 1 axes use_snow_normal = use_snow_normal.squeeze() -#%% +# %% # 7. Final statistical calculations coefficient = mstats.pearsonr(reg.array, use_snow_normal.array) -print(f"The Pearson correlation coefficient is {coefficient}") +print(f"The Pearson correlation coefficient is: {coefficient}") -#%% +# %% # 8. Plots including title stating the coefficient. # First it outputs the newly formatted data, you can change the file names # the plots will save as. -# colour_scale.txt is a colour scale specifically made and saved to show +# colour_scale.txt is a colour scale specifically made and saved to show # the data but you could make your own or use existing ones from # https://ncas-cms.github.io/cf-plot/build/colour_scales.html # You will also need to adjust the labels and axis for your region -cfp.gopen(rows=1, columns=2, file="snow_and_orog_on_same_grid.png") -###cfp.cscale("~/cfplot_data/colour_scale.txt") -# Joint config +cfp.gopen( + rows=1, columns=2, top=0.85, wspace=0.001, + file="snow_and_orog_on_same_grid.png", + user_position=True, +) +# TODO SB cfp.cscale("~/cfplot_data/colour_scale.txt") +# Joint config including adding an overall title +plt.suptitle( + ( + "Snow cover in relation to elevation over the same area of the UK " + "at midnight on\n2024-01-01: the correlation coefficient between " + f"the two datasets is {coefficient.statistic:.4g} (4 s.f.)" + ), + fontsize=18, +) cfp.mapset(resolution="10m") +cfp.setvars(ocean_color="white", lake_color="white") label_info = { - "xticklabels": ("3W", "2W", "1W"), - "yticklabels": ("52N", "53N", "54N", "55N"), + "xticklabels": ("3W", "2W", "1W"), + "yticklabels": ("52N", "53N", "54N", "55N"), "xticks": (-3, -2, -1), "yticks": (52, 53, 54, 55), } + cfp.gpos(1) -cfp.con(use_snow_normal, lines=False, title = "Snow Cover Plot", - **label_info) -# This plots the regridded file data. The destination grid is the snow grid. -cfp.gpos(2) cfp.cscale("wiki_2_0_reduced") -cfp.con(reg, lines=False, title="Elevation (1km resolution Orography) Plot", - **label_info) +cfp.con( + reg, + lines=False, + title="Elevation (from 1km-resolution orography)", + colorbar_drawedges=False, + **label_info, +) +cfp.gpos(2) +cfp.cscale("precip4_11lev", ncols=22, reverse=1) +cfp.con(use_snow_normal, lines=False, + title="Snow cover extent (from satellite imagery)", + colorbar_drawedges=False, + yaxis=False, # since is same as one on first column plot + **label_info +) + cfp.gclose() From 10354b7e474facb748f4f49e10ede507e54cc6d9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 01:16:56 +0100 Subject: [PATCH 05/11] Update NH original recipe 18 to improve variable names --- docs/source/recipes/plot_18_recipe.py | 39 ++++++++++++--------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index 24c9c19721..eff2d252a1 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -27,9 +27,6 @@ snow = cf.read(f"{PATH}/snowcover")[0] # Could use any of the seven days by indexing differently snow_day = snow[0] # first day, Jan 1st of this year (2024) -snow_day.dump() -orog.dump() -print("OROG DATA IS", orog.data) # %% # 3. Choose the regions and subspace to same area for both datasets: @@ -39,10 +36,10 @@ # Tool for finding rectangular area for lat and lon points # https://www.findlatitudeandlongitude.com/l/Yorkshire%2C+England/5009414/ region_in_mid_uk = [(-3.0, -1.0), (52.0, 55.0)] # lon then lat -use_orog = orog.subspace( +sub_orog = orog.subspace( longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) -use_snow = snow_day.subspace( +sub_snow = snow_day.subspace( longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) @@ -51,23 +48,22 @@ # Reassign the units as they are removed by cf-python after calculation # Only do this if you are certain the units are convertible to % # This normalises to between 0 and 1 so multiply by 100 to get a percentage: -use_snow_normal = ((use_snow - use_snow.minimum()) / (use_snow.range())) * 100 -# TODO SB not CF Compliant way to handle normalisation, check -use_snow_normal.override_units("percentage", inplace=True) +sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range())) * 100 +sub_snow.override_units("1", inplace=True) # %% # 5. Regrid the data to have a comparable array. # Here the orography file is regridded to the snow file # as snow is higher resolution (if this doesnt work switch them). -reg = use_orog.regrids(use_snow_normal, method="linear") +regridded_orog = sub_orog.regrids(sub_snow, method="linear") # %% # 6. Squeeze snow data to remove the size 1 axes -use_snow_normal = use_snow_normal.squeeze() +sub_snow = sub_snow.squeeze() # %% # 7. Final statistical calculations -coefficient = mstats.pearsonr(reg.array, use_snow_normal.array) +coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array) print(f"The Pearson correlation coefficient is: {coefficient}") @@ -78,21 +74,21 @@ # colour_scale.txt is a colour scale specifically made and saved to show # the data but you could make your own or use existing ones from # https://ncas-cms.github.io/cf-plot/build/colour_scales.html -# You will also need to adjust the labels and axis for your region +# You will also need to adjust the labels and axis for your regridded_orogion cfp.gopen( - rows=1, columns=2, top=0.85, wspace=0.001, + rows=1, columns=2, top=0.85, file="snow_and_orog_on_same_grid.png", user_position=True, ) -# TODO SB cfp.cscale("~/cfplot_data/colour_scale.txt") + # Joint config including adding an overall title plt.suptitle( ( - "Snow cover in relation to elevation over the same area of the UK " - "at midnight on\n2024-01-01: the correlation coefficient between " - f"the two datasets is {coefficient.statistic:.4g} (4 s.f.)" + "Snow cover compared to elevation for the same area of the UK " + "at midnight\n2024-01-01, with correlation coefficient " + f"(on the same grid) of {coefficient.statistic:.4g} (4 s.f.)" ), - fontsize=18, + fontsize=17, ) cfp.mapset(resolution="10m") cfp.setvars(ocean_color="white", lake_color="white") @@ -103,11 +99,11 @@ "yticks": (52, 53, 54, 55), } - +# PLot the two contour plots as columns cfp.gpos(1) cfp.cscale("wiki_2_0_reduced") cfp.con( - reg, + regridded_orog, lines=False, title="Elevation (from 1km-resolution orography)", colorbar_drawedges=False, @@ -115,10 +111,9 @@ ) cfp.gpos(2) cfp.cscale("precip4_11lev", ncols=22, reverse=1) -cfp.con(use_snow_normal, lines=False, +cfp.con(sub_snow, lines=False, title="Snow cover extent (from satellite imagery)", colorbar_drawedges=False, - yaxis=False, # since is same as one on first column plot **label_info ) From 558c02f42a54b0f7eb2ec8d2e10464903a8fd66d Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 01:37:04 +0100 Subject: [PATCH 06/11] Generalise NH recipe 18 to process any day from week's dataset --- docs/source/recipes/plot_18_recipe.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index eff2d252a1..7695d884d9 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -27,6 +27,9 @@ snow = cf.read(f"{PATH}/snowcover")[0] # Could use any of the seven days by indexing differently snow_day = snow[0] # first day, Jan 1st of this year (2024) +# Find day corresponding to aggregated dataset +snow_day_dt = snow_day.coordinate("time")[0].data +snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(" ")[0]}" # %% # 3. Choose the regions and subspace to same area for both datasets: @@ -64,6 +67,8 @@ # %% # 7. Final statistical calculations coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array) +# Need to use scipy.stats.mstats, not just scipy.stats, to account for masked +# data in the array(s) properly. print(f"The Pearson correlation coefficient is: {coefficient}") @@ -85,8 +90,9 @@ plt.suptitle( ( "Snow cover compared to elevation for the same area of the UK " - "at midnight\n2024-01-01, with correlation coefficient " - f"(on the same grid) of {coefficient.statistic:.4g} (4 s.f.)" + f"aggregated across\n day {snow_day_daystring} with correlation " + "coefficient (on the same grid) of " + f"{coefficient.statistic:.4g} (4 s.f.)" ), fontsize=17, ) From 36824213297e3ef3464098443b50e1a94027efc9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 11:54:40 +0100 Subject: [PATCH 07/11] Update NH recipe 18 to tidy & add detail to comments --- docs/source/recipes/plot_18_recipe.py | 101 ++++++++++++++------------ 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index 7695d884d9..ae9e6b434c 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -1,11 +1,13 @@ """ -Plot to compare two linked datasets: Snow against Elevation -=========================================================== +Calculating the Pearson correlation coefficient between datasets +================================================================ + +In this recipe, we will take two datasets, one for an independent variable +(in this example elevation) and one for a dependant variable (snow +cover over a particuar day), regrid them to the same resolution then +calculate the correlation coefficient, to get a measure of the relationship +between them. -In this recipe, we will plot a dependant variable (in this example snow -cover) against an independent variable (elevation). This will be on a -contour plot separately and together, then finding the coefficient of -correlation and making an elevation line plot. """ # %% @@ -17,28 +19,34 @@ import scipy.stats.mstats as mstats # %% -# 2. Read the data in: -# We are investigating the influence of the land height on the snow cover, -# so snow cover is the dependent variable. You can use different variable -# names for easier understanding. -# We are selecting the first field in the data with [0] +# 2. Read the data in and unpack the Fields from FieldLists using indexing. +# In our example We are investigating the influence of the land height on +# the snow cover extent, so snow cover is the dependent variable. The data +# was sourced from +# TODO SOURCES: PATH = "~/summerstudents/final-recipes/new-required-datasets" orog = cf.read(f"{PATH}/1km_elevation.nc")[0] snow = cf.read(f"{PATH}/snowcover")[0] -# Could use any of the seven days by indexing differently -snow_day = snow[0] # first day, Jan 1st of this year (2024) -# Find day corresponding to aggregated dataset + + +# %% +# 3. Choose the day of pre-aggregated snow cover to investigate. We will +# take the first datetime element corresponding to the first day from the +# datasets, 1st January 2024, but by changing the indexing you can explore +# other days by changing the index. We also get the string corresponding to +# the date, to reference later: +snow_day = snow[0] snow_day_dt = snow_day.coordinate("time")[0].data -snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(" ")[0]}" +snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(' ')[0]}" # %% -# 3. Choose the regions and subspace to same area for both datasets: -# Ensuring both datasets cover it, and also that its not too large as then -# regridding could cause the program to crash on small computers (dont -# regrid the whole map), as well as being mainly land. -# Tool for finding rectangular area for lat and lon points -# https://www.findlatitudeandlongitude.com/l/Yorkshire%2C+England/5009414/ -region_in_mid_uk = [(-3.0, -1.0), (52.0, 55.0)] # lon then lat +# 4. Choose the region to consider to compare the relationship across, +# which must be defined across both datasets, though not necessarily on the +# same grid since we regrid to the same grid next and subspace to the same +# area for both datasets ready for comparison in the next steps. By changing +# the latitude and longitude points in the tuple below, you can change the +# area that is used: +region_in_mid_uk = ((-3.0, -1.0), (52.0, 55.0)) sub_orog = orog.subspace( longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1]) ) @@ -47,46 +55,47 @@ ) # %% -# 4. Normalise the data and change the units appropriately. -# Reassign the units as they are removed by cf-python after calculation -# Only do this if you are certain the units are convertible to % -# This normalises to between 0 and 1 so multiply by 100 to get a percentage: -sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range())) * 100 +# 5. Ensure data quality, since the standard name here corresponds to a +# unitless fraction, but the values are in the tens, so we need to +# normalise these to all lie between 0 and 1 and change the units +# appropriately: +sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range())) sub_snow.override_units("1", inplace=True) # %% -# 5. Regrid the data to have a comparable array. -# Here the orography file is regridded to the snow file -# as snow is higher resolution (if this doesnt work switch them). +# 6. Regrid the data so that they lie on the same grid and therefore each +# array structure has values with corresponding geospatial points that +# can be statistically compared. Here the elevation field is regridded to the +# snow field since the snow is higher-resolution, but the other way round is +# possible by switching the field order: regridded_orog = sub_orog.regrids(sub_snow, method="linear") # %% -# 6. Squeeze snow data to remove the size 1 axes +# 7. Squeeze the snow data to remove the size 1 axes so we have arrays of +# the same dimensions for each of the two fields to compare: sub_snow = sub_snow.squeeze() # %% -# 7. Final statistical calculations +# 8. Finally, perform the statistical calculation by using the SciPy method +# to find the Pearson correlation coefficient for the two arrays now they are +# in comparable form. Note we need to use 'scipy.stats.mstats' and not +# 'scipy.stats' for the 'pearsonr' method, to account for masked +# data in the array(s) properly: coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array) -# Need to use scipy.stats.mstats, not just scipy.stats, to account for masked -# data in the array(s) properly. print(f"The Pearson correlation coefficient is: {coefficient}") - # %% -# 8. Plots including title stating the coefficient. -# First it outputs the newly formatted data, you can change the file names -# the plots will save as. -# colour_scale.txt is a colour scale specifically made and saved to show -# the data but you could make your own or use existing ones from -# https://ncas-cms.github.io/cf-plot/build/colour_scales.html -# You will also need to adjust the labels and axis for your regridded_orogion +# 9. Make a final plot showing the two arrays side-by-side and quoting the +# determined Pearson correlation coefficient to illustrate the relatoinship +# and its strength visually. We use 'gpos' to position the plots in two +# columns and apply some specific axes ticks and labels for clarity. cfp.gopen( rows=1, columns=2, top=0.85, file="snow_and_orog_on_same_grid.png", user_position=True, ) -# Joint config including adding an overall title +# Joint configuration of the plots, including adding an overall title plt.suptitle( ( "Snow cover compared to elevation for the same area of the UK " @@ -105,7 +114,7 @@ "yticks": (52, 53, 54, 55), } -# PLot the two contour plots as columns +# Plot the two contour plots as columns cfp.gpos(1) cfp.cscale("wiki_2_0_reduced") cfp.con( @@ -116,7 +125,9 @@ **label_info, ) cfp.gpos(2) -cfp.cscale("precip4_11lev", ncols=22, reverse=1) +# Don't add extentions on the colourbar since it can only be 0 to 1 inclusive +cfp.levs(min=0, max=1, step=0.1, extend="neither") +cfp.cscale("precip_11lev", ncols=11, reverse=1) cfp.con(sub_snow, lines=False, title="Snow cover extent (from satellite imagery)", colorbar_drawedges=False, From 73a301810d474d16a38834f893b0114b5ccf080f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 12:03:54 +0100 Subject: [PATCH 08/11] Add full details of data sources to NH recipe 18 --- docs/source/recipes/plot_18_recipe.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index ae9e6b434c..2c494cac27 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -21,12 +21,18 @@ # %% # 2. Read the data in and unpack the Fields from FieldLists using indexing. # In our example We are investigating the influence of the land height on -# the snow cover extent, so snow cover is the dependent variable. The data -# was sourced from -# TODO SOURCES: +# the snow cover extent, so snow cover is the dependent variable. The snow +# cover data is the +# 'Snow Cover Extent 2017-present (raster 500 m), Europe, daily – version 1' +# sourced from the Copernicus Land Monitoring Service which is described at: +# https://land.copernicus.eu/en/products/snow/snow-cover-extent-europe-v1-0-500m +# and the elevation data is the 'NOAA NGDC GLOBE topo: elevation data' dataset +# which can be sourced from the IRI Data Library, or details found, at: +# http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NGDC/.GLOBE/.topo/index.html. PATH = "~/summerstudents/final-recipes/new-required-datasets" orog = cf.read(f"{PATH}/1km_elevation.nc")[0] snow = cf.read(f"{PATH}/snowcover")[0] +orog.dump() # %% From 76daca3fa06061e891fefd61253bf5da1c121fd2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 12:07:07 +0100 Subject: [PATCH 09/11] Set recipes path to standard in NH recipe 18 --- docs/source/recipes/plot_18_recipe.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index 2c494cac27..d219bdfe19 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -3,7 +3,7 @@ ================================================================ In this recipe, we will take two datasets, one for an independent variable -(in this example elevation) and one for a dependant variable (snow +(in this example elevation) and one for a dependent variable (snow cover over a particuar day), regrid them to the same resolution then calculate the correlation coefficient, to get a measure of the relationship between them. @@ -29,11 +29,8 @@ # and the elevation data is the 'NOAA NGDC GLOBE topo: elevation data' dataset # which can be sourced from the IRI Data Library, or details found, at: # http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NGDC/.GLOBE/.topo/index.html. -PATH = "~/summerstudents/final-recipes/new-required-datasets" -orog = cf.read(f"{PATH}/1km_elevation.nc")[0] -snow = cf.read(f"{PATH}/snowcover")[0] -orog.dump() - +orog = cf.read("~/recipes/1km_elevation.nc")[0] +snow = cf.read("~/recipes/snowcover")[0] # %% # 3. Choose the day of pre-aggregated snow cover to investigate. We will From e14b1b18da2f746e28b3785a9d40946226dd4ab6 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 12:11:14 +0100 Subject: [PATCH 10/11] Add NH recipe 18 to recipes listing with filter keywords --- docs/source/recipes/recipe_list.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/recipes/recipe_list.txt b/docs/source/recipes/recipe_list.txt index f39297f9cb..42d0b2cd10 100644 --- a/docs/source/recipes/recipe_list.txt +++ b/docs/source/recipes/recipe_list.txt @@ -27,4 +27,6 @@ plot_13_recipe.html#sphx-glr-recipes-plot-13-recipe-py plot_14_recipe.html#sphx-glr-recipes-plot-14-recipe-py
plot_15_recipe.html#sphx-glr-recipes-plot-15-recipe-py -
\ No newline at end of file +
+plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py +
From d841efcf4b5598b4deae052a40f05ffe6b7912ad Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 9 Oct 2024 12:14:39 +0100 Subject: [PATCH 11/11] Add new keyword 'stats' for filtering to appropriate recipes --- docs/source/recipes/recipe_list.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/recipes/recipe_list.txt b/docs/source/recipes/recipe_list.txt index 42d0b2cd10..c571bc4e62 100644 --- a/docs/source/recipes/recipe_list.txt +++ b/docs/source/recipes/recipe_list.txt @@ -13,7 +13,7 @@ plot_06_recipe.html#sphx-glr-recipes-plot-06-recipe-py plot_07_recipe.html#sphx-glr-recipes-plot-07-recipe-py
plot_08_recipe.html#sphx-glr-recipes-plot-08-recipe-py -
+
plot_09_recipe.html#sphx-glr-recipes-plot-09-recipe-py
plot_10_recipe.html#sphx-glr-recipes-plot-10-recipe-py @@ -23,9 +23,9 @@ plot_11_recipe.html#sphx-glr-recipes-plot-11-recipe-py plot_12_recipe.html#sphx-glr-recipes-plot-12-recipe-py
plot_13_recipe.html#sphx-glr-recipes-plot-13-recipe-py -
+
plot_14_recipe.html#sphx-glr-recipes-plot-14-recipe-py -
+
plot_15_recipe.html#sphx-glr-recipes-plot-15-recipe-py
plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py