diff --git a/auxiliary_tools/cdat_regression_testing/668-tc_analysis/668-tc_analysis_run_script.py b/auxiliary_tools/cdat_regression_testing/668-tc_analysis/668-tc_analysis_run_script.py new file mode 100644 index 000000000..e7358cd70 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/668-tc_analysis/668-tc_analysis_run_script.py @@ -0,0 +1,51 @@ +""" +The template run script used for generating results on your development branch. + +Steps: +1. Activate your conda dev env for your branch +2. `make install` to install the latest version of your branch code into the env +3. Copy this script into `auxiliary_tools/cdat_regression_testing/-` +4. Update `SET_DIR` string variable +5. Update `SET_NAME` string variable. + - Options include: "lat_lon", "zonal_mean_xy", "zonal_mean_2d", + "zonal_mean_2d_stratosphere", "polar", "cosp_histogram", + "meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo", + "area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags", + "tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition", +6. Run this script as a Python module + - `auxiliary_tools` is not included in `setup.py`, so `-m` is required + to run the script as a Python module + - Command: python -m auxiliary_tools.cdat_regression_testing.-. + - Example: python -m auxiliary_tools.cdat_regression_testing.660_cosp_histogram.run_script +7. Run `chmod -R o=rx ` to allow public access to viewer outputs on the NERSC webserver + - Example: `chmod -R o=rx /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/654-zonal_mean_xy` + - https://portal.nersc.gov/project/e3sm/cdat-migration-fy24/ +8. Make a copy of the CDAT regression testing notebook in the same directory + as this script and follow the instructions there to start testing. +9. Update `CFG_PATH` to a custom cfg file to debug specific variables. + - It is useful to create a custom cfg based on the default diags to debug + specific variables that are running into problems. + - For example, copy `zonal_mean_xy_model_vs_model.cfg` into the same directory + as the copy of this script, then modify it to specific variables. Afterwards + update `CFG_PATH` to the path of that .cfg file. + - Tip: Use VS Code to step through the code with the Python debugger. +""" +from auxiliary_tools.cdat_regression_testing.base_run_script import run_set + +# TODO: Update SETS_TO_RUN to the single set you are refactoring. +# Example: "lat_lon" +SET_NAME = "tc_analysis" +# TODO: Update SET_DIR to . This string gets appended +# to the base results_dir, "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/". +# Example: "671-lat-lon" +SET_DIR = "688-tc_analysis" + +# TODO: UPDATE CFG_PATH if using a custom cfg file for debugging. +# Example: "auxiliary_tools/cdat_regression_testing/654_zonal_mean_xy.cfg" +CFG_PATH: str | None = None + +# TODO: Update MULTIPROCESSING based on whether to run in parallel or +# serial. For debugging purposes, set to False to run serially. +MULTIPROCESSING = True + +run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING) diff --git a/auxiliary_tools/run_tc_analysis.py b/auxiliary_tools/run_tc_analysis.py new file mode 100644 index 000000000..680c413a1 --- /dev/null +++ b/auxiliary_tools/run_tc_analysis.py @@ -0,0 +1,31 @@ +import os +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.parameter.tc_analysis_parameter import TCAnalysisParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +#param.test_data_path = '/Users/zhang40/Documents/ACME_simulations/E3SM_v1_H1_6hourly_TC/test' +param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/1_2/1_2' +param.test_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis' +param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/extendedOutput.v3.LR.historical_0101/tc-analysis' +param.test_name = 'extendedOutput.v3.LR.historical_0101' +param.short_test_name = 'v3.LR.historical_threshold_1.0' +param.reference_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/0_3/0_3' +param.ref_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis' +param.short_ref_name = 'threshold_0.3' +param.run_type = 'model_vs_model' +prefix = '/global/cfs/cdirs/e3sm/www/zhang40/tests' +#param.multiprocessing = True +#param.num_workers = 4 +param.results_dir = os.path.join(prefix, 'tc_analysis_model_model') +#param.test_timeseries_input = True +#param.ref_timeseries_input = True +param.test_start_yr = '2000' +param.test_end_yr = '2014' +param.ref_start_yr = '0051' +param.ref_end_yr = '0060' + +runner.sets_to_run = ['tc_analysis'] +runner.run_diags([param]) + diff --git a/e3sm_diags/driver/tc_analysis_driver.py b/e3sm_diags/driver/tc_analysis_driver.py index de82b2c1b..ed1bd3a87 100644 --- a/e3sm_diags/driver/tc_analysis_driver.py +++ b/e3sm_diags/driver/tc_analysis_driver.py @@ -5,12 +5,12 @@ from datetime import datetime, timedelta from typing import TYPE_CHECKING, Any, Dict, List, Tuple -import cdms2 import numpy as np +import xarray as xr from netCDF4 import Dataset as netcdffile import e3sm_diags -from e3sm_diags.plot.cartopy import tc_analysis_plot +from e3sm_diags.plot import tc_analysis_plot if TYPE_CHECKING: from numpy.ma.core import MaskedArray @@ -74,23 +74,23 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter: test_data_path, "cyclones_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr), ) - test_cyclones_hist = cdms2.open(test_cyclones_file)( - "density", lat=(-60, 60, "ccb"), squeeze=1 - ) + test_cyclones_hist = xr.open_dataset(test_cyclones_file).sel(lat=slice(-60, 60))[ + "density" + ] test_aew_file = os.path.join( test_data_path, "aew_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr), ) - test_aew_hist = cdms2.open(test_aew_file)( - "density", lat=(0, 35, "ccb"), lon=(-180, 0, "ccb"), squeeze=1 - ) + test_aew_hist = xr.open_dataset(test_aew_file).sel( + lat=slice(0, 35), lon=slice(180, 360) + )["density"] test_data = collections.OrderedDict() ref_data = collections.OrderedDict() test_data["metrics"] = generate_tc_metrics_from_te_stitch_file(test_te_file) - test_data["cyclone_density"] = test_cyclones_hist - test_data["aew_density"] = test_aew_hist + test_data["cyclone_density"] = test_cyclones_hist # type: ignore + test_data["aew_density"] = test_aew_hist # type: ignore test_num_years = int(test_end_yr) - int(test_start_yr) + 1 test_data["aew_num_years"] = test_num_years # type: ignore test_data["cyclone_num_years"] = test_num_years # type: ignore @@ -110,15 +110,22 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter: reference_data_path, "cyclones_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr), ) - ref_cyclones_hist = cdms2.open(ref_cyclones_file)("density", squeeze=1) + + ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[ + "density" + ] + ref_aew_file = os.path.join( reference_data_path, "aew_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr), ) - ref_aew_hist = cdms2.open(ref_aew_file)("density", squeeze=1) + # Note the refactor included subset that was missed in original implementation + ref_aew_hist = xr.open_dataset(ref_aew_file).sel( + lat=slice(0, 35), lon=slice(180, 360) + )["density"] ref_data["metrics"] = generate_tc_metrics_from_te_stitch_file(ref_te_file) - ref_data["cyclone_density"] = ref_cyclones_hist - ref_data["aew_density"] = ref_aew_hist + ref_data["cyclone_density"] = ref_cyclones_hist # type: ignore + ref_data["aew_density"] = ref_aew_hist # type: ignore ref_num_years = int(ref_end_yr) - int(ref_start_yr) + 1 ref_data["aew_num_years"] = ref_num_years # type: ignore ref_data["cyclone_num_years"] = ref_num_years # type: ignore @@ -130,16 +137,18 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter: ref_cyclones_file = os.path.join( reference_data_path, "cyclones_hist_IBTrACS_1979_2018.nc" ) - ref_cyclones_hist = cdms2.open(ref_cyclones_file)( - "density", lat=(-60, 60, "ccb"), squeeze=1 - ) + ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[ + "density" + ] + ref_aew_file = os.path.join(reference_data_path, "aew_hist_ERA5_2010_2014.nc") - ref_aew_hist = cdms2.open(ref_aew_file)( - "density", lat=(0, 35, "ccb"), lon=(180, 360, "ccb"), squeeze=1 - ) - ref_data["cyclone_density"] = ref_cyclones_hist + ref_aew_hist = xr.open_dataset(ref_aew_file).sel( + lat=slice(0, 35), lon=slice(180, 360) + )["density"] + ref_data["cyclone_density"] = ref_cyclones_hist # type: ignore ref_data["cyclone_num_years"] = 40 # type: ignore - ref_data["aew_density"] = ref_aew_hist + ref_data["aew_density"] = ref_aew_hist # type: ignore + # Question, should the num_years = 5? ref_data["aew_num_years"] = 1 # type: ignore parameter.ref_name = "Observation" parameter.ref_title = "Observation" @@ -173,7 +182,7 @@ def generate_tc_metrics_from_te_stitch_file(te_stitch_file: str) -> Dict[str, An # Use E3SM land-sea mask mask_path = os.path.join(e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc") - ocnfrac = cdms2.open(mask_path)("OCNFRAC", squeeze=1) + ocnfrac = xr.open_dataset(mask_path)["OCNFRAC"].squeeze(dim="time", drop=True) # From model data, this dict stores a tuple for each basin. # (mean ace, tc_intensity_dist, seasonal_cycle, # storms, # of storms over the ocean) @@ -278,7 +287,7 @@ def _get_vars_from_te_stitch( def _derive_metrics_per_basin( num_storms: int, vars: Dict[str, Any], - ocnfrac: cdms2.dataset.DatasetVariable, + ocnfrac: xr.DataArray, basin_info: BasinInfo, ) -> Dict[str, Any]: """Derives metrics for each basin using TE stitch variables and other information. @@ -287,8 +296,8 @@ def _derive_metrics_per_basin( :type num_storms: int :param vars: TE stitch variables :type vars: Dict[str, Any] - :param ocnfrac: Ocnfrac CDMS2 dataset variable - :type ocnfrac: cdms2.dataset.DatasetVariable + :param ocnfrac: Ocnfrac xarray dataarray variable + :type ocnfrac: xarray.DataArray :param basin_info: Basin information :type basin_info: BasinInfo :return: A dictionary containing mod variables @@ -327,9 +336,9 @@ def _derive_metrics_per_basin( yer = yearmc[:, k][~np.isnan(latmc[:, k])] # Get the nearest location on land-sea mask to the first point of a TC Track - p = np.abs(ocnfrac.getLatitude()[:] - lat[0]) + p = np.abs(ocnfrac.lat.values - lat[0]) loc_y = int(np.argmin(p)) - p = np.abs(ocnfrac.getLongitude()[:] - lon[0]) + p = np.abs(ocnfrac.lon.values - lon[0]) loc_x = int(np.argmin(p)) ocn_frac_0 = ocnfrac[loc_y, loc_x] diff --git a/e3sm_diags/plot/tc_analysis_plot.py b/e3sm_diags/plot/tc_analysis_plot.py new file mode 100644 index 000000000..39337a490 --- /dev/null +++ b/e3sm_diags/plot/tc_analysis_plot.py @@ -0,0 +1,347 @@ +import os + +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import matplotlib +import numpy as np +import xcdat as xc +from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter + +from e3sm_diags.driver.utils.general import get_output_dir +from e3sm_diags.logger import custom_logger +from e3sm_diags.plot.utils import MAIN_TITLE_FONTSIZE + +matplotlib.use("agg") +import matplotlib.pyplot as plt # isort:skip # noqa: E402 + +logger = custom_logger(__name__) + + +# Position and sizes of subplot axes in page coordinates (0 to 1) +PANEL_CFG = [ + (0.1691, 0.55, 0.6465, 0.2758), + (0.1691, 0.27, 0.6465, 0.2758), +] + + +# Each key gives a list with ax extent, x ticks , y ticks, title, clevs, reference and time resolution ratio (convert 3hrly to 6hrly data, density needs to be devided by 2) +# TODO flexible to apply to 3hrly model output when compare track density. +PLOT_INFO = { + "aew": { + "ax_extent": [182, 359, 0, 35], + "x_ticks": [240, 300], + "y_ticks": [0, 15, 30], + "title": "African Easterly Wave Density", + "clevs": np.arange(0, 15.1, 1), + "reference": "EAR5 (2000-2014)", + "time_resolution_ratio": 1, + }, + "cyclone": { + "ax_extent": [0, 359, -60, 60], + "x_ticks": [0, 60, 120, 180, 240, 300, 359.99], + "y_ticks": [-60, -30, 0, 30, 60], + "title": "TC Tracks Density", + "clevs": np.arange(0, 0.3, 0.05), + "reference": "IBTrACS (1979-2018)", + "time_resolution_ratio": 2, + }, +} + + +def plot(test, ref, parameter, basin_dict): + test_metrics = test["metrics"] + ref_metrics = ref["metrics"] + + test_num_year = test_metrics["num_years"] + ref_num_year = ref_metrics["num_years"] + + if parameter.short_test_name: + test_name = parameter.short_test_name + else: + test_name = parameter.test_name + ref_name = parameter.ref_name + + # TC intensity of each basins + fig, axes = plt.subplots(2, 3, figsize=(12, 7), sharex=True, sharey=True) + fig.subplots_adjust(hspace=0.4, wspace=0.15) + axes = axes.ravel() + + ace_ref = [] + ace_test = [] + num_ref = [] + num_test = [] + for ifig, (basin, basin_info) in enumerate(basin_dict.items()): + ace_ref.append(ref_metrics[basin][0]) + ace_test.append(test_metrics[basin][0]) + num_ref.append(ref_metrics[basin][3]) + num_test.append(test_metrics[basin][3]) + ax = axes[ifig] + ax.plot( + np.arange(1, 7), + test_metrics[basin][1] / test_num_year, + "--k", + linewidth=2, + label="Test", + ) + ax.plot( + np.arange(1, 7), + ref_metrics[basin][1] / ref_num_year, + "k", + linewidth=2, + label="Ref", + ) + ax.legend() + ax.set_title(basin_info[0]) + ax.set_ylabel("TCs per year") + plt.xticks([1, 2, 3, 4, 5, 6], ["Cat0", "Cat1", "Cat2", "Cat3", "Cat4", "Cat5"]) + ax.xaxis.set_tick_params(labelbottom=True) + + plt.suptitle( + "Test: {}".format(test_name) + "\n" + "Ref: {}".format(ref_name), + ha="left", + x=0.1, + y=0.99, + ) + + output_file_name = "tc-intensity" + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + output_file_name + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + plt.close() + + # TC frequency of each basins + fig = plt.figure(figsize=(12, 7)) + ax = fig.add_subplot(111) + + N = 6 + ind = np.arange(N) # the x locations for the groups + width = 0.27 + + ref_vals = num_ref / np.sum(num_ref) + rects2 = ax.bar(ind - width / 2, ref_vals, width, color="black") + test_vals = num_test / np.sum(num_test) + rects1 = ax.bar(ind + width / 2, test_vals, width, color="darkgrey") + logger.info("total number based on 6 basins") + + ax.set_xticks(ind) + ax.set_xticklabels( + ( + "North Atlantic", + "Northwest Pacific", + "Eastern Pacific", + "North Indian", + "South Indian", + "South Pacific", + ) + ) + + ax.legend( + (rects2[0], rects1[0]), + ( + "{}: (~{})storms per year".format(ref_name, int(np.sum(num_ref))), + "{}: (~{}) storms per year".format(test_name, int(np.sum(num_test))), + ), + ) + ax.set_ylabel("Fraction") + ax.set_title("Relative frequency of TCs for each ocean basins") + + output_file_name = "tc-frequency" + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + output_file_name + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + plt.close() + + fig1 = plt.figure(figsize=(12, 6)) + ax = fig1.add_subplot(111) + + N = 6 + ind = np.arange(N) # the x locations for the groups + width = 0.27 + + ref_vals = ace_ref / np.sum(ace_ref) + rects2 = ax.bar(ind - width / 2, ref_vals, width, color="black") + test_vals = ace_test / np.sum(ace_test) + rects1 = ax.bar(ind + width / 2, test_vals, width, color="darkgrey") + + ax.set_xticks(ind) + ax.set_xticklabels( + ( + "North Atlantic", + "Northwest Pacific", + "Eastern Pacific", + "North Indian", + "South Indian", + "South Pacific", + ) + ) + + ax.legend((rects2[0], rects1[0]), (ref_name, test_name)) + ax.set_ylabel("Fraction") + ax.set_title( + "Distribution of accumulated cyclone energy (ACE) among various ocean basins" + ) + output_file_name = "ace-distribution" + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + output_file_name + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + plt.close() + + fig, axes = plt.subplots(2, 3, figsize=(12, 6), sharex=True, sharey=True) + fig.subplots_adjust(hspace=0.4, wspace=0.15) + axes = axes.ravel() + + for ifig, (basin, basin_info) in enumerate(basin_dict.items()): + ax = axes[ifig] + ax.plot(np.arange(1, 13), ref_metrics[basin][2], "k", linewidth=2, label="Test") + ax.plot( + np.arange(1, 13), + test_metrics[basin][2], + "--k", + linewidth=2, + label="Ref", + ) + ax.legend() + ax.set_title(basin_info[0]) + ax.set_ylabel("Fraction") + plt.xticks( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + ["J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D"], + ) + ax.xaxis.set_tick_params(labelbottom=True) + + plt.suptitle( + "Test: {}".format(test_name) + "\n" + "Ref: {}".format(ref_name), + ha="left", + x=0.1, + y=0.99, + ) + + output_file_name = "tc-frequency-annual-cycle" + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + output_file_name + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + plt.close() + + ########################################################## + # Plot TC tracks density + plot_map(test, ref, "aew", parameter) + + # Plot AEW density + plot_map(test, ref, "cyclone", parameter) + + +def plot_map(test_data, ref_data, region, parameter): + """Create figure, projection for maps""" + + test = test_data["{}_density".format(region)] + test_num_years = test_data["{}_num_years".format(region)] + + ref = ref_data["{}_density".format(region)] + ref_num_years = ref_data["{}_num_years".format(region)] + + fig = plt.figure(figsize=(8.5, 8.5), dpi=parameter.dpi) + proj = ccrs.PlateCarree(central_longitude=180) + + # First panel + plot_panel( + 0, + fig, + proj, + test, + test_num_years, + region, + parameter.test_title, + ) + + # Second panel + plot_panel( + 1, + fig, + proj, + ref, + ref_num_years, + region, + parameter.ref_title, + ) + + # Figure title + fig.suptitle(PLOT_INFO[region]["title"], x=0.5, y=0.9, fontsize=14) + output_file_name = "{}-density-map".format(region) + + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + output_file_name + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + plt.close() + + +def plot_panel(n, fig, proj, var, var_num_years, region, title): + ax = fig.add_axes(PANEL_CFG[n], projection=proj) + ax.set_extent(PLOT_INFO[region]["ax_extent"], ccrs.PlateCarree()) + + clevs = PLOT_INFO[region]["clevs"] + + lat = xc.get_dim_coords(var, axis="Y") + lon = xc.get_dim_coords(var, axis="X") + + var = var.squeeze() + p1 = ax.contourf( + lon, + lat, + var / var_num_years / PLOT_INFO[region]["time_resolution_ratio"], + transform=ccrs.PlateCarree(), + levels=clevs, + extend="both", + cmap="jet", + ) + ax.coastlines(lw=0.3) + ax.add_feature(cfeature.LAND, zorder=100, edgecolor="k") + + if title != "Observation": + ax.set_title("{}".format(title), fontdict={"fontsize": MAIN_TITLE_FONTSIZE}) + else: + ax.set_title( + "{}".format(PLOT_INFO[region]["reference"]), + fontdict={"fontsize": MAIN_TITLE_FONTSIZE}, + ) + ax.set_xticks(PLOT_INFO[region]["x_ticks"], crs=ccrs.PlateCarree()) + ax.set_yticks(PLOT_INFO[region]["y_ticks"], crs=ccrs.PlateCarree()) + lon_formatter = LongitudeFormatter(zero_direction_label=True, number_format=".0f") + lat_formatter = LatitudeFormatter() + ax.xaxis.set_major_formatter(lon_formatter) + ax.yaxis.set_major_formatter(lat_formatter) + ax.tick_params(labelsize=8.0, direction="out", width=1) + ax.xaxis.set_ticks_position("bottom") + ax.yaxis.set_ticks_position("left") + + cbax = fig.add_axes( + (PANEL_CFG[n][0] + 0.6635, PANEL_CFG[n][1] + 0.0215, 0.0326, 0.1792) + ) + cbar = fig.colorbar(p1, cax=cbax) + + cbar.ax.tick_params(labelsize=9.0, length=0) + return