CDAT Migration Phase 2: Refactor tc_analysis set (#829)

* start tc_analysis_refactor * update driver * update plotting * Clean up plotter - Remove unused variables - Make `plot_info` a constant called `PLOT_INFO`, which is now a dict of dicts - Reorder functions for top-down readability * Remove unused notebook --------- Co-authored-by: tomvothecoder <[email protected]>
E3SM-Project · Aug 21, 2024 · e0e3bab · e0e3bab
1 parent eed8a29
commit e0e3bab
Show file tree

Hide file tree

Showing 4 changed files with 466 additions and 28 deletions.
diff --git a/auxiliary_tools/cdat_regression_testing/668-tc_analysis/668-tc_analysis_run_script.py b/auxiliary_tools/cdat_regression_testing/668-tc_analysis/668-tc_analysis_run_script.py
@@ -0,0 +1,51 @@
+"""
+The template run script used for generating results on your development branch.
+
+Steps:
+1. Activate your conda dev env for your branch
+2. `make install` to install the latest version of your branch code into the env
+3. Copy this script into `auxiliary_tools/cdat_regression_testing/<ISSUE>-<SET_NAME>`
+4. Update `SET_DIR` string variable
+5. Update `SET_NAME` string variable.
+   - Options include: "lat_lon", "zonal_mean_xy", "zonal_mean_2d",
+     "zonal_mean_2d_stratosphere", "polar", "cosp_histogram",
+     "meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo",
+     "area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags",
+     "tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition",
+6. Run this script as a Python module
+   - `auxiliary_tools` is not included in `setup.py`, so `-m` is required
+     to run the script as a Python module
+   - Command: python -m auxiliary_tools.cdat_regression_testing.<ISSUE>-<SET_NAME>.<SCRIPT-NAME>
+   - Example: python -m auxiliary_tools.cdat_regression_testing.660_cosp_histogram.run_script
+7. Run `chmod -R o=rx <SET_DIR>` to allow public access to viewer outputs on the NERSC webserver
+  - Example: `chmod -R o=rx /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/654-zonal_mean_xy`
+  - https://portal.nersc.gov/project/e3sm/cdat-migration-fy24/
+8. Make a copy of the CDAT regression testing notebook in the same directory
+   as this script and follow the instructions there to start testing.
+9. <OPTIONAL> Update `CFG_PATH` to a custom cfg file to debug specific variables.
+   - It is useful to create a custom cfg based on the default diags to debug
+     specific variables that are running into problems.
+   - For example, copy `zonal_mean_xy_model_vs_model.cfg` into the same directory
+     as the copy of this script, then modify it to specific variables. Afterwards
+     update `CFG_PATH` to the path of that .cfg file.
+   - Tip: Use VS Code to step through the code with the Python debugger.
+"""
+from auxiliary_tools.cdat_regression_testing.base_run_script import run_set
+
+# TODO: Update SETS_TO_RUN to the single set you are refactoring.
+# Example: "lat_lon"
+SET_NAME = "tc_analysis"
+# TODO: Update SET_DIR to <ISSUE-SET_NAME>. This string gets appended
+# to the base results_dir, "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/".
+# Example: "671-lat-lon"
+SET_DIR = "688-tc_analysis"
+
+# TODO: <OPTIONAL> UPDATE CFG_PATH if using a custom cfg file for debugging.
+# Example: "auxiliary_tools/cdat_regression_testing/654_zonal_mean_xy.cfg"
+CFG_PATH: str | None = None
+
+# TODO: <OPTIONAL> Update MULTIPROCESSING based on whether to run in parallel or
+# serial. For debugging purposes, set to False to run serially.
+MULTIPROCESSING = True
+
+run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
diff --git a/auxiliary_tools/run_tc_analysis.py b/auxiliary_tools/run_tc_analysis.py
@@ -0,0 +1,31 @@
+import os
+from e3sm_diags.parameter.core_parameter import CoreParameter
+from e3sm_diags.parameter.tc_analysis_parameter import TCAnalysisParameter
+from e3sm_diags.run import runner
+
+param = CoreParameter()
+
+#param.test_data_path = '/Users/zhang40/Documents/ACME_simulations/E3SM_v1_H1_6hourly_TC/test'
+param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/1_2/1_2'
+param.test_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis' 
+param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/extendedOutput.v3.LR.historical_0101/tc-analysis'
+param.test_name = 'extendedOutput.v3.LR.historical_0101' 
+param.short_test_name = 'v3.LR.historical_threshold_1.0'
+param.reference_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/0_3/0_3'
+param.ref_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis'
+param.short_ref_name = 'threshold_0.3'
+param.run_type = 'model_vs_model'
+prefix = '/global/cfs/cdirs/e3sm/www/zhang40/tests'
+#param.multiprocessing = True
+#param.num_workers = 4
+param.results_dir = os.path.join(prefix, 'tc_analysis_model_model')
+#param.test_timeseries_input = True
+#param.ref_timeseries_input = True
+param.test_start_yr = '2000'
+param.test_end_yr = '2014'
+param.ref_start_yr = '0051'
+param.ref_end_yr = '0060'
+
+runner.sets_to_run = ['tc_analysis']
+runner.run_diags([param])
+
diff --git a/e3sm_diags/driver/tc_analysis_driver.py b/e3sm_diags/driver/tc_analysis_driver.py
@@ -5,12 +5,12 @@
 from datetime import datetime, timedelta
 from typing import TYPE_CHECKING, Any, Dict, List, Tuple
 
-import cdms2
 import numpy as np
+import xarray as xr
 from netCDF4 import Dataset as netcdffile
 
 import e3sm_diags
-from e3sm_diags.plot.cartopy import tc_analysis_plot
+from e3sm_diags.plot import tc_analysis_plot
 
 if TYPE_CHECKING:
     from numpy.ma.core import MaskedArray
@@ -74,23 +74,23 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
         test_data_path,
         "cyclones_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr),
     )
-    test_cyclones_hist = cdms2.open(test_cyclones_file)(
-        "density", lat=(-60, 60, "ccb"), squeeze=1
-    )
+    test_cyclones_hist = xr.open_dataset(test_cyclones_file).sel(lat=slice(-60, 60))[
+        "density"
+    ]
     test_aew_file = os.path.join(
         test_data_path,
         "aew_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr),
     )
-    test_aew_hist = cdms2.open(test_aew_file)(
-        "density", lat=(0, 35, "ccb"), lon=(-180, 0, "ccb"), squeeze=1
-    )
+    test_aew_hist = xr.open_dataset(test_aew_file).sel(
+        lat=slice(0, 35), lon=slice(180, 360)
+    )["density"]
 
     test_data = collections.OrderedDict()
     ref_data = collections.OrderedDict()
 
     test_data["metrics"] = generate_tc_metrics_from_te_stitch_file(test_te_file)
-    test_data["cyclone_density"] = test_cyclones_hist
-    test_data["aew_density"] = test_aew_hist
+    test_data["cyclone_density"] = test_cyclones_hist  # type: ignore
+    test_data["aew_density"] = test_aew_hist  # type: ignore
     test_num_years = int(test_end_yr) - int(test_start_yr) + 1
     test_data["aew_num_years"] = test_num_years  # type: ignore
     test_data["cyclone_num_years"] = test_num_years  # type: ignore
@@ -110,15 +110,22 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
             reference_data_path,
             "cyclones_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr),
         )
-        ref_cyclones_hist = cdms2.open(ref_cyclones_file)("density", squeeze=1)
+
+        ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[
+            "density"
+        ]
+
         ref_aew_file = os.path.join(
             reference_data_path,
             "aew_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr),
         )
-        ref_aew_hist = cdms2.open(ref_aew_file)("density", squeeze=1)
+        # Note the refactor included subset that was missed in original implementation
+        ref_aew_hist = xr.open_dataset(ref_aew_file).sel(
+            lat=slice(0, 35), lon=slice(180, 360)
+        )["density"]
         ref_data["metrics"] = generate_tc_metrics_from_te_stitch_file(ref_te_file)
-        ref_data["cyclone_density"] = ref_cyclones_hist
-        ref_data["aew_density"] = ref_aew_hist
+        ref_data["cyclone_density"] = ref_cyclones_hist  # type: ignore
+        ref_data["aew_density"] = ref_aew_hist  # type: ignore
         ref_num_years = int(ref_end_yr) - int(ref_start_yr) + 1
         ref_data["aew_num_years"] = ref_num_years  # type: ignore
         ref_data["cyclone_num_years"] = ref_num_years  # type: ignore
@@ -130,16 +137,18 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
         ref_cyclones_file = os.path.join(
             reference_data_path, "cyclones_hist_IBTrACS_1979_2018.nc"
         )
-        ref_cyclones_hist = cdms2.open(ref_cyclones_file)(
-            "density", lat=(-60, 60, "ccb"), squeeze=1
-        )
+        ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[
+            "density"
+        ]
+
         ref_aew_file = os.path.join(reference_data_path, "aew_hist_ERA5_2010_2014.nc")
-        ref_aew_hist = cdms2.open(ref_aew_file)(
-            "density", lat=(0, 35, "ccb"), lon=(180, 360, "ccb"), squeeze=1
-        )
-        ref_data["cyclone_density"] = ref_cyclones_hist
+        ref_aew_hist = xr.open_dataset(ref_aew_file).sel(
+            lat=slice(0, 35), lon=slice(180, 360)
+        )["density"]
+        ref_data["cyclone_density"] = ref_cyclones_hist  # type: ignore
         ref_data["cyclone_num_years"] = 40  # type: ignore
-        ref_data["aew_density"] = ref_aew_hist
+        ref_data["aew_density"] = ref_aew_hist  # type: ignore
+        # Question, should the num_years = 5?
         ref_data["aew_num_years"] = 1  # type: ignore
         parameter.ref_name = "Observation"
         parameter.ref_title = "Observation"
@@ -173,7 +182,7 @@ def generate_tc_metrics_from_te_stitch_file(te_stitch_file: str) -> Dict[str, An
 
     # Use E3SM land-sea mask
     mask_path = os.path.join(e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc")
-    ocnfrac = cdms2.open(mask_path)("OCNFRAC", squeeze=1)
+    ocnfrac = xr.open_dataset(mask_path)["OCNFRAC"].squeeze(dim="time", drop=True)
 
     # From model data, this dict stores a tuple for each basin.
     # (mean ace, tc_intensity_dist, seasonal_cycle, # storms, # of storms over the ocean)
@@ -278,7 +287,7 @@ def _get_vars_from_te_stitch(
 def _derive_metrics_per_basin(
     num_storms: int,
     vars: Dict[str, Any],
-    ocnfrac: cdms2.dataset.DatasetVariable,
+    ocnfrac: xr.DataArray,
     basin_info: BasinInfo,
 ) -> Dict[str, Any]:
     """Derives metrics for each basin using TE stitch variables and other information.
@@ -287,8 +296,8 @@ def _derive_metrics_per_basin(
     :type num_storms: int
     :param vars: TE stitch variables
     :type vars: Dict[str, Any]
-    :param ocnfrac: Ocnfrac CDMS2 dataset variable
-    :type ocnfrac: cdms2.dataset.DatasetVariable
+    :param ocnfrac: Ocnfrac xarray dataarray variable
+    :type ocnfrac: xarray.DataArray
     :param basin_info: Basin information
     :type basin_info: BasinInfo
     :return: A dictionary containing mod variables
@@ -327,9 +336,9 @@ def _derive_metrics_per_basin(
         yer = yearmc[:, k][~np.isnan(latmc[:, k])]
 
         # Get the nearest location on land-sea mask to the first point of a TC Track
-        p = np.abs(ocnfrac.getLatitude()[:] - lat[0])
+        p = np.abs(ocnfrac.lat.values - lat[0])
         loc_y = int(np.argmin(p))
-        p = np.abs(ocnfrac.getLongitude()[:] - lon[0])
+        p = np.abs(ocnfrac.lon.values - lon[0])
         loc_x = int(np.argmin(p))
         ocn_frac_0 = ocnfrac[loc_y, loc_x]