Skip to content

Commit

Permalink
CDAT Migration Phase 2: Refactor tc_analysis set (#829)
Browse files Browse the repository at this point in the history
* start tc_analysis_refactor

* update driver

* update plotting

* Clean up plotter
- Remove unused variables
- Make `plot_info` a constant called `PLOT_INFO`, which is now a dict of dicts
- Reorder functions for top-down readability

* Remove unused notebook

---------

Co-authored-by: tomvothecoder <[email protected]>
  • Loading branch information
chengzhuzhang and tomvothecoder committed Aug 21, 2024
1 parent eed8a29 commit e0e3bab
Show file tree
Hide file tree
Showing 4 changed files with 466 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
The template run script used for generating results on your development branch.
Steps:
1. Activate your conda dev env for your branch
2. `make install` to install the latest version of your branch code into the env
3. Copy this script into `auxiliary_tools/cdat_regression_testing/<ISSUE>-<SET_NAME>`
4. Update `SET_DIR` string variable
5. Update `SET_NAME` string variable.
- Options include: "lat_lon", "zonal_mean_xy", "zonal_mean_2d",
"zonal_mean_2d_stratosphere", "polar", "cosp_histogram",
"meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo",
"area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags",
"tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition",
6. Run this script as a Python module
- `auxiliary_tools` is not included in `setup.py`, so `-m` is required
to run the script as a Python module
- Command: python -m auxiliary_tools.cdat_regression_testing.<ISSUE>-<SET_NAME>.<SCRIPT-NAME>
- Example: python -m auxiliary_tools.cdat_regression_testing.660_cosp_histogram.run_script
7. Run `chmod -R o=rx <SET_DIR>` to allow public access to viewer outputs on the NERSC webserver
- Example: `chmod -R o=rx /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/654-zonal_mean_xy`
- https://portal.nersc.gov/project/e3sm/cdat-migration-fy24/
8. Make a copy of the CDAT regression testing notebook in the same directory
as this script and follow the instructions there to start testing.
9. <OPTIONAL> Update `CFG_PATH` to a custom cfg file to debug specific variables.
- It is useful to create a custom cfg based on the default diags to debug
specific variables that are running into problems.
- For example, copy `zonal_mean_xy_model_vs_model.cfg` into the same directory
as the copy of this script, then modify it to specific variables. Afterwards
update `CFG_PATH` to the path of that .cfg file.
- Tip: Use VS Code to step through the code with the Python debugger.
"""
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

# TODO: Update SETS_TO_RUN to the single set you are refactoring.
# Example: "lat_lon"
SET_NAME = "tc_analysis"
# TODO: Update SET_DIR to <ISSUE-SET_NAME>. This string gets appended
# to the base results_dir, "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/".
# Example: "671-lat-lon"
SET_DIR = "688-tc_analysis"

# TODO: <OPTIONAL> UPDATE CFG_PATH if using a custom cfg file for debugging.
# Example: "auxiliary_tools/cdat_regression_testing/654_zonal_mean_xy.cfg"
CFG_PATH: str | None = None

# TODO: <OPTIONAL> Update MULTIPROCESSING based on whether to run in parallel or
# serial. For debugging purposes, set to False to run serially.
MULTIPROCESSING = True

run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
31 changes: 31 additions & 0 deletions auxiliary_tools/run_tc_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
from e3sm_diags.parameter.core_parameter import CoreParameter
from e3sm_diags.parameter.tc_analysis_parameter import TCAnalysisParameter
from e3sm_diags.run import runner

param = CoreParameter()

#param.test_data_path = '/Users/zhang40/Documents/ACME_simulations/E3SM_v1_H1_6hourly_TC/test'
param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/1_2/1_2'
param.test_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis'
param.test_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/extendedOutput.v3.LR.historical_0101/tc-analysis'
param.test_name = 'extendedOutput.v3.LR.historical_0101'
param.short_test_name = 'v3.LR.historical_threshold_1.0'
param.reference_data_path = '/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/0_3/0_3'
param.ref_name = '20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis'
param.short_ref_name = 'threshold_0.3'
param.run_type = 'model_vs_model'
prefix = '/global/cfs/cdirs/e3sm/www/zhang40/tests'
#param.multiprocessing = True
#param.num_workers = 4
param.results_dir = os.path.join(prefix, 'tc_analysis_model_model')
#param.test_timeseries_input = True
#param.ref_timeseries_input = True
param.test_start_yr = '2000'
param.test_end_yr = '2014'
param.ref_start_yr = '0051'
param.ref_end_yr = '0060'

runner.sets_to_run = ['tc_analysis']
runner.run_diags([param])

65 changes: 37 additions & 28 deletions e3sm_diags/driver/tc_analysis_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any, Dict, List, Tuple

import cdms2
import numpy as np
import xarray as xr
from netCDF4 import Dataset as netcdffile

import e3sm_diags
from e3sm_diags.plot.cartopy import tc_analysis_plot
from e3sm_diags.plot import tc_analysis_plot

if TYPE_CHECKING:
from numpy.ma.core import MaskedArray
Expand Down Expand Up @@ -74,23 +74,23 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
test_data_path,
"cyclones_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr),
)
test_cyclones_hist = cdms2.open(test_cyclones_file)(
"density", lat=(-60, 60, "ccb"), squeeze=1
)
test_cyclones_hist = xr.open_dataset(test_cyclones_file).sel(lat=slice(-60, 60))[
"density"
]
test_aew_file = os.path.join(
test_data_path,
"aew_hist_{}_{}_{}.nc".format(test_name, test_start_yr, test_end_yr),
)
test_aew_hist = cdms2.open(test_aew_file)(
"density", lat=(0, 35, "ccb"), lon=(-180, 0, "ccb"), squeeze=1
)
test_aew_hist = xr.open_dataset(test_aew_file).sel(
lat=slice(0, 35), lon=slice(180, 360)
)["density"]

test_data = collections.OrderedDict()
ref_data = collections.OrderedDict()

test_data["metrics"] = generate_tc_metrics_from_te_stitch_file(test_te_file)
test_data["cyclone_density"] = test_cyclones_hist
test_data["aew_density"] = test_aew_hist
test_data["cyclone_density"] = test_cyclones_hist # type: ignore
test_data["aew_density"] = test_aew_hist # type: ignore
test_num_years = int(test_end_yr) - int(test_start_yr) + 1
test_data["aew_num_years"] = test_num_years # type: ignore
test_data["cyclone_num_years"] = test_num_years # type: ignore
Expand All @@ -110,15 +110,22 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
reference_data_path,
"cyclones_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr),
)
ref_cyclones_hist = cdms2.open(ref_cyclones_file)("density", squeeze=1)

ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[
"density"
]

ref_aew_file = os.path.join(
reference_data_path,
"aew_hist_{}_{}_{}.nc".format(ref_name, ref_start_yr, ref_end_yr),
)
ref_aew_hist = cdms2.open(ref_aew_file)("density", squeeze=1)
# Note the refactor included subset that was missed in original implementation
ref_aew_hist = xr.open_dataset(ref_aew_file).sel(
lat=slice(0, 35), lon=slice(180, 360)
)["density"]
ref_data["metrics"] = generate_tc_metrics_from_te_stitch_file(ref_te_file)
ref_data["cyclone_density"] = ref_cyclones_hist
ref_data["aew_density"] = ref_aew_hist
ref_data["cyclone_density"] = ref_cyclones_hist # type: ignore
ref_data["aew_density"] = ref_aew_hist # type: ignore
ref_num_years = int(ref_end_yr) - int(ref_start_yr) + 1
ref_data["aew_num_years"] = ref_num_years # type: ignore
ref_data["cyclone_num_years"] = ref_num_years # type: ignore
Expand All @@ -130,16 +137,18 @@ def run_diag(parameter: TCAnalysisParameter) -> TCAnalysisParameter:
ref_cyclones_file = os.path.join(
reference_data_path, "cyclones_hist_IBTrACS_1979_2018.nc"
)
ref_cyclones_hist = cdms2.open(ref_cyclones_file)(
"density", lat=(-60, 60, "ccb"), squeeze=1
)
ref_cyclones_hist = xr.open_dataset(ref_cyclones_file).sel(lat=slice(-60, 60))[
"density"
]

ref_aew_file = os.path.join(reference_data_path, "aew_hist_ERA5_2010_2014.nc")
ref_aew_hist = cdms2.open(ref_aew_file)(
"density", lat=(0, 35, "ccb"), lon=(180, 360, "ccb"), squeeze=1
)
ref_data["cyclone_density"] = ref_cyclones_hist
ref_aew_hist = xr.open_dataset(ref_aew_file).sel(
lat=slice(0, 35), lon=slice(180, 360)
)["density"]
ref_data["cyclone_density"] = ref_cyclones_hist # type: ignore
ref_data["cyclone_num_years"] = 40 # type: ignore
ref_data["aew_density"] = ref_aew_hist
ref_data["aew_density"] = ref_aew_hist # type: ignore
# Question, should the num_years = 5?
ref_data["aew_num_years"] = 1 # type: ignore
parameter.ref_name = "Observation"
parameter.ref_title = "Observation"
Expand Down Expand Up @@ -173,7 +182,7 @@ def generate_tc_metrics_from_te_stitch_file(te_stitch_file: str) -> Dict[str, An

# Use E3SM land-sea mask
mask_path = os.path.join(e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc")
ocnfrac = cdms2.open(mask_path)("OCNFRAC", squeeze=1)
ocnfrac = xr.open_dataset(mask_path)["OCNFRAC"].squeeze(dim="time", drop=True)

# From model data, this dict stores a tuple for each basin.
# (mean ace, tc_intensity_dist, seasonal_cycle, # storms, # of storms over the ocean)
Expand Down Expand Up @@ -278,7 +287,7 @@ def _get_vars_from_te_stitch(
def _derive_metrics_per_basin(
num_storms: int,
vars: Dict[str, Any],
ocnfrac: cdms2.dataset.DatasetVariable,
ocnfrac: xr.DataArray,
basin_info: BasinInfo,
) -> Dict[str, Any]:
"""Derives metrics for each basin using TE stitch variables and other information.
Expand All @@ -287,8 +296,8 @@ def _derive_metrics_per_basin(
:type num_storms: int
:param vars: TE stitch variables
:type vars: Dict[str, Any]
:param ocnfrac: Ocnfrac CDMS2 dataset variable
:type ocnfrac: cdms2.dataset.DatasetVariable
:param ocnfrac: Ocnfrac xarray dataarray variable
:type ocnfrac: xarray.DataArray
:param basin_info: Basin information
:type basin_info: BasinInfo
:return: A dictionary containing mod variables
Expand Down Expand Up @@ -327,9 +336,9 @@ def _derive_metrics_per_basin(
yer = yearmc[:, k][~np.isnan(latmc[:, k])]

# Get the nearest location on land-sea mask to the first point of a TC Track
p = np.abs(ocnfrac.getLatitude()[:] - lat[0])
p = np.abs(ocnfrac.lat.values - lat[0])
loc_y = int(np.argmin(p))
p = np.abs(ocnfrac.getLongitude()[:] - lon[0])
p = np.abs(ocnfrac.lon.values - lon[0])
loc_x = int(np.argmin(p))
ocn_frac_0 = ocnfrac[loc_y, loc_x]

Expand Down
Loading

0 comments on commit e0e3bab

Please sign in to comment.