Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AdfData class for streamlined I/O in plotting scripts #269

Merged
merged 52 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
b28b6ac
sketching out a path for AdfData; adf_dataset.py and zonal_Mean_B.py
brianpm Oct 27, 2023
796a8c0
working on making it run
brianpm Oct 27, 2023
18706b3
fix getting reference case path
brianpm Oct 27, 2023
efdfba5
allow reference case to have a label
brianpm Oct 27, 2023
6456fdc
adfdata applied to global maps script
brianpm Dec 1, 2023
aababb2
adfdata applied to global maps script -- rename function
brianpm Dec 1, 2023
988011b
adfdata applied to global maps script -- debugging
brianpm Dec 1, 2023
91bce2a
adfdata applied to global maps script -- debugging
brianpm Dec 1, 2023
bf02a1f
adfdata applied to global maps script -- debugging
brianpm Dec 1, 2023
71578ef
debugging
brianpm Dec 1, 2023
06d3ea1
debugging
brianpm Dec 1, 2023
11f1477
debugging
brianpm Dec 1, 2023
c7286b7
debugging
brianpm Dec 1, 2023
97e1cbd
debugging
brianpm Dec 1, 2023
9068ec9
debugging -- fix load_da; actually return the DataArray
brianpm Dec 1, 2023
ceecec9
debugging
brianpm Dec 1, 2023
b48d01a
debugging -- fix load_reference_da; actually return the DataArray
brianpm Dec 1, 2023
dd015ef
debugging
brianpm Dec 1, 2023
84150a4
debugging
brianpm Dec 1, 2023
42614fb
debugging
brianpm Dec 1, 2023
b7765ab
debugging
brianpm Dec 1, 2023
857a441
debugging
brianpm Dec 1, 2023
577c9e7
debugging
brianpm Dec 1, 2023
87aebf2
debugging
brianpm Dec 1, 2023
c751d02
debugging
brianpm Dec 1, 2023
f9f296d
completed updates for AdfData and implement in zonal mean and global …
brianpm Dec 1, 2023
9ec949b
remove extra debugging print statements
brianpm Dec 1, 2023
3adbfe0
refactor plot_file_op for easier logic
brianpm Dec 1, 2023
51394a6
seasonal averaging moved to inside season loop
brianpm Dec 1, 2023
cf2128c
seasonal averaging moved to inside season loop
brianpm Dec 1, 2023
3be03db
correct zonal mean error message
brianpm Dec 1, 2023
536d2f2
Merge branch 'NCAR:main' into adf_case_dataclass
brianpm Apr 12, 2024
6b9f671
updated adf_dataset.py
brianpm Apr 19, 2024
7d2c946
Merge branch 'NCAR:main' into adf_case_dataclass
brianpm Apr 19, 2024
5d46906
starting to address PR comments. First round done on adf_dataset.py
brianpm Jun 12, 2024
ce50839
merged my conflicted adf_dataset.py
brianpm Jun 12, 2024
142d4e5
addressing PR comments. Instantiate AdfData from AdfDiag
brianpm Jun 12, 2024
935b6f4
Merge branch 'NCAR:main' into adf_case_dataclass
brianpm Jun 12, 2024
cae606f
testing AdfData. Fixes for history file naming. Fix to force loading …
brianpm Jun 12, 2024
0183799
removed commented lines
brianpm Jun 12, 2024
26a059c
addressing Jesse's comments on PR
brianpm Jun 18, 2024
7d6cdc9
Replace and with versions using new class
brianpm Jun 21, 2024
df14b1b
try to merge amwg_table from ADF/main
brianpm Jun 21, 2024
b966637
resolve upstream conflicts
brianpm Jun 21, 2024
39da27e
add back useful changes to adf_info
brianpm Jun 21, 2024
2d5a7dc
Merge branch 'main' into adf_case_dataclass
justin-richling Jun 21, 2024
de213fc
Merge branch 'main' into adf_case_dataclass
justin-richling Jun 26, 2024
231aaaa
bug fixes.
brianpm Jun 27, 2024
6e20dae
correct arguments for load_reference_regrid_da
brianpm Jun 27, 2024
496d5c7
Merge branch 'main' into adf_case_dataclass
justin-richling Jul 11, 2024
e36585f
trying to fix linting errors
brianpm Jul 11, 2024
febddf5
added load method for timeseries files
brianpm Jul 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 26 additions & 27 deletions lib/adf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ def __init__(self, adfobj):
self.model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True)

# variables (and info for unit transform)
self.var_list = adfobj.diag_var_list
self.res = adfobj.variable_defaults
# use self.adf.diag_var_list and self.adf.self.adf.variable_defaults

# case names and nicknames
self.case_names = adfobj.get_cam_info("cam_case_name", required=True)
Expand All @@ -60,27 +59,26 @@ def __init__(self, adfobj):
self.ref_nickname = self.base_nickname

# define reference data
self.reference_is_obs = adfobj.get_basic_info("compare_obs")
self.set_reference() # specify "ref_labels" -> called "data_list" in zonal_mean (name of data source)

def set_reference(self):
"""Set attributes for reference (aka baseline) data location, names, and variables."""
if self.reference_is_obs:
if self.adf.compare_obs:
self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict}
self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict}
self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict}
if not self.adf.var_obs_dict:
print("\t WARNING: reference is observations, but no observations found to plot against.")
warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.")
else:
self.ref_var_loc = {}
self.ref_var_nam = {}
self.ref_labels = {}
# when using a reference simulation, allow a "special" attribute with the case name:
self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True)
for v in self.var_list:
for v in self.adf.diag_var_list:
f = self.get_reference_climo_file(v)
if f is None:
print(f"\t WARNING: ADFData found no reference climo file for {v}")
warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}")
continue
else:
self.ref_var_loc[v] = f
Expand All @@ -89,28 +87,28 @@ def set_reference(self):

def get_reference_climo_file(self, var):
"""Return a list of files to be used as reference (aka baseline) for variable var."""
if self.reference_is_obs:
if self.adf.compare_obs:
fils = self.ref_var_loc.get(var, None)
return [fils] if fils is not None else None
self.ref_loc = self.adf.get_baseline_info("cam_climo_loc")
ref_loc = self.adf.get_baseline_info("cam_climo_loc")
# NOTE: originally had this looking for *_baseline.nc
fils = sorted(Path(self.ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
if fils:
return fils
return None

def load_reference_dataset(self, var):
fils = self.get_reference_climo_file(var)
if not fils:
print(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
return None
return self.load_dataset(fils)

def load_reference_da(self, variablename):
da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]]
if variablename in self.res:
vres = self.res[variablename]
if self.reference_is_obs:
if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
if self.adf.compare_obs:
scale_factor = vres.get("obs_scale_factor",1)
add_offset = vres.get("obs_add_offset", 0)
else:
Expand All @@ -131,7 +129,7 @@ def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
print(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
warnings.warning(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think (?) this should be warn not warning:

Suggested change
warnings.warning(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. Fixed it.

return None
return self.load_dataset(fils)

Expand All @@ -140,19 +138,19 @@ def get_climo_file(self, case, variablename):
"""Retrieve the climo file path(s) for variablename for a specific case."""
a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case) # the entry for specified case
# print(f"Checking if case name is in the climo loc entry: {case in a[caseindex]}")
model_cl_loc = Path(a[caseindex])
return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))

def get_timeseries_file(self, case, field):
ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True)
ts_loc = Path(ts_locs[case])
ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case)
ts_loc = Path(ts_locs[caseindex])
ts_filenames = f'{case}.*.{field}.*nc'
ts_files = sorted(ts_loc.glob(ts_filenames))
return ts_files

def get_ref_timeseries_file(self, field):
if self.reference_is_obs:
if self.adf.compare_obs:
return None
else:
ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc", required=True))
Expand All @@ -169,19 +167,17 @@ def get_regrid_file(self, case, field):
def load_regrid_dataset(self, case, field):
fils = self.get_regrid_file(case, field)
if not fils:
print(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)

def load_regrid_da(self, case, field):
fils = self.get_regrid_file(case, field)
if not fils:
print(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(fils, field)

def get_file_list():
pass

def load_dataset(self, fils):
if (len(fils) == 0):
Expand All @@ -191,20 +187,23 @@ def load_dataset(self, fils):
ds = xr.open_mfdataset(fils, combine='by_coords')
else:
sfil = str(fils[0])
assert Path(sfil).is_file(), f"Needs to be a file: {sfil}"
if not Path(sfil).is_file():
warnings.warn(f"Expecting to find file: {sfil}")
return None
ds = xr.open_dataset(sfil)
if ds is None:
warnings.warn(f"invalid data on load_dataset")
return ds


def load_da(self, fils, variablename):
ds = self.load_dataset(fils)
if ds is None:
print(f"ERROR: Load failed for {variablename}")
warnings.warn(f"ERROR: Load failed for {variablename}")
return None
da = (ds[variablename]).squeeze()
if variablename in self.res:
vres = self.res[variablename]
if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0)
da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
return da
18 changes: 12 additions & 6 deletions lib/adf_diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@

# +++++++++++++++++++++++++++++

# Finally, import needed ADF module:
# Finally, import needed ADF modules:
from adf_web import AdfWeb

from adf_dataset import AdfData

#################
# Helper functions
Expand Down Expand Up @@ -182,6 +182,9 @@ def __init__(self, config_file, debug=False):
# Add plotting script names:
self.__plotting_scripts = self.read_config_var("plotting_scripts")

# Provide convenience functions for data handling:
self.data = AdfData(self)

# Create property needed to return "plotting_scripts" variable to user:
@property
def plotting_scripts(self):
Expand Down Expand Up @@ -538,11 +541,13 @@ def call_ncrcat(cmd):
# Aerosol Calcs
#--------------
#Always make sure PMID is made if aerosols are desired in config file
# Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent:
azl = res.get("aerosol_zonal_list", [])
if "PMID" not in diag_var_list:
if any(item in res["aerosol_zonal_list"] for item in diag_var_list):
if any(item in azl for item in diag_var_list):
diag_var_list += ["PMID"]
if "T" not in diag_var_list:
if any(item in res["aerosol_zonal_list"] for item in diag_var_list):
if any(item in azl for item in diag_var_list):
diag_var_list += ["T"]
#End aerosol calcs

Expand Down Expand Up @@ -1056,7 +1061,7 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
print(ermsg)
else:
#Open a new dataset with all the constituent files/variables
ds = xr.open_mfdataset(constit_files)
ds = xr.open_mfdataset(constit_files).compute()

# create new file name for derived variable
derived_file = constit_files[0].replace(constit_list[0], var)
Expand Down Expand Up @@ -1088,7 +1093,8 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
#These will be multiplied by rho (density of dry air)
ds_pmid_done = False
ds_t_done = False
if var in res["aerosol_zonal_list"]:
azl = res.get("aerosol_zonal_list", []) # User-defined defaults might not include aerosol zonal list
if var in azl:

#Only calculate once for all aerosol vars
if not ds_pmid_done:
Expand Down
24 changes: 21 additions & 3 deletions lib/adf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,20 +673,38 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name):

#Search for first variable in var_list to get a time series file to read
#NOTE: it is assumed all the variables have the same dates!
ts_files = sorted(input_location.glob(f"{case_name}*.{var_list[0]}.*nc"))

#Read hist_str (component.hist_num) from the yaml file, or set to default
hist_str = self.get_basic_info('hist_str')
#If hist_str is not present, then default to 'cam.h0':
if not hist_str:
hist_str = 'cam.h0'
#End if

ts_files = sorted(input_location.glob(f"{case_name}.{hist_str}.{var_list[0]}.*nc"))

#Read in file(s)
if len(ts_files) == 1:
cam_ts_data = xr.open_dataset(ts_files[0], decode_times=True)
else:
cam_ts_data = xr.open_mfdataset(ts_files, decode_times=True, combine='by_coords')
try:
cam_ts_data = xr.open_mfdataset(ts_files, decode_times=True, combine='by_coords')
except:
print(" ----------- ERROR ------------")
print(ts_files)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is OK for now, but do you remember what the specific error was that prompted this try/except? In general we want to try and check for a specific type of exception, otherwise it could catch potential errors that we actually want to be exposed (for example if the computer itself has a system/hardware error).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, this is a remnant of me trying to do testing. I'm taking it out. I was hitting a problem where multiple time series files had been generated for case (and put in the same directory). It resulted in having multiple files identified in the search that couldn't be concatenated because they had overlapping times. Maybe that needs to be revisited sometime, but I don't think this check needs to be included here.


#Average time dimension over time bounds, if bounds exist:
if 'time_bnds' in cam_ts_data:
timeBoundsName = 'time_bnds'
elif 'time_bounds' in cam_ts_data:
timeBoundsName = 'time_bounds'
else:
timeBoundsName = None
if timeBoundsName:
time = cam_ts_data['time']
#NOTE: force `load` here b/c if dask & time is cftime,
#throws a NotImplementedError:
time = xr.DataArray(cam_ts_data['time_bnds'].load().mean(dim='nbnd').values, dims=time.dims, attrs=time.attrs)
time = xr.DataArray(cam_ts_data[timeBoundsName].load().mean(dim='nbnd').values, dims=time.dims, attrs=time.attrs)
cam_ts_data['time'] = time
cam_ts_data.assign_coords(time=time)
cam_ts_data = xr.decode_cf(cam_ts_data)
Expand Down
2 changes: 0 additions & 2 deletions lib/plotting_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2317,9 +2317,7 @@ def square_contour_difference(fld1, fld2, **kwargs):
mnorm = mpl.colors.Normalize(mn, mx)

coord1, coord2 = fld1.coords # ASSUMES xarray WITH coords AND 2-dimensions
# print(f"{coord1}, {coord2}")
xx, yy = np.meshgrid(fld1[coord2], fld1[coord1])
# print(f"shape of meshgrid: {xx.shape}")

img1 = ax1.contourf(xx, yy, fld1.transpose())
if (coord1 == 'month') and (fld1.shape[0] ==12):
Expand Down
1 change: 0 additions & 1 deletion scripts/averaging/create_climo_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def process_variable(ts_files, syr, eyr, output_file):
cam_ts_data.assign_coords(time=time)
cam_ts_data = xr.decode_cf(cam_ts_data)
#Extract data subset using provided year bounds:
#cam_ts_data = cam_ts_data.sel(time=slice(syr, eyr))
tslice = get_time_slice_by_year(cam_ts_data.time, int(syr), int(eyr))
cam_ts_data = cam_ts_data.isel(time=tslice)
#Group time series values by month, and average those months together:
Expand Down
25 changes: 12 additions & 13 deletions scripts/plotting/global_latlon_map_B.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import warnings # use to warn user about missing files.

import plotting_functions as pf
from adf_dataset import AdfData

#Format warning messages:
def my_formatwarning(msg, *args, **kwargs):
Expand Down Expand Up @@ -83,7 +82,7 @@ def global_latlon_map_B(adfobj):
#
# Use ADF api to get all necessary information
#
data = AdfData(adfobj)
# data = AdfData(adfobj) NO LONGER NEEDED
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming this script works as expected I would just delete this line.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

var_list = adfobj.diag_var_list
#Special ADF variable which contains the output paths for
#all generated plots and tables for each case:
Expand Down Expand Up @@ -128,7 +127,7 @@ def global_latlon_map_B(adfobj):

# probably want to do this one variable at a time:
for var in var_list:
if var not in data.ref_var_nam:
if var not in adfobj.data.ref_var_nam:
dmsg = f"No reference data found for variable `{var}`, zonal mean plotting skipped."
adfobj.debug_log(dmsg)
continue
Expand Down Expand Up @@ -156,7 +155,7 @@ def global_latlon_map_B(adfobj):
vres['central_longitude'] = pf.get_central_longitude(adfobj)

# load reference data (observational or baseline)
odata = data.load_reference_da(var)
odata = adfobj.data.load_reference_da(var)
if odata is None:
continue
has_dims = pf.lat_lon_validate_dims(odata) # T iff dims are (lat,lon) -- can't plot unless we have both
Expand All @@ -165,10 +164,10 @@ def global_latlon_map_B(adfobj):
continue

#Loop over model cases:
for case_idx, case_name in enumerate(data.case_names):
for case_idx, case_name in enumerate(adfobj.data.case_names):

#Set case nickname:
case_nickname = data.test_nicknames[case_idx]
case_nickname = adfobj.data.test_nicknames[case_idx]

#Set output plot location:
plot_loc = Path(plot_locations[case_idx])
Expand All @@ -179,7 +178,7 @@ def global_latlon_map_B(adfobj):
plot_loc.mkdir(parents=True)

#Load re-gridded model files:
mdata = data.load_regrid_da(case_name, var)
mdata = adfobj.data.load_regrid_da(case_name, var)

#Skip this variable/case if the regridded climo file doesn't exist:
if mdata is None:
Expand Down Expand Up @@ -239,11 +238,11 @@ def global_latlon_map_B(adfobj):
# difference: each entry should be (lat, lon)
dseasons[s] = mseasons[s] - oseasons[s]

pf.plot_map_and_save(plot_name, case_nickname, data.ref_nickname,
pf.plot_map_and_save(plot_name, case_nickname, adfobj.data.ref_nickname,
[syear_cases[case_idx],eyear_cases[case_idx]],
[syear_baseline,eyear_baseline],
mseasons[s], oseasons[s], dseasons[s],
obs=data.reference_is_obs, **vres)
obs=adfobj.compare_obs, **vres)

#Add plot to website (if enabled):
adfobj.add_website_data(plot_name, var, case_name, category=web_category,
Expand Down Expand Up @@ -283,7 +282,7 @@ def global_latlon_map_B(adfobj):
[syear_cases[case_idx],eyear_cases[case_idx]],
[syear_baseline,eyear_baseline],
mseasons[s].sel(lev=pres), oseasons[s].sel(lev=pres), dseasons[s].sel(lev=pres),
obs=data.reference_is_obs, **vres)
obs=adfobj.compare_obs, **vres)

#Add plot to website (if enabled):
adfobj.add_website_data(plot_name, f"{var}_{pres}hpa", case_name, category=web_category,
Expand Down Expand Up @@ -343,14 +342,14 @@ def plot_file_op(adfobj, plot_name, var, case_name, season, web_category, redo_p
if plot_name.is_file():
if redo_plot:
plot_name.unlink()
return 1
return True
else:
#Add already-existing plot to website (if enabled):
adfobj.add_website_data(plot_name, var, case_name, category=web_category,
season=season, plot_type=plot_type)
return None # None tells caller that file exists and not to overwrite
return False # False tells caller that file exists and not to overwrite
else:
return 1
return True

##############
#END OF SCRIPT
Loading
Loading