NCAR · justin-richling · Jul 11, 2024 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/lib/adf_dataset.py b/lib/adf_dataset.py
@@ -50,8 +50,7 @@ def __init__(self, adfobj):
         self.model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True)
 
         # variables (and info for unit transform)
-        self.var_list = adfobj.diag_var_list
-        self.res = adfobj.variable_defaults
+        # use self.adf.diag_var_list and self.adf.self.adf.variable_defaults
 
         # case names and nicknames
         self.case_names = adfobj.get_cam_info("cam_case_name", required=True)
@@ -60,27 +59,26 @@ def __init__(self, adfobj):
         self.ref_nickname = self.base_nickname
 
         # define reference data
-        self.reference_is_obs = adfobj.get_basic_info("compare_obs")
         self.set_reference() # specify "ref_labels" -> called "data_list" in zonal_mean (name of data source)
 
     def set_reference(self):
         """Set attributes for reference (aka baseline) data location, names, and variables."""
-        if self.reference_is_obs:
+        if self.adf.compare_obs:
             self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict}
             self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict}
             self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict}
             if not self.adf.var_obs_dict:
-                print("\t WARNING: reference is observations, but no observations found to plot against.")
+                warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.")
         else:
             self.ref_var_loc = {}
             self.ref_var_nam = {}
             self.ref_labels = {}
             # when using a reference simulation, allow a "special" attribute with the case name:
             self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True)
-            for v in self.var_list:
+            for v in self.adf.diag_var_list:
                 f = self.get_reference_climo_file(v)
                 if f is None:
-                    print(f"\t WARNING: ADFData found no reference climo file for {v}")
+                    warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}")
                     continue
                 else:
                     self.ref_var_loc[v] = f
@@ -89,28 +87,28 @@ def set_reference(self):
 
     def get_reference_climo_file(self, var):
         """Return a list of files to be used as reference (aka baseline) for variable var."""
-        if self.reference_is_obs:
+        if self.adf.compare_obs:
             fils = self.ref_var_loc.get(var, None)
             return [fils] if fils is not None else None
-        self.ref_loc = self.adf.get_baseline_info("cam_climo_loc")
+        ref_loc = self.adf.get_baseline_info("cam_climo_loc")
         # NOTE: originally had this looking for *_baseline.nc
-        fils = sorted(Path(self.ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
+        fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
         if fils:
             return fils
         return None
 
     def load_reference_dataset(self, var):
         fils = self.get_reference_climo_file(var)
         if not fils:
-            print(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
+            warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
             return None
         return self.load_dataset(fils)
 
     def load_reference_da(self, variablename):
         da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]]
-        if variablename in self.res:
-            vres = self.res[variablename]
-            if self.reference_is_obs:
+        if variablename in self.adf.variable_defaults:
+            vres = self.adf.variable_defaults[variablename]
+            if self.adf.compare_obs:
                 scale_factor = vres.get("obs_scale_factor",1)
                 add_offset = vres.get("obs_add_offset", 0)
             else:
@@ -131,7 +129,7 @@ def load_climo_file(self, case, variablename):
         """Return Dataset for climo of variablename"""
         fils = self.get_climo_file(case, variablename)
         if not fils:
-            print(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
+            warnings.warning(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
-            warnings.warning(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
+            warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
-            warnings.warning(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
+            warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
             return None
         return self.load_dataset(fils)
 
@@ -140,19 +138,19 @@ def get_climo_file(self, case, variablename):
         """Retrieve the climo file path(s) for variablename for a specific case."""
         a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
         caseindex = (self.case_names).index(case) # the entry for specified case
-        # print(f"Checking if case name is in the climo loc entry: {case in a[caseindex]}")
         model_cl_loc = Path(a[caseindex])
         return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))
 
     def get_timeseries_file(self, case, field):
-        ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True)
-        ts_loc = Path(ts_locs[case])
+        ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases)
+        caseindex = (self.case_names).index(case)
+        ts_loc = Path(ts_locs[caseindex])
         ts_filenames = f'{case}.*.{field}.*nc'
         ts_files = sorted(ts_loc.glob(ts_filenames))
         return ts_files
 
     def get_ref_timeseries_file(self, field):
-        if self.reference_is_obs:
+        if self.adf.compare_obs:
             return None
         else:
             ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc", required=True))
@@ -169,19 +167,17 @@ def get_regrid_file(self, case, field):
     def load_regrid_dataset(self, case, field):
         fils = self.get_regrid_file(case, field)
         if not fils:
-            print(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
+            warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
             return None
         return self.load_dataset(fils)
 
     def load_regrid_da(self, case, field):
         fils = self.get_regrid_file(case, field)
         if not fils:
-            print(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
+            warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
             return None
         return self.load_da(fils, field)
 
-    def get_file_list():
-        pass
 
     def load_dataset(self, fils):
         if (len(fils) == 0):
@@ -191,20 +187,23 @@ def load_dataset(self, fils):
             ds = xr.open_mfdataset(fils, combine='by_coords')
         else:
             sfil = str(fils[0])
-            assert Path(sfil).is_file(), f"Needs to be a file: {sfil}"
+            if not Path(sfil).is_file():
+                warnings.warn(f"Expecting to find file: {sfil}")
+                return None
             ds = xr.open_dataset(sfil)
         if ds is None:
             warnings.warn(f"invalid data on load_dataset")
         return ds
 
+
     def load_da(self, fils, variablename):
         ds = self.load_dataset(fils)
         if ds is None:
-            print(f"ERROR: Load failed for {variablename}")
+            warnings.warn(f"ERROR: Load failed for {variablename}")
             return None
         da = (ds[variablename]).squeeze()
-        if variablename in self.res:
-            vres = self.res[variablename]
+        if variablename in self.adf.variable_defaults:
+            vres = self.adf.variable_defaults[variablename]
             da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0)
             da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
         return da
diff --git a/lib/adf_diag.py b/lib/adf_diag.py
@@ -92,9 +92,9 @@
 
 # +++++++++++++++++++++++++++++
 
-# Finally, import needed ADF module:
+# Finally, import needed ADF modules:
 from adf_web import AdfWeb
-
+from adf_dataset import AdfData
 
 #################
 # Helper functions
@@ -182,6 +182,9 @@ def __init__(self, config_file, debug=False):
         # Add plotting script names:
         self.__plotting_scripts = self.read_config_var("plotting_scripts")
 
+        # Provide convenience functions for data handling:
+        self.data = AdfData(self)
+
     # Create property needed to return "plotting_scripts" variable to user:
     @property
     def plotting_scripts(self):
@@ -538,11 +541,13 @@ def call_ncrcat(cmd):
             # Aerosol Calcs
             #--------------
             #Always make sure PMID is made if aerosols are desired in config file
+            # Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent:
+            azl = res.get("aerosol_zonal_list", [])
             if "PMID" not in diag_var_list:
-                if any(item in res["aerosol_zonal_list"] for item in diag_var_list):
+                if any(item in azl for item in diag_var_list):
                     diag_var_list += ["PMID"]
             if "T" not in diag_var_list:
-                if any(item in res["aerosol_zonal_list"] for item in diag_var_list):
+                if any(item in azl for item in diag_var_list):
                     diag_var_list += ["T"]
             #End aerosol calcs
 
@@ -1056,7 +1061,7 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
                 print(ermsg)
             else:
                 #Open a new dataset with all the constituent files/variables
-                ds = xr.open_mfdataset(constit_files)
+                ds = xr.open_mfdataset(constit_files).compute()
 
                 # create new file name for derived variable
                 derived_file = constit_files[0].replace(constit_list[0], var)
@@ -1088,7 +1093,8 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
                 #These will be multiplied by rho (density of dry air)
                 ds_pmid_done = False
                 ds_t_done = False
-                if var in res["aerosol_zonal_list"]:
+                azl = res.get("aerosol_zonal_list", []) # User-defined defaults might not include aerosol zonal list
+                if var in azl:
 
                     #Only calculate once for all aerosol vars
                     if not ds_pmid_done:

diff --git a/lib/adf_info.py b/lib/adf_info.py
@@ -673,20 +673,38 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name):
 
         #Search for first variable in var_list to get a time series file to read
         #NOTE: it is assumed all the variables have the same dates!
-        ts_files = sorted(input_location.glob(f"{case_name}*.{var_list[0]}.*nc"))
+
+        #Read hist_str (component.hist_num) from the yaml file, or set to default
+        hist_str = self.get_basic_info('hist_str')
+        #If hist_str is not present, then default to 'cam.h0':
+        if not hist_str:
+            hist_str = 'cam.h0'
+        #End if
+
+        ts_files = sorted(input_location.glob(f"{case_name}.{hist_str}.{var_list[0]}.*nc"))
 
         #Read in file(s)
         if len(ts_files) == 1:
             cam_ts_data = xr.open_dataset(ts_files[0], decode_times=True)
         else:
-            cam_ts_data = xr.open_mfdataset(ts_files, decode_times=True, combine='by_coords')
+            try:
+                cam_ts_data = xr.open_mfdataset(ts_files, decode_times=True, combine='by_coords')
+            except:
+                print(" -----------    ERROR   ------------")
+                print(ts_files)
 
         #Average time dimension over time bounds, if bounds exist:
         if 'time_bnds' in cam_ts_data:
+            timeBoundsName = 'time_bnds'
+        elif 'time_bounds' in cam_ts_data:
+            timeBoundsName = 'time_bounds'
+        else:
+            timeBoundsName = None
+        if timeBoundsName:
             time = cam_ts_data['time']
             #NOTE: force `load` here b/c if dask & time is cftime, 
             #throws a NotImplementedError:
-            time = xr.DataArray(cam_ts_data['time_bnds'].load().mean(dim='nbnd').values, dims=time.dims, attrs=time.attrs)
+            time = xr.DataArray(cam_ts_data[timeBoundsName].load().mean(dim='nbnd').values, dims=time.dims, attrs=time.attrs)
             cam_ts_data['time'] = time
             cam_ts_data.assign_coords(time=time)
             cam_ts_data = xr.decode_cf(cam_ts_data)

diff --git a/lib/plotting_functions.py b/lib/plotting_functions.py
@@ -2317,9 +2317,7 @@ def square_contour_difference(fld1, fld2, **kwargs):
     mnorm = mpl.colors.Normalize(mn, mx)
 
     coord1, coord2 = fld1.coords  # ASSUMES xarray WITH coords AND 2-dimensions
-    # print(f"{coord1}, {coord2}")
     xx, yy = np.meshgrid(fld1[coord2], fld1[coord1])
-    # print(f"shape of meshgrid: {xx.shape}")
 
     img1 = ax1.contourf(xx, yy, fld1.transpose())
     if (coord1 == 'month') and (fld1.shape[0] ==12):

diff --git a/scripts/averaging/create_climo_files.py b/scripts/averaging/create_climo_files.py
@@ -214,7 +214,6 @@ def process_variable(ts_files, syr, eyr, output_file):
         cam_ts_data.assign_coords(time=time)
         cam_ts_data = xr.decode_cf(cam_ts_data)
     #Extract data subset using provided year bounds:
-    #cam_ts_data = cam_ts_data.sel(time=slice(syr, eyr))
     tslice = get_time_slice_by_year(cam_ts_data.time, int(syr), int(eyr))
     cam_ts_data = cam_ts_data.isel(time=tslice)
     #Group time series values by month, and average those months together:

diff --git a/scripts/plotting/global_latlon_map_B.py b/scripts/plotting/global_latlon_map_B.py
@@ -18,7 +18,6 @@
 import warnings  # use to warn user about missing files.
 
 import plotting_functions as pf
-from adf_dataset import AdfData
 
 #Format warning messages:
 def my_formatwarning(msg, *args, **kwargs):
@@ -83,7 +82,7 @@ def global_latlon_map_B(adfobj):
     #
     # Use ADF api to get all necessary information
     #
-    data = AdfData(adfobj)
+    # data = AdfData(adfobj) NO LONGER NEEDED
     var_list = adfobj.diag_var_list
     #Special ADF variable which contains the output paths for
     #all generated plots and tables for each case:
@@ -128,7 +127,7 @@ def global_latlon_map_B(adfobj):
 
     # probably want to do this one variable at a time:
     for var in var_list:
-        if var not in data.ref_var_nam:
+        if var not in adfobj.data.ref_var_nam:
             dmsg = f"No reference data found for variable `{var}`, zonal mean plotting skipped."
             adfobj.debug_log(dmsg)
             continue        
@@ -156,7 +155,7 @@ def global_latlon_map_B(adfobj):
         vres['central_longitude'] = pf.get_central_longitude(adfobj)
 
         # load reference data (observational or baseline)
-        odata = data.load_reference_da(var)
+        odata = adfobj.data.load_reference_da(var)
         if odata is None:
             continue
         has_dims = pf.lat_lon_validate_dims(odata) # T iff dims are (lat,lon) -- can't plot unless we have both
@@ -165,10 +164,10 @@ def global_latlon_map_B(adfobj):
             continue
 
         #Loop over model cases:
-        for case_idx, case_name in enumerate(data.case_names):
+        for case_idx, case_name in enumerate(adfobj.data.case_names):
 
             #Set case nickname:
-            case_nickname = data.test_nicknames[case_idx]
+            case_nickname = adfobj.data.test_nicknames[case_idx]
 
             #Set output plot location:
             plot_loc = Path(plot_locations[case_idx])
@@ -179,7 +178,7 @@ def global_latlon_map_B(adfobj):
                 plot_loc.mkdir(parents=True)
 
             #Load re-gridded model files:
-            mdata = data.load_regrid_da(case_name, var)
+            mdata = adfobj.data.load_regrid_da(case_name, var)
 
             #Skip this variable/case if the regridded climo file doesn't exist:
             if mdata is None:
@@ -239,11 +238,11 @@ def global_latlon_map_B(adfobj):
                     # difference: each entry should be (lat, lon)
                     dseasons[s] = mseasons[s] - oseasons[s]
 
-                    pf.plot_map_and_save(plot_name, case_nickname, data.ref_nickname,
+                    pf.plot_map_and_save(plot_name, case_nickname, adfobj.data.ref_nickname,
                                             [syear_cases[case_idx],eyear_cases[case_idx]],
                                             [syear_baseline,eyear_baseline],
                                             mseasons[s], oseasons[s], dseasons[s],
-                                            obs=data.reference_is_obs, **vres)
+                                            obs=adfobj.compare_obs, **vres)
 
                     #Add plot to website (if enabled):
                     adfobj.add_website_data(plot_name, var, case_name, category=web_category,
@@ -283,7 +282,7 @@ def global_latlon_map_B(adfobj):
                                                 [syear_cases[case_idx],eyear_cases[case_idx]],
                                                 [syear_baseline,eyear_baseline],
                                                 mseasons[s].sel(lev=pres), oseasons[s].sel(lev=pres), dseasons[s].sel(lev=pres),
-                                                obs=data.reference_is_obs, **vres)
+                                                obs=adfobj.compare_obs, **vres)
 
                         #Add plot to website (if enabled):
                         adfobj.add_website_data(plot_name, f"{var}_{pres}hpa", case_name, category=web_category,
@@ -343,14 +342,14 @@ def plot_file_op(adfobj, plot_name, var, case_name, season, web_category, redo_p
     if plot_name.is_file():
         if redo_plot:
             plot_name.unlink()
-            return 1
+            return True
         else:
             #Add already-existing plot to website (if enabled):
             adfobj.add_website_data(plot_name, var, case_name, category=web_category,
                                     season=season, plot_type=plot_type)
-            return None  # None tells caller that file exists and not to overwrite
+            return False  # False tells caller that file exists and not to overwrite
     else:
-        return 1
+        return True
 
 ##############
 #END OF SCRIPT