From ee7e6f693634f377bf770f9506cbe783727a11c6 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 15:54:24 -0700 Subject: [PATCH 01/40] Do not exit 'make all' if pylint fails. Resolves #2316. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 271e977046..440e2e0de8 100644 --- a/python/Makefile +++ b/python/Makefile @@ -19,7 +19,7 @@ ifneq ($(verbose), not-set) endif PYLINT=pylint -PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc +PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc --fail-under=0 PYLINT_SRC = \ ctsm # NOTE: These don't pass pylint checking and should be added when we put into effort to get them to pass From 25f7d216e74a096a2c95a989293c0e6d42e0c41b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 16:14:07 -0700 Subject: [PATCH 02/40] Fix pylint for ctsm_pylib_dependent_utils.py (except missing-module-docstring). --- python/ctsm/ctsm_pylib_dependent_utils.py | 63 +++++++++++++---------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/python/ctsm/ctsm_pylib_dependent_utils.py b/python/ctsm/ctsm_pylib_dependent_utils.py index 13ccf7a969..4f149c53a9 100644 --- a/python/ctsm/ctsm_pylib_dependent_utils.py +++ b/python/ctsm/ctsm_pylib_dependent_utils.py @@ -1,49 +1,56 @@ -from ctsm.utils import abort import numpy as np +from ctsm.utils import abort -def import_coord_1d(ds, coordName): +def import_coord_1d(data_set, coord_name): """Import 1-d coordinate variable Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import Returns: xarray DataArray: DataArray corresponding to the requested coordinate. """ - da = ds[coordName] - if len(da.dims) != 1: - abort(f"Expected 1 dimension for {coordName}; found {len(da.dims)}: {da.dims}") - return da, len(da) + data_array = data_set[coord_name] + if len(data_array.dims) != 1: + abort(f"Expected 1 dimension for {coord_name}; " + + f"found {len(data_array.dims)}: {data_array.dims}") + return data_array, len(data_array) -def import_coord_2d(ds, coordName, varName): - """Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY or LONGXY) and return it as a 1-d DataArray that can be used as a coordinate for writing CESM input files +def import_coord_2d(data_set, coord_name, var_name): + """ + Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY + or LONGXY and return it as a 1-d DataArray that can be used as a coordinate for writing + CESM input files Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import - varName (str): Name of variable with dimension coordName + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import + var_name (str): Name of variable with dimension coord_name Returns: xarray DataArray: 1-d variable that can be used as a coordinate for writing CESM input files int: Length of that variable """ - da = ds[varName] - thisDim = [x for x in da.dims if coordName in x] - if len(thisDim) != 1: - abort(f"Expected 1 dimension name containing {coordName}; found {len(thisDim)}: {thisDim}") - thisDim = thisDim[0] - otherDim = [x for x in da.dims if coordName not in x] - if len(otherDim) != 1: + data_array = data_set[var_name] + this_dim = [x for x in data_array.dims if coord_name in x] + if len(this_dim) != 1: + abort(f"Expected 1 dimension name containing {coord_name}; " + + f"found {len(this_dim)}: {this_dim}") + this_dim = this_dim[0] + other_dim = [x for x in data_array.dims if coord_name not in x] + if len(other_dim) != 1: abort( - f"Expected 1 dimension name not containing {coordName}; found {len(otherDim)}: {otherDim}" + f"Expected 1 dimension name not containing {coord_name}; " + + f"found {len(other_dim)}: {other_dim}" ) - otherDim = otherDim[0] - da = da.astype(np.float32) - da = da.isel({otherDim: [0]}).squeeze().rename({thisDim: coordName}).rename(coordName) - da = da.assign_coords({coordName: da.values}) - da.attrs["long_name"] = "coordinate " + da.attrs["long_name"] - da.attrs["units"] = da.attrs["units"].replace(" ", "_") - return da, len(da) + other_dim = other_dim[0] + data_array = data_array.astype(np.float32) + data_array = data_array.isel({other_dim: [0]}).squeeze() + data_array = data_array.rename({this_dim: coord_name}).rename(coord_name) + data_array = data_array.assign_coords({coord_name: data_array.values}) + data_array.attrs["long_name"] = "coordinate " + data_array.attrs["long_name"] + data_array.attrs["units"] = data_array.attrs["units"].replace(" ", "_") + return data_array, len(data_array) From 714033708b6bfd0617dcc4b11e6ef40a6da9c45a Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 21:31:46 -0700 Subject: [PATCH 03/40] Fix pylint for process_ggcmi_sdates.py. --- .../crop_calendars/process_ggcmi_shdates.py | 391 +++++++++++------- 1 file changed, 253 insertions(+), 138 deletions(-) diff --git a/python/ctsm/crop_calendars/process_ggcmi_shdates.py b/python/ctsm/crop_calendars/process_ggcmi_shdates.py index 835f91cb22..cada2b421b 100644 --- a/python/ctsm/crop_calendars/process_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/process_ggcmi_shdates.py @@ -1,16 +1,21 @@ -import numpy as np -import xarray as xr -import os -import datetime as dt -import cftime +""" +Convert GGCMI crop calendar files for use in CTSM +""" + import sys import argparse import logging +import os +import datetime as dt +import numpy as np +import xarray as xr +import cftime # -- add python/ctsm to path (needed if we want to run process_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) +# pylint: disable=wrong-import-position from ctsm import ctsm_logging import ctsm.crop_calendars.cropcal_utils as utils import ctsm.crop_calendars.regrid_ggcmi_shdates as regrid @@ -18,19 +23,28 @@ logger = logging.getLogger(__name__) -def get_cft(y): - return cftime.DatetimeNoLeap(y, 1, 1, 0, 0, 0, 0, has_year_zero=True) +def get_cft(year): + """ + Given a year, return the cftime.DatetimeNoLeap of Jan. 1 at 00:00. + """ + return cftime.DatetimeNoLeap(year, 1, 1, 0, 0, 0, 0, has_year_zero=True) -def get_dayssince_jan1y1(y1, y): - cft_y1 = get_cft(y1) - cft_y = get_cft(y) +def get_dayssince_jan1y1(year1, year): + """ + Get the number of days since Jan. 1 of year1 + """ + cft_y1 = get_cft(year1) + cft_y = get_cft(year) time_delta = cft_y - cft_y1 time_delta_secs = time_delta.total_seconds() return time_delta_secs / (60 * 60 * 24) def main(): + """ + main() function for calling process_ggcmi_shdates.py from command line. + """ ctsm_logging.setup_logging_pre_config() args = process_ggcmi_shdates_args() process_ggcmi_shdates( @@ -40,7 +54,6 @@ def main(): args.file_specifier, args.first_year, args.last_year, - args.verbose, args.ggcmi_author, args.regrid_resolution, args.regrid_template_file, @@ -50,8 +63,14 @@ def main(): def process_ggcmi_shdates_args(): + """ + Set up and parse input arguments for working with GGCMI crop calendar files + """ parser = argparse.ArgumentParser( - description="Converts raw sowing and harvest date files provided by GGCMI into a format that CLM can read, optionally at a target resolution." + description=( + "Converts raw sowing and harvest date files provided by GGCMI into " + + "a format that CLM can read, optionally at a target resolution." + ) ) # Required @@ -72,7 +91,10 @@ def process_ggcmi_shdates_args(): parser.add_argument( "-a", "--author", - help="String to be saved in author_thisfile attribute of output files. E.g., 'Author Name (authorname@ucar.edu)'", + help=( + "String to be saved in author_thisfile attribute of output files. " + + "E.g., 'Author Name (authorname@ucar.edu)'" + ), type=str, required=True, ) @@ -80,21 +102,30 @@ def process_ggcmi_shdates_args(): # Optional parser.add_argument( "--file-specifier", - help="String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. Will also be saved to output filenames.", + help=( + "String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. " + + "Will also be saved to output filenames." + ), type=str, default="ggcmi_crop_calendar_phase3_v1.01", ) parser.add_argument( "-y1", "--first-year", - help="First year in output files. Must be present in template file, unless it's the same as the last year.", + help=( + "First year in output files. Must be present in template file, " + + "unless it's the same as the last year." + ), type=int, default=2000, ) parser.add_argument( "-yN", "--last-year", - help="Last year in output files. Must be present in template file, unless it's the same as the first year.", + help=( + "Last year in output files. Must be present in template file, " + + "unless it's the same as the first year." + ), type=int, default=2000, ) @@ -117,53 +148,19 @@ def process_ggcmi_shdates_args(): return args -def process_ggcmi_shdates( - input_directory, - output_directory, - author, - file_specifier, - first_year, - last_year, - verbose, - ggcmi_author, - regrid_resolution, - regrid_template_file, - regrid_extension, - crop_list, -): - - input_directory = os.path.realpath(input_directory) - output_directory = os.path.realpath(output_directory) - - ############################################################ - ### Regrid original GGCMI files to target CLM resolution ### - ############################################################ - - regridded_ggcmi_files_dir = os.path.join( - output_directory, f"regridded_ggcmi_files-{regrid_resolution}" - ) +def setup_crop_dict(): + """ + Associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - regrid.regrid_ggcmi_shdates( - regrid_resolution, - regrid_template_file, - input_directory, - regridded_ggcmi_files_dir, - regrid_extension, - crop_list, - ) + Some notes: + - As "CLMname: {clm_num, thiscrop_ggcmi}" + - CLM names and numbers taken from commit 3dcbc7499a57904750a994672fc36b4221b9def5 + - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. + - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. + - Using GGCMI "pea" for CLM pulses, as suggested by GGCMI phase 3 protocol. + - Only using GGCMI "ri1" for rice; ignoring "ri2". + """ - ########################### - ### Define dictionaries ### - ########################### - - # First, we associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - # Some notes: - # - As "CLMname: {clm_num, thiscrop_ggcmi}" - # - CLM names and numbers taken from commit `3dcbc7499a57904750a994672fc36b4221b9def5` - # - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. - # - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. - # - Using GGCMI `pea` for CLM pulses, as suggested by GGCMI phase 3 protocol. - # - Only using GGCMI `ri1` for rice; ignoring `ri2`. def set_crop_dict(thisnum, thisname): return {"clm_num": thisnum, "thiscrop_ggcmi": thisname} @@ -234,8 +231,16 @@ def set_crop_dict(thisnum, thisname): "c3_irrigated": set_crop_dict(16, None), } - # Next, we associate CLM variable names with their GGCMI counterparts. We also save a placeholder for output file paths associated with each variable. - # As CLMname: {GGCMIname, output_file} + return crop_dict + + +def setup_var_dict(): + """ + Associate CLM variable names with their GGCMI counterparts. + - We also save a placeholder for output file paths associated with each variable. + - As CLMname: {GGCMIname, output_file} + """ + def set_var_dict(name_ggcmi, outfile): return {"name_ggcmi": name_ggcmi, "outfile": outfile} @@ -243,23 +248,178 @@ def set_var_dict(name_ggcmi, outfile): "sdate": set_var_dict("planting_day", ""), "hdate": set_var_dict("maturity_day", ""), } + return variable_dict + + +def set_var_attrs(thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Set output variable attributes + """ + + longname = thisvar_da.attrs["long_name"] + longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) + thisvar_da.attrs["long_name"] = longname + + if thiscrop_ggcmi is None: + thisvar_da.attrs["crop_name_clm"] = "none" + thisvar_da.attrs["crop_name_ggcmi"] = "none" + else: + thisvar_da.attrs["crop_name_clm"] = thiscrop_clm + thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi + + thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi + thisvar_da.attrs["units"] = "day of year" + thisvar_da.encoding["_FillValue"] = new_fillvalue + + # scale_factor and add_offset are required by I/O library for short data + # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: + # unpacked_value = packed_value * scale_factor + add_offset + thisvar_da.attrs["scale_factor"] = np.int16(1) + thisvar_da.attrs["add_offset"] = np.int16(0) + return thisvar_da + + +def fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Ensure fill value and real data are correct format + """ + dummyvalue = -1 + thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue + if thiscrop_ggcmi is None: + thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) + else: + thisvar_ds.variables[varname_ggcmi].values[ + np.isnan(thisvar_ds.variables[varname_ggcmi].values) + ] = new_fillvalue + thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ + varname_ggcmi + ].values.astype("int16") + + return thisvar_ds + + +def add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm): + """ + Add time dimension (https://stackoverflow.com/a/62862440) + - Repeats original map for every timestep + - Probably not necessary to use this method, since I only end up extracting thisvar_ds.values + anyway---I could probably use some numpy method instead. + """ + + thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) + thisvar_da_tmp = thisvar_ds[varname_ggcmi] + thisvar_da = xr.DataArray( + data=thisvar_da_tmp.values.astype("int16"), + attrs=thisvar_da_tmp.attrs, + coords=thisvar_da_tmp.coords, + name=varname_clm, + ) + + return thisvar_da + + +def create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, +): + """ + Create output files, one for each variable + """ + datetime_string = dt.datetime.now().strftime("%year%m%d_%H%M%S") + nninterp_suffix = "nninterp-" + regrid_resolution + for var in variable_dict: + basename = ( + f"{var}s_{file_specifier}_{nninterp_suffix}." + + f"{first_year}-{last_year}.{datetime_string}.nc" + ) + outfile = os.path.join(output_directory, basename) + variable_dict[var]["outfile"] = outfile + template_ds.to_netcdf( + path=variable_dict[var]["outfile"], + format="NETCDF3_CLASSIC", + ) + + return nninterp_suffix + + +def strip_dataset(cropcal_ds, varname_ggcmi): + """ + Remove all variables except one from Dataset + """ + droplist = [] + for i in list(cropcal_ds.keys()): + if i != varname_ggcmi: + droplist.append(i) + thisvar_ds = cropcal_ds.drop(droplist) + return thisvar_ds + + +def process_ggcmi_shdates( + input_directory, + output_directory, + author, + file_specifier, + first_year, + last_year, + ggcmi_author, + regrid_resolution, + regrid_template_file, + regrid_extension, + crop_list, +): + """ + Convert GGCMI crop calendar files for use in CTSM + """ + + input_directory = os.path.realpath(input_directory) + output_directory = os.path.realpath(output_directory) + + ############################################################ + ### Regrid original GGCMI files to target CLM resolution ### + ############################################################ + + regridded_ggcmi_files_dir = os.path.join( + output_directory, f"regridded_ggcmi_files-{regrid_resolution}" + ) + + regrid.regrid_ggcmi_shdates( + regrid_resolution, + regrid_template_file, + input_directory, + regridded_ggcmi_files_dir, + regrid_extension, + crop_list, + ) + + # Set up dictionaries used in remapping crops and variables between GGCMI and CLM + crop_dict = setup_crop_dict() + variable_dict = setup_var_dict() ################################ ### Instantiate output files ### ################################ # Global attributes for output files + comment = ( + "Day of year is 1-indexed (i.e., Jan. 1 = 1). " + + "Filled using cdo -remapnn,$original -setmisstonn" + ) out_attrs = { "title": "GGCMI crop calendar for Phase 3, v1.01", "author_thisfile": author, "author_original": ggcmi_author, - "comment": "Day of year is 1-indexed (i.e., Jan. 1 = 1). Filled using cdo -remapnn,$original -setmisstonn", + "comment": comment, "created": dt.datetime.now().replace(microsecond=0).astimezone().isoformat(), } # Create template dataset time_array = np.array( - [get_dayssince_jan1y1(first_year, y) for y in np.arange(first_year, last_year + 1)] + [get_dayssince_jan1y1(first_year, year) for year in np.arange(first_year, last_year + 1)] ) time_coord = xr.IndexVariable( "time", @@ -273,18 +433,15 @@ def set_var_dict(name_ggcmi, outfile): template_ds = xr.Dataset(coords={"time": time_coord}, attrs=out_attrs) # Create output files - datetime_string = dt.datetime.now().strftime("%Y%m%d_%H%M%S") - nninterp_suffix = "nninterp-" + regrid_resolution - for v in variable_dict: - outfile = os.path.join( - output_directory, - f"{v}s_{file_specifier}_{nninterp_suffix}.{first_year}-{last_year}.{datetime_string}.nc", - ) - variable_dict[v]["outfile"] = outfile - template_ds.to_netcdf( - path=variable_dict[v]["outfile"], - format="NETCDF3_CLASSIC", - ) + nninterp_suffix = create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, + ) ######################### ### Process all crops ### @@ -293,7 +450,7 @@ def set_var_dict(name_ggcmi, outfile): for thiscrop_clm in crop_dict: # Which crop are we on? - c = list(crop_dict.keys()).index(thiscrop_clm) + 1 + crop_int = list(crop_dict.keys()).index(thiscrop_clm) + 1 # Get information about this crop this_dict = crop_dict[thiscrop_clm] @@ -306,18 +463,24 @@ def set_var_dict(name_ggcmi, outfile): # If no corresponding GGCMI crop, skip opening dataset. # Will use previous cropcal_ds as a template. - if thiscrop_ggcmi == None: - if c == 1: + if thiscrop_ggcmi is None: + if crop_int == 1: raise ValueError(f"First crop ({thiscrop_clm}) must have a GGCMI type") logger.info( - "Filling %s with dummy data (%d of %d)..." % (str(thiscrop_clm), c, len(crop_dict)) + "Filling %s with dummy data (%d of %d)...", + str(thiscrop_clm), + crop_int, + len(crop_dict), ) # Otherwise, import crop calendar file else: logger.info( - "Importing %s -> %s (%d of %d)..." - % (str(thiscrop_ggcmi), str(thiscrop_clm), c, len(crop_dict)) + "Importing %s -> %s (%d of %d)...", + str(thiscrop_ggcmi), + str(thiscrop_clm), + crop_int, + len(crop_dict), ) file_ggcmi = os.path.join( @@ -326,7 +489,7 @@ def set_var_dict(name_ggcmi, outfile): ) if not os.path.exists(file_ggcmi): logger.warning( - f"Skipping {thiscrop_ggcmi} because input file not found: {file_ggcmi}" + "Skipping %s because input file not found: %s", thiscrop_ggcmi, file_ggcmi ) continue cropcal_ds = xr.open_dataset(file_ggcmi) @@ -338,7 +501,7 @@ def set_var_dict(name_ggcmi, outfile): for thisvar_clm in variable_dict: # Get GGCMI netCDF info varname_ggcmi = variable_dict[thisvar_clm]["name_ggcmi"] - logger.info(" Processing %s..." % varname_ggcmi) + logger.info(" Processing %s...", varname_ggcmi) # Get CLM netCDF info varname_clm = thisvar_clm + "1_" + str(thiscrop_int) @@ -347,69 +510,21 @@ def set_var_dict(name_ggcmi, outfile): raise Exception("Output file not found: " + file_clm) # Strip dataset to just this variable - droplist = [] - for i in list(cropcal_ds.keys()): - if i != varname_ggcmi: - droplist.append(i) - thisvar_ds = cropcal_ds.drop(droplist) - thisvar_ds = thisvar_ds.load() + strip_dataset(cropcal_ds, varname_ggcmi) # Convert to integer new_fillvalue = -1 - dummyvalue = -1 - thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue - if thiscrop_ggcmi == None: - thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) - else: - thisvar_ds.variables[varname_ggcmi].values[ - np.isnan(thisvar_ds.variables[varname_ggcmi].values) - ] = new_fillvalue - thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ - varname_ggcmi - ].values.astype("int16") + thisvar_ds = fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue) # Add time dimension (https://stackoverflow.com/a/62862440) - # (Repeats original map for every timestep) - # Probably not necessary to use this method, since I only end up extracting thisvar_ds.values anyway---I could probably use some numpy method instead. - thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) - thisvar_da_tmp = thisvar_ds[varname_ggcmi] - thisvar_da = xr.DataArray( - data=thisvar_da_tmp.values.astype("int16"), - attrs=thisvar_da_tmp.attrs, - coords=thisvar_da_tmp.coords, - name=varname_clm, - ) - - # Edit/add variable attributes etc. - longname = thisvar_da.attrs["long_name"] - longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) - - def set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue - ): - thisvar_da.attrs["long_name"] = longname - if thiscrop_ggcmi == None: - thisvar_da.attrs["crop_name_clm"] = "none" - thisvar_da.attrs["crop_name_ggcmi"] = "none" - else: - thisvar_da.attrs["crop_name_clm"] = thiscrop_clm - thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi - thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi - thisvar_da.attrs["units"] = "day of year" - thisvar_da.encoding["_FillValue"] = new_fillvalue - # scale_factor and add_offset are required by I/O library for short data - # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: - # unpacked_value = packed_value * scale_factor + add_offset - thisvar_da.attrs["scale_factor"] = np.int16(1) - thisvar_da.attrs["add_offset"] = np.int16(0) - return thisvar_da + thisvar_da = add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm) thisvar_da = set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue + thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue ) # Save - logger.info(" Saving %s..." % varname_ggcmi) + logger.info(" Saving %s...", varname_ggcmi) thisvar_da.to_netcdf(file_clm, mode="a", format="NETCDF3_CLASSIC") cropcal_ds.close() From 8347afc5dc85aae8738d466bd2c18d7f2d3651a4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 15:16:11 -0700 Subject: [PATCH 04/40] Satisfy pylint for generate_gdds_functions.py. --- .../crop_calendars/generate_gdds_functions.py | 724 ++++++++++-------- 1 file changed, 417 insertions(+), 307 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index cb05f1920d..74e8fd57f4 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -1,55 +1,77 @@ -import numpy as np -import xarray as xr +""" +Functions to support generate_gdds.py +""" +# pylint: disable=too-many-lines,too-many-statements import warnings import os +import sys import glob import datetime as dt from importlib import util as importlib_util +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) -import sys - sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils -import ctsm.crop_calendars.cropcal_module as cc +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position -can_plot = True +CAN_PLOT = True try: + # pylint: disable=wildcard-import,unused-wildcard-import + # pylint: disable=import-error from ctsm.crop_calendars.cropcal_figs_module import * from matplotlib.transforms import Bbox warnings.filterwarnings( "ignore", - message="__len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry.", + message=( + "__len__ for multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Check the length of the `geoms` property instead to get the number of " + + "parts of a multi-part geometry." + ), ) warnings.filterwarnings( "ignore", - message="Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.", + message=( + "Iteration over multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Use the `geoms` property to access the constituent parts of a multi-part " + + "geometry." + ), ) print("Will (attempt to) produce harvest requirement map figure files.") -except: +except ModuleNotFoundError: print("Will NOT produce harvest requirement map figure files.") - can_plot = False + CAN_PLOT = False -# Functions to simultaneously print to console and to log file def log(logger, string): + """ + Simultaneously print INFO messages to console and to log file + """ print(string) logger.info(string) def error(logger, string): + """ + Simultaneously print ERROR messages to console and to log file + """ logger.error(string) raise RuntimeError(string) def check_sdates(dates_ds, sdates_rx, logger, verbose=False): + """ + Checking that input and output sdates match + """ log(logger, " Checking that input and output sdates match...") sdates_grid = utils.grid_one_variable(dates_ds, "SDATES") @@ -58,28 +80,28 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): any_found = False vegtypes_skipped = [] vegtypes_included = [] - for i, vt_str in enumerate(dates_ds.vegtype_str.values): + for i, vegtype_str in enumerate(dates_ds.vegtype_str.values): # Input - vt = dates_ds.ivt.values[i] - thisVar = f"gs1_{vt}" - if thisVar not in sdates_rx: - vegtypes_skipped = vegtypes_skipped + [vt_str] + vegtype_int = dates_ds.ivt.values[i] + this_var = f"gs1_{vegtype_int}" + if this_var not in sdates_rx: + vegtypes_skipped = vegtypes_skipped + [vegtype_str] # log(logger, f" {vt_str} ({vt}) SKIPPED...") continue - vegtypes_included = vegtypes_included + [vt_str] + vegtypes_included = vegtypes_included + [vegtype_str] any_found = True if verbose: - log(logger, f" {vt_str} ({vt})...") - in_map = sdates_rx[thisVar].squeeze(drop=True) + log(logger, f" {vegtype_str} ({vegtype_int})...") + in_map = sdates_rx[this_var].squeeze(drop=True) # Output - out_map = sdates_grid.sel(ivt_str=vt_str).squeeze(drop=True) + out_map = sdates_grid.sel(ivt_str=vegtype_str).squeeze(drop=True) # Check for differences diff_map = out_map - in_map diff_map_notnan = diff_map.values[np.invert(np.isnan(diff_map.values))] if np.any(diff_map_notnan): - log(logger, f"Difference(s) found in {vt_str}") + log(logger, f"Difference(s) found in {vegtype_str}") here = np.where(diff_map_notnan) log(logger, "in:") in_map_notnan = in_map.values[np.invert(np.isnan(diff_map.values))] @@ -91,7 +113,7 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): log(logger, diff_map_notnan[here][0:4]) all_ok = False - if not (any_found): + if not any_found: error(logger, "No matching variables found in sdates_rx!") # Sanity checks for included vegetation types @@ -102,7 +124,8 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): elif vegtypes_skipped_weird: log( logger, - f"\nWarning: Some crop types had output rainfed patches but no irrigated patches: {vegtypes_skipped_weird}", + "\nWarning: Some crop types had output rainfed patches but no irrigated patches: " + + f"{vegtypes_skipped_weird}", ) if all_ok: @@ -111,34 +134,42 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): error(logger, " ❌ Input and output sdates differ.") -def import_rx_dates(s_or_h, date_inFile, incl_patches1d_itype_veg, mxsowings, logger): - if isinstance(date_inFile, xr.Dataset): - return date_inFile - elif not isinstance(date_inFile, str): +def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, logger): + """ + Import prescribed sowing or harvest dates + """ + if isinstance(date_infile, xr.Dataset): + return date_infile + if not isinstance(date_infile, str): error( logger, - f"Importing {s_or_h}dates_rx: Expected date_inFile to be str or DataArray, not {type(date_inFile)}", + f"Importing {s_or_h}dates_rx: Expected date_infile to be str or DataArray," + + f"not {type(date_infile)}", ) # Which vegetation types were simulated? - itype_veg_toImport = np.unique(incl_patches1d_itype_veg) + itype_veg_to_import = np.unique(incl_patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{s_or_h}date{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_var_list = [] + for i in itype_veg_to_import: + for n_sowing in np.arange(mxsowings): + this_var = f"{s_or_h}date{n_sowing+1}_{i}" + date_var_list = date_var_list + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = utils.import_ds(date_infile, myVars=date_var_list) - for v in ds: - ds = ds.rename({v: v.replace(f"{s_or_h}date", "gs")}) + for var in this_ds: + this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) - return ds + return this_ds -def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): - # xarray pointwise indexing; see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing +def this_crop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): + """ + Given a map, get a vector of patches + """ + # xarray pointwise indexing; + # see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing return ( map_ds[f"gs1_{vegtype_int}"] .sel(lon=xr.DataArray(lon_points, dims="patch"), lat=xr.DataArray(lat_points, dims="patch")) @@ -146,8 +177,10 @@ def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): ) -# Get and grid mean GDDs in GGCMI growing season def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefix, logger): + """ + Get and grid mean GDDs in GGCMI growing season + """ # Get means warnings.filterwarnings( "ignore", message="Mean of empty slice" @@ -160,44 +193,45 @@ def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefi # Grid ds_out = xr.Dataset() - for c, ra in enumerate(p_list): - if isinstance(ra, type(None)): + for this_crop_int, data in enumerate(p_list): + if isinstance(data, type(None)): continue - thisCrop_str = incl_vegtypes_str[c] - log(logger, f" {thisCrop_str}...") - newVar = f"gdd1_{utils.ivt_str2int(thisCrop_str)}" - ds = daily_ds.isel( - patch=np.where(daily_ds.patches1d_itype_veg_str.values == thisCrop_str)[0] + this_crop_str = incl_vegtypes_str[this_crop_int] + log(logger, f" {this_crop_str}...") + new_var = f"gdd1_{utils.ivt_str2int(this_crop_str)}" + this_ds = daily_ds.isel( + patch=np.where(daily_ds.patches1d_itype_veg_str.values == this_crop_str)[0] ) - template_da = ds.patches1d_itype_veg_str - da = xr.DataArray( - data=ra, + template_da = this_ds.patches1d_itype_veg_str + this_da = xr.DataArray( + data=data, coords=template_da.coords, - attrs={"units": "GDD", "long_name": f"{longname_prefix}{thisCrop_str}"}, + attrs={"units": "GDD", "long_name": f"{longname_prefix}{this_crop_str}"}, ) # Grid this crop - ds["tmp"] = da - da_gridded = utils.grid_one_variable(ds, "tmp", vegtype=thisCrop_str).squeeze(drop=True) + this_ds["tmp"] = this_da + da_gridded = utils.grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) + da_gridded = da_gridded.squeeze(drop=True) # Add singleton time dimension and save to output Dataset da_gridded = da_gridded.expand_dims(time=dates_rx.time) - ds_out[newVar] = da_gridded + ds_out[new_var] = da_gridded return ds_out def import_and_process_1yr( - y1, - yN, - y, - thisYear, + year_1, + year_n, + year_index, + this_year, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, indir, incl_vegtypes_str_in, @@ -207,8 +241,11 @@ def import_and_process_1yr( skip_crops, logger, ): + """ + Import one year of CLM output data for GDD generation + """ save_figs = True - log(logger, f"netCDF year {thisYear}...") + log(logger, f"netCDF year {this_year}...") log(logger, dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # Without dask, this can take a LONG time at resolutions finer than 2-deg @@ -237,7 +274,7 @@ def import_and_process_1yr( h1_filelist, myVars=["SDATES", "HDATES"], myVegtypes=crops_to_read, - timeSlice=slice(f"{thisYear}-01-01", f"{thisYear}-12-31"), + timeSlice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), chunks=chunks, ) @@ -261,8 +298,8 @@ def import_and_process_1yr( np.sum(~np.isnan(dates_ds.HDATES.values), axis=dates_ds.HDATES.dims.index("mxharvests")) == 0 ) - N_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) - if N_unmatched_nans > 0: + n_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) + if n_unmatched_nans > 0: error(logger, "Output SDATE and HDATE NaN masks do not match.") if np.sum(~np.isnan(dates_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN!") @@ -270,15 +307,15 @@ def import_and_process_1yr( # Just work with non-NaN patches for now skip_patches_for_isel_nan = np.where(sdates_all_nan)[0] incl_patches_for_isel_nan = np.where(~sdates_all_nan)[0] - different_nan_mask = y > 0 and not np.array_equal( - skip_patches_for_isel_nan_lastyear, skip_patches_for_isel_nan + different_nan_mask = year_index > 0 and not np.array_equal( + skip_patches_for_isel_nan_last_year, skip_patches_for_isel_nan ) if different_nan_mask: log(logger, " Different NaN mask than last year") incl_thisyr_but_nan_lastyr = [ dates_ds.patch.values[p] for p in incl_patches_for_isel_nan - if p in skip_patches_for_isel_nan_lastyear + if p in skip_patches_for_isel_nan_last_year ] else: incl_thisyr_but_nan_lastyr = [] @@ -286,14 +323,15 @@ def import_and_process_1yr( if skipping_patches_for_isel_nan: log( logger, - f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and harvest dates.", + f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and " + + "harvest dates.", ) dates_incl_ds = dates_ds.isel(patch=incl_patches_for_isel_nan) else: dates_incl_ds = dates_ds incl_patches1d_itype_veg = dates_incl_ds.patches1d_itype_veg - if y == 0: + if year_index == 0: incl_vegtypes_str = [c for c in dates_incl_ds.vegtype_str.values if c not in skip_crops] else: incl_vegtypes_str = incl_vegtypes_str_in @@ -304,13 +342,15 @@ def import_and_process_1yr( if incl_vegtypes_str != list(dates_incl_ds.vegtype_str.values): error( logger, - f"Included veg types differ. Previously {incl_vegtypes_str}, now {dates_incl_ds.vegtype_str.values}", + f"Included veg types differ. Previously {incl_vegtypes_str}, " + + f"now {dates_incl_ds.vegtype_str.values}", ) if np.sum(~np.isnan(dates_incl_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN after ignoring those patches!") - # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. + # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. mxsowings = dates_ds.dims["mxsowings"] mxsowings_dim = dates_ds.SDATES.dims.index("mxsowings") skip_patches_for_isel_sdatelt1 = np.where(dates_incl_ds.SDATES.values < 1)[1] @@ -322,7 +362,8 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_hdates) == [364]: log( logger, - f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this might have just been because of incorrectly daily outputs. Setting them to 365.", + f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this" + + "might have just been because of incorrectly daily outputs. Setting them to 365.", ) new_sdates_ar = dates_incl_ds.SDATES.values if mxsowings_dim != 0: @@ -336,13 +377,16 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. Unique affected hdates: {unique_hdates}", + f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. " + + f"Unique affected hdates: {unique_hdates}", ) - # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. Can also happen if patch wasn't active last year + # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. Can also happen if patch wasn't active last year mxharvests = dates_ds.dims["mxharvests"] mxharvests_dim = dates_ds.HDATES.dims.index("mxharvests") - # If a patch was inactive last year but was either (a) harvested the last time it was active or (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. + # If a patch was inactive last year but was either (a) harvested the last time it was active or + # (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. hdates_thisyr = dates_incl_ds.HDATES.isel(mxharvests=0) skip_patches_for_isel_hdatelt1 = np.where(hdates_thisyr.values < 1)[0] skipping_patches_for_isel_hdatelt1 = len(skip_patches_for_isel_hdatelt1) > 0 @@ -352,7 +396,6 @@ def import_and_process_1yr( patch=incl_thisyr_but_nan_lastyr ) if np.any(hdates_thisyr_where_nan_lastyr < 1): - # patches_to_fix = hdates_thisyr_where_nan_lastyr.isel(patch=np.where(hdates_thisyr_where_nan_lastyr < 1)[0]).patch.values new_hdates = dates_incl_ds.HDATES.values if mxharvests_dim != 0: error(logger, "Code this up") @@ -360,7 +403,10 @@ def import_and_process_1yr( here = [patch_list.index(x) for x in incl_thisyr_but_nan_lastyr] log( logger, - f" ❗ {len(here)} patches have harvest date -1 because they weren't active last year (and were either never active or were harvested when last active). Ignoring, but you should have done a run with patches always active if they are ever active in the real LU timeseries.", + f" ❗ {len(here)} patches have harvest date -1 because they weren't active last" + + "year (and were either never active or were harvested when last active). " + + "Ignoring, but you should have done a run with patches always active if they are " + + "ever active in the real LU timeseries.", ) new_hdates[0, here] = sdates_thisyr_where_nan_lastyr.values - 1 dates_incl_ds["HDATES"] = xr.DataArray( @@ -382,7 +428,9 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_sdates) == [1]: log( logger, - f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like this might have just been because of incorrectly daily outputs; setting them to 365.", + f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like " + + "this might have just been because of incorrectly daily outputs; setting them to " + + "365.", ) new_hdates_ar = dates_incl_ds.HDATES.values if mxharvests_dim != 0: @@ -396,18 +444,21 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n * Not using constant crop areas (e.g., flanduse_timeseries from make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\nUnique affected sdates: {unique_sdates}", + f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n" + + "* Not using constant crop areas (e.g., flanduse_timeseries from " + + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n" + + f"Unique affected sdates: {unique_sdates}", ) # Make sure there was only one harvest per year - N_extra_harv = np.sum( + n_extra_harv = np.sum( np.nanmax( dates_incl_ds.HDATES.isel(mxharvests=slice(1, mxharvests)).values, axis=mxharvests_dim ) >= 1 ) - if N_extra_harv > 0: - error(logger, f"{N_extra_harv} patches have >1 harvest.") + if n_extra_harv > 0: + error(logger, f"{n_extra_harv} patches have >1 harvest.") # Make sure harvest happened the day before sowing sdates_clm = dates_incl_ds.SDATES.values.squeeze() @@ -432,13 +483,13 @@ def import_and_process_1yr( if mxmats and (imported_sdates or imported_hdates): print(" Limiting growing season length...") hdates_rx = hdates_rx_orig.copy() - for v in hdates_rx_orig: - if v == "time_bounds": + for var in hdates_rx_orig: + if var == "time_bounds": continue # Get max growing season length vegtype_int = int( - v.split("_")[1] + var.split("_")[1] ) # netCDF variable name v should be something like gs1_17 vegtype_str = utils.ivt_int2str(vegtype_int) if vegtype_str == "soybean": @@ -452,41 +503,45 @@ def import_and_process_1yr( continue # Get "prescribed" growing season length - gs_len_rx_da = get_gs_len_da(hdates_rx_orig[v] - sdates_rx[v]) + gs_len_rx_da = get_gs_len_da(hdates_rx_orig[var] - sdates_rx[var]) not_ok = gs_len_rx_da.values > mxmat if not np.any(not_ok): print(f" Not limiting {vegtype_str}: No rx season > {mxmat} days") continue - hdates_limited = hdates_rx_orig[v].copy().values - hdates_limited[np.where(not_ok)] = sdates_rx[v].values[np.where(not_ok)] + mxmat + hdates_limited = hdates_rx_orig[var].copy().values + hdates_limited[np.where(not_ok)] = sdates_rx[var].values[np.where(not_ok)] + mxmat hdates_limited[np.where(hdates_limited > 365)] -= 365 if np.any(hdates_limited < 1): raise RuntimeError("Limited hdates < 1") - elif np.any(hdates_limited > 365): + if np.any(hdates_limited > 365): raise RuntimeError("Limited hdates > 365") - hdates_rx[v] = xr.DataArray( - data=hdates_limited, coords=hdates_rx_orig[v].coords, attrs=hdates_rx_orig[v].attrs + hdates_rx[var] = xr.DataArray( + data=hdates_limited, + coords=hdates_rx_orig[var].coords, + attrs=hdates_rx_orig[var].attrs, ) print( - f" Limited {vegtype_str} growing season length to {mxmat}. Longest was {int(np.max(gs_len_rx_da.values))}, now {int(np.max(get_gs_len_da(hdates_rx[v] - sdates_rx[v]).values))}." + f" Limited {vegtype_str} growing season length to {mxmat}. Longest was " + + f"{int(np.max(gs_len_rx_da.values))}, now " + + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}." ) else: hdates_rx = hdates_rx_orig - log(logger, f" Importing accumulated GDDs...") + log(logger, " Importing accumulated GDDs...") clm_gdd_var = "GDDACCUM" - myVars = [clm_gdd_var, "GDDHARV"] - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc") + my_vars = [clm_gdd_var, "GDDHARV"] + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc") h2_files = glob.glob(pattern) if not h2_files: - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc.base") + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc.base") h2_files = glob.glob(pattern) if not h2_files: - error(logger, f"No files found matching pattern '*h2.{thisYear-1}-01-01*.nc(.base)'") + error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") h2_ds = utils.import_ds( h2_files, - myVars=myVars, + myVars=my_vars, myVegtypes=crops_to_read, chunks=chunks, ) @@ -503,181 +558,209 @@ def import_and_process_1yr( error(logger, f"All {clm_gdd_var} values are zero!") # Get standard datetime axis for outputs - Nyears = yN - y1 + 1 + n_years = year_n - year_1 + 1 if len(gddaccum_yp_list) == 0: - lastYear_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] + last_year_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] gddaccum_yp_list = [None for vegtype_str in incl_vegtypes_str] if save_figs: gddharv_yp_list = [None for vegtype_str in incl_vegtypes_str] incl_vegtype_indices = [] - for v, vegtype_str in enumerate(incl_vegtypes_str): + for var, vegtype_str in enumerate(incl_vegtypes_str): if vegtype_str in skip_crops: log(logger, f" SKIPPING {vegtype_str}") continue vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - thisCrop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) + this_crop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) # Get time series for each patch of this type - thisCrop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) - thisCrop_gddaccum_da = thisCrop_ds[clm_gdd_var] + this_crop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) + this_crop_gddaccum_da = this_crop_ds[clm_gdd_var] if save_figs: - thisCrop_gddharv_da = thisCrop_ds["GDDHARV"] - if not thisCrop_gddaccum_da.size: + this_crop_gddharv_da = this_crop_ds["GDDHARV"] + if not this_crop_gddaccum_da.size: continue log(logger, f" {vegtype_str}...") - incl_vegtype_indices = incl_vegtype_indices + [v] + incl_vegtype_indices = incl_vegtype_indices + [var] # Get prescribed harvest dates for these patches - lon_points = thisCrop_ds.patches1d_lon.values - lat_points = thisCrop_ds.patches1d_lat.values - thisCrop_hdates_rx = thisCrop_map_to_patches(lon_points, lat_points, hdates_rx, vegtype_int) + lon_points = this_crop_ds.patches1d_lon.values + lat_points = this_crop_ds.patches1d_lat.values + this_crop_hdates_rx = this_crop_map_to_patches( + lon_points, lat_points, hdates_rx, vegtype_int + ) - if isinstance(gddaccum_yp_list[v], type(None)): - gddaccum_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + if isinstance(gddaccum_yp_list[var], type(None)): + gddaccum_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) if save_figs: - gddharv_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + gddharv_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) # Get the accumulated GDDs at each prescribed harvest date - gddaccum_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) + gddaccum_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) if save_figs: - gddharv_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) - unique_rx_hdates = np.unique(thisCrop_hdates_rx.values) + gddharv_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) + unique_rx_hdates = np.unique(this_crop_hdates_rx.values) # Build an indexing tuple patches = [] i_patches = [] i_times = [] - for i, hdate in enumerate(unique_rx_hdates): - here = np.where(thisCrop_hdates_rx.values == hdate)[0] - patches += list(thisCrop_gddaccum_da.patch.values[here]) + for hdate in unique_rx_hdates: + here = np.where(this_crop_hdates_rx.values == hdate)[0] + patches += list(this_crop_gddaccum_da.patch.values[here]) i_patches += list(here) i_times += list(np.full((len(here),), int(hdate - 1))) # Sort back to correct order if not np.all( - thisCrop_gddaccum_da.patch.values[:-1] <= thisCrop_gddaccum_da.patch.values[1:] + this_crop_gddaccum_da.patch.values[:-1] <= this_crop_gddaccum_da.patch.values[1:] ): error(logger, "This code depends on DataArray patch list being sorted.") sortorder = np.argsort(patches) i_patches = list(np.array(i_patches)[np.array(sortorder)]) i_times = list(np.array(i_times)[np.array(sortorder)]) # Select using the indexing tuple - gddaccum_atharv_p = thisCrop_gddaccum_da.values[(i_times, i_patches)] + gddaccum_atharv_p = this_crop_gddaccum_da.values[(i_times, i_patches)] if save_figs: - gddharv_atharv_p = thisCrop_gddharv_da.values[(i_times, i_patches)] + gddharv_atharv_p = this_crop_gddharv_da.values[(i_times, i_patches)] if np.any(np.isnan(gddaccum_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} NaN after extracting GDDs accumulated at harvest", + f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} " + + "NaN after extracting GDDs accumulated at harvest", ) if save_figs and np.any(np.isnan(gddharv_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} NaN after extracting GDDHARV", + f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} " + + "NaN after extracting GDDHARV", ) # Assign these to growing seasons based on whether gs crossed new year - thisYear_active_patch_indices = [ - thisCrop_full_patchlist.index(x) for x in thisCrop_ds.patch.values + this_year_active_patch_indices = [ + this_crop_full_patchlist.index(x) for x in this_crop_ds.patch.values ] - thisCrop_sdates_rx = thisCrop_map_to_patches(lon_points, lat_points, sdates_rx, vegtype_int) - where_gs_thisyr = np.where(thisCrop_sdates_rx < thisCrop_hdates_rx)[0] - tmp_gddaccum = np.full(thisCrop_sdates_rx.shape, np.nan) + this_crop_sdates_rx = this_crop_map_to_patches( + lon_points, lat_points, sdates_rx, vegtype_int + ) + where_gs_thisyr = np.where(this_crop_sdates_rx < this_crop_hdates_rx)[0] + tmp_gddaccum = np.full(this_crop_sdates_rx.shape, np.nan) tmp_gddaccum[where_gs_thisyr] = gddaccum_atharv_p[where_gs_thisyr] if save_figs: tmp_gddharv = np.full(tmp_gddaccum.shape, np.nan) tmp_gddharv[where_gs_thisyr] = gddharv_atharv_p[where_gs_thisyr] - if y > 0: - lastYear_active_patch_indices = lastYear_active_patch_indices_list[v] - where_gs_lastyr = np.where(thisCrop_sdates_rx > thisCrop_hdates_rx)[0] - active_thisYear_where_gs_lastyr_indices = [ - thisYear_active_patch_indices[x] for x in where_gs_lastyr + if year_index > 0: + last_year_active_patch_indices = last_year_active_patch_indices_list[var] + where_gs_lastyr = np.where(this_crop_sdates_rx > this_crop_hdates_rx)[0] + active_this_year_where_gs_lastyr_indices = [ + this_year_active_patch_indices[x] for x in where_gs_lastyr ] - if not np.array_equal(lastYear_active_patch_indices, thisYear_active_patch_indices): + if not np.array_equal(last_year_active_patch_indices, this_year_active_patch_indices): if incorrectly_daily: log( logger, - " ❗ This year's active patch indices differ from last year's. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ This year's active patch indices differ from last year's. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "This year's active patch indices differ from last year's.") # Make sure we're not about to overwrite any existing values. if np.any( - ~np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDD accumulation. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDD accumulation") if save_figs and np.any( - ~np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDDHARV") # Fill. - gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices] = gddaccum_atharv_p[ - where_gs_lastyr - ] + gddaccum_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices + ] = gddaccum_atharv_p[where_gs_lastyr] if save_figs: - gddharv_yp_list[v][ - y - 1, active_thisYear_where_gs_lastyr_indices + gddharv_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices ] = gddharv_atharv_p[where_gs_lastyr] # Last year's season should be filled out now; make sure. if np.any( - np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDD accumulation. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDD accumulation.") if save_figs and np.any( - np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDDHARV. Allowing because " + + "this might just be an artifact of incorrectly daily outputs, BUT " + + "RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDDHARV.") - gddaccum_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddaccum + gddaccum_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddaccum if save_figs: - gddharv_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddharv + gddharv_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddharv - # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from last year - if y > 0: + # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from + # last year + if year_index > 0: nanmask_output_sdates = np.isnan( dates_ds.SDATES.isel( mxsowings=0, patch=np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] ).values ) - nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[v][y - 1, :]) + nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[var][year_index - 1, :]) if not np.array_equal(nanmask_output_gdds_lastyr, nanmask_output_sdates): if incorrectly_daily: log( logger, - " ❗ NaN masks differ between this year's sdates and 'filled-out' GDDs from last year. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ NaN masks differ between this year's sdates and 'filled-out' " + + "GDDs from last year. Allowing because this might just be an artifact of " + + "incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error( logger, - "NaN masks differ between this year's sdates and 'filled-out' GDDs from last year", + "NaN masks differ between this year's sdates and 'filled-out' GDDs from " + + "last year", ) - lastYear_active_patch_indices_list[v] = thisYear_active_patch_indices + last_year_active_patch_indices_list[var] = this_year_active_patch_indices - skip_patches_for_isel_nan_lastyear = skip_patches_for_isel_nan + skip_patches_for_isel_nan_last_year = skip_patches_for_isel_nan # Could save space by only saving variables needed for gridding log(logger, " Saving h2_ds...") @@ -689,8 +772,8 @@ def import_and_process_1yr( hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -698,35 +781,37 @@ def import_and_process_1yr( ) -def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): +def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units): + # pylint: disable=missing-function-docstring # Get GDDs for these crops - da_eachCFT = xr.concat((ds[x] for i, x in enumerate(theseVars)), dim="cft") - if "time" in ds.dims: - da_eachCFT = da_eachCFT.isel(time=0, drop=True) - da_eachCFT = da_eachCFT.where(da_eachCFT != dummy_fill) - da_eachCFT.attrs["units"] = gdd_units + da_each_cft = xr.concat((this_ds[x] for i, x in enumerate(these_vars)), dim="cft") + if "time" in this_ds.dims: + da_each_cft = da_each_cft.isel(time=0, drop=True) + da_each_cft = da_each_cft.where(da_each_cft != dummy_fill) + da_each_cft.attrs["units"] = gdd_units # What are the maximum differences seen between different crop types? - if len(theseVars) > 1: - maxDiff = np.nanmax(da_eachCFT.max(dim="cft") - da_eachCFT.min(dim="cft")) - if maxDiff > 0: - print(f" Max difference among crop types: {np.round(maxDiff)}") + if len(these_vars) > 1: + max_diff = np.nanmax(da_each_cft.max(dim="cft") - da_each_cft.min(dim="cft")) + if max_diff > 0: + print(f" Max difference among crop types: {np.round(max_diff)}") if crop_fracs_yx is None: - return da_eachCFT.isel(cft=0, drop=True) + return da_each_cft.isel(cft=0, drop=True) # Warn if GDD is NaN anywhere that there is area - da_eachCFT["cft"] = crop_fracs_yx["cft"] - gddNaN_areaPos = np.isnan(da_eachCFT) & (crop_fracs_yx > 0) - if np.any(gddNaN_areaPos): - total_bad_croparea = np.nansum(crop_fracs_yx.where(gddNaN_areaPos).values) + da_each_cft["cft"] = crop_fracs_yx["cft"] + gdd_nan_area_pos = np.isnan(da_each_cft) & (crop_fracs_yx > 0) + if np.any(gdd_nan_area_pos): + total_bad_croparea = np.nansum(crop_fracs_yx.where(gdd_nan_area_pos).values) total_croparea = np.nansum(crop_fracs_yx.values) print( - f" GDD reqt NaN but area positive ({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" + " GDD reqt NaN but area positive " + + f"({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" ) # Get areas and weights, masking cell-crops with NaN GDDs - crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_eachCFT)) + crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_each_cft)) crop_area_yx = crop_fracs_yx.sum(dim="cft") weights_yx = crop_fracs_yx / crop_area_yx weights_sum_gt0 = weights_yx.sum(dim="cft").where(weights_yx > 0) @@ -734,45 +819,48 @@ def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): assert np.isclose(np.nanmax(weights_sum_gt0.values), 1.0) # Mask GDDs and weights where there is no area - da_eachCFT = da_eachCFT.where(crop_fracs_yx > 0) - if len(theseVars) == 1: - return da_eachCFT.isel(cft=0, drop=True) + da_each_cft = da_each_cft.where(crop_fracs_yx > 0) + if len(these_vars) == 1: + return da_each_cft.isel(cft=0, drop=True) weights_yx = weights_yx.where(crop_fracs_yx > 0) weights_sum = weights_yx.sum(dim="cft").where(crop_area_yx > 0) assert np.isclose(np.nanmin(weights_sum.values), 1.0) assert np.isclose(np.nanmax(weights_sum.values), 1.0) # Ensure grid match between GDDs and weights - if not np.array_equal(da_eachCFT["lon"].values, weights_yx["lon"].values): + if not np.array_equal(da_each_cft["lon"].values, weights_yx["lon"].values): raise RuntimeError("lon mismatch") - if not np.array_equal(da_eachCFT["lat"].values, weights_yx["lat"].values): + if not np.array_equal(da_each_cft["lat"].values, weights_yx["lat"].values): raise RuntimeError("lat mismatch") # Get area-weighted mean GDD requirements for all crops - da = (da_eachCFT * weights_yx).sum(dim="cft") - da.attrs["units"] = gdd_units - da = da.where(crop_area_yx > 0) + this_da = (da_each_cft * weights_yx).sum(dim="cft") + this_da.attrs["units"] = gdd_units + this_da = this_da.where(crop_area_yx > 0) # Ensure that weighted mean is between each cell's min and max - whereBad = (da < da_eachCFT.min(dim="cft")) | (da > da_eachCFT.max(dim="cft")) - if np.any(whereBad): - where_belowMin = da.where(da < da_eachCFT.min(dim="cft")) - worst_belowMin = np.min((da_eachCFT.min(dim="cft") - where_belowMin).values) - where_aboveMax = da.where(da > da_eachCFT.max(dim="cft")) - worst_aboveMax = np.max((where_aboveMax - da_eachCFT.max(dim="cft")).values) - worst = max(worst_belowMin, worst_aboveMax) + where_bad = (this_da < da_each_cft.min(dim="cft")) | (this_da > da_each_cft.max(dim="cft")) + if np.any(where_bad): + where_below_min = this_da.where(this_da < da_each_cft.min(dim="cft")) + worst_below_min = np.min((da_each_cft.min(dim="cft") - where_below_min).values) + where_above_max = this_da.where(this_da > da_each_cft.max(dim="cft")) + worst_above_max = np.max((where_above_max - da_each_cft.max(dim="cft")).values) + worst = max(worst_below_min, worst_above_max) tol = 1e-12 if worst > 1e-12: raise RuntimeError( f"Some value is outside expected range by {worst} (exceeds tolerance {tol})" ) - return da + return this_da -if can_plot: +if CAN_PLOT: def get_bounds_ncolors(gdd_spacing, diff_map_yx): + """ + Get information about color bar + """ vmax = np.floor(np.nanmax(diff_map_yx.values) / gdd_spacing) * gdd_spacing vmin = -vmax epsilon = np.nextafter(0, 1) @@ -781,11 +869,11 @@ def get_bounds_ncolors(gdd_spacing, diff_map_yx): bounds.remove(0) bounds[bounds.index(-gdd_spacing)] /= 2 bounds[bounds.index(gdd_spacing)] /= 2 - Ncolors = len(bounds) + 1 - return vmax, bounds, Ncolors + n_colors = len(bounds) + 1 + return vmax, bounds, n_colors - def make_map( - ax, + def make_gengdd_map( + this_axis, this_map, this_title, vmax, @@ -798,11 +886,14 @@ def make_map( cbar_ticks=None, vmin=None, ): + """ + Make maps + """ if bounds: if not cmap: raise RuntimeError("Calling make_map() with bounds requires cmap to be specified") norm = mcolors.BoundaryNorm(bounds, cmap.N, extend=extend) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -817,11 +908,11 @@ def make_map( if vmin is not None: raise RuntimeError("Do not specify vmin in this call of make_map()") vmin = -vmax - Ncolors = vmax / gdd_spacing - if Ncolors % 2 == 0: - Ncolors += 1 + n_colors = vmax / gdd_spacing + if n_colors % 2 == 0: + n_colors += 1 if not cmap: - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) if np.any(this_map.values > vmax) and np.any(this_map.values < vmin): extend = "both" @@ -838,15 +929,15 @@ def make_map( else: vmin = np.floor(vmin / 500) * 500 vmax = np.floor(vmax / 500) * 500 - Ncolors = int(vmax / 500) + n_colors = int(vmax / 500) if not cmap: - cmap = cm.get_cmap(cropcal_colors["seq_other"], Ncolors + 1) + cmap = cm.get_cmap(cropcal_colors["seq_other"], n_colors + 1) extend = "max" extend_color = cmap.colors[-1] - cmap = mcolors.ListedColormap(cmap.colors[:Ncolors]) + cmap = mcolors.ListedColormap(cmap.colors[:n_colors]) cmap.set_over(extend_color) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -856,9 +947,9 @@ def make_map( cmap=cmap, ) - ax.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) - ax.coastlines(linewidth=0.3) - ax.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) + this_axis.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) + this_axis.coastlines(linewidth=0.3) + this_axis.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) cbar = plt.colorbar( im1, orientation="horizontal", @@ -876,24 +967,30 @@ def make_map( ticks = np.arange(-60, 91, bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" plt.yticks(np.arange(-60, 91, 15), labels=ticklabels, fontsize=fontsize_ticklabels) plt.axis("off") - def get_non_nans(in_da, fillValue): - in_da = in_da.where(in_da != fillValue) + def get_non_nans(in_da, fill_value): + """ + Get non-NaN, non-fill values of a DataArray + """ + in_da = in_da.where(in_da != fill_value) return in_da.values[~np.isnan(in_da.values)] - def set_boxplot_props(bp, color, linewidth): + def set_boxplot_props(bpl, color, linewidth): + """ + Set boxplot properties + """ linewidth = 1.5 - plt.setp(bp["boxes"], color=color, linewidth=linewidth) - plt.setp(bp["whiskers"], color=color, linewidth=linewidth) - plt.setp(bp["caps"], color=color, linewidth=linewidth) - plt.setp(bp["medians"], color=color, linewidth=linewidth) + plt.setp(bpl["boxes"], color=color, linewidth=linewidth) + plt.setp(bpl["whiskers"], color=color, linewidth=linewidth) + plt.setp(bpl["caps"], color=color, linewidth=linewidth) + plt.setp(bpl["medians"], color=color, linewidth=linewidth) plt.setp( - bp["fliers"], + bpl["fliers"], markeredgecolor=color, markersize=6, linewidth=linewidth, @@ -901,16 +998,19 @@ def set_boxplot_props(bp, color, linewidth): ) def make_plot(data, offset, linewidth): + """ + Make boxplot + """ offset = 0.4 * offset bpl = plt.boxplot( data, positions=np.array(range(len(data))) * 2.0 + offset, widths=0.6, - boxprops=dict(linewidth=linewidth), - whiskerprops=dict(linewidth=linewidth), - capprops=dict(linewidth=linewidth), - medianprops=dict(linewidth=linewidth), - flierprops=dict(markeredgewidth=0.5), + boxprops={"linewidth": linewidth}, + whiskerprops={"linewidth": linewidth}, + capprops={"linewidth": linewidth}, + medianprops={"linewidth": linewidth}, + flierprops={"markeredgewidth": 0.5}, ) return bpl @@ -921,26 +1021,31 @@ def make_figures( run1_name, run2_name, logger, - thisDir=None, + this_dir=None, gdd_maps_ds=None, gddharv_maps_ds=None, outdir_figs=None, linewidth=1.5, ): + """ + Make map-and-boxplot figures + """ if not gdd_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gdd_maps_ds, you must provide thisDir (location of gdd_maps.nc)", + "If not providing gdd_maps_ds, you must provide thisDir (location of " + + "gdd_maps.nc)", ) - gdd_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gdd_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") if not gddharv_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gddharv_maps_ds, you must provide thisDir (location of gddharv_maps.nc)", + "If not providing gddharv_maps_ds, you must provide thisDir (location of " + + "gddharv_maps.nc)", ) - gddharv_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gddharv_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") # Get info incl_vegtypes_str = gdd_maps_ds.attrs["incl_vegtypes_str"] @@ -952,19 +1057,19 @@ def make_figures( if not outdir_figs: outdir_figs = gdd_maps_ds.attrs["outdir_figs"] try: - y1 = gdd_maps_ds.attrs["y1"] - yN = gdd_maps_ds.attrs["yN"] + year_1 = gdd_maps_ds.attrs["y1"] + year_n = gdd_maps_ds.attrs["yN"] # Backwards compatibility with a bug (fixed 2023-01-03) - except: - y1 = gdd_maps_ds.attrs["first_season"] - yN = gdd_maps_ds.attrs["last_season"] + except KeyError: + year_1 = gdd_maps_ds.attrs["first_season"] + year_n = gdd_maps_ds.attrs["last_season"] # Import LU data, if doing so if land_use_file: - y1_lu = y1 if first_land_use_year == None else first_land_use_year - yN_lu = yN if last_land_use_year == None else last_land_use_year - lu_ds = cc.open_lu_ds(land_use_file, y1_lu, yN_lu, gdd_maps_ds, ungrid=False) - lu_years_text = f" (masked by {y1_lu}-{yN_lu} area)" - lu_years_file = f"_mask{y1_lu}-{yN_lu}" + year_1_lu = year_1 if first_land_use_year is None else first_land_use_year + year_n_lu = year_n if last_land_use_year is None else last_land_use_year + lu_ds = cc.open_lu_ds(land_use_file, year_1_lu, year_n_lu, gdd_maps_ds, ungrid=False) + lu_years_text = f" (masked by {year_1_lu}-{year_n_lu} area)" + lu_years_file = f"_mask{year_1_lu}-{year_n_lu}" else: lu_ds = None lu_years_text = "" @@ -980,11 +1085,11 @@ def make_figures( fontsize_axislabels = 12 fontsize_ticklabels = 12 - Nbins = len(lat_bin_edges) - 1 + n_bins = len(lat_bin_edges) - 1 bin_names = ["All"] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] bin_names.append(f"{lower}–{upper}") color_old = cropcal_colors_cases(run1_name) @@ -996,13 +1101,13 @@ def make_figures( gdd_units = "GDD (°C • day)" # Maps - ny = 3 - nx = 1 + nplot_y = 3 + nplot_x = 1 log(logger, "Making before/after maps...") vegtype_list = incl_vegtypes_str if land_use_file: vegtype_list += ["Corn", "Cotton", "Rice", "Soybean", "Sugarcane", "Wheat"] - for v, vegtype_str in enumerate(vegtype_list): + for vegtype_str in vegtype_list: print(f"{vegtype_str}...") # Get component types @@ -1025,12 +1130,12 @@ def make_figures( else: crop_fracs_yx = None - theseVars = [f"gdd1_{x}" for x in vegtypes_int] + these_vars = [f"gdd1_{x}" for x in vegtypes_int] gddharv_map_yx = get_multicrop_maps( - gddharv_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gddharv_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) gdd_map_yx = get_multicrop_maps( - gdd_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gdd_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) # Get figure title @@ -1048,25 +1153,25 @@ def make_figures( # Set up figure and first subplot if layout == "3x1": fig = plt.figure(figsize=(7.5, 14)) - ax = fig.add_subplot(ny, nx, 1, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 1, projection=ccrs.PlateCarree()) elif layout == "2x2": fig = plt.figure(figsize=(12, 6)) spec = fig.add_gridspec(nrows=2, ncols=2, width_ratios=[0.4, 0.6]) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) elif layout == "3x2": fig = plt.figure(figsize=(14, 9)) spec = fig.add_gridspec(nrows=3, ncols=2, width_ratios=[0.5, 0.5], wspace=0.2) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gddharv_map_yx))) - thisMax = int(np.round(np.nanmax(gddharv_map_yx))) - thisTitle = f"{run1_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gddharv_map_yx))) + this_max = int(np.round(np.nanmax(gddharv_map_yx))) + this_title = f"{run1_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gddharv_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1075,18 +1180,18 @@ def make_figures( ) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 2, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 2, projection=ccrs.PlateCarree()) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gdd_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1096,22 +1201,22 @@ def make_figures( # Difference if layout == "3x2": - ax = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} minus {run1_name}" + this_axis = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} minus {run1_name}" diff_map_yx = gdd_map_yx - gddharv_map_yx diff_map_yx.attrs["units"] = gdd_units gdd_spacing = 500 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - if Ncolors < 9: + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + if n_colors < 9: gdd_spacing = 250 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) cbar_ticks = [] - include_0bin_ticks = Ncolors <= 13 + include_0bin_ticks = n_colors <= 13 if vmax <= 3000: tick_spacing = gdd_spacing * 2 elif vmax <= 5000: @@ -1119,17 +1224,19 @@ def make_figures( else: tick_spacing = 2000 previous = -np.inf - for x in bounds: - if (not include_0bin_ticks) and (x > 0) and (previous < 0): + for bound in bounds: + if (not include_0bin_ticks) and (previous < 0 < bound): cbar_ticks.append(0) - if x % tick_spacing == 0 or (include_0bin_ticks and abs(x) == gdd_spacing / 2): - cbar_ticks.append(x) - previous = x - - make_map( - ax, + if bound % tick_spacing == 0 or ( + include_0bin_ticks and abs(bound) == gdd_spacing / 2 + ): + cbar_ticks.append(bound) + previous = bound + + make_gengdd_map( + this_axis, diff_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1148,25 +1255,25 @@ def make_figures( lat_abs = np.abs(gdd_map_yx.lat.values) gdd_bybin_old = [gddharv_vector] gdd_bybin_new = [gdd_vector] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] lat_inds = np.where((lat_abs >= lower) & (lat_abs < upper))[0] - gdd_vector_thisBin = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) - gddharv_vector_thisBin = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) - gdd_bybin_old.append(gddharv_vector_thisBin) - gdd_bybin_new.append(gdd_vector_thisBin) + this_bin_gdd_vector = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) + this_bin_gddharv_vector = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) + gdd_bybin_old.append(this_bin_gddharv_vector) + gdd_bybin_new.append(this_bin_gdd_vector) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 3) + this_axis = fig.add_subplot(nplot_y, nplot_x, 3) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[:, 1]) + this_axis = fig.add_subplot(spec[:, 1]) else: error(logger, f"layout {layout} not recognized") # Shift bottom of plot up to make room for legend - ax_pos = ax.get_position() - ax.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) + ax_pos = this_axis.get_position() + this_axis.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) # Define legend position legend_bbox_to_anchor = (0, -0.15, 1, 0.2) @@ -1188,13 +1295,13 @@ def make_figures( plt.xticks(range(0, len(bin_names) * 2, 2), bin_names, fontsize=fontsize_ticklabels) plt.yticks(fontsize=fontsize_ticklabels) - ax.spines["right"].set_visible(False) - ax.spines["top"].set_visible(False) + this_axis.spines["right"].set_visible(False) + this_axis.spines["top"].set_visible(False) plt.xlabel("Latitude zone (absolute value)", fontsize=fontsize_axislabels) plt.ylabel(gdd_units, fontsize=fontsize_axislabels) - ax.yaxis.set_label_coords(-0.11, 0.5) - plt.title(f"Zonal changes", fontsize=fontsize_titles, fontweight="bold") + this_axis.yaxis.set_label_coords(-0.11, 0.5) + plt.title("Zonal changes", fontsize=fontsize_titles, fontweight="bold") plt.suptitle( f"Maturity requirements: {vegtype_str_title}" + lu_years_text, @@ -1205,10 +1312,13 @@ def make_figures( if vegtype_str in incl_vegtypes_str: outfile = os.path.join( - outdir_figs, f"{theseVars[0]}_{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png" + outdir_figs, + f"{these_vars[0]}_{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png", ) else: - outfile = os.path.join(outdir_figs, f"{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png") + outfile = os.path.join( + outdir_figs, f"{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png" + ) plt.savefig(outfile, dpi=300, transparent=False, facecolor="white", bbox_inches="tight") plt.close() From 9c9b23f1fe07e7461fcc23e243e1dd0eacd46e90 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 15:32:39 -0700 Subject: [PATCH 05/40] Satisfy pylint for generate_gdds.py. --- python/ctsm/crop_calendars/generate_gdds.py | 149 ++++++++++---------- 1 file changed, 76 insertions(+), 73 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 16e3e130da..1af3744b28 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -1,32 +1,29 @@ -paramfile_dir = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" - -# Import other shared functions +""" +Generate maturity requirements (GDD) from outputs of a GDD-generating run +""" import os -import inspect import sys +import pickle +import datetime as dt +import argparse +import logging +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_module as cc -import ctsm.crop_calendars.generate_gdds_functions as gddfn - -# Import everything else -import os -import sys -import numpy as np -import xarray as xr -import pickle -import datetime as dt -import argparse -import logging +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +import ctsm.crop_calendars.generate_gdds_functions as gddfn # pylint: disable=wrong-import-position -# Info re: PFT parameter set -my_clm_ver = 51 -my_clm_subver = "c211112" +# Global constants +PARAMFILE_DIR = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" +MY_CLM_VER = 51 +MY_CLM_SUBVER = "c211112" def main( @@ -47,6 +44,7 @@ def main( skip_crops=None, logger=None, ): + # pylint: disable=missing-function-docstring,too-many-statements # Directories to save output files and figures if not output_dir: if only_make_figs: @@ -73,11 +71,14 @@ def main( # Disable plotting if any plotting module is unavailable if save_figs: try: + # pylint: disable=import-outside-toplevel,unused-import,import-error import cartopy import matplotlib - except: + except ModuleNotFoundError as exc: if only_make_figs: - raise RuntimeError("only_make_figs True but not all plotting modules are available") + raise RuntimeError( + "only_make_figs True but not all plotting modules are available" + ) from exc gddfn.log(logger, "Not all plotting modules are available; disabling save_figs") save_figs = False @@ -95,19 +96,21 @@ def main( ########################## if not only_make_figs: - # Keep 1 extra year to avoid incomplete final growing season for crops harvested after Dec. 31. - y1_import_str = f"{first_season+1}-01-01" - yN_import_str = f"{last_season+2}-01-01" + # Keep 1 extra year to avoid incomplete final growing season for crops + # harvested after Dec. 31. + yr_1_import_str = f"{first_season+1}-01-01" + yr_n_import_str = f"{last_season+2}-01-01" gddfn.log( logger, - f"Importing netCDF time steps {y1_import_str} through {yN_import_str} (years are +1 because of CTSM output naming)", + f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} " + + "(years are +1 because of CTSM output naming)", ) pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle") h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc") if os.path.exists(pickle_file): - with open(pickle_file, "rb") as f: + with open(pickle_file, "rb") as file: ( first_season, last_season, @@ -115,14 +118,14 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, skip_crops, - ) = pickle.load(f) + ) = pickle.load(file) print(f"Will resume import at {pickle_year+1}") h2_ds = None else: @@ -132,17 +135,17 @@ def main( gddaccum_yp_list = [] gddharv_yp_list = [] incl_vegtypes_str = None - lastYear_active_patch_indices_list = None + lastyear_active_patch_indices_list = None sdates_rx = sdates_file hdates_rx = hdates_file if not unlimited_season_length: - mxmats = cc.import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver) + mxmats = cc.import_max_gs_length(PARAMFILE_DIR, MY_CLM_VER, MY_CLM_SUBVER) else: mxmats = None - for y, thisYear in enumerate(np.arange(first_season + 1, last_season + 3)): - if thisYear <= pickle_year: + for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)): + if this_yr <= pickle_year: continue ( @@ -152,7 +155,7 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -160,14 +163,14 @@ def main( ) = gddfn.import_and_process_1yr( first_season, last_season, - y, - thisYear, + yr_index, + this_yr, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, input_dir, incl_vegtypes_str, @@ -179,16 +182,16 @@ def main( ) gddfn.log(logger, f" Saving pickle file ({pickle_file})...") - with open(pickle_file, "wb") as f: + with open(pickle_file, "wb") as file: pickle.dump( [ first_season, last_season, - thisYear, + this_yr, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, @@ -196,7 +199,7 @@ def main( mxsowings, skip_crops, ], - f, + file, protocol=-1, ) @@ -248,35 +251,35 @@ def main( ] dummy_vars = [] dummy_longnames = [] - for v, thisVar in enumerate(all_vars): - if thisVar not in gdd_maps_ds: - dummy_vars.append(thisVar) - dummy_longnames.append(all_longnames[v]) + for var_index, this_var in enumerate(all_vars): + if this_var not in gdd_maps_ds: + dummy_vars.append(this_var) + dummy_longnames.append(all_longnames[var_index]) - def make_dummy(thisCrop_gridded, addend): - dummy_gridded = thisCrop_gridded + def make_dummy(this_crop_gridded, addend): + dummy_gridded = this_crop_gridded dummy_gridded.values = dummy_gridded.values * 0 + addend return dummy_gridded - for v in gdd_maps_ds: - thisCrop_gridded = gdd_maps_ds[v].copy() + for var_index in gdd_maps_ds: + this_crop_gridded = gdd_maps_ds[var_index].copy() break - dummy_gridded = make_dummy(thisCrop_gridded, -1) + dummy_gridded = make_dummy(this_crop_gridded, -1) - for v, thisVar in enumerate(dummy_vars): - if thisVar in gdd_maps_ds: + for var_index, this_var in enumerate(dummy_vars): + if this_var in gdd_maps_ds: gddfn.error( - logger, f"{thisVar} is already in gdd_maps_ds. Why overwrite it with dummy?" + logger, f"{this_var} is already in gdd_maps_ds. Why overwrite it with dummy?" ) - dummy_gridded.name = thisVar - dummy_gridded.attrs["long_name"] = dummy_longnames[v] - gdd_maps_ds[thisVar] = dummy_gridded + dummy_gridded.name = this_var + dummy_gridded.attrs["long_name"] = dummy_longnames[var_index] + gdd_maps_ds[this_var] = dummy_gridded # Add lon/lat attributes - def add_lonlat_attrs(ds): - ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} - ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} - return ds + def add_lonlat_attrs(this_ds): + this_ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} + this_ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} + return this_ds gdd_maps_ds = add_lonlat_attrs(gdd_maps_ds) gddharv_maps_ds = add_lonlat_attrs(gddharv_maps_ds) @@ -297,14 +300,17 @@ def add_lonlat_attrs(ds): def save_gdds(sdates_file, hdates_file, outfile, gdd_maps_ds, sdates_rx): # Set up output file from template (i.e., prescribed sowing dates). template_ds = xr.open_dataset(sdates_file, decode_times=True) - for v in template_ds: - if "sdate" in v: - template_ds = template_ds.drop(v) + for var in template_ds: + if "sdate" in var: + template_ds = template_ds.drop(var) template_ds.to_netcdf(path=outfile, format="NETCDF3_CLASSIC") template_ds.close() # Add global attributes - comment = f"Derived from CLM run plus crop calendar input files {os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + comment = ( + "Derived from CLM run plus crop calendar input files " + + f"{os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + ) gdd_maps_ds.attrs = { "author": "Sam Rabin (sam.rabin@gmail.com)", "comment": comment, @@ -384,7 +390,11 @@ def add_attrs_to_map_ds( parser.add_argument( "-i", "--input-dir", - help="Directory where run outputs can be found (and where outputs will go). If --only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle file).", + help=( + "Directory where run outputs can be found (and where outputs will go). If " + + "--only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle " + + "file)." + ), required=True, ) parser.add_argument( @@ -464,7 +474,6 @@ def add_attrs_to_map_ds( args = parser.parse_args(sys.argv[1:]) for k, v in sorted(vars(args).items()): print(f"{k}: {v}") - save_figs = not args.dont_save_figs # Call main() main( @@ -474,7 +483,7 @@ def add_attrs_to_map_ds( sdates_file=args.sdates_file, hdates_file=args.hdates_file, output_dir=args.output_dir, - save_figs=save_figs, + save_figs=not args.dont_save_figs, only_make_figs=args.only_make_figs, run1_name=args.run1_name, run2_name=args.run2_name, @@ -484,9 +493,3 @@ def add_attrs_to_map_ds( unlimited_season_length=args.unlimited_season_length, skip_crops=args.skip_crops, ) - -# main(input_dir="/Users/Shared/CESM_runs/tests_10x15_20230329_gddgen/202303301820", -# sdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/sdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# hdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/hdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# first_season=1997, last_season=2003, -# save_figs=False) From 73da27ab293cd07f61a460d6fa8119980db77334 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:53:37 -0700 Subject: [PATCH 06/40] Remove unused function from cropcal_utils.py. --- python/ctsm/crop_calendars/cropcal_utils.py | 34 --------------------- 1 file changed, 34 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index ba6c0b6e41..4d77d2ef66 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -15,40 +15,6 @@ import numpy as np import xarray as xr -# from xr_ds_ex import xr_ds_ex - - -# generate annual means, weighted by days / month -def weighted_annual_mean(array, time_in="time", time_out="time"): - if isinstance(array[time_in].values[0], cftime.datetime): - month_length = array[time_in].dt.days_in_month - - # After https://docs.xarray.dev/en/v0.5.1/examples/monthly-means.html - group = f"{time_in}.year" - weights = month_length.groupby(group) / month_length.groupby(group).sum() - np.testing.assert_allclose(weights.groupby(group).sum().values, 1) - array = (array * weights).groupby(group).sum(dim=time_in, skipna=True) - if time_out != "year": - array = array.rename({"year": time_out}) - - else: - mon_day = xr.DataArray( - np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]), dims=["month"] - ) - mon_wgt = mon_day / mon_day.sum() - array = ( - array.rolling({time_in: 12}, center=False) # rolling - .construct("month") # construct the array - .isel( - {time_in: slice(11, None, 12)} - ) # slice so that the first element is [1..12], second is [13..24] - .dot(mon_wgt, dims=["month"]) - ) - if time_in != time_out: - array = array.rename({time_in: time_out}) - - return array - # List of PFTs used in CLM def define_pftlist(): From ddd5e51d59db58259b2fa669cf583dcba64deae3 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:53:52 -0700 Subject: [PATCH 07/40] Rename a variable in generate_gdds.py. --- python/ctsm/crop_calendars/generate_gdds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 1af3744b28..156ebfb20e 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -261,8 +261,8 @@ def make_dummy(this_crop_gridded, addend): dummy_gridded.values = dummy_gridded.values * 0 + addend return dummy_gridded - for var_index in gdd_maps_ds: - this_crop_gridded = gdd_maps_ds[var_index].copy() + for var in gdd_maps_ds: + this_crop_gridded = gdd_maps_ds[var].copy() break dummy_gridded = make_dummy(this_crop_gridded, -1) From 3b4ae701f12f12eb07ac39fe03e07c70db4a78de Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:55:56 -0700 Subject: [PATCH 08/40] Satisfy pylint for cropcal_module.py. --- .../ctsm/crop_calendars/check_rxboth_run.py | 8 +- python/ctsm/crop_calendars/cropcal_module.py | 972 ++++++++++-------- 2 files changed, 557 insertions(+), 423 deletions(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 6dae071937..30c280120d 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -60,16 +60,16 @@ def main(argv): # These should be constant in a Prescribed Calendars (rxboth) run, as long as the inputs were # static. case = { - "constantVars": ["SDATES", "GDDHARV"], + "const_vars": ["SDATES", "GDDHARV"], "rx_sdates_file": args.rx_sdates_file, "rx_gdds_file": args.rx_gdds_file, } case["ds"] = cc.import_output( annual_outfiles, - myVars=myVars, - y1=args.first_usable_year, - yN=args.last_usable_year, + my_vars=myVars, + year_1=args.first_usable_year, + year_N=args.last_usable_year, ) cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 76c295974d..4fa3cdf5aa 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -1,21 +1,27 @@ -import numpy as np -import xarray as xr +""" +Helper functions for various crop calendar stuff +""" +# pylint: disable=too-many-lines + import warnings import sys import os import glob +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position try: import pandas as pd -except: +except ModuleNotFoundError: pass @@ -38,9 +44,15 @@ }, } +# Minimum harvest threshold allowed in PlantCrop() +# Was 50 before cropcal runs 2023-01-28 +DEFAULT_GDD_MIN = 1.0 -# After importing a file, restrict it to years of interest. -def check_and_trim_years(y1, yN, ds_in): + +def check_and_trim_years(year_1, year_n, ds_in): + """ + After importing a file, restrict it to years of interest. + """ ### In annual outputs, file with name Y is actually results from year Y-1. ### Note that time values refer to when it was SAVED. So 1981-01-01 is for year 1980. @@ -49,65 +61,80 @@ def get_year_from_cftime(cftime_date): return cftime_date.year - 1 # Check that all desired years are included - if get_year_from_cftime(ds_in.time.values[0]) > y1: + if get_year_from_cftime(ds_in.time.values[0]) > year_1: raise RuntimeError( - f"Requested y1 is {y1} but first year in outputs is {get_year_from_cftime(ds_in.time.values[0])}" + f"Requested year_1 is {year_1} but first year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[0])}" ) - elif get_year_from_cftime(ds_in.time.values[-1]) < y1: + if get_year_from_cftime(ds_in.time.values[-1]) < year_1: raise RuntimeError( - f"Requested yN is {yN} but last year in outputs is {get_year_from_cftime(ds_in.time.values[-1])}" + f"Requested year_n is {year_n} but last year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[-1])}" ) # Remove years outside range of interest ### Include an extra year at the end to finish out final seasons. - ds_in = utils.safer_timeslice(ds_in, slice(f"{y1+1}-01-01", f"{yN+2}-01-01")) + ds_in = utils.safer_timeslice(ds_in, slice(f"{year_1+1}-01-01", f"{year_n+2}-01-01")) # Make sure you have the expected number of timesteps (including extra year) - Nyears_expected = yN - y1 + 2 - if ds_in.dims["time"] != Nyears_expected: + n_years_expected = year_n - year_1 + 2 + if ds_in.dims["time"] != n_years_expected: raise RuntimeError( - f"Expected {Nyears_expected} timesteps in output but got {ds_in.dims['time']}" + f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}" ) return ds_in -def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): +def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): + """ + Open land-use dataset + """ # Open and trim to years of interest - dsg = xr.open_dataset(filename).sel(time=slice(y1, yN)) + this_ds_gridded = xr.open_dataset(filename).sel(time=slice(year_1, year_n)) # Assign actual lon/lat coordinates - dsg = dsg.assign_coords( + this_ds_gridded = this_ds_gridded.assign_coords( lon=("lsmlon", existing_ds.lon.values), lat=("lsmlat", existing_ds.lat.values) ) - dsg = dsg.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) - - if "AREA" in dsg: - dsg["AREA_CFT"] = dsg.AREA * 1e6 * dsg.LANDFRAC_PFT * dsg.PCT_CROP / 100 * dsg.PCT_CFT / 100 - dsg["AREA_CFT"].attrs = {"units": "m2"} - dsg["AREA_CFT"].load() + this_ds_gridded = this_ds_gridded.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) + + if "AREA" in this_ds_gridded: + this_ds_gridded["AREA_CFT"] = ( + this_ds_gridded.AREA + * 1e6 + * this_ds_gridded.LANDFRAC_PFT + * this_ds_gridded.PCT_CROP + / 100 + * this_ds_gridded.PCT_CFT + / 100 + ) + this_ds_gridded["AREA_CFT"].attrs = {"units": "m2"} + this_ds_gridded["AREA_CFT"].load() else: print("Warning: AREA missing from Dataset, so AREA_CFT will not be created") if not ungrid: - return dsg + return this_ds_gridded # Un-grid query_ilons = [int(x) - 1 for x in existing_ds["patches1d_ixy"].values] query_ilats = [int(x) - 1 for x in existing_ds["patches1d_jxy"].values] - query_ivts = [list(dsg.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values] + query_ivts = [ + list(this_ds_gridded.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values + ] - ds = xr.Dataset(attrs=dsg.attrs) - for v in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: - if v not in dsg: + this_ds = xr.Dataset(attrs=this_ds_gridded.attrs) + for var in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: + if var not in this_ds_gridded: continue - if "time" in dsg[v].dims: + if "time" in this_ds_gridded[var].dims: new_coords = existing_ds["GRAINC_TO_FOOD_ANN"].coords else: new_coords = existing_ds["patches1d_lon"].coords - if "cft" in dsg[v].dims: - ds[v] = ( - dsg[v] + if "cft" in this_ds_gridded[var].dims: + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -117,8 +144,8 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): .assign_coords(new_coords) ) else: - ds[v] = ( - dsg[v] + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -126,67 +153,73 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): ) .assign_coords(new_coords) ) - for v in existing_ds: - if "patches1d_" in v or "grid1d_" in v: - ds[v] = existing_ds[v] - ds["lon"] = dsg["lon"] - ds["lat"] = dsg["lat"] + for var in existing_ds: + if "patches1d_" in var or "grid1d_" in var: + this_ds[var] = existing_ds[var] + this_ds["lon"] = this_ds_gridded["lon"] + this_ds["lat"] = this_ds_gridded["lat"] # Which crops are irrigated? - is_irrigated = np.full_like(ds["patches1d_itype_veg"], False) - for vegtype_str in np.unique(ds["patches1d_itype_veg_str"].values): + is_irrigated = np.full_like(this_ds["patches1d_itype_veg"], False) + for vegtype_str in np.unique(this_ds["patches1d_itype_veg_str"].values): if "irrigated" not in vegtype_str: continue vegtype_int = utils.ivt_str2int(vegtype_str) - is_this_vegtype = np.where(ds["patches1d_itype_veg"].values == vegtype_int)[0] + is_this_vegtype = np.where(this_ds["patches1d_itype_veg"].values == vegtype_int)[0] is_irrigated[is_this_vegtype] = True - ["irrigated" in x for x in ds["patches1d_itype_veg_str"].values] - ds["IRRIGATED"] = xr.DataArray( + this_ds["IRRIGATED"] = xr.DataArray( data=is_irrigated, - coords=ds["patches1d_itype_veg_str"].coords, + coords=this_ds["patches1d_itype_veg_str"].coords, attrs={"long_name": "Is patch irrigated?"}, ) # How much area is irrigated? - ds["IRRIGATED_AREA_CFT"] = ds["IRRIGATED"] * ds["AREA_CFT"] - ds["IRRIGATED_AREA_CFT"].attrs = { + this_ds["IRRIGATED_AREA_CFT"] = this_ds["IRRIGATED"] * this_ds["AREA_CFT"] + this_ds["IRRIGATED_AREA_CFT"].attrs = { "long name": "CFT area (irrigated types only)", "units": "m^2", } - ds["IRRIGATED_AREA_GRID"] = ( - ds["IRRIGATED_AREA_CFT"] - .groupby(ds["patches1d_gi"]) + this_ds["IRRIGATED_AREA_GRID"] = ( + this_ds["IRRIGATED_AREA_CFT"] + .groupby(this_ds["patches1d_gi"]) .sum() .rename({"patches1d_gi": "gridcell"}) ) - ds["IRRIGATED_AREA_GRID"].attrs = {"long name": "Irrigated area in gridcell", "units": "m^2"} + this_ds["IRRIGATED_AREA_GRID"].attrs = { + "long name": "Irrigated area in gridcell", + "units": "m^2", + } - return ds + return this_ds def check_constant_vars( - this_ds, case, ignore_nan, constantGSs=None, verbose=True, throw_error=True + this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True ): + """ + For variables that should stay constant, make sure they are + """ if isinstance(case, str): - constantVars = [case] + const_vars = [case] elif isinstance(case, list): - constantVars = case + const_vars = case elif isinstance(case, dict): - constantVars = case["constantVars"] + const_vars = case["const_vars"] else: raise TypeError(f"case must be str or dict, not {type(case)}") - if not constantVars: + if not const_vars: return None - if constantGSs: - gs0 = this_ds.gs.values[0] - gsN = this_ds.gs.values[-1] - if constantGSs.start > gs0 or constantGSs.stop < gsN: + if const_growing_seasons: + gs_0 = this_ds.gs.values[0] + gs_n = this_ds.gs.values[-1] + if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: print( - f"❗ Only checking constantVars over {constantGSs.start}-{constantGSs.stop} (run includes {gs0}-{gsN})" + f"❗ Only checking const_vars over {const_growing_seasons.start}-" + + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" ) - this_ds = this_ds.sel(gs=constantGSs) + this_ds = this_ds.sel(gs=const_growing_seasons) any_bad = False any_bad_before_checking_rx = False @@ -194,155 +227,168 @@ def check_constant_vars( emojus = "❌" else: emojus = "❗" - if not isinstance(constantVars, list): - constantVars = [constantVars] + if not isinstance(const_vars, list): + const_vars = [const_vars] - for v in constantVars: - ok = True + for var in const_vars: + everything_ok = True - if "gs" in this_ds[v].dims: + if "gs" in this_ds[var].dims: time_coord = "gs" - elif "time" in this_ds[v].dims: + elif "time" in this_ds[var].dims: time_coord = "time" else: - raise RuntimeError(f"Which of these is the time coordinate? {this_ds[v].dims}") - i_time_coord = this_ds[v].dims.index(time_coord) + raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") + i_time_coord = this_ds[var].dims.index(time_coord) - this_da = this_ds[v] + this_da = this_ds[var] ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) incl_patches = [] bad_patches = np.array([]) - strList = [] + str_list = [] # Read prescription file, if needed rx_ds = None if isinstance(case, dict): - if v == "GDDHARV" and "rx_gdds_file" in case: + if var == "GDDHARV" and "rx_gdds_file" in case: rx_ds = import_rx_dates( "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False ).squeeze() - for t1 in np.arange(this_ds.dims[time_coord] - 1): - condn = ~np.isnan(ra_sp[t1, ...]) - if t1 > 0: - condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:t1, ...]), axis=0)) - thesePatches = np.where(condn)[0] - if thesePatches.size == 0: + for time_1 in np.arange(this_ds.dims[time_coord] - 1): + condn = ~np.isnan(ra_sp[time_1, ...]) + if time_1 > 0: + condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) + these_patches = np.where(condn)[0] + if these_patches.size == 0: continue - thesePatches = list(np.where(condn)[0]) - incl_patches += thesePatches + these_patches = list(np.where(condn)[0]) + incl_patches += these_patches # print(f't1 {t1}: {thesePatches}') - t1_yr = this_ds[time_coord].values[t1] - t1_vals = np.squeeze(this_da.isel({time_coord: t1, "patch": thesePatches}).values) + t1_yr = this_ds[time_coord].values[time_1] + t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) - for t in np.arange(t1 + 1, this_ds.dims[time_coord]): - t_yr = this_ds[time_coord].values[t] - t_vals = np.squeeze(this_da.isel({time_coord: t, "patch": thesePatches}).values) + for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + t_yr = this_ds[time_coord].values[timestep] + t_vals = np.squeeze( + this_da.isel({time_coord: timestep, "patch": these_patches}).values + ) ok_p = t1_vals == t_vals - # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where land use varies over time. + # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where + # land use varies over time. if ignore_nan: ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) if not np.all(ok_p): any_bad_before_checking_rx = True - bad_patches_thisT = list(np.where(np.bitwise_not(ok_p))[0]) + bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) bad_patches = np.concatenate( - (bad_patches, np.array(thesePatches)[bad_patches_thisT]) + (bad_patches, np.array(these_patches)[bad_patches_this_time]) ) if rx_ds: found_in_rx = np.array([False for x in bad_patches]) - varyPatches = list(np.array(thesePatches)[bad_patches_thisT]) - varyLons = this_ds.patches1d_lon.values[bad_patches_thisT] - varyLats = this_ds.patches1d_lat.values[bad_patches_thisT] - varyCrops = this_ds.patches1d_itype_veg_str.values[bad_patches_thisT] - varyCrops_int = this_ds.patches1d_itype_veg.values[bad_patches_thisT] - - any_bad_anyCrop = False - for c in np.unique(varyCrops_int): - rx_var = f"gs1_{c}" - varyLons_thisCrop = varyLons[np.where(varyCrops_int == c)] - varyLats_thisCrop = varyLats[np.where(varyCrops_int == c)] - theseRxVals = np.diag( - rx_ds[rx_var].sel(lon=varyLons_thisCrop, lat=varyLats_thisCrop).values + vary_patches = list(np.array(these_patches)[bad_patches_this_time]) + vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] + vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] + vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] + vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] + + any_bad_any_crop = False + for crop_int in np.unique(vary_crops_int): + rx_var = f"gs1_{crop_int}" + vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] + vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] + these_rx_vals = np.diag( + rx_ds[rx_var] + .sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop) + .values ) - if len(theseRxVals) != len(varyLats_thisCrop): + if len(these_rx_vals) != len(vary_lats_this_crop): raise RuntimeError( - f"Expected {len(varyLats_thisCrop)} rx values; got {len(theseRxVals)}" + f"Expected {len(vary_lats_this_crop)} rx values; got " + + f"{len(these_rx_vals)}" ) - if not np.any(theseRxVals != -1): + if not np.any(these_rx_vals != -1): continue - any_bad_anyCrop = True + any_bad_any_crop = True break - if not any_bad_anyCrop: + if not any_bad_any_crop: continue - # This bit is pretty inefficient, but I'm not going to optimize it until I actually need to use it. - for i, p in enumerate(bad_patches_thisT): - thisPatch = varyPatches[i] - thisLon = varyLons[i] - thisLat = varyLats[i] - thisCrop = varyCrops[i] - thisCrop_int = varyCrops_int[i] + # This bit is pretty inefficient, but I'm not going to optimize it until I + # actually need to use it. + for i, patch in enumerate(bad_patches_this_time): + this_patch = vary_patches[i] + this_lon = vary_lons[i] + this_lat = vary_lats[i] + this_crop = vary_crops[i] + this_crop_int = vary_crops_int[i] # If prescribed input had missing value (-1), it's fine for it to vary. if rx_ds: - rx_var = f"gs1_{thisCrop_int}" - if thisLon in rx_ds.lon.values and thisLat in rx_ds.lat.values: - rx = rx_ds[rx_var].sel(lon=thisLon, lat=thisLat).values - Nunique = len(np.unique(rx)) - if Nunique == 1: + rx_var = f"gs1_{this_crop_int}" + if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: + rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values + n_unique = len(np.unique(rx_vals)) + if n_unique == 1: found_in_rx[i] = True - if rx == -1: + if rx_vals == -1: continue - elif Nunique > 1: + elif n_unique > 1: raise RuntimeError( - f"How does lon {thisLon} lat {thisLat} {thisCrop} have time-varying {v}?" + f"How does lon {this_lon} lat {this_lat} {this_crop} have " + + f"time-varying {var}?" ) else: raise RuntimeError( - "lon {thisLon} lat {thisLat} {thisCrop} not in rx dataset?" + f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?" ) # Print info (or save to print later) any_bad = True if verbose: - thisStr = f" Patch {thisPatch} (lon {thisLon} lat {thisLat}) {thisCrop} ({thisCrop_int})" + this_str = ( + f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " + + f"{this_crop} ({this_crop_int})" + ) if rx_ds and not found_in_rx[i]: - thisStr = thisStr.replace("(lon", "* (lon") - if not np.isnan(t1_vals[p]): - t1_val_print = int(t1_vals[p]) + this_str = this_str.replace("(lon", "* (lon") + if not np.isnan(t1_vals[patch]): + t1_val_print = int(t1_vals[patch]) else: t1_val_print = "NaN" - if not np.isnan(t_vals[p]): - t_val_print = int(t_vals[p]) + if not np.isnan(t_vals[patch]): + t_val_print = int(t_vals[patch]) else: t_val_print = "NaN" - if v == "SDATES": - strList.append( - f"{thisStr}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} jday {t_val_print}" + if var == "SDATES": + str_list.append( + f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " + + f"jday {t_val_print}" ) else: - strList.append( - f"{thisStr}: {t1_yr} {v} {t1_val_print}, {t_yr} {v} {t_val_print}" + str_list.append( + f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " + + f"{t_val_print}" ) else: - if ok: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - ok = False - print(f"{v} timestep {t} does not match timestep {t1}") + if everything_ok: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + everything_ok = False + print(f"{var} timestep {timestep} does not match timestep {time_1}") break if verbose and any_bad: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - strList.sort() + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + str_list.sort() if rx_ds and np.any(~found_in_rx): - strList = [ + str_list = [ "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" - ] + strList + ] + str_list elif not rx_ds: - strList = ["(No rx file checked)"] + strList - print("\n".join(strList)) + str_list = ["(No rx file checked)"] + str_list + print("\n".join(str_list)) # Make sure every patch was checked once (or is all-NaN except possibly final season) incl_patches = np.sort(incl_patches) @@ -365,21 +411,23 @@ def check_constant_vars( if not np.array_equal(incl_patches, np.unique(incl_patches)): raise RuntimeError("Patch(es) checked but also all-NaN??") if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for p in np.arange(this_ds.dims["patch"]): - if p not in incl_patches: + for patch in np.arange(this_ds.dims["patch"]): + if patch not in incl_patches: break raise RuntimeError( - f"Not all patches checked! E.g., {p}: {this_da.isel(patch=p).values}" + f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" ) if not any_bad: if any_bad_before_checking_rx: print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output (except for patch(es) with missing rx)." + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output (except for patch(es) with missing rx)." ) else: print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output." + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output." ) if any_bad and throw_error: @@ -392,6 +440,9 @@ def check_constant_vars( def check_rx_obeyed( vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False ): + """ + Check that prescribed crop calendars were obeyed + """ all_ok = 2 diff_str_list = [] gdd_tolerance = 1 @@ -403,25 +454,26 @@ def check_rx_obeyed( ) pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) print( - f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at maturity)" + f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " + + "maturity)" ) for vegtype_str in vegtype_list: - thisVeg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] - if thisVeg_patches.size == 0: + thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] + if thisveg_patches.size == 0: continue - ds_thisVeg = dates_ds.isel(patch=thisVeg_patches) - patch_inds_lon_thisVeg = ds_thisVeg.patches1d_ixy.values.astype(int) - 1 - patch_inds_lat_thisVeg = ds_thisVeg.patches1d_jxy.values.astype(int) - 1 - patch_lons_thisVeg = ds_thisVeg.patches1d_lon - patch_lats_thisVeg = ds_thisVeg.patches1d_lat + ds_thisveg = dates_ds.isel(patch=thisveg_patches) + patch_inds_lon_thisveg = ds_thisveg.patches1d_ixy.values.astype(int) - 1 + patch_inds_lat_thisveg = ds_thisveg.patches1d_jxy.values.astype(int) - 1 + patch_lons_thisveg = ds_thisveg.patches1d_lon + patch_lats_thisveg = ds_thisveg.patches1d_lat vegtype_int = utils.vegtype_str2int(vegtype_str)[0] rx_da = rx_ds[f"gs1_{vegtype_int}"] - rx_array = rx_da.values[patch_inds_lat_thisVeg, patch_inds_lon_thisVeg] + rx_array = rx_da.values[patch_inds_lat_thisveg, patch_inds_lon_thisveg] rx_array = np.expand_dims(rx_array, axis=1) - sim_array = ds_thisVeg[output_var].values - sim_array_dims = ds_thisVeg[output_var].dims + sim_array = ds_thisveg[output_var].values + sim_array_dims = ds_thisveg[output_var].dims # Ignore patches without prescribed value with np.errstate(invalid="ignore"): @@ -430,10 +482,11 @@ def check_rx_obeyed( # Account for... if "GDDHARV" in output_var: # ...GDD harvest threshold minimum set in PlantCrop() - if gdd_min == None: - gdd_min = default_gdd_min() + if gdd_min is None: + gdd_min = DEFAULT_GDD_MIN print( - f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using default {gdd_min}" + f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " + + f"default {gdd_min}" ) with np.errstate(invalid="ignore"): rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min @@ -443,11 +496,13 @@ def check_rx_obeyed( # 1: Harvesting at maturity # 2: Harvesting at max season length (mxmat) # 3: Crop was incorrectly planted in last time step of Dec. 31 - # 4: Today was supposed to be the planting day, but the previous crop still hasn't been harvested. + # 4: Today was supposed to be the planting day, but the previous crop still hasn't been + # harvested. # 5: Harvest the day before the next sowing date this year. # 6: Same as #5. - # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date is Jan. 1) - harvest_reason_da = ds_thisVeg["HARVEST_REASON"] + # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date + # is Jan. 1) + harvest_reason_da = ds_thisveg["HARVEST_REASON"] unique_harvest_reasons = np.unique( harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] ) @@ -456,43 +511,51 @@ def check_rx_obeyed( if np.any(sim_array != rx_array): diff_array = sim_array - rx_array - # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for the next day + # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for + # the next day if output_var == "GDDHARV_PERHARV": diff_array = np.ma.masked_array( diff_array, - mask=(diff_array < 0) & (ds_thisVeg["HARVEST_REASON_PERHARV"].values == 5), + mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), ) elif output_var == "GDDHARV": with np.errstate(invalid="ignore"): diff_lt_0 = diff_array < 0 - harv_reason_5 = ds_thisVeg["HARVEST_REASON"].values == 5 + harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) with np.errstate(invalid="ignore"): abs_gt_0 = abs(diff_array) > 0 if np.any(np.abs(diff_array[abs_gt_0]) > 0): - min_diff, minLon, minLat, minGS, minRx = get_extreme_info( + min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( diff_array, rx_array, np.nanmin, sim_array_dims, dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, + patch_lons_thisveg, + patch_lats_thisveg, ) - max_diff, maxLon, maxLat, maxGS, maxRx = get_extreme_info( + max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( diff_array, rx_array, np.nanmax, sim_array_dims, dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, + patch_lons_thisveg, + patch_lats_thisveg, ) - diffs_eg_txt = f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {minLon}, lat {minLat}, gs {minGS}, rx ~{minRx}) to {max_diff} (lon {maxLon}, lat {maxLat}, gs {maxGS}, rx ~{maxRx})" + diffs_eg_txt = ( + f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " + + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " + + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" + ) if "GDDHARV" in output_var: - diffs_eg_txt += f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harvested at maturity)" + diffs_eg_txt += ( + f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" + + "% harvested at maturity)" + ) if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: if all_ok > 0: all_ok = 1 @@ -501,7 +564,8 @@ def check_rx_obeyed( all_ok = 0 if verbose: print( - f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}" + f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " + + f"{diffs_eg_txt}" ) else: break @@ -512,56 +576,67 @@ def check_rx_obeyed( # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") # for x in diff_str_list: print(x) print( - f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= {gdd_tolerance})" + f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " + + f"{gdd_tolerance})" ) elif not verbose: print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") -# Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV -def check_v0_le_v1(this_ds, vars, msg_txt=" ", both_nan_ok=False, throw_error=False): - v0 = vars[0] - v1 = vars[1] - gdd_lt_hui = this_ds[v0] <= this_ds[v1] +def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): + """ + Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV + """ + var0 = var_list[0] + var1 = var_list[1] + gdd_lt_hui = this_ds[var0] <= this_ds[var1] if both_nan_ok: - gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[v0]) & np.isnan(this_ds[v1])) + gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[var0]) & np.isnan(this_ds[var1])) if np.all(gdd_lt_hui): - print(f"✅{msg_txt}{v0} always <= {v1}") + print(f"✅{msg_txt}{var0} always <= {var1}") else: - msg = f"❌{msg_txt}{v0} *not* always <= {v1}" + msg = f"❌{msg_txt}{var0} *not* always <= {var1}" gdd_lt_hui_vals = gdd_lt_hui.values - p = np.where(~gdd_lt_hui_vals)[0][0] + patch_index = np.where(~gdd_lt_hui_vals)[0][0] msg = ( msg - + f"\ne.g., patch {p}: {this_ds.patches1d_itype_veg_str.values[p]}, lon {this_ds.patches1d_lon.values[p]} lat {this_ds.patches1d_lat.values[p]}:" + + f"\ne.g., patch {patch_index}: {this_ds.patches1d_itype_veg_str.values[patch_index]}," + + f" lon {this_ds.patches1d_lon.values[patch_index]} lat " + + f"{this_ds.patches1d_lat.values[patch_index]}:" ) - msg = msg + f"\n{this_ds[v0].values[p,:]}" - msg = msg + f"\n{this_ds[v1].values[p,:]}" + msg = msg + f"\n{this_ds[var0].values[patch_index,:]}" + msg = msg + f"\n{this_ds[var1].values[patch_index,:]}" if throw_error: print(msg) else: raise RuntimeError(msg) -# Convert time*mxharvests axes to growingseason axis -def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): +def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): + """ + Convert time*mxharvests axes to growingseason axis + """ # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - Npatch = this_ds.dims["patch"] - # Because some patches will be planted in the last year but not complete, we have to ignore any finalyear-planted seasons that do complete. - Ngs = this_ds.dims["time"] - 1 - expected_valid = Npatch * Ngs + n_patch = this_ds.dims["patch"] + # Because some patches will be planted in the last year but not complete, we have to ignore any + # finalyear-planted seasons that do complete. + n_gs = this_ds.dims["time"] - 1 + expected_valid = n_patch * n_gs mxharvests = this_ds.dims["mxharvests"] if verbose: print( - f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} patch-seasons" + f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + + "patch-seasons" ) - # Set all non-positive date values to NaN. These are seasons that were never harvested (or never started): "non-seasons." + # Set all non-positive date values to NaN. These are seasons that were never harvested + # (or never started): "non-seasons." if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): raise RuntimeError( - f"This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not {this_ds.HDATES.dims}" + "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + + f"{this_ds.HDATES.dims}" ) hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) @@ -578,9 +653,10 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): # Find seasons that were planted while the patch was inactive with np.errstate(invalid="ignore"): sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) - sown_inactive_py = np.concatenate((np.full((Npatch, 1), False), sown_inactive_py), axis=1) + sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) - # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was inactive" + # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was + # inactive" with np.errstate(invalid="ignore"): first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) @@ -589,7 +665,7 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sown_prerun_or_inactive_pym = np.concatenate( ( np.expand_dims(sown_prerun_or_inactive_py, axis=2), - np.full((Npatch, Ngs + 1, mxharvests - 1), False), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), ), axis=2, ) @@ -598,12 +674,17 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan if verbose: print( - f'After "Ignore harvests from before this output began: discrepancy of {np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons' + "After 'Ignore harvests from before this output began: discrepancy of " + + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" ) - # We need to keep some non-seasons---it's possible that "the yearY growing season" never happened (sowing conditions weren't met), but we still need something there so that we can make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. + # We need to keep some non-seasons---it's possible that "the yearY growing season" never + # happened (sowing conditions weren't met), but we still need something there so that we can + # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to + # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. - # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was intentionally ignored above." + # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was + # intentionally ignored above." sdates_orig_ymp = this_ds.SDATES.copy().values sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) hdates_pym2 = hdates_pym.copy() @@ -615,43 +696,45 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): where_nosow_py_1st = np.where(nosow_py_1st) hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - for h in np.arange(mxharvests - 1): - if h == 0: + for harvest_index in np.arange(mxharvests - 1): + if harvest_index == 0: continue - elif h == 1: + elif harvest_index == 1: print("Warning: Untested with mxharvests > 2") where_nosow_py = np.where( nosow_py - & ~np.any(np.isnan(hdates_pym[:, :, 0:h]), axis=2) - & np.isnan(hdates_pym[:, :, h]) + & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) + & np.isnan(hdates_pym[:, :, harvest_index]) ) - hdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf - sdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf + hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - # "In years with sowing that are followed by inactive years, check whether the last sowing was harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] no-harvest is meaningful." + # "In years with sowing that are followed by inactive years, check whether the last sowing was + # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + # no-harvest is meaningful." sdates_orig_masked_pym = sdates_orig_pym.copy() with np.errstate(invalid="ignore"): sdates_le_0 = sdates_orig_masked_pym <= 0 sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") - last_sdate_firstNgs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) - last_hdate_firstNgs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) + last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) + last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) with np.errstate(invalid="ignore"): - hdate_lt_sdate = last_hdate_firstNgs_py < last_sdate_firstNgs_py - last_sowing_not_harvested_sameyear_firstNgs_py = hdate_lt_sdate | np.isnan( - last_hdate_firstNgs_py + hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py + last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( + last_hdate_first_n_gs_py ) - inactive_lastNgs_py = inactive_py[:, 1:] - last_sowing_never_harvested_firstNgs_py = ( - last_sowing_not_harvested_sameyear_firstNgs_py & inactive_lastNgs_py + inactive_last_n_gs_py = inactive_py[:, 1:] + last_sowing_never_harvested_first_n_gs_py = ( + last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py ) last_sowing_never_harvested_py = np.concatenate( - (last_sowing_never_harvested_firstNgs_py, np.full((Npatch, 1), False)), axis=1 + (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 ) last_sowing_never_harvested_pym = np.concatenate( ( - np.full((Npatch, Ngs + 1, mxharvests - 1), False), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), np.expand_dims(last_sowing_never_harvested_py, axis=2), ), axis=2, @@ -663,33 +746,36 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf # Convert to growingseason axis - def pym_to_pg(pym, quiet=False): - pg = np.reshape(pym, (pym.shape[0], -1)) - ok_pg = pg[~np.isnan(pg)] + def pym_to_pg(pym_array, quiet=False): + pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) + ok_pg = pg_array[~np.isnan(pg_array)] if not quiet: print( - f"{ok_pg.size} included; unique N seasons = {np.unique(np.sum(~np.isnan(pg), axis=1))}" + f"{ok_pg.size} included; unique N seasons = " + + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" ) - return pg + return pg_array hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) if verbose: print( - f'After "In years with no sowing, pretend the first no-harvest is meaningful: discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons' + "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" ) - # "Ignore any harvests that were planted in the final year, because some cells will have incomplete growing seasons for the final year." + # "Ignore any harvests that were planted in the final year, because some cells will have + # incomplete growing seasons for the final year." with np.errstate(invalid="ignore"): hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) - def ignore_lastyear_complete_season(pg, excl, mxharvests): - tmp_L = pg[:, :-mxharvests] - tmp_R = pg[:, -mxharvests:] - tmp_R[np.where(excl)] = np.nan - pg = np.concatenate((tmp_L, tmp_R), axis=1) - return pg + def ignore_lastyear_complete_season(pg_array, excl, mxharvests): + tmp_l = pg_array[:, :-mxharvests] + tmp_r = pg_array[:, -mxharvests:] + tmp_r[np.where(excl)] = np.nan + pg_array = np.concatenate((tmp_l, tmp_r), axis=1) + return pg_array hdates_pg2 = ignore_lastyear_complete_season( hdates_pg.copy(), lastyear_complete_season, mxharvests @@ -699,41 +785,45 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): ) is_valid = ~np.isnan(hdates_pg2) is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], Ngs)) + is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) discrepancy = np.sum(is_valid) - expected_valid - unique_Nseasons = np.unique(np.sum(is_valid, axis=1)) + unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) if verbose: print( - f'After "Ignore any harvests that were planted in the final year, because other cells will have incomplete growing seasons for the final year": discrepancy of {discrepancy} patch-seasons' + "After 'Ignore any harvests that were planted in the final year, because other cells " + + "will have incomplete growing seasons for the final year': discrepancy of " + + f"{discrepancy} patch-seasons" ) if "pandas" in sys.modules: - bc = np.bincount(np.sum(is_valid, axis=1)) - bc = bc[bc > 0] - df = pd.DataFrame({"Ngs": unique_Nseasons, "Count": bc}) - print(df) + bincount = np.bincount(np.sum(is_valid, axis=1)) + bincount = bincount[bincount > 0] + dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) + print(dataframe) else: - print(f"unique N seasons = {unique_Nseasons}") + print(f"unique N seasons = {unique_n_seasons}") print(" ") # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out if discrepancy == 0: this_ds_gs = set_up_ds_with_gs_axis(this_ds) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch") or (myVars and v not in myVars): + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch") or ( + my_vars and var not in my_vars + ): continue # Set invalid values to NaN - da_yhp = this_ds[v].copy() + da_yhp = this_ds[var].copy() da_yhp = da_yhp.where(~np.isneginf(da_yhp)) # Remove the nans and reshape to patches*growingseasons da_pyh = da_yhp.transpose("patch", "time", "mxharvests") ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], Ngs)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) # Change -infs to nans ar_valid_pg[is_fake] = np.nan # Save as DataArray to new Dataset, stripping _PERHARV from variable name - newname = v.replace("_PERHARV", "") + newname = var.replace("_PERHARV", "") if newname in this_ds_gs: raise RuntimeError(f"{newname} already in dataset!") da_pg = xr.DataArray( @@ -743,14 +833,16 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): attrs=da_yhp.attrs, ) this_ds_gs[newname] = da_pg - this_ds_gs[newname].attrs["units"] = this_ds[v].attrs["units"] + this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] else: # Print details about example bad patch(es) - if min(unique_Nseasons) < Ngs: - print(f"Too few seasons (min {min(unique_Nseasons)} < {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, + if min(unique_n_seasons) < n_gs: + print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, this_ds, sdates_ymp, hdates_ymp, @@ -765,11 +857,13 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): sdates_pg2, hdates_pg2, ) - if max(unique_Nseasons) > Ngs: - print(f"Too many seasons (max {max(unique_Nseasons)} > {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, + if max(unique_n_seasons) > n_gs: + print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, this_ds, sdates_ymp, hdates_ymp, @@ -785,35 +879,31 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): hdates_pg2, ) raise RuntimeError( - f"Can't convert time*mxharvests axes to growingseason axis: discrepancy of {discrepancy} patch-seasons" + "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + + f"{discrepancy} patch-seasons" ) # Preserve units - for v1 in this_ds_gs: - v0 = v1 - if v0 not in this_ds: - v0 += "_PERHARV" - if v0 not in this_ds: + for var_1 in this_ds_gs: + var_0 = var_1 + if var_0 not in this_ds: + var_0 += "_PERHARV" + if var_0 not in this_ds: continue - if "units" in this_ds[v0].attrs: - this_ds_gs[v1].attrs["units"] = this_ds[v0].attrs["units"] + if "units" in this_ds[var_0].attrs: + this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] if incl_orig: return this_ds_gs, this_ds - else: - return this_ds_gs - - -# Minimum harvest threshold allowed in PlantCrop() -# Was 50 before cropcal runs 2023-01-28 -def default_gdd_min(): - return 1.0 + return this_ds_gs -# Get information about extreme gridcells (for debugging) -def get_extreme_info(diff_array, rx_array, mxn, dims, gs, patches1d_lon, patches1d_lat): - if mxn == np.min: - diff_array = np.ma.masked_array(diff_array, mask=(np.abs(diff_array) == 0)) +def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): + """ + Get information about extreme gridcells (for debugging) + """ + if mxn == np.min: # pylint: disable=comparison-with-callable + diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) themxn = mxn(diff_array) # Find the first patch-gs that has the mxn value @@ -821,20 +911,22 @@ def get_extreme_info(diff_array, rx_array, mxn, dims, gs, patches1d_lon, patches first_indices = [x[0] for x in matching_indices] # Get the lon, lat, and growing season of that patch-gs - p = first_indices[dims.index("patch")] - thisLon = patches1d_lon.values[p] - thisLat = patches1d_lat.values[p] - s = first_indices[dims.index("gs")] - thisGS = gs.values[s] + patch_index = first_indices[dims.index("patch")] + this_lon = patches1d_lon.values[patch_index] + this_lat = patches1d_lat.values[patch_index] + season_index = first_indices[dims.index("gs")] + this_gs = gs_da.values[season_index] # Get the prescribed value for this patch-gs - thisRx = rx_array[p][0] + this_rx = rx_array[patch_index][0] - return round(themxn, 3), round(thisLon, 3), round(thisLat, 3), thisGS, round(thisRx) + return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) -# Get growing season lengths from a DataArray of hdate-sdate def get_gs_len_da(this_da): + """ + Get growing season lengths from a DataArray of hdate-sdate + """ tmp = this_da.values with np.errstate(invalid="ignore"): tmp_lt_0 = tmp < 0 @@ -845,13 +937,16 @@ def get_gs_len_da(this_da): def get_pct_harv_at_mature(harvest_reason_da): - Nharv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) + """ + Get percentage of harvests that happened at maturity + """ + n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) with np.errstate(invalid="ignore"): harv_reason_gt_0 = harvest_reason_da.values > 0 - Nharv = len(np.where(harv_reason_gt_0)[0]) - if Nharv == 0: + n_harv = len(np.where(harv_reason_gt_0)[0]) + if n_harv == 0: return np.nan - pct_harv_at_mature = Nharv_at_mature / Nharv * 100 + pct_harv_at_mature = n_harv_at_mature / n_harv * 100 pct_harv_at_mature = np.format_float_positional( pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" ) # Round to 2 significant digits @@ -859,6 +954,9 @@ def get_pct_harv_at_mature(harvest_reason_da): def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): + """ + Import maximum growing season length + """ # Get parameter file pattern = os.path.join(paramfile_dir, f"*{my_clm_ver}_params.{my_clm_subver}.nc") paramfile = glob.glob(pattern) @@ -886,8 +984,12 @@ def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): return mxmat_dict -# E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) -def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): +def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): + """ + Import prescribed sowing/harvest dates + + E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) + """ # Get run info: # Max number of growing seasons per year if "mxsowings" in dates_ds: @@ -896,53 +998,61 @@ def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): mxsowings = 1 # Which vegetation types were simulated? - itype_veg_toImport = np.unique(dates_ds.patches1d_itype_veg) + itype_veg_to_import = np.unique(dates_ds.patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{var_prefix}{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_varlist = [] + for i in itype_veg_to_import: + for j in np.arange(mxsowings): + this_var = f"{var_prefix}{j+1}_{i}" + date_varlist = date_varlist + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = utils.import_ds(date_infile, myVars=date_varlist) did_warn = False - for v in ds: - v_new = v.replace(var_prefix, "gs") - ds = ds.rename({v: v_new}) + for var in this_ds: + v_new = var.replace(var_prefix, "gs") + this_ds = this_ds.rename({var: v_new}) # Set -1 prescribed GDD values to NaN. Only warn the first time. - if set_neg1_to_nan and var_prefix == "gdd" and v_new != v and np.any(ds[v_new].values < 0): - if np.any((ds[v_new].values < 0) & (ds[v_new].values != -1)): - raise RuntimeError(f"Unexpected negative value in {v}") + if ( + set_neg1_to_nan + and var_prefix == "gdd" + and v_new != var + and np.any(this_ds[v_new].values < 0) + ): + if np.any((this_ds[v_new].values < 0) & (this_ds[v_new].values != -1)): + raise RuntimeError(f"Unexpected negative value in {var}") if not did_warn: - print(f"Setting -1 rx GDD values to NaN") + print("Setting -1 rx GDD values to NaN") did_warn = True - ds[v_new] = ds[v_new].where(ds[v_new] != -1) + this_ds[v_new] = this_ds[v_new].where(this_ds[v_new] != -1) - return ds + return this_ds def import_output( filename, - myVars, - y1=None, - yN=None, - myVegtypes=utils.define_mgdcrop_list(), + my_vars, + year_1=None, + year_n=None, + my_vegtypes=utils.define_mgdcrop_list(), sdates_rx_ds=None, gdds_rx_ds=None, verbose=False, ): + """ + Import CLM output + """ # Import - this_ds = utils.import_ds(filename, myVars=myVars, myVegtypes=myVegtypes) + this_ds = utils.import_ds(filename, myVars=my_vars, myVegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) - if y1 and yN: - this_ds = check_and_trim_years(y1, yN, this_ds) + if year_1 and year_n: + this_ds = check_and_trim_years(year_1, year_n, this_ds) else: # Assume including all growing seasons except last complete one are "of interest" - y1 = this_ds.time.values[0].year - yN = this_ds.time.values[-1].year - 2 - this_ds = check_and_trim_years(y1, yN, this_ds) + year_1 = this_ds.time.values[0].year + year_n = this_ds.time.values[-1].year - 2 + this_ds = check_and_trim_years(year_1, year_n, this_ds) # What vegetation types are included? vegtype_list = [ @@ -954,20 +1064,24 @@ def import_output( all_nan = np.full(this_ds[date_vars[0]].shape, True) all_nonpos = np.full(this_ds[date_vars[0]].shape, True) all_pos = np.full(this_ds[date_vars[0]].shape, True) - for v in date_vars: - all_nan = all_nan & np.isnan(this_ds[v].values) + for var in date_vars: + all_nan = all_nan & np.isnan(this_ds[var].values) with np.errstate(invalid="ignore"): - all_nonpos = all_nonpos & (this_ds[v].values <= 0) - all_pos = all_pos & (this_ds[v].values > 0) + all_nonpos = all_nonpos & (this_ds[var].values <= 0) + all_pos = all_pos & (this_ds[var].values > 0) if np.any(np.bitwise_not(all_nan | all_nonpos | all_pos)): raise RuntimeError("Inconsistent missing/present values on mxharvests axis") - # When doing transient runs, it's somehow possible for crops in newly-active patches to be *already alive*. They even have a sowing date (idop)! This will of course not show up in SDATES, but it does show up in SDATES_PERHARV. - # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going to invalidate those "seasons." + # When doing transient runs, it's somehow possible for crops in newly-active patches to be + # *already alive*. They even have a sowing date (idop)! This will of course not show up in + # SDATES, but it does show up in SDATES_PERHARV. + # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + # to invalidate those "seasons." # # In all but the last calendar year, which patches had no sowing? no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) - # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? (Indicates sowing the previous calendar year.) + # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? + # (Indicates sowing the previous calendar year.) with np.errstate(invalid="ignore"): hsdate1_gt_hdate1_yp = ( this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] @@ -976,7 +1090,8 @@ def import_output( falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp if np.any(falsely_alive_yp): print( - f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the year before harvest, but no sowings occurred that year." + f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + + "year before harvest, but no sowings occurred that year." ) falsely_alive_yp = np.concatenate( (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 @@ -984,52 +1099,57 @@ def import_output( falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch"): + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch"): continue - this_ds[v] = this_ds[v].where(~falsely_alive_yhp) + this_ds[var] = this_ds[var].where(~falsely_alive_yhp) - def check_no_negative(this_ds_in, varList_no_negative, which_file, verbose=False): - tiny_negOK = 1e-12 + def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose=False): + tiny_neg_ok = 1e-12 this_ds = this_ds_in.copy() - for v in this_ds: - if not any(x in v for x in varList_no_negative): + for var in this_ds: + if not any(x in var for x in varlist_no_negative): continue - the_min = np.nanmin(this_ds[v].values) + the_min = np.nanmin(this_ds[var].values) if the_min < 0: - if np.abs(the_min) <= tiny_negOK: + if np.abs(the_min) <= tiny_neg_ok: if verbose: print( - f"Tiny negative value(s) in {v} (abs <= {tiny_negOK}) being set to 0 ({which_file})" + f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + + f" ({which_file})" ) else: print( - f"WARNING: Unexpected negative value(s) in {v}; minimum {the_min} ({which_file})" + f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + + f"({which_file})" ) - values = this_ds[v].copy().values + values = this_ds[var].copy().values with np.errstate(invalid="ignore"): - do_setto_0 = (values < 0) & (values >= -tiny_negOK) + do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) values[np.where(do_setto_0)] = 0 - this_ds[v] = xr.DataArray( - values, coords=this_ds[v].coords, dims=this_ds[v].dims, attrs=this_ds[v].attrs + this_ds[var] = xr.DataArray( + values, + coords=this_ds[var].coords, + dims=this_ds[var].dims, + attrs=this_ds[var].attrs, ) elif verbose: - print(f"No negative value(s) in {v}; min {the_min} ({which_file})") + print(f"No negative value(s) in {var}; min {the_min} ({which_file})") return this_ds - def check_no_zeros(this_ds, varList_no_zero, which_file): - for v in this_ds: - if not any(x in v for x in varList_no_zero): + def check_no_zeros(this_ds, varlist_no_zero, which_file): + for var in this_ds: + if not any(x in var for x in varlist_no_zero): continue - if np.any(this_ds[v].values == 0): - print(f"WARNING: Unexpected zero(s) in {v} ({which_file})") + if np.any(this_ds[var].values == 0): + print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") elif verbose: - print(f"No zero value(s) in {v} ({which_file})") + print(f"No zero value(s) in {var} ({which_file})") # Check for no zero values where there shouldn't be - varList_no_zero = ["DATE", "YEAR"] - check_no_zeros(this_ds, varList_no_zero, "original file") + varlist_no_zero = ["DATE", "YEAR"] + check_no_zeros(this_ds, varlist_no_zero, "original file") # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) @@ -1046,21 +1166,21 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): # Get HUI accumulation as fraction of required this_ds_gs["HUIFRAC"] = this_ds_gs["HUI"] / this_ds_gs["GDDHARV"] this_ds_gs["HUIFRAC_PERHARV"] = this_ds["HUI_PERHARV"] / this_ds["GDDHARV_PERHARV"] - for v in ["HUIFRAC", "HUIFRAC_PERHARV"]: - this_ds_gs[v].attrs["units"] = "Fraction of required" + for var in ["HUIFRAC", "HUIFRAC_PERHARV"]: + this_ds_gs[var].attrs["units"] = "Fraction of required" # Avoid tiny negative values - varList_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] - this_ds_gs = check_no_negative(this_ds_gs, varList_no_negative, "new file", verbose=verbose) + varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] + this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose=verbose) # Check for no zero values where there shouldn't be - varList_no_zero = ["REASON", "DATE"] - check_no_zeros(this_ds_gs, varList_no_zero, "new file") + varlist_no_zero = ["REASON", "DATE"] + check_no_zeros(this_ds_gs, varlist_no_zero, "new file") # Check that e.g., GDDACCUM <= HUI - for vars in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: - if all(v in this_ds_gs for v in vars): - check_v0_le_v1(this_ds_gs, vars, both_nan_ok=True, throw_error=True) + for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: + if all(v in this_ds_gs for v in var_list): + check_v0_le_v1(this_ds_gs, var_list, both_nan_ok=True, throw_error=True) # Check that prescribed calendars were obeyed if sdates_rx_ds: @@ -1071,9 +1191,8 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): gdds_rx_ds, this_ds, "this_ds", - "SDATES", "GDDHARV", - gdd_min=default_gdd_min(), + gdd_min=DEFAULT_GDD_MIN, ) # Convert time axis to integer year, saving original as 'cftime' @@ -1092,9 +1211,8 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): return this_ds_gs -# Print information about a patch (for debugging) -def print_onepatch_wrongNgs( - p, +def print_onepatch_wrong_n_gs( + patch_index, this_ds_orig, sdates_ymp, hdates_ymp, @@ -1109,21 +1227,21 @@ def print_onepatch_wrongNgs( sdates_pg2, hdates_pg2, ): - try: - import pandas as pd - except: - print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") + """ + Print information about a patch (for debugging) + """ print( - f"patch {p}: {this_ds_orig.patches1d_itype_veg_str.values[p]}, lon" - f" {this_ds_orig.patches1d_lon.values[p]} lat {this_ds_orig.patches1d_lat.values[p]}" + f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " + f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " + f"{this_ds_orig.patches1d_lat.values[patch_index]}" ) print("Original SDATES (per sowing):") - print(this_ds_orig.SDATES.values[:, :, p]) + print(this_ds_orig.SDATES.values[:, :, patch_index]) print("Original HDATES (per harvest):") - print(this_ds_orig.HDATES.values[:, :, p]) + print(this_ds_orig.HDATES.values[:, :, patch_index]) if "pandas" in sys.modules: @@ -1132,29 +1250,36 @@ def print_pandas_ymp(msg, cols, arrs_tuple): mxharvests = arrs_tuple[0].shape[1] arrs_list2 = [] cols2 = [] - for h in np.arange(mxharvests): - for i, a in enumerate(arrs_tuple): - arrs_list2.append(a[:, h]) - cols2.append(cols[i] + str(h)) + for harvest_index in np.arange(mxharvests): + for i, array in enumerate(arrs_tuple): + arrs_list2.append(array[:, harvest_index]) + cols2.append(cols[i] + str(harvest_index)) arrs_tuple2 = tuple(arrs_list2) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols2 - print(df) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols2 + print(dataframe) print_pandas_ymp( "Original", ["sdate", "hdate"], - (this_ds_orig.SDATES_PERHARV.values[:, :, p], this_ds_orig.HDATES.values[:, :, p]), + ( + this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], + this_ds_orig.HDATES.values[:, :, patch_index], + ), ) - print_pandas_ymp("Masked", ["sdate", "hdate"], (sdates_ymp[:, :, p], hdates_ymp[:, :, p])) + print_pandas_ymp( + "Masked", + ["sdate", "hdate"], + (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), + ) print_pandas_ymp( 'After "Ignore harvests from before this output began"', ["sdate", "hdate"], ( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], ), ) @@ -1162,8 +1287,8 @@ def print_pandas_ymp(msg, cols, arrs_tuple): 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ["sdate", "hdate"], ( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], ), ) @@ -1175,23 +1300,25 @@ def print_pandas_ymp(msg, cols, arrs_tuple): ), ["sdate", "hdate"], ( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ), ) def print_pandas_pg(msg, cols, arrs_tuple): print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") arrs_list = list(arrs_tuple) - for i, a in enumerate(arrs_tuple): - arrs_list[i] = np.reshape(a, (-1)) + for i, array in enumerate(arrs_tuple): + arrs_list[i] = np.reshape(array, (-1)) arrs_tuple2 = tuple(arrs_list) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols - print(df) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols + print(dataframe) print_pandas_pg( - "Same, but converted to gs axis", ["sdate", "hdate"], (sdates_pg[p, :], hdates_pg[p, :]) + "Same, but converted to gs axis", + ["sdate", "hdate"], + (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), ) print_pandas_pg( @@ -1200,35 +1327,36 @@ def print_pandas_pg(msg, cols, arrs_tuple): ' will have incomplete growing seasons for the final year"' ), ["sdate", "hdate"], - (sdates_pg2[p, :], hdates_pg2[p, :]), + (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), ) else: + print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") - def print_nopandas(a1, a2, msg): + def print_nopandas(array_1, array_2, msg): print(msg) - if a1.ndim == 1: + if array_1.ndim == 1: # I don't know why these aren't side-by-side! - print(np.stack((a1, a2), axis=1)) + print(np.stack((array_1, array_2), axis=1)) else: - print(np.concatenate((a1, a2), axis=1)) + print(np.concatenate((array_1, array_2), axis=1)) - print_nopandas(sdates_ymp[:, :, p], hdates_ymp[:, :, p], "Masked:") + print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") print_nopandas( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], 'After "Ignore harvests from before this output began"', ) print_nopandas( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ) print_nopandas( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ( 'After "In years with sowing that are followed by inactive years, check whether the' " last sowing was harvested before the patch was deactivated. If not, pretend the" @@ -1236,11 +1364,13 @@ def print_nopandas(a1, a2, msg): ), ) - print_nopandas(sdates_pg[p, :], hdates_pg[p, :], "Same, but converted to gs axis") + print_nopandas( + sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + ) print_nopandas( - sdates_pg2[p, :], - hdates_pg2[p, :], + sdates_pg2[patch_index, :], + hdates_pg2[patch_index, :], ( 'After "Ignore any harvests that were planted in the final year, because some cells' ' will have incomplete growing seasons for the final year"' @@ -1250,14 +1380,18 @@ def print_nopandas(a1, a2, msg): print("\n\n") -# Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. -# Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or mxharvests. def set_up_ds_with_gs_axis(ds_in): + """ + Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. + + Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or + mxharvests. + """ # Get the data variables to include in the new dataset - data_vars = dict() - for v in ds_in.data_vars: - if not any([x in ["mxsowings", "mxharvests"] for x in ds_in[v].dims]): - data_vars[v] = ds_in[v] + data_vars = {} + for var in ds_in.data_vars: + if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): + data_vars[var] = ds_in[var] # Set up the new dataset gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] coords = ds_in.coords From 4380ff7f4f49bb465b188471350342d2a87ccaf7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:59:11 -0700 Subject: [PATCH 09/40] Satisfy pylint for check_rxboth_run.py. --- .../ctsm/crop_calendars/check_rxboth_run.py | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 30c280120d..a41ef8d8f7 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -1,12 +1,18 @@ -# %% Setup - +""" +Check the results of a run with prescribed sowing dates and maturity requirements +""" +import sys +import argparse +import glob +import os import numpy as np -import sys, argparse -import cropcal_module as cc -import glob, os +import cropcal_module as cc # pylint: disable=import-error def main(argv): + """ + Main method: Check the results of a run with prescribed sowing dates and maturity requirements + """ # Set arguments parser = argparse.ArgumentParser(description="ADD DESCRIPTION HERE") parser.add_argument( @@ -40,7 +46,7 @@ def main(argv): args = parser.parse_args(argv) # Note that _PERHARV will be stripped off upon import - myVars = [ + my_vars = [ "GRAINC_TO_FOOD_PERHARV", "GRAINC_TO_FOOD_ANN", "SDATES", @@ -67,7 +73,7 @@ def main(argv): case["ds"] = cc.import_output( annual_outfiles, - my_vars=myVars, + my_vars=my_vars, year_1=args.first_usable_year, year_N=args.last_usable_year, ) @@ -84,20 +90,27 @@ def main(argv): # Equalize lons/lats lonlat_tol = 1e-4 - for v in ["rx_sdates_ds", "rx_gdds_ds"]: - if v in case: - for l in ["lon", "lat"]: - max_diff_orig = np.max(np.abs(case[v][l].values - case["ds"][l].values)) + for ds_name in ["rx_sdates_ds", "rx_gdds_ds"]: + if ds_name in case: + for coord_name in ["lon", "lat"]: + max_diff_orig = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) + ) if max_diff_orig > lonlat_tol: raise RuntimeError( - f"{v} {l} values differ too much ({max_diff_orig} > {lonlat_tol})" + f"{ds_name} {coord_name} values differ too much ({max_diff_orig} > " + + f"{lonlat_tol})" + ) + if max_diff_orig > 0: + case[ds_name] = case[ds_name].assign_coords( + {coord_name: case["ds"][coord_name].values} + ) + max_diff = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) ) - elif max_diff_orig > 0: - case[v] = case[v].assign_coords({l: case["ds"][l].values}) - max_diff = np.max(np.abs(case[v][l].values - case["ds"][l].values)) - print(f"{v} {l} max_diff {max_diff_orig} → {max_diff}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig} → {max_diff}") else: - print(f"{v} {l} max_diff {max_diff_orig}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig}") # Check if case["rx_sdates_file"]: From 8397f3c0e41e91ef6bbdb6b4064d6921b332dbad Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 08:14:05 -0700 Subject: [PATCH 10/40] Fix call of import_output() in check_rxboth_run.py. --- python/ctsm/crop_calendars/check_rxboth_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index a41ef8d8f7..c2cf37aa12 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -75,7 +75,7 @@ def main(argv): annual_outfiles, my_vars=my_vars, year_1=args.first_usable_year, - year_N=args.last_usable_year, + year_n=args.last_usable_year, ) cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) From 4be7930708cee0311eedae9722deb6b8f61cdb24 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:46:34 -0700 Subject: [PATCH 11/40] Satisfy pylint for test_sys_regrid_ggcmi_shdates.py. --- python/ctsm/test/test_sys_regrid_ggcmi_shdates.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py index 7521ef09a5..6c2e230481 100755 --- a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py +++ b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py @@ -5,7 +5,6 @@ """ import os -import re import unittest import tempfile @@ -18,8 +17,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root from ctsm import unit_testing from ctsm.crop_calendars.regrid_ggcmi_shdates import regrid_ggcmi_shdates @@ -78,6 +76,9 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_regrid_ggcmi_shdates(self): + """ + Tests regrid_ggcmi_shdates + """ # Call script sys.argv = self._function_call_list From 2a533efd0be208034620df310abe08ad4741cc26 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:47:55 -0700 Subject: [PATCH 12/40] Satisfy pylint for test_unit_modify_singlept_site_neon.py. --- python/ctsm/test/test_unit_modify_singlept_site_neon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/test/test_unit_modify_singlept_site_neon.py b/python/ctsm/test/test_unit_modify_singlept_site_neon.py index ecd96357b3..3a9d7d424c 100755 --- a/python/ctsm/test/test_unit_modify_singlept_site_neon.py +++ b/python/ctsm/test/test_unit_modify_singlept_site_neon.py @@ -17,7 +17,7 @@ # -- add python/ctsm to path (needed if we want to run the test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root # pylint: disable=wrong-import-position From 3cbe7197d1efced49c6f3360aaaad15d44d2e0ca Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:50:57 -0700 Subject: [PATCH 13/40] Satisfy pylint for test_unit_run_sys_tests.py. --- python/ctsm/test/test_unit_run_sys_tests.py | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/python/ctsm/test/test_unit_run_sys_tests.py b/python/ctsm/test/test_unit_run_sys_tests.py index 65ec1df5a5..98a9d54674 100755 --- a/python/ctsm/test/test_unit_run_sys_tests.py +++ b/python/ctsm/test/test_unit_run_sys_tests.py @@ -271,7 +271,7 @@ def test_withDryRun_nothingDone(self): def test_getTestmodList_suite(self): """Ensure that _get_testmod_list() works correctly with suite-style input""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -283,12 +283,12 @@ def test_getTestmodList_suite(self): "clm-crop", "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=False) + output = _get_testmod_list(testmod_list_input, unique=False) self.assertEqual(output, target) def test_getTestmodList_suite_unique(self): """Ensure that _get_testmod_list() works correctly with unique=True""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -300,24 +300,29 @@ def test_getTestmodList_suite_unique(self): "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=True) + output = _get_testmod_list(testmod_list_input, unique=True) self.assertEqual(output, target) def test_getTestmodList_testname(self): """Ensure that _get_testmod_list() works correctly with full test name(s) specified""" - input = [ + testmod_list_input = [ "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-crop", "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default", ] target = ["clm-crop", "clm-default"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) def test_getTestmodList_twomods(self): - """Ensure that _get_testmod_list() works correctly with full test name(s) specified and two mods in one test""" - input = ["ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop"] + """ + Ensure that _get_testmod_list() works correctly with full test name(s) specified and two + mods in one test + """ + testmod_list_input = [ + "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop" + ] target = ["clm-default", "clm-crop"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) From dbbe8c5ac2967c5e94378549837813dde59e9fd7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:58:38 -0700 Subject: [PATCH 14/40] Satisfy pylint for test_unit_utils_import_coord.py. --- .../ctsm/test/test_unit_utils_import_coord.py | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/python/ctsm/test/test_unit_utils_import_coord.py b/python/ctsm/test/test_unit_utils_import_coord.py index b7ec8f90ec..c5607356fd 100755 --- a/python/ctsm/test/test_unit_utils_import_coord.py +++ b/python/ctsm/test/test_unit_utils_import_coord.py @@ -16,7 +16,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm import unit_testing from ctsm.path_utils import path_to_ctsm_root from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d @@ -33,7 +33,9 @@ # Allow all the instance attributes that we need # pylint: disable=too-many-instance-attributes class TestUtilsImportCoord(unittest.TestCase): - # Tests the importcoord* subroutines from utils.py + """ + Tests the importcoord* subroutines from utils.py + """ def setUp(self): """Setup for trying out the methods""" @@ -56,6 +58,9 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_importcoord1d(self): + """ + Tests importing a 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) lat, Nlat = import_coord_1d(ds, "lat") np.testing.assert_equal(Nlat, 360) @@ -63,6 +68,9 @@ def test_importcoord1d(self): np.testing.assert_array_equal(lat.values[-4:], [-88.25, -88.75, -89.25, -89.75]) def test_importcoord1d_attrs(self): + """ + Tests attributes of an imported 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) lat, _ = import_coord_1d(ds, "lat") # Unlike import_coord_2d, import_coord_1d doesn't rename the long name. @@ -73,20 +81,29 @@ def test_importcoord1d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord1d_too_many_dims(self): + """ + Tests that 1d-importing function errors when given a 2d variable to import + """ ds = xr.open_dataset(self._2d_lonlat_file) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension for LATIXY; found 2: \('lsmlat', 'lsmlon'\)", + msg="Expected 1 dimension for LATIXY; found 2: ('lsmlat', 'lsmlon')", ): import_coord_1d(ds, "LATIXY") def test_importcoord2d(self): + """ + Tests importing a 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_values = np.array([-13.9, -11.7, -9.5, -7.3, -5.1]).astype(np.float32) np.testing.assert_array_equal(lat.values, expected_values) def test_importcoord2d_attrs(self): + """ + Tests attributes of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_attributes = { @@ -96,25 +113,34 @@ def test_importcoord2d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord2d_rename_dim(self): + """ + Tests renaming of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") self.assertTupleEqual(lat.dims, ("lat",)) def test_importcoord2d_no_dim_contains_coordName(self): + """ + Tests that 2d-importing function errors when given a nonexistent dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlat": "abc"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "ERROR: Expected 1 dimension name containing lat; found 0: \[\]", + msg="ERROR: Expected 1 dimension name containing lat; found 0: []", ): import_coord_2d(ds, "lat", "LATIXY") def test_importcoord2d_1_dim_containing(self): + """ + Tests that 2d-importing function errors when given an ambiguous dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlon": "lsmlat2"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension name containing lat; found 2: \['lsmlat', 'lsmlat2'\]", + msg="Expected 1 dimension name containing lat; found 2: ['lsmlat', 'lsmlat2']", ): import_coord_2d(ds, "lat", "LATIXY") From f87abd05ad40b1793a40e0da4f4648249a9bdb34 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 1 Feb 2024 09:10:44 -0700 Subject: [PATCH 15/40] Satisfy pylint for modify_singlept_site_neon.py. Includes adding a timeout of 60 seconds for requests.get(). --- .../modify_singlept_site_neon.py | 232 ++++++++++-------- 1 file changed, 133 insertions(+), 99 deletions(-) diff --git a/python/ctsm/site_and_regional/modify_singlept_site_neon.py b/python/ctsm/site_and_regional/modify_singlept_site_neon.py index ae1318e2f8..e69a8ab834 100755 --- a/python/ctsm/site_and_regional/modify_singlept_site_neon.py +++ b/python/ctsm/site_and_regional/modify_singlept_site_neon.py @@ -54,6 +54,9 @@ myname = getuser() +# Seconds to wait before requests.get() times out +TIMEOUT = 60 + # -- valid neon sites valid_neon_sites = glob.glob( @@ -176,7 +179,7 @@ def get_neon(neon_dir, site_name): + site_name + "_surfaceData.csv" ) - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(neon_file, "wb") as a_file: a_file.write(response.content) @@ -430,7 +433,7 @@ def download_file(url, fname): file name to save the downloaded file. """ try: - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(fname, "wb") as a_file: a_file.write(response.content) @@ -443,7 +446,7 @@ def download_file(url, fname): except Exception as err: print("The server could not fulfill the request.") print("Something went wrong in downloading", fname) - print("Error code:", err.code) + raise err def fill_interpolate(f_2, var, method): @@ -472,6 +475,129 @@ def fill_interpolate(f_2, var, method): print("=====================================") +def print_neon_data_soil_structure(obs_bot, soil_bot, bin_index): + """ + Print info about NEON data soil structure + """ + print("================================") + print(" Neon data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for i, this_obs_bot in enumerate(obs_bot): + print("layer", i) + print("-------------", "{0:.2f}".format(this_obs_bot), "-------------") + + print("================================") + print("Surface data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for this_bin in range(len(bin_index)): + print("layer", this_bin) + print("-------------", "{0:.2f}".format(soil_bot[this_bin]), "-------------") + + +def print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 +): + """ + Prints information about soil quality + """ + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print("inorganic:") + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print(inorganic) + print("~~~~~~~~~~~~~~~~~~~~~~~~") + + print("bin_index : ", bin_index[soil_lev]) + print("layer_depth : ", layer_depth) + print("carbon_tot : ", carbon_tot) + print("estimated_oc : ", estimated_oc) + print("bulk_den : ", bulk_den) + print("organic :", f_2["ORGANIC"][soil_lev].values) + print("--------------------------") + + +def update_agri_site_info(site_name, f_2): + """ + Updates agricultural sites + """ + ag_sites = ["KONA", "STER"] + if site_name not in ag_sites: + return f_2 + + print("Updating PCT_NATVEG") + print("Original : ", f_2.PCT_NATVEG.values) + f_2.PCT_NATVEG.values = [[0.0]] + print("Updated : ", f_2.PCT_NATVEG.values) + + print("Updating PCT_CROP") + print("Original : ", f_2.PCT_CROP.values) + f_2.PCT_CROP.values = [[100.0]] + print("Updated : ", f_2.PCT_CROP.values) + + print("Updating PCT_NAT_PFT") + print(f_2.PCT_NAT_PFT.values[0]) + print(f_2.PCT_NAT_PFT[0].values) + + return f_2 + + +def update_fields_with_neon(f_1, d_f, bin_index): + """ + update fields with neon + """ + f_2 = f_1 + soil_levels = f_2["PCT_CLAY"].size + for soil_lev in range(soil_levels): + print("--------------------------") + print("soil_lev:", soil_lev) + print(d_f["clayTotal"][bin_index[soil_lev]]) + f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] + f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] + + bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] + carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] + estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] + + # -- estimated_oc in neon data is rounded to the nearest integer. + # -- Check to make sure the rounded oc is not higher than carbon_tot. + # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. + + estimated_oc = min(estimated_oc, carbon_tot) + + layer_depth = ( + d_f["biogeoBottomDepth"][bin_index[soil_lev]] + - d_f["biogeoTopDepth"][bin_index[soil_lev]] + ) + + # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 + + # -- after adding caco3 by NEON: + # -- if caco3 exists: + # -- inorganic = caco3/100.0869*12.0107 + # -- organic = carbon_tot - inorganic + # -- else: + # -- organic = estimated_oc * bulk_den /0.58 + + caco3 = d_f["caco3Conc"][bin_index[soil_lev]] + inorganic = caco3 / 100.0869 * 12.0107 + print("inorganic:", inorganic) + + if not np.isnan(inorganic): + actual_oc = carbon_tot - inorganic + else: + actual_oc = estimated_oc + + f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 + + print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 + ) + return f_2 + + def main(): """modify_singlept_site_neon main function""" args = get_parser().parse_args() @@ -532,88 +658,10 @@ def main(): bins = d_f["biogeoTopDepth"] / 100 bin_index = np.digitize(soil_mid, bins) - 1 - """ - print ("================================") - print (" Neon data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for i in range(len(obs_bot)): - print ("layer",i) - print ("-------------", - "{0:.2f}".format(obs_bot[i]), - "-------------") - - print ("================================") - print ("Surface data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for b in range(len(bin_index)): - print ("layer",b) - print ("-------------", - "{0:.2f}".format(soil_bot[b]), - "-------------") - """ + print_neon_data_soil_structure(obs_bot, soil_bot, bin_index) # -- update fields with neon - f_2 = f_1 - soil_levels = f_2["PCT_CLAY"].size - for soil_lev in range(soil_levels): - print("--------------------------") - print("soil_lev:", soil_lev) - print(d_f["clayTotal"][bin_index[soil_lev]]) - f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] - f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] - - bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] - carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] - estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] - - # -- estimated_oc in neon data is rounded to the nearest integer. - # -- Check to make sure the rounded oc is not higher than carbon_tot. - # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. - - estimated_oc = min(estimated_oc, carbon_tot) - - layer_depth = ( - d_f["biogeoBottomDepth"][bin_index[soil_lev]] - - d_f["biogeoTopDepth"][bin_index[soil_lev]] - ) - - # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 - - # -- after adding caco3 by NEON: - # -- if caco3 exists: - # -- inorganic = caco3/100.0869*12.0107 - # -- organic = carbon_tot - inorganic - # -- else: - # -- organic = estimated_oc * bulk_den /0.58 - - caco3 = d_f["caco3Conc"][bin_index[soil_lev]] - inorganic = caco3 / 100.0869 * 12.0107 - print("inorganic:", inorganic) - - if not np.isnan(inorganic): - actual_oc = carbon_tot - inorganic - else: - actual_oc = estimated_oc - - f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 - - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print("inorganic:") - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print(inorganic) - print("~~~~~~~~~~~~~~~~~~~~~~~~") - - print("bin_index : ", bin_index[soil_lev]) - print("layer_depth : ", layer_depth) - print("carbon_tot : ", carbon_tot) - print("estimated_oc : ", estimated_oc) - print("bulk_den : ", bulk_den) - print("organic :", f_2["ORGANIC"][soil_lev].values) - print("--------------------------") + f_2 = update_fields_with_neon(f_1, d_f, bin_index) # -- Interpolate missing values method = "linear" @@ -633,22 +681,8 @@ def main(): sort_print_soil_layers(obs_bot, soil_bot) - # -- updates for ag sites : KONA and STER - ag_sites = ["KONA", "STER"] - if site_name in ag_sites: - print("Updating PCT_NATVEG") - print("Original : ", f_2.PCT_NATVEG.values) - f_2.PCT_NATVEG.values = [[0.0]] - print("Updated : ", f_2.PCT_NATVEG.values) - - print("Updating PCT_CROP") - print("Original : ", f_2.PCT_CROP.values) - f_2.PCT_CROP.values = [[100.0]] - print("Updated : ", f_2.PCT_CROP.values) - - print("Updating PCT_NAT_PFT") - print(f_2.PCT_NAT_PFT.values[0]) - print(f_2.PCT_NAT_PFT[0].values) + # -- updates for ag sites + update_agri_site_info(site_name, f_2) out_dir = args.out_dir From e4aa2bdac842b0aa105308cbb8068dd55bbf1ede Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 11:47:58 -0700 Subject: [PATCH 16/40] python/Makefile: Do not fail even if pylint isn't clean. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 271e977046..440e2e0de8 100644 --- a/python/Makefile +++ b/python/Makefile @@ -19,7 +19,7 @@ ifneq ($(verbose), not-set) endif PYLINT=pylint -PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc +PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc --fail-under=0 PYLINT_SRC = \ ctsm # NOTE: These don't pass pylint checking and should be added when we put into effort to get them to pass From 8e3b3ba02ad75ab50ba751267627d9136c96464e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 11:48:31 -0700 Subject: [PATCH 17/40] python/Makefile: Call black before pylint in 'make all'. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 440e2e0de8..b43e1c5e53 100644 --- a/python/Makefile +++ b/python/Makefile @@ -27,7 +27,7 @@ PYLINT_SRC = \ # ../cime_config/buildlib \ # ../cime_config/buildnml -all: test lint black +all: test black lint @echo @echo @echo "Successfully ran all standard tests" From 18e03eac9ebf2da8700dbaa111bc79bcebf2d925 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 13:22:01 -0700 Subject: [PATCH 18/40] Satisfy pylint for ctsm_pylib_dependent_utils.py. --- python/ctsm/ctsm_pylib_dependent_utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/ctsm/ctsm_pylib_dependent_utils.py b/python/ctsm/ctsm_pylib_dependent_utils.py index 4f149c53a9..59ca15155b 100644 --- a/python/ctsm/ctsm_pylib_dependent_utils.py +++ b/python/ctsm/ctsm_pylib_dependent_utils.py @@ -1,3 +1,7 @@ +""" +Utilities that are dependent on non-standard modules (i.e., require ctsm_pylib). +""" + import numpy as np from ctsm.utils import abort @@ -14,8 +18,10 @@ def import_coord_1d(data_set, coord_name): """ data_array = data_set[coord_name] if len(data_array.dims) != 1: - abort(f"Expected 1 dimension for {coord_name}; " - + f"found {len(data_array.dims)}: {data_array.dims}") + abort( + f"Expected 1 dimension for {coord_name}; " + + f"found {len(data_array.dims)}: {data_array.dims}" + ) return data_array, len(data_array) @@ -37,8 +43,10 @@ def import_coord_2d(data_set, coord_name, var_name): data_array = data_set[var_name] this_dim = [x for x in data_array.dims if coord_name in x] if len(this_dim) != 1: - abort(f"Expected 1 dimension name containing {coord_name}; " - + f"found {len(this_dim)}: {this_dim}") + abort( + f"Expected 1 dimension name containing {coord_name}; " + + f"found {len(this_dim)}: {this_dim}" + ) this_dim = this_dim[0] other_dim = [x for x in data_array.dims if coord_name not in x] if len(other_dim) != 1: From c55dac6b89cd40091426d6205e83ad7e0d59099a Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 15:56:31 -0700 Subject: [PATCH 19/40] Resolve pylint warnings about cropcal_module.py. Moves some functions from there into 3 new modules: * ctsm/crop_calendars/check_constant_vars.py * ctsm/crop_calendars/check_rx_obeyed.py * ctsm/crop_calendars/convert_axis_time2gs.py --- .../crop_calendars/check_constant_vars.py | 386 ++++++ python/ctsm/crop_calendars/check_rx_obeyed.py | 227 ++++ .../ctsm/crop_calendars/check_rxboth_run.py | 20 +- .../crop_calendars/convert_axis_time2gs.py | 631 ++++++++++ python/ctsm/crop_calendars/cropcal_module.py | 1069 ++--------------- 5 files changed, 1351 insertions(+), 982 deletions(-) create mode 100644 python/ctsm/crop_calendars/check_constant_vars.py create mode 100644 python/ctsm/crop_calendars/check_rx_obeyed.py create mode 100644 python/ctsm/crop_calendars/convert_axis_time2gs.py diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py new file mode 100644 index 0000000000..92e1819803 --- /dev/null +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -0,0 +1,386 @@ +""" +For variables that should stay constant, make sure they are +""" + +import sys +import os +import numpy as np + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position + import_rx_dates, +) + + +def check_one_constant_var_setup(this_ds, case, var): + """ + Various setup steps for check_one_constant_var() + """ + if "gs" in this_ds[var].dims: + time_coord = "gs" + elif "time" in this_ds[var].dims: + time_coord = "time" + else: + raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") + i_time_coord = this_ds[var].dims.index(time_coord) + + this_da = this_ds[var] + ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) + incl_patches = [] + bad_patches = np.array([]) + str_list = [] + + # Read prescription file, if needed + rx_ds = None + if isinstance(case, dict): + if var == "GDDHARV" and "rx_gdds_file" in case: + rx_ds = import_rx_dates( + "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False + ).squeeze() + + return time_coord, this_da, ra_sp, incl_patches, str_list, rx_ds, bad_patches + + +def loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, +): + """ + Loop through and check any patches that were "bad" according to check_constant_vars(). + + This is pretty inefficient, but it works. + """ + patch = None # In case bad_patches_this_time is empty + for i, patch in enumerate(bad_patches_this_time): + this_patch = vary_patches[i] + this_lon = vary_lons[i] + this_lat = vary_lats[i] + this_crop = vary_crops[i] + this_crop_int = vary_crops_int[i] + + # If prescribed input had missing value (-1), it's fine for it to vary. + if rx_ds: + rx_var = f"gs1_{this_crop_int}" + if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: + rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values + n_unique = len(np.unique(rx_vals)) + if n_unique == 1: + found_in_rx[i] = True + if rx_vals == -1: + continue + elif n_unique > 1: + raise RuntimeError( + f"How does lon {this_lon} lat {this_lat} {this_crop} have " + + f"time-varying {var}?" + ) + else: + raise RuntimeError(f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?") + + # Print info (or save to print later) + any_bad = True + if verbose: + this_str = ( + f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " + + f"{this_crop} ({this_crop_int})" + ) + if rx_ds and not found_in_rx[i]: + this_str = this_str.replace("(lon", "* (lon") + if not np.isnan(t1_vals[patch]): + t1_val_print = int(t1_vals[patch]) + else: + t1_val_print = "NaN" + if not np.isnan(t_vals[patch]): + t_val_print = int(t_vals[patch]) + else: + t_val_print = "NaN" + if var == "SDATES": + str_list.append( + f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " + + f"jday {t_val_print}" + ) + else: + str_list.append( + f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " + f"{t_val_print}" + ) + else: + if everything_ok: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + everything_ok = False + print(f"{var} timestep {timestep} does not match timestep {time_1}") + break + return any_bad, patch + + +def ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches): + """ + In check_one_constant_var(), make sure every patch was checked once (or is all-NaN except + possibly final season) + """ + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked more than once!") + incl_patches = list(incl_patches) + incl_patches += list( + np.where( + np.all( + np.isnan( + ra_sp[ + :-1, + ] + ), + axis=0, + ) + )[0] + ) + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked but also all-NaN??") + if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): + for patch in np.arange(this_ds.dims["patch"]): + if patch not in incl_patches: + raise RuntimeError( + f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" + ) + + +def check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, +): + """ + In check_one_constant_var(), loop through timesteps + """ + for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + t_yr = this_ds[time_coord].values[timestep] + t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) + ok_p = t1_vals == t_vals + + # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where + # land use varies over time. + if ignore_nan: + ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) + + if not np.all(ok_p): + any_bad_before_checking_rx = True + bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) + bad_patches = np.concatenate( + (bad_patches, np.array(these_patches)[bad_patches_this_time]) + ) + if rx_ds: + found_in_rx = np.array([False for x in bad_patches]) + vary_patches = list(np.array(these_patches)[bad_patches_this_time]) + vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] + vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] + vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] + vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] + + any_bad_any_crop = False + for crop_int in np.unique(vary_crops_int): + rx_var = f"gs1_{crop_int}" + vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] + vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] + these_rx_vals = np.diag( + rx_ds[rx_var].sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop).values + ) + if len(these_rx_vals) != len(vary_lats_this_crop): + raise RuntimeError( + f"Expected {len(vary_lats_this_crop)} rx values; got " + + f"{len(these_rx_vals)}" + ) + if not np.any(these_rx_vals != -1): + continue + any_bad_any_crop = True + break + if not any_bad_any_crop: + continue + + # Loop through and check any patches that were "bad" + any_bad = loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, + ) + + return any_bad_before_checking_rx, bad_patches, found_in_rx, any_bad + + +def check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx +): + """ + Ensure that a variable that should be constant actually is + """ + everything_ok = True + + ( + time_coord, + this_da, + ra_sp, + incl_patches, + str_list, + rx_ds, + bad_patches, + ) = check_one_constant_var_setup(this_ds, case, var) + + for time_1 in np.arange(this_ds.dims[time_coord] - 1): + condn = ~np.isnan(ra_sp[time_1, ...]) + if time_1 > 0: + condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) + these_patches = np.where(condn)[0] + if these_patches.size == 0: + continue + these_patches = list(np.where(condn)[0]) + incl_patches += these_patches + + t1_yr = this_ds[time_coord].values[time_1] + t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) + + ( + any_bad_before_checking_rx, + bad_patches, + found_in_rx, + any_bad, + ) = check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, + ) + + if verbose and any_bad: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + str_list.sort() + if rx_ds and np.any(~found_in_rx): + str_list = [ + "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" + ] + str_list + elif not rx_ds: + str_list = ["(No rx file checked)"] + str_list + print("\n".join(str_list)) + + # Make sure every patch was checked once (or is all-NaN except possibly final season) + ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches) + + if not any_bad: + if any_bad_before_checking_rx: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output (except for patch(es) with missing rx)." + ) + else: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output." + ) + + return any_bad, any_bad_before_checking_rx, bad_patches + + +def check_constant_vars( + this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True +): + """ + For variables that should stay constant, make sure they are + """ + if isinstance(case, str): + const_vars = [case] + elif isinstance(case, list): + const_vars = case + elif isinstance(case, dict): + const_vars = case["const_vars"] + else: + raise TypeError(f"case must be str or dict, not {type(case)}") + + if not const_vars: + return None + + if const_growing_seasons: + gs_0 = this_ds.gs.values[0] + gs_n = this_ds.gs.values[-1] + if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: + print( + f"❗ Only checking const_vars over {const_growing_seasons.start}-" + + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" + ) + this_ds = this_ds.sel(gs=const_growing_seasons) + + any_bad = False + any_bad_before_checking_rx = False + if throw_error: + emojus = "❌" + else: + emojus = "❗" + if not isinstance(const_vars, list): + const_vars = [const_vars] + + for var in const_vars: + any_bad, any_bad_before_checking_rx, bad_patches = check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + ) + + if any_bad and throw_error: + raise RuntimeError("Stopping due to failed check_constant_vars().") + + bad_patches = np.unique(bad_patches) + return [int(p) for p in bad_patches] diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py new file mode 100644 index 0000000000..c1ad5cfecc --- /dev/null +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -0,0 +1,227 @@ +""" +Check that prescribed crop calendars were obeyed +""" + +import sys +import os +import numpy as np + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position +from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position + DEFAULT_GDD_MIN, +) + + +def get_pct_harv_at_mature(harvest_reason_da): + """ + Get percentage of harvests that happened at maturity + """ + n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) + with np.errstate(invalid="ignore"): + harv_reason_gt_0 = harvest_reason_da.values > 0 + n_harv = len(np.where(harv_reason_gt_0)[0]) + if n_harv == 0: + return np.nan + pct_harv_at_mature = n_harv_at_mature / n_harv * 100 + pct_harv_at_mature = np.format_float_positional( + pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" + ) # Round to 2 significant digits + return pct_harv_at_mature + + +def check_rx_obeyed_handle_gdharv(output_var, gdd_min, ds_thisveg, rx_array): + """ + In check_rx_obeyed(), account for the GDD harvest threshold minimum set in PlantCrop() + """ + if gdd_min is None: + gdd_min = DEFAULT_GDD_MIN + print( + f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " + + f"default {gdd_min}" + ) + with np.errstate(invalid="ignore"): + rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min + + # ...harvest reason + # 0: Should never happen in any simulation + # 1: Harvesting at maturity + # 2: Harvesting at max season length (mxmat) + # 3: Crop was incorrectly planted in last time step of Dec. 31 + # 4: Today was supposed to be the planting day, but the previous crop still hasn't been + # harvested. + # 5: Harvest the day before the next sowing date this year. + # 6: Same as #5. + # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date + # is Jan. 1) + harvest_reason_da = ds_thisveg["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + return gdd_min, unique_harvest_reasons, pct_harv_at_mature + + +def check_rx_obeyed_setup(dates_ds, which_ds, output_var, verbose): + """ + Various setup steps for check_rx_obeyed() + """ + all_ok = 2 + diff_str_list = [] + gdd_tolerance = 1 + + if "GDDHARV" in output_var and verbose: + harvest_reason_da = dates_ds["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + print( + f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " + + "maturity)" + ) + + return all_ok, diff_str_list, gdd_tolerance + + +def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): + """ + Get information about extreme gridcells (for debugging) + """ + if mxn == np.min: # pylint: disable=comparison-with-callable + diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) + themxn = mxn(diff_array) + + # Find the first patch-gs that has the mxn value + matching_indices = np.where(diff_array == themxn) + first_indices = [x[0] for x in matching_indices] + + # Get the lon, lat, and growing season of that patch-gs + patch_index = first_indices[dims.index("patch")] + this_lon = patches1d_lon.values[patch_index] + this_lat = patches1d_lat.values[patch_index] + season_index = first_indices[dims.index("gs")] + this_gs = gs_da.values[season_index] + + # Get the prescribed value for this patch-gs + this_rx = rx_array[patch_index][0] + + return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) + + +def check_rx_obeyed( + vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False +): + """ + Check that prescribed crop calendars were obeyed + """ + all_ok, diff_str_list, gdd_tolerance = check_rx_obeyed_setup( + dates_ds, which_ds, output_var, verbose + ) + + for vegtype_str in vegtype_list: + thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] + if thisveg_patches.size == 0: + continue + ds_thisveg = dates_ds.isel(patch=thisveg_patches) + + vegtype_int = utils.vegtype_str2int(vegtype_str)[0] + rx_da = rx_ds[f"gs1_{vegtype_int}"] + rx_array = rx_da.values[ + ds_thisveg.patches1d_jxy.values.astype(int) - 1, + ds_thisveg.patches1d_ixy.values.astype(int) - 1, + ] + rx_array = np.expand_dims(rx_array, axis=1) + sim_array = ds_thisveg[output_var].values + sim_array_dims = ds_thisveg[output_var].dims + + # Ignore patches without prescribed value + with np.errstate(invalid="ignore"): + rx_array[np.where(rx_array < 0)] = np.nan + + # Account for... + if "GDDHARV" in output_var: + # ...GDD harvest threshold minimum set in PlantCrop() + gdd_min, unique_harvest_reasons, pct_harv_at_mature = check_rx_obeyed_handle_gdharv( + output_var, gdd_min, ds_thisveg, rx_array + ) + + if np.any(sim_array != rx_array): + diff_array = sim_array - rx_array + + # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for + # the next day + if output_var == "GDDHARV_PERHARV": + diff_array = np.ma.masked_array( + diff_array, + mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), + ) + elif output_var == "GDDHARV": + with np.errstate(invalid="ignore"): + diff_lt_0 = diff_array < 0 + harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 + diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) + + with np.errstate(invalid="ignore"): + abs_gt_0 = abs(diff_array) > 0 + if np.any(np.abs(diff_array[abs_gt_0]) > 0): + min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmin, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmax, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + + diffs_eg_txt = ( + f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " + + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " + + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" + ) + if "GDDHARV" in output_var: + diffs_eg_txt += ( + f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" + + "% harvested at maturity)" + ) + if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: + if all_ok > 0: + all_ok = 1 + diff_str_list.append(f" {diffs_eg_txt}") + else: + all_ok = 0 + if verbose: + print( + f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " + + f"{diffs_eg_txt}" + ) + else: + break + + if all_ok == 2: + print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") + elif all_ok == 1: + # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") + # for x in diff_str_list: print(x) + print( + f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " + + f"{gdd_tolerance})" + ) + elif not verbose: + print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index c2cf37aa12..126ef98bbc 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -8,6 +8,20 @@ import numpy as np import cropcal_module as cc # pylint: disable=import-error +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position + check_rx_obeyed, +) +from ctsm.crop_calendars.check_constant_vars import ( # pylint: disable=wrong-import-position + check_constant_vars, +) + def main(argv): """ @@ -77,7 +91,7 @@ def main(argv): year_1=args.first_usable_year, year_n=args.last_usable_year, ) - cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) + check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) # Import GGCMI sowing and harvest dates, and check sims casename = "Prescribed Calendars" @@ -114,7 +128,7 @@ def main(argv): # Check if case["rx_sdates_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_sdates_ds"].isel(time=0), case["ds"], @@ -122,7 +136,7 @@ def main(argv): "SDATES", ) if case["rx_gdds_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_gdds_ds"].isel(time=0), case["ds"], diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py new file mode 100644 index 0000000000..f311d39e05 --- /dev/null +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -0,0 +1,631 @@ +""" +Convert time*mxharvests axes to growingseason axis +""" +import warnings +import sys +import os +import numpy as np +import xarray as xr + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) + +try: + import pandas as pd +except ModuleNotFoundError: + pass + + +def pym_to_pg(pym_array, quiet=False): + """ + In convert_axis_time2gs(), convert year x month array to growingseason axis + """ + pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) + ok_pg = pg_array[~np.isnan(pg_array)] + if not quiet: + print( + f"{ok_pg.size} included; unique N seasons = " + + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" + ) + return pg_array + + +def ignore_lastyear_complete_season(pg_array, excl, mxharvests): + """ + Helper function for convert_axis_time2gs() + """ + tmp_l = pg_array[:, :-mxharvests] + tmp_r = pg_array[:, -mxharvests:] + tmp_r[np.where(excl)] = np.nan + pg_array = np.concatenate((tmp_l, tmp_r), axis=1) + return pg_array + + +def convert_axis_time2gs_setup(this_ds, verbose): + """ + Various setup steps for convert_axis_time2gs_setup() + """ + # How many non-NaN patch-seasons do we expect to have once we're done organizing things? + n_patch = this_ds.dims["patch"] + # Because some patches will be planted in the last year but not complete, we have to ignore any + # finalyear-planted seasons that do complete. + n_gs = this_ds.dims["time"] - 1 + expected_valid = n_patch * n_gs + + mxharvests = this_ds.dims["mxharvests"] + + if verbose: + print( + f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + + "patch-seasons" + ) + + # Set all non-positive date values to NaN. These are seasons that were never harvested + # (or never started): "non-seasons." + if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): + raise RuntimeError( + "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + + f"{this_ds.HDATES.dims}" + ) + hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values + hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) + sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values + sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) + with np.errstate(invalid="ignore"): + hdates_pym[hdates_pym <= 0] = np.nan + return n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym + + +def set_up_ds_with_gs_axis(ds_in): + """ + Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. + + Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or + mxharvests. + """ + # Get the data variables to include in the new dataset + data_vars = {} + for var in ds_in.data_vars: + if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): + data_vars[var] = ds_in[var] + # Set up the new dataset + gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] + coords = ds_in.coords + coords["gs"] = gs_years + ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) + return ds_out + + +def print_onepatch_wrong_n_gs( + patch_index, + this_ds_orig, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, +): + """ + Print information about a patch (for debugging) + """ + + print( + f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " + f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " + f"{this_ds_orig.patches1d_lat.values[patch_index]}" + ) + + print("Original SDATES (per sowing):") + print(this_ds_orig.SDATES.values[:, :, patch_index]) + + print("Original HDATES (per harvest):") + print(this_ds_orig.HDATES.values[:, :, patch_index]) + + if "pandas" in sys.modules: + + def print_pandas_ymp(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + mxharvests = arrs_tuple[0].shape[1] + arrs_list2 = [] + cols2 = [] + for harvest_index in np.arange(mxharvests): + for i, array in enumerate(arrs_tuple): + arrs_list2.append(array[:, harvest_index]) + cols2.append(cols[i] + str(harvest_index)) + arrs_tuple2 = tuple(arrs_list2) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols2 + print(dataframe) + + print_pandas_ymp( + "Original", + ["sdate", "hdate"], + ( + this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], + this_ds_orig.HDATES.values[:, :, patch_index], + ), + ) + + print_pandas_ymp( + "Masked", + ["sdate", "hdate"], + (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), + ) + + print_pandas_ymp( + 'After "Ignore harvests from before this output began"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST no-harvest is meaningful."' + ), + ["sdate", "hdate"], + ( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ), + ) + + def print_pandas_pg(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + arrs_list = list(arrs_tuple) + for i, array in enumerate(arrs_tuple): + arrs_list[i] = np.reshape(array, (-1)) + arrs_tuple2 = tuple(arrs_list) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols + print(dataframe) + + print_pandas_pg( + "Same, but converted to gs axis", + ["sdate", "hdate"], + (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), + ) + + print_pandas_pg( + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ["sdate", "hdate"], + (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), + ) + else: + print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") + + def print_nopandas(array_1, array_2, msg): + print(msg) + if array_1.ndim == 1: + # I don't know why these aren't side-by-side! + print(np.stack((array_1, array_2), axis=1)) + else: + print(np.concatenate((array_1, array_2), axis=1)) + + print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") + + print_nopandas( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + 'After "Ignore harvests from before this output began"', + ) + + print_nopandas( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ) + + print_nopandas( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST [easier to implement!] no-harvest is meaningful."' + ), + ) + + print_nopandas( + sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + ) + + print_nopandas( + sdates_pg2[patch_index, :], + hdates_pg2[patch_index, :], + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ) + + print("\n\n") + + +def handle_years_with_no_sowing(this_ds, mxharvests, hdates_pym, sdates_pym): + """ + In years with no sowing, pretend the first no-harvest is meaningful, unless that was + intentionally ignored earlier in convert_axis_time2gs(). + """ + sdates_orig_ymp = this_ds.SDATES.copy().values + sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) + hdates_pym2 = hdates_pym.copy() + sdates_pym2 = sdates_pym.copy() + with np.errstate(invalid="ignore"): + sdates_gt_0 = sdates_orig_pym > 0 + nosow_py = np.all(~sdates_gt_0, axis=2) + nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) + where_nosow_py_1st = np.where(nosow_py_1st) + hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + for harvest_index in np.arange(mxharvests - 1): + if harvest_index == 0: + continue + if harvest_index == 1: + print("Warning: Untested with mxharvests > 2") + where_nosow_py = np.where( + nosow_py + & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) + & np.isnan(hdates_pym[:, :, harvest_index]) + ) + hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + return sdates_orig_pym, hdates_pym2, sdates_pym2 + + +def handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, +): + """ + In years with sowing that are followed by inactive years, check whether the last sowing was + harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + no-harvest is meaningful. + """ + sdates_orig_masked_pym = sdates_orig_pym.copy() + with np.errstate(invalid="ignore"): + sdates_le_0 = sdates_orig_masked_pym <= 0 + sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") + last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) + last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) + with np.errstate(invalid="ignore"): + hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py + last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( + last_hdate_first_n_gs_py + ) + inactive_last_n_gs_py = inactive_py[:, 1:] + last_sowing_never_harvested_first_n_gs_py = ( + last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py + ) + last_sowing_never_harvested_py = np.concatenate( + (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 + ) + last_sowing_never_harvested_pym = np.concatenate( + ( + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + np.expand_dims(last_sowing_never_harvested_py, axis=2), + ), + axis=2, + ) + where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym + hdates_pym3 = hdates_pym2.copy() + sdates_pym3 = sdates_pym2.copy() + hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + + hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) + sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) + if verbose: + print( + "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" + ) + + return hdates_pym3, sdates_pym3, hdates_pg, sdates_pg + + +def ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg +): + """ + Ignore any harvests that were planted in the final year, because some cells will have + incomplete growing seasons for the final year. + """ + with np.errstate(invalid="ignore"): + hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] + lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) + + hdates_pg2 = ignore_lastyear_complete_season( + hdates_pg.copy(), lastyear_complete_season, mxharvests + ) + sdates_pg2 = ignore_lastyear_complete_season( + sdates_pg.copy(), lastyear_complete_season, mxharvests + ) + is_valid = ~np.isnan(hdates_pg2) + is_fake = np.isneginf(hdates_pg2) + is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) + discrepancy = np.sum(is_valid) - expected_valid + unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) + if verbose: + print( + "After 'Ignore any harvests that were planted in the final year, because other cells " + + "will have incomplete growing seasons for the final year': discrepancy of " + + f"{discrepancy} patch-seasons" + ) + if "pandas" in sys.modules: + bincount = np.bincount(np.sum(is_valid, axis=1)) + bincount = bincount[bincount > 0] + dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) + print(dataframe) + else: + print(f"unique N seasons = {unique_n_seasons}") + print(" ") + return hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons + + +def create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, +): + """ + Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + """ + if discrepancy == 0: + this_ds_gs = set_up_ds_with_gs_axis(this_ds) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch") or ( + my_vars and var not in my_vars + ): + continue + + # Set invalid values to NaN + da_yhp = this_ds[var].copy() + da_yhp = da_yhp.where(~np.isneginf(da_yhp)) + + # Remove the nans and reshape to patches*growingseasons + da_pyh = da_yhp.transpose("patch", "time", "mxharvests") + ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) + # Change -infs to nans + ar_valid_pg[is_fake] = np.nan + # Save as DataArray to new Dataset, stripping _PERHARV from variable name + newname = var.replace("_PERHARV", "") + if newname in this_ds_gs: + raise RuntimeError(f"{newname} already in dataset!") + da_pg = xr.DataArray( + data=ar_valid_pg, + coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], + name=newname, + attrs=da_yhp.attrs, + ) + this_ds_gs[newname] = da_pg + this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] + else: + # Print details about example bad patch(es) + if min(unique_n_seasons) < n_gs: + print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + if max(unique_n_seasons) > n_gs: + print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + raise RuntimeError( + "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + + f"{discrepancy} patch-seasons" + ) + + # Preserve units + for var_1 in this_ds_gs: + var_0 = var_1 + if var_0 not in this_ds: + var_0 += "_PERHARV" + if var_0 not in this_ds: + continue + if "units" in this_ds[var_0].attrs: + this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] + return this_ds_gs + + +def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): + """ + Convert time*mxharvests axes to growingseason axis + """ + + ( + n_patch, + n_gs, + expected_valid, + mxharvests, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + ) = convert_axis_time2gs_setup(this_ds, verbose) + + # Find years where patch was inactive + inactive_py = np.transpose( + np.isnan(this_ds.HDATES).all(dim="mxharvests").values + & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values + ) + # Find seasons that were planted while the patch was inactive + with np.errstate(invalid="ignore"): + sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) + sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) + + # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was + # inactive" + with np.errstate(invalid="ignore"): + first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] + first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) + first_season_before_first_year_py[:, 0] = first_season_before_first_year_p + sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py + sown_prerun_or_inactive_pym = np.concatenate( + ( + np.expand_dims(sown_prerun_or_inactive_py, axis=2), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + ), + axis=2, + ) + where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) + hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + if verbose: + print( + "After 'Ignore harvests from before this output began: discrepancy of " + + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" + ) + + # We need to keep some non-seasons---it's possible that "the yearY growing season" never + # happened (sowing conditions weren't met), but we still need something there so that we can + # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to + # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. + + # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was + # intentionally ignored above." + sdates_orig_pym, hdates_pym2, sdates_pym2 = handle_years_with_no_sowing( + this_ds, mxharvests, hdates_pym, sdates_pym + ) + + # "In years with sowing that are followed by inactive years, check whether the last sowing was + # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + # no-harvest is meaningful." + hdates_pym3, sdates_pym3, hdates_pg, sdates_pg = handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, + ) + + # "Ignore any harvests that were planted in the final year, because some cells will have + # incomplete growing seasons for the final year." + ( + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) = ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg + ) + + # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + this_ds_gs = create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) + + if incl_orig: + return this_ds_gs, this_ds + return this_ds_gs diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 4fa3cdf5aa..aa3c5d469e 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -1,9 +1,7 @@ """ Helper functions for various crop calendar stuff """ -# pylint: disable=too-many-lines -import warnings import sys import os import glob @@ -18,11 +16,12 @@ ) sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position - -try: - import pandas as pd -except ModuleNotFoundError: - pass +from ctsm.crop_calendars.convert_axis_time2gs import ( # pylint: disable=wrong-import-position + convert_axis_time2gs, +) +from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position + check_rx_obeyed, +) # Define conversion multipliers, {from: {to1, to2, ...}, ...} @@ -193,396 +192,6 @@ def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): return this_ds -def check_constant_vars( - this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True -): - """ - For variables that should stay constant, make sure they are - """ - if isinstance(case, str): - const_vars = [case] - elif isinstance(case, list): - const_vars = case - elif isinstance(case, dict): - const_vars = case["const_vars"] - else: - raise TypeError(f"case must be str or dict, not {type(case)}") - - if not const_vars: - return None - - if const_growing_seasons: - gs_0 = this_ds.gs.values[0] - gs_n = this_ds.gs.values[-1] - if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: - print( - f"❗ Only checking const_vars over {const_growing_seasons.start}-" - + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" - ) - this_ds = this_ds.sel(gs=const_growing_seasons) - - any_bad = False - any_bad_before_checking_rx = False - if throw_error: - emojus = "❌" - else: - emojus = "❗" - if not isinstance(const_vars, list): - const_vars = [const_vars] - - for var in const_vars: - everything_ok = True - - if "gs" in this_ds[var].dims: - time_coord = "gs" - elif "time" in this_ds[var].dims: - time_coord = "time" - else: - raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") - i_time_coord = this_ds[var].dims.index(time_coord) - - this_da = this_ds[var] - ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) - incl_patches = [] - bad_patches = np.array([]) - str_list = [] - - # Read prescription file, if needed - rx_ds = None - if isinstance(case, dict): - if var == "GDDHARV" and "rx_gdds_file" in case: - rx_ds = import_rx_dates( - "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False - ).squeeze() - - for time_1 in np.arange(this_ds.dims[time_coord] - 1): - condn = ~np.isnan(ra_sp[time_1, ...]) - if time_1 > 0: - condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) - these_patches = np.where(condn)[0] - if these_patches.size == 0: - continue - these_patches = list(np.where(condn)[0]) - incl_patches += these_patches - # print(f't1 {t1}: {thesePatches}') - - t1_yr = this_ds[time_coord].values[time_1] - t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) - - for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): - t_yr = this_ds[time_coord].values[timestep] - t_vals = np.squeeze( - this_da.isel({time_coord: timestep, "patch": these_patches}).values - ) - ok_p = t1_vals == t_vals - - # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where - # land use varies over time. - if ignore_nan: - ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) - - if not np.all(ok_p): - any_bad_before_checking_rx = True - bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) - bad_patches = np.concatenate( - (bad_patches, np.array(these_patches)[bad_patches_this_time]) - ) - if rx_ds: - found_in_rx = np.array([False for x in bad_patches]) - vary_patches = list(np.array(these_patches)[bad_patches_this_time]) - vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] - vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] - vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] - vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] - - any_bad_any_crop = False - for crop_int in np.unique(vary_crops_int): - rx_var = f"gs1_{crop_int}" - vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] - vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] - these_rx_vals = np.diag( - rx_ds[rx_var] - .sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop) - .values - ) - if len(these_rx_vals) != len(vary_lats_this_crop): - raise RuntimeError( - f"Expected {len(vary_lats_this_crop)} rx values; got " - + f"{len(these_rx_vals)}" - ) - if not np.any(these_rx_vals != -1): - continue - any_bad_any_crop = True - break - if not any_bad_any_crop: - continue - - # This bit is pretty inefficient, but I'm not going to optimize it until I - # actually need to use it. - for i, patch in enumerate(bad_patches_this_time): - this_patch = vary_patches[i] - this_lon = vary_lons[i] - this_lat = vary_lats[i] - this_crop = vary_crops[i] - this_crop_int = vary_crops_int[i] - - # If prescribed input had missing value (-1), it's fine for it to vary. - if rx_ds: - rx_var = f"gs1_{this_crop_int}" - if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: - rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values - n_unique = len(np.unique(rx_vals)) - if n_unique == 1: - found_in_rx[i] = True - if rx_vals == -1: - continue - elif n_unique > 1: - raise RuntimeError( - f"How does lon {this_lon} lat {this_lat} {this_crop} have " - + f"time-varying {var}?" - ) - else: - raise RuntimeError( - f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?" - ) - - # Print info (or save to print later) - any_bad = True - if verbose: - this_str = ( - f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " - + f"{this_crop} ({this_crop_int})" - ) - if rx_ds and not found_in_rx[i]: - this_str = this_str.replace("(lon", "* (lon") - if not np.isnan(t1_vals[patch]): - t1_val_print = int(t1_vals[patch]) - else: - t1_val_print = "NaN" - if not np.isnan(t_vals[patch]): - t_val_print = int(t_vals[patch]) - else: - t_val_print = "NaN" - if var == "SDATES": - str_list.append( - f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " - + f"jday {t_val_print}" - ) - else: - str_list.append( - f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " - + f"{t_val_print}" - ) - else: - if everything_ok: - print(f"{emojus} CLM output {var} unexpectedly vary over time:") - everything_ok = False - print(f"{var} timestep {timestep} does not match timestep {time_1}") - break - if verbose and any_bad: - print(f"{emojus} CLM output {var} unexpectedly vary over time:") - str_list.sort() - if rx_ds and np.any(~found_in_rx): - str_list = [ - "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" - ] + str_list - elif not rx_ds: - str_list = ["(No rx file checked)"] + str_list - print("\n".join(str_list)) - - # Make sure every patch was checked once (or is all-NaN except possibly final season) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked more than once!") - incl_patches = list(incl_patches) - incl_patches += list( - np.where( - np.all( - np.isnan( - ra_sp[ - :-1, - ] - ), - axis=0, - ) - )[0] - ) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked but also all-NaN??") - if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for patch in np.arange(this_ds.dims["patch"]): - if patch not in incl_patches: - break - raise RuntimeError( - f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" - ) - - if not any_bad: - if any_bad_before_checking_rx: - print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " - + "seasons of output (except for patch(es) with missing rx)." - ) - else: - print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " - + "seasons of output." - ) - - if any_bad and throw_error: - raise RuntimeError("Stopping due to failed check_constant_vars().") - - bad_patches = np.unique(bad_patches) - return [int(p) for p in bad_patches] - - -def check_rx_obeyed( - vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False -): - """ - Check that prescribed crop calendars were obeyed - """ - all_ok = 2 - diff_str_list = [] - gdd_tolerance = 1 - - if "GDDHARV" in output_var and verbose: - harvest_reason_da = dates_ds["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - print( - f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " - + "maturity)" - ) - - for vegtype_str in vegtype_list: - thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] - if thisveg_patches.size == 0: - continue - ds_thisveg = dates_ds.isel(patch=thisveg_patches) - patch_inds_lon_thisveg = ds_thisveg.patches1d_ixy.values.astype(int) - 1 - patch_inds_lat_thisveg = ds_thisveg.patches1d_jxy.values.astype(int) - 1 - patch_lons_thisveg = ds_thisveg.patches1d_lon - patch_lats_thisveg = ds_thisveg.patches1d_lat - - vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - rx_da = rx_ds[f"gs1_{vegtype_int}"] - rx_array = rx_da.values[patch_inds_lat_thisveg, patch_inds_lon_thisveg] - rx_array = np.expand_dims(rx_array, axis=1) - sim_array = ds_thisveg[output_var].values - sim_array_dims = ds_thisveg[output_var].dims - - # Ignore patches without prescribed value - with np.errstate(invalid="ignore"): - rx_array[np.where(rx_array < 0)] = np.nan - - # Account for... - if "GDDHARV" in output_var: - # ...GDD harvest threshold minimum set in PlantCrop() - if gdd_min is None: - gdd_min = DEFAULT_GDD_MIN - print( - f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " - + f"default {gdd_min}" - ) - with np.errstate(invalid="ignore"): - rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min - - # ...harvest reason - # 0: Should never happen in any simulation - # 1: Harvesting at maturity - # 2: Harvesting at max season length (mxmat) - # 3: Crop was incorrectly planted in last time step of Dec. 31 - # 4: Today was supposed to be the planting day, but the previous crop still hasn't been - # harvested. - # 5: Harvest the day before the next sowing date this year. - # 6: Same as #5. - # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date - # is Jan. 1) - harvest_reason_da = ds_thisveg["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - - if np.any(sim_array != rx_array): - diff_array = sim_array - rx_array - - # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for - # the next day - if output_var == "GDDHARV_PERHARV": - diff_array = np.ma.masked_array( - diff_array, - mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), - ) - elif output_var == "GDDHARV": - with np.errstate(invalid="ignore"): - diff_lt_0 = diff_array < 0 - harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 - diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) - - with np.errstate(invalid="ignore"): - abs_gt_0 = abs(diff_array) > 0 - if np.any(np.abs(diff_array[abs_gt_0]) > 0): - min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( - diff_array, - rx_array, - np.nanmin, - sim_array_dims, - dates_ds.gs, - patch_lons_thisveg, - patch_lats_thisveg, - ) - max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( - diff_array, - rx_array, - np.nanmax, - sim_array_dims, - dates_ds.gs, - patch_lons_thisveg, - patch_lats_thisveg, - ) - - diffs_eg_txt = ( - f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " - + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " - + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" - ) - if "GDDHARV" in output_var: - diffs_eg_txt += ( - f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" - + "% harvested at maturity)" - ) - if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: - if all_ok > 0: - all_ok = 1 - diff_str_list.append(f" {diffs_eg_txt}") - else: - all_ok = 0 - if verbose: - print( - f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " - + f"{diffs_eg_txt}" - ) - else: - break - - if all_ok == 2: - print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") - elif all_ok == 1: - # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") - # for x in diff_str_list: print(x) - print( - f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " - + f"{gdd_tolerance})" - ) - elif not verbose: - print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") - - def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): """ Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV @@ -612,317 +221,6 @@ def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_erro raise RuntimeError(msg) -def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): - """ - Convert time*mxharvests axes to growingseason axis - """ - # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - n_patch = this_ds.dims["patch"] - # Because some patches will be planted in the last year but not complete, we have to ignore any - # finalyear-planted seasons that do complete. - n_gs = this_ds.dims["time"] - 1 - expected_valid = n_patch * n_gs - - mxharvests = this_ds.dims["mxharvests"] - - if verbose: - print( - f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " - + "patch-seasons" - ) - - # Set all non-positive date values to NaN. These are seasons that were never harvested - # (or never started): "non-seasons." - if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): - raise RuntimeError( - "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " - + f"{this_ds.HDATES.dims}" - ) - hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values - hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) - sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values - sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) - with np.errstate(invalid="ignore"): - hdates_pym[hdates_pym <= 0] = np.nan - - # Find years where patch was inactive - inactive_py = np.transpose( - np.isnan(this_ds.HDATES).all(dim="mxharvests").values - & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values - ) - # Find seasons that were planted while the patch was inactive - with np.errstate(invalid="ignore"): - sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) - sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) - - # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was - # inactive" - with np.errstate(invalid="ignore"): - first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] - first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) - first_season_before_first_year_py[:, 0] = first_season_before_first_year_p - sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py - sown_prerun_or_inactive_pym = np.concatenate( - ( - np.expand_dims(sown_prerun_or_inactive_py, axis=2), - np.full((n_patch, n_gs + 1, mxharvests - 1), False), - ), - axis=2, - ) - where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) - hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - if verbose: - print( - "After 'Ignore harvests from before this output began: discrepancy of " - + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" - ) - - # We need to keep some non-seasons---it's possible that "the yearY growing season" never - # happened (sowing conditions weren't met), but we still need something there so that we can - # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to - # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. - - # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was - # intentionally ignored above." - sdates_orig_ymp = this_ds.SDATES.copy().values - sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) - hdates_pym2 = hdates_pym.copy() - sdates_pym2 = sdates_pym.copy() - with np.errstate(invalid="ignore"): - sdates_gt_0 = sdates_orig_pym > 0 - nosow_py = np.all(~sdates_gt_0, axis=2) - nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) - where_nosow_py_1st = np.where(nosow_py_1st) - hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - for harvest_index in np.arange(mxharvests - 1): - if harvest_index == 0: - continue - elif harvest_index == 1: - print("Warning: Untested with mxharvests > 2") - where_nosow_py = np.where( - nosow_py - & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) - & np.isnan(hdates_pym[:, :, harvest_index]) - ) - hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - - # "In years with sowing that are followed by inactive years, check whether the last sowing was - # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] - # no-harvest is meaningful." - sdates_orig_masked_pym = sdates_orig_pym.copy() - with np.errstate(invalid="ignore"): - sdates_le_0 = sdates_orig_masked_pym <= 0 - sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") - last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) - last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) - with np.errstate(invalid="ignore"): - hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py - last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( - last_hdate_first_n_gs_py - ) - inactive_last_n_gs_py = inactive_py[:, 1:] - last_sowing_never_harvested_first_n_gs_py = ( - last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py - ) - last_sowing_never_harvested_py = np.concatenate( - (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 - ) - last_sowing_never_harvested_pym = np.concatenate( - ( - np.full((n_patch, n_gs + 1, mxharvests - 1), False), - np.expand_dims(last_sowing_never_harvested_py, axis=2), - ), - axis=2, - ) - where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym - hdates_pym3 = hdates_pym2.copy() - sdates_pym3 = sdates_pym2.copy() - hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - - # Convert to growingseason axis - def pym_to_pg(pym_array, quiet=False): - pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) - ok_pg = pg_array[~np.isnan(pg_array)] - if not quiet: - print( - f"{ok_pg.size} included; unique N seasons = " - + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" - ) - return pg_array - - hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) - sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) - if verbose: - print( - "After 'In years with no sowing, pretend the first no-harvest is meaningful: " - + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" - ) - - # "Ignore any harvests that were planted in the final year, because some cells will have - # incomplete growing seasons for the final year." - with np.errstate(invalid="ignore"): - hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] - lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) - - def ignore_lastyear_complete_season(pg_array, excl, mxharvests): - tmp_l = pg_array[:, :-mxharvests] - tmp_r = pg_array[:, -mxharvests:] - tmp_r[np.where(excl)] = np.nan - pg_array = np.concatenate((tmp_l, tmp_r), axis=1) - return pg_array - - hdates_pg2 = ignore_lastyear_complete_season( - hdates_pg.copy(), lastyear_complete_season, mxharvests - ) - sdates_pg2 = ignore_lastyear_complete_season( - sdates_pg.copy(), lastyear_complete_season, mxharvests - ) - is_valid = ~np.isnan(hdates_pg2) - is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) - discrepancy = np.sum(is_valid) - expected_valid - unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) - if verbose: - print( - "After 'Ignore any harvests that were planted in the final year, because other cells " - + "will have incomplete growing seasons for the final year': discrepancy of " - + f"{discrepancy} patch-seasons" - ) - if "pandas" in sys.modules: - bincount = np.bincount(np.sum(is_valid, axis=1)) - bincount = bincount[bincount > 0] - dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) - print(dataframe) - else: - print(f"unique N seasons = {unique_n_seasons}") - print(" ") - - # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out - if discrepancy == 0: - this_ds_gs = set_up_ds_with_gs_axis(this_ds) - for var in this_ds.data_vars: - if this_ds[var].dims != ("time", "mxharvests", "patch") or ( - my_vars and var not in my_vars - ): - continue - - # Set invalid values to NaN - da_yhp = this_ds[var].copy() - da_yhp = da_yhp.where(~np.isneginf(da_yhp)) - - # Remove the nans and reshape to patches*growingseasons - da_pyh = da_yhp.transpose("patch", "time", "mxharvests") - ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) - # Change -infs to nans - ar_valid_pg[is_fake] = np.nan - # Save as DataArray to new Dataset, stripping _PERHARV from variable name - newname = var.replace("_PERHARV", "") - if newname in this_ds_gs: - raise RuntimeError(f"{newname} already in dataset!") - da_pg = xr.DataArray( - data=ar_valid_pg, - coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], - name=newname, - attrs=da_yhp.attrs, - ) - this_ds_gs[newname] = da_pg - this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] - else: - # Print details about example bad patch(es) - if min(unique_n_seasons) < n_gs: - print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") - patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ - 0 - ][0] - print_onepatch_wrong_n_gs( - patch_index, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - if max(unique_n_seasons) > n_gs: - print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") - patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ - 0 - ][0] - print_onepatch_wrong_n_gs( - patch_index, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - raise RuntimeError( - "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " - + f"{discrepancy} patch-seasons" - ) - - # Preserve units - for var_1 in this_ds_gs: - var_0 = var_1 - if var_0 not in this_ds: - var_0 += "_PERHARV" - if var_0 not in this_ds: - continue - if "units" in this_ds[var_0].attrs: - this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] - - if incl_orig: - return this_ds_gs, this_ds - return this_ds_gs - - -def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): - """ - Get information about extreme gridcells (for debugging) - """ - if mxn == np.min: # pylint: disable=comparison-with-callable - diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) - themxn = mxn(diff_array) - - # Find the first patch-gs that has the mxn value - matching_indices = np.where(diff_array == themxn) - first_indices = [x[0] for x in matching_indices] - - # Get the lon, lat, and growing season of that patch-gs - patch_index = first_indices[dims.index("patch")] - this_lon = patches1d_lon.values[patch_index] - this_lat = patches1d_lat.values[patch_index] - season_index = first_indices[dims.index("gs")] - this_gs = gs_da.values[season_index] - - # Get the prescribed value for this patch-gs - this_rx = rx_array[patch_index][0] - - return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) - - def get_gs_len_da(this_da): """ Get growing season lengths from a DataArray of hdate-sdate @@ -936,23 +234,6 @@ def get_gs_len_da(this_da): return this_da -def get_pct_harv_at_mature(harvest_reason_da): - """ - Get percentage of harvests that happened at maturity - """ - n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) - with np.errstate(invalid="ignore"): - harv_reason_gt_0 = harvest_reason_da.values > 0 - n_harv = len(np.where(harv_reason_gt_0)[0]) - if n_harv == 0: - return np.nan - pct_harv_at_mature = n_harv_at_mature / n_harv * 100 - pct_harv_at_mature = np.format_float_positional( - pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" - ) # Round to 2 significant digits - return pct_harv_at_mature - - def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): """ Import maximum growing season length @@ -1030,6 +311,57 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): return this_ds +def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose): + """ + In import_output(), check that there are no unexpected negative values. + """ + tiny_neg_ok = 1e-12 + this_ds = this_ds_in.copy() + for var in this_ds: + if not any(x in var for x in varlist_no_negative): + continue + the_min = np.nanmin(this_ds[var].values) + if the_min < 0: + if np.abs(the_min) <= tiny_neg_ok: + if verbose: + print( + f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + + f" ({which_file})" + ) + else: + print( + f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + + f"({which_file})" + ) + values = this_ds[var].copy().values + with np.errstate(invalid="ignore"): + do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) + values[np.where(do_setto_0)] = 0 + this_ds[var] = xr.DataArray( + values, + coords=this_ds[var].coords, + dims=this_ds[var].dims, + attrs=this_ds[var].attrs, + ) + + elif verbose: + print(f"No negative value(s) in {var}; min {the_min} ({which_file})") + return this_ds + + +def check_no_zeros(this_ds, varlist_no_zero, which_file, verbose): + """ + In import_output(), check that there are no unexpected zeros. + """ + for var in this_ds: + if not any(x in var for x in varlist_no_zero): + continue + if np.any(this_ds[var].values == 0): + print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") + elif verbose: + print(f"No zero value(s) in {var} ({which_file})") + + def import_output( filename, my_vars, @@ -1077,79 +409,11 @@ def import_output( # SDATES, but it does show up in SDATES_PERHARV. # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going # to invalidate those "seasons." - # - # In all but the last calendar year, which patches had no sowing? - no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) - # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? - # (Indicates sowing the previous calendar year.) - with np.errstate(invalid="ignore"): - hsdate1_gt_hdate1_yp = ( - this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] - ) - # Where both, we have the problem. - falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp - if np.any(falsely_alive_yp): - print( - f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " - + "year before harvest, but no sowings occurred that year." - ) - falsely_alive_yp = np.concatenate( - (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 - ) - falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) - dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) - falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) - for var in this_ds.data_vars: - if this_ds[var].dims != ("time", "mxharvests", "patch"): - continue - this_ds[var] = this_ds[var].where(~falsely_alive_yhp) - - def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose=False): - tiny_neg_ok = 1e-12 - this_ds = this_ds_in.copy() - for var in this_ds: - if not any(x in var for x in varlist_no_negative): - continue - the_min = np.nanmin(this_ds[var].values) - if the_min < 0: - if np.abs(the_min) <= tiny_neg_ok: - if verbose: - print( - f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" - + f" ({which_file})" - ) - else: - print( - f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " - + f"({which_file})" - ) - values = this_ds[var].copy().values - with np.errstate(invalid="ignore"): - do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) - values[np.where(do_setto_0)] = 0 - this_ds[var] = xr.DataArray( - values, - coords=this_ds[var].coords, - dims=this_ds[var].dims, - attrs=this_ds[var].attrs, - ) - - elif verbose: - print(f"No negative value(s) in {var}; min {the_min} ({which_file})") - return this_ds - - def check_no_zeros(this_ds, varlist_no_zero, which_file): - for var in this_ds: - if not any(x in var for x in varlist_no_zero): - continue - if np.any(this_ds[var].values == 0): - print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") - elif verbose: - print(f"No zero value(s) in {var} ({which_file})") + this_ds = handle_zombie_crops(this_ds) # Check for no zero values where there shouldn't be varlist_no_zero = ["DATE", "YEAR"] - check_no_zeros(this_ds, varlist_no_zero, "original file") + check_no_zeros(this_ds, varlist_no_zero, "original file", verbose) # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) @@ -1171,11 +435,11 @@ def check_no_zeros(this_ds, varlist_no_zero, which_file): # Avoid tiny negative values varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] - this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose=verbose) + this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose) # Check for no zero values where there shouldn't be varlist_no_zero = ["REASON", "DATE"] - check_no_zeros(this_ds_gs, varlist_no_zero, "new file") + check_no_zeros(this_ds_gs, varlist_no_zero, "new file", verbose) # Check that e.g., GDDACCUM <= HUI for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: @@ -1211,190 +475,37 @@ def check_no_zeros(this_ds, varlist_no_zero, which_file): return this_ds_gs -def print_onepatch_wrong_n_gs( - patch_index, - this_ds_orig, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, -): +def handle_zombie_crops(this_ds): """ - Print information about a patch (for debugging) + When doing transient runs, it's somehow possible for crops in newly-active patches to be + *already alive*. They even have a sowing date (idop)! This will of course not show up in + SDATES, but it does show up in SDATES_PERHARV. + I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + to invalidate those "seasons." """ - - print( - f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " - f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " - f"{this_ds_orig.patches1d_lat.values[patch_index]}" - ) - - print("Original SDATES (per sowing):") - print(this_ds_orig.SDATES.values[:, :, patch_index]) - - print("Original HDATES (per harvest):") - print(this_ds_orig.HDATES.values[:, :, patch_index]) - - if "pandas" in sys.modules: - - def print_pandas_ymp(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - mxharvests = arrs_tuple[0].shape[1] - arrs_list2 = [] - cols2 = [] - for harvest_index in np.arange(mxharvests): - for i, array in enumerate(arrs_tuple): - arrs_list2.append(array[:, harvest_index]) - cols2.append(cols[i] + str(harvest_index)) - arrs_tuple2 = tuple(arrs_list2) - dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - dataframe.columns = cols2 - print(dataframe) - - print_pandas_ymp( - "Original", - ["sdate", "hdate"], - ( - this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], - this_ds_orig.HDATES.values[:, :, patch_index], - ), - ) - - print_pandas_ymp( - "Masked", - ["sdate", "hdate"], - (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), - ) - - print_pandas_ymp( - 'After "Ignore harvests from before this output began"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], - ), - ) - - print_pandas_ymp( - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], - ), - ) - - print_pandas_ymp( - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST no-harvest is meaningful."' - ), - ["sdate", "hdate"], - ( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], - ), - ) - - def print_pandas_pg(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - arrs_list = list(arrs_tuple) - for i, array in enumerate(arrs_tuple): - arrs_list[i] = np.reshape(array, (-1)) - arrs_tuple2 = tuple(arrs_list) - dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - dataframe.columns = cols - print(dataframe) - - print_pandas_pg( - "Same, but converted to gs axis", - ["sdate", "hdate"], - (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), - ) - - print_pandas_pg( - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), - ["sdate", "hdate"], - (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), - ) - else: - print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") - - def print_nopandas(array_1, array_2, msg): - print(msg) - if array_1.ndim == 1: - # I don't know why these aren't side-by-side! - print(np.stack((array_1, array_2), axis=1)) - else: - print(np.concatenate((array_1, array_2), axis=1)) - - print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") - - print_nopandas( - np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], - 'After "Ignore harvests from before this output began"', - ) - - print_nopandas( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', - ) - - print_nopandas( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST [easier to implement!] no-harvest is meaningful."' - ), + # In all but the last calendar year, which patches had no sowing? + no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) + # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? + # (Indicates sowing the previous calendar year.) + with np.errstate(invalid="ignore"): + hsdate1_gt_hdate1_yp = ( + this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] ) - - print_nopandas( - sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + # Where both, we have the problem. + falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp + if np.any(falsely_alive_yp): + print( + f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + + "year before harvest, but no sowings occurred that year." ) - - print_nopandas( - sdates_pg2[patch_index, :], - hdates_pg2[patch_index, :], - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), + falsely_alive_yp = np.concatenate( + (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 ) - - print("\n\n") - - -def set_up_ds_with_gs_axis(ds_in): - """ - Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. - - Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or - mxharvests. - """ - # Get the data variables to include in the new dataset - data_vars = {} - for var in ds_in.data_vars: - if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): - data_vars[var] = ds_in[var] - # Set up the new dataset - gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] - coords = ds_in.coords - coords["gs"] = gs_years - ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) - return ds_out + falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) + dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) + falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch"): + continue + this_ds[var] = this_ds[var].where(~falsely_alive_yhp) + return this_ds From 34b3320bd0c2dd787abbf0af64625cb202704a33 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:11:43 -0700 Subject: [PATCH 20/40] pylint: Allow variable names ax and im (common in matplotlib instructions). --- python/ctsm/.pylintrc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ctsm/.pylintrc b/python/ctsm/.pylintrc index 2087913e8a..ceff04c7d8 100644 --- a/python/ctsm/.pylintrc +++ b/python/ctsm/.pylintrc @@ -436,7 +436,10 @@ good-names=i, _, # --- default list is above here, our own list is below here --- # Allow logger as a global name in each module, because this seems to follow general recommended convention: - logger + logger, +# Allow these names, which are commonly used in matplotlib instructions + ax, + im # Include a hint for the correct naming format with invalid-name. include-naming-hint=no From be18e3ea3e39dc70396c6b1fc9b1283fb11c289b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:11:57 -0700 Subject: [PATCH 21/40] Satisfy pylint for cropcal_figs_module.py. --- .../crop_calendars/cropcal_figs_module.py | 47 ++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_figs_module.py b/python/ctsm/crop_calendars/cropcal_figs_module.py index 8d7f472fec..d820460175 100644 --- a/python/ctsm/crop_calendars/cropcal_figs_module.py +++ b/python/ctsm/crop_calendars/cropcal_figs_module.py @@ -1,5 +1,11 @@ +""" +Functions for making crop calendar figures +""" + import numpy as np +# It's fine if these can't be imported. The script using these will handle it. +# pylint: disable=import-error import cartopy.crs as ccrs import matplotlib.pyplot as plt import matplotlib.colors as mcolors @@ -23,6 +29,9 @@ # Cases (line and scatter plots) def cropcal_colors_cases(casename): + """ + Define colors for each case + """ case_color_dict = { "clm default": [x / 255 for x in [92, 219, 219]], "prescribed calendars": [x / 255 for x in [250, 102, 240]], @@ -32,11 +41,8 @@ def cropcal_colors_cases(casename): case_color_dict["5.0 lu"] = case_color_dict["clm default"] case_color_dict["5.2 lu"] = case_color_dict["prescribed calendars"] - case_color = None casename_for_colors = casename.lower().replace(" (0)", "").replace(" (1)", "") - if casename_for_colors in case_color_dict: - case_color = case_color_dict[casename_for_colors] - return case_color + return case_color_dict.get(casename_for_colors, None) def make_map( @@ -65,6 +71,9 @@ def make_map( vmin=None, vrange=None, ): + """ + Make map + """ if underlay is not None: if underlay_color is None: underlay_color = cropcal_colors["underlay"] @@ -147,23 +156,25 @@ def make_map( # Need to do this for subplot row labels set_ticks(-1, fontsize, "y") plt.yticks([]) - for x in ax.spines: - ax.spines[x].set_visible(False) + for spine in ax.spines: + ax.spines[spine].set_visible(False) if show_cbar: return im, cbar - else: - return im, None + return im, None def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): + """ + Handle settings related to ticklabels + """ if ticklocations is not None: cbar.set_ticks(ticklocations) if units is not None and units.lower() == "month": cbar.set_ticklabels( ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] ) - units == "Month" + units = "Month" elif ticklabels is not None: cbar.set_ticklabels(ticklabels) if isinstance(im, mplcol.QuadMesh): @@ -173,7 +184,7 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): if cbar_max is not None and clim_max > cbar_max: if ticklabels is not None: raise RuntimeError( - "How to handle this now that you are specifying ticklocations separate from ticklabels?" + "How to handle this now that ticklocations is specified separately from ticklabels?" ) ticks = cbar.get_ticks() if ticks[-2] > cbar_max: @@ -182,24 +193,28 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): ) ticklabels = ticks.copy() ticklabels[-1] = cbar_max - for i, x in enumerate(ticklabels): - if x == int(x): - ticklabels[i] = str(int(x)) + for i, ticklabel in enumerate(ticklabels): + if ticklabel == int(ticklabel): + ticklabels[i] = str(int(ticklabel)) cbar.set_ticks( ticks - ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only be used together with FixedLocator" (https://stackoverflow.com/questions/63723514/userwarning-fixedformatter-should-only-be-used-together-with-fixedlocator) + ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only + # be used together with FixedLocator" (https://stackoverflow.com/questions/63723514) cbar.set_ticklabels(ticklabels) def set_ticks(lonlat_bin_width, fontsize, x_or_y): + """ + Plot tick marks + """ if x_or_y == "x": ticks = np.arange(-180, 181, lonlat_bin_width) else: ticks = np.arange(-60, 91, lonlat_bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" if x_or_y == "x": From 5b3cad7c7e61e8c1d945b2a29dac15bf85573c4f Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:17:22 -0700 Subject: [PATCH 22/40] pylint: Resolve remaining wrong-import-position complaints. --- python/ctsm/crop_calendars/cropcal_utils.py | 6 ++++-- python/ctsm/crop_calendars/regrid_ggcmi_shdates.py | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 4d77d2ef66..f96efd1d99 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -1,5 +1,7 @@ -"""utility functions""" -"""copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py""" +""" +utility functions +copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py +""" import re import warnings diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 911b2f93a1..5c2e7f8820 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -11,9 +11,12 @@ _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) -from ctsm.utils import abort -from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d -from ctsm import ctsm_logging +from ctsm.utils import abort # pylint: disable=wrong-import-position +from ctsm.ctsm_pylib_dependent_utils import ( # pylint: disable=wrong-import-position + import_coord_1d, + import_coord_2d, +) +from ctsm import ctsm_logging # pylint: disable=wrong-import-position logger = logging.getLogger(__name__) From 89c3dade60cb0192e6003399efd0ae2b103f88b1 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 21:14:01 -0700 Subject: [PATCH 23/40] Post-pylint fixes to get RXCROPMATURITY working again. --- .../crop_calendars/check_constant_vars.py | 19 ++++++++++--- python/ctsm/crop_calendars/check_rx_obeyed.py | 2 +- .../ctsm/crop_calendars/cropcal_constants.py | 26 ++++++++++++++++++ python/ctsm/crop_calendars/cropcal_module.py | 27 +++---------------- 4 files changed, 45 insertions(+), 29 deletions(-) create mode 100644 python/ctsm/crop_calendars/cropcal_constants.py diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py index 92e1819803..1a5a4e62c6 100644 --- a/python/ctsm/crop_calendars/check_constant_vars.py +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -67,6 +67,7 @@ def loop_through_bad_patches( vary_lats, vary_crops, vary_crops_int, + any_bad, ): """ Loop through and check any patches that were "bad" according to check_constant_vars(). @@ -99,7 +100,7 @@ def loop_through_bad_patches( else: raise RuntimeError(f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?") - # Print info (or save to print later) + # Print info (or save to print later) any_bad = True if verbose: this_str = ( @@ -181,10 +182,14 @@ def check_one_constant_var_loop_through_timesteps( these_patches, t1_yr, t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, ): """ In check_one_constant_var(), loop through timesteps """ + found_in_rx = None for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): t_yr = this_ds[time_coord].values[timestep] t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) @@ -250,13 +255,14 @@ def check_one_constant_var_loop_through_timesteps( vary_lats, vary_crops, vary_crops_int, + any_bad, ) return any_bad_before_checking_rx, bad_patches, found_in_rx, any_bad def check_one_constant_var( - this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx ): """ Ensure that a variable that should be constant actually is @@ -306,12 +312,17 @@ def check_one_constant_var( these_patches, t1_yr, t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, ) if verbose and any_bad: print(f"{emojus} CLM output {var} unexpectedly vary over time:") str_list.sort() - if rx_ds and np.any(~found_in_rx): + if found_in_rx is None: + raise RuntimeError("Somehow any_bad True but found_in_rx None") + if rx_ds and np.any(~found_in_rx): # pylint: disable=invalid-unary-operand-type str_list = [ "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" ] + str_list @@ -376,7 +387,7 @@ def check_constant_vars( for var in const_vars: any_bad, any_bad_before_checking_rx, bad_patches = check_one_constant_var( - this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx ) if any_bad and throw_error: diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py index c1ad5cfecc..3d769d3820 100644 --- a/python/ctsm/crop_calendars/check_rx_obeyed.py +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -14,7 +14,7 @@ ) sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position +from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position DEFAULT_GDD_MIN, ) diff --git a/python/ctsm/crop_calendars/cropcal_constants.py b/python/ctsm/crop_calendars/cropcal_constants.py new file mode 100644 index 0000000000..f015ac7db1 --- /dev/null +++ b/python/ctsm/crop_calendars/cropcal_constants.py @@ -0,0 +1,26 @@ +""" +Constants used in crop calendar scripts +""" + +# Define conversion multipliers, {from: {to1, to2, ...}, ...} +multiplier_dict = { + # Mass + "g": { + "Mt": 1e-12, + }, + "t": { + "Mt": 1e-6, + }, + # Volume + "m3": { + "km3": 1e-9, + }, + # Yield + "g/m2": { + "t/ha": 1e-6 * 1e4, + }, +} + +# Minimum harvest threshold allowed in PlantCrop() +# Was 50 before cropcal runs 2023-01-28 +DEFAULT_GDD_MIN = 1.0 diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index aa3c5d469e..91963aa269 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -22,30 +22,9 @@ from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position check_rx_obeyed, ) - - -# Define conversion multipliers, {from: {to1, to2, ...}, ...} -multiplier_dict = { - # Mass - "g": { - "Mt": 1e-12, - }, - "t": { - "Mt": 1e-6, - }, - # Volume - "m3": { - "km3": 1e-9, - }, - # Yield - "g/m2": { - "t/ha": 1e-6 * 1e4, - }, -} - -# Minimum harvest threshold allowed in PlantCrop() -# Was 50 before cropcal runs 2023-01-28 -DEFAULT_GDD_MIN = 1.0 +from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position + DEFAULT_GDD_MIN, +) def check_and_trim_years(year_1, year_n, ds_in): From 3e8b83504c00497ad0185abbacca92feca7b8035 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:31:55 -0700 Subject: [PATCH 24/40] pylint: Resolve remaining invalid-name complaints. --- python/ctsm/crop_calendars/cropcal_module.py | 4 +- python/ctsm/crop_calendars/cropcal_utils.py | 302 +++++++++--------- .../crop_calendars/generate_gdds_functions.py | 12 +- .../crop_calendars/regrid_ggcmi_shdates.py | 32 +- .../ctsm/test/test_unit_utils_import_coord.py | 4 +- 5 files changed, 180 insertions(+), 174 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 91963aa269..b3b415b77c 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -266,7 +266,7 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): this_var = f"{var_prefix}{j+1}_{i}" date_varlist = date_varlist + [this_var] - this_ds = utils.import_ds(date_infile, myVars=date_varlist) + this_ds = utils.import_ds(date_infile, my_vars=date_varlist) did_warn = False for var in this_ds: @@ -355,7 +355,7 @@ def import_output( Import CLM output """ # Import - this_ds = utils.import_ds(filename, myVars=my_vars, myVegtypes=my_vegtypes) + this_ds = utils.import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) if year_1 and year_n: diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index f96efd1d99..2f84bd6739 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -250,8 +250,8 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): indices = np.array([-1]) else: indices = np.full(len(vegtype_str), -1) - for v in np.unique(vegtype_str): - indices[np.where(vegtype_str == v)] = vegtype_mainlist.index(v) + for vegtype_str_2 in np.unique(vegtype_str): + indices[np.where(vegtype_str == vegtype_str_2)] = vegtype_mainlist.index(vegtype_str_2) if convert_to_ndarray: indices = [int(x) for x in indices] return indices @@ -331,8 +331,8 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr if slice_members == []: raise TypeError("slice is all None?") this_type = int - for x in slice_members: - if x < 0 or not isinstance(x, int): + for member in slice_members: + if member < 0 or not isinstance(member, int): this_type = "values" break elif isinstance(selection, np.ndarray): @@ -341,12 +341,12 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr else: is_inefficient = True this_type = None - for x in selection: - if x < 0 or x % 1 > 0: - if isinstance(x, int): + for member in selection: + if member < 0 or member % 1 > 0: + if isinstance(member, int): this_type = "values" else: - this_type = type(x) + this_type = type(member) break if this_type == None: this_type = int @@ -385,47 +385,47 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr # Trim along relevant 1d axes if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: if selection_type == "indices": - inclCoords = xr_object[key].values[selection] + incl_coords = xr_object[key].values[selection] elif selection_type == "values": if isinstance(selection, slice): - inclCoords = xr_object.sel({key: selection}, drop=False)[key].values + incl_coords = xr_object.sel({key: selection}, drop=False)[key].values else: - inclCoords = selection + incl_coords = selection else: raise TypeError(f"selection_type {selection_type} not recognized") if key == "lat": - thisXY = "jxy" + this_xy = "jxy" elif key == "lon": - thisXY = "ixy" + this_xy = "ixy" else: raise KeyError( f"Key '{key}' not recognized: What 1d_ suffix should I use for variable" " name?" ) - pattern = re.compile(f"1d_{thisXY}") + pattern = re.compile(f"1d_{this_xy}") matches = [x for x in list(xr_object.keys()) if pattern.search(x) != None] - for thisVar in matches: - if len(xr_object[thisVar].dims) != 1: + for var in matches: + if len(xr_object[var].dims) != 1: raise RuntimeError( - f"Expected {thisVar} to have 1 dimension, but it has" - f" {len(xr_object[thisVar].dims)}: {xr_object[thisVar].dims}" + f"Expected {var} to have 1 dimension, but it has" + f" {len(xr_object[var].dims)}: {xr_object[var].dims}" ) - thisVar_dim = xr_object[thisVar].dims[0] - # print(f"Variable {thisVar} has dimension {thisVar_dim}") - thisVar_coords = xr_object[key].values[ - xr_object[thisVar].values.astype(int) - 1 - ] - # print(f"{thisVar_dim} size before: {xr_object.sizes[thisVar_dim]}") + dim = xr_object[var].dims[0] + # print(f"Variable {var} has dimension {dim}") + coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] + # print(f"{dim} size before: {xr_object.sizes[dim]}") ok_ind = [] - new_1d_thisXY = [] - for i, x in enumerate(thisVar_coords): - if x in inclCoords: + new_1d_this_xy = [] + for i, member in enumerate(coords): + if member in incl_coords: ok_ind = ok_ind + [i] - new_1d_thisXY = new_1d_thisXY + [(inclCoords == x).nonzero()[0] + 1] - xr_object = xr_object.isel({thisVar_dim: ok_ind}) - new_1d_thisXY = np.array(new_1d_thisXY).squeeze() - xr_object[thisVar].values = new_1d_thisXY - # print(f"{thisVar_dim} size after: {xr_object.sizes[thisVar_dim]}") + new_1d_this_xy = new_1d_this_xy + [ + (incl_coords == member).nonzero()[0] + 1 + ] + xr_object = xr_object.isel({dim: ok_ind}) + new_1d_this_xy = np.array(new_1d_this_xy).squeeze() + xr_object[var].values = new_1d_this_xy + # print(f"{dim} size after: {xr_object.sizes[dim]}") # Perform selection if selection_type == "indices": @@ -463,72 +463,71 @@ def get_patch_ivts(this_ds, this_pftlist): # Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). def get_vegtype_str_da(vegtype_str): nvt = len(vegtype_str) - thisName = "vegtype_str" vegtype_str_da = xr.DataArray( - vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name=thisName + vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name="vegtype_str" ) return vegtype_str_da # Function to drop unwanted variables in preprocessing of open_mfdataset(), making sure to NOT drop any unspecified variables that will be useful in gridding. Also adds vegetation type info in the form of a DataArray of strings. # Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be named like "patch". This can later be reversed, for compatibility with other code, using patch2pft(). -def mfdataset_preproc(ds, vars_to_import, vegtypes_to_import, timeSlice): +def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): # Rename "pft" dimension and variables to "patch", if needed - if "pft" in ds.dims: + if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] pft2patch_dict = {"pft": "patch"} - for m in matches: - pft2patch_dict[m] = m.replace("pft", "patch").replace("patchs", "patches") - ds = ds.rename(pft2patch_dict) + for match in matches: + pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") + ds_in = ds_in.rename(pft2patch_dict) derived_vars = [] if vars_to_import != None: # Split vars_to_import into variables that are vs. aren't already in ds - derived_vars = [v for v in vars_to_import if v not in ds] - present_vars = [v for v in vars_to_import if v in ds] + derived_vars = [v for v in vars_to_import if v not in ds_in] + present_vars = [v for v in vars_to_import if v in ds_in] vars_to_import = present_vars # Get list of dimensions present in variables in vars_to_import. - dimList = [] - for thisVar in vars_to_import: + dim_list = [] + for var in vars_to_import: # list(set(x)) returns a list of the unique items in x - dimList = list(set(dimList + list(ds.variables[thisVar].dims))) + dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) # Get any _1d variables that are associated with those dimensions. These will be useful in gridding. Also, if any dimension is "pft", set up to rename it and all like-named variables to "patch" - onedVars = [] - for thisDim in dimList: - pattern = re.compile(f"{thisDim}.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] - onedVars = list(set(onedVars + matches)) + oned_vars = [] + for dim in dim_list: + pattern = re.compile(f"{dim}.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + oned_vars = list(set(oned_vars + matches)) # Add dimensions and _1d variables to vars_to_import - vars_to_import = list(set(vars_to_import + list(ds.dims) + onedVars)) + vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) # Add any _bounds variables bounds_vars = [] - for v in vars_to_import: - bounds_var = v + "_bounds" - if bounds_var in ds: + for var in vars_to_import: + bounds_var = var + "_bounds" + if bounds_var in ds_in: bounds_vars = bounds_vars + [bounds_var] vars_to_import = vars_to_import + bounds_vars # Get list of variables to drop - varlist = list(ds.variables) + varlist = list(ds_in.variables) vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) # Drop them - ds = ds.drop_vars(vars_to_drop) + ds_in = ds_in.drop_vars(vars_to_drop) # Add vegetation type info - if "patches1d_itype_veg" in list(ds): + if "patches1d_itype_veg" in list(ds_in): this_pftlist = define_pftlist() get_patch_ivts( - ds, this_pftlist + ds_in, this_pftlist ) # Includes check of whether vegtype changes over time anywhere vegtype_da = get_vegtype_str_da(this_pftlist) patches1d_itype_veg_str = vegtype_da.values[ - ds.isel(time=0).patches1d_itype_veg.values.astype(int) + ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) ] npatch = len(patches1d_itype_veg_str) patches1d_itype_veg_str = xr.DataArray( @@ -537,77 +536,82 @@ def mfdataset_preproc(ds, vars_to_import, vegtypes_to_import, timeSlice): dims=["patch"], name="patches1d_itype_veg_str", ) - ds = xr.merge([ds, vegtype_da, patches1d_itype_veg_str]) + ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) # Restrict to veg. types of interest, if any if vegtypes_to_import != None: - ds = xr_flexsel(ds, vegtype=vegtypes_to_import) + ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) # Restrict to time slice, if any - if timeSlice: - ds = safer_timeslice(ds, timeSlice) + if time_slice: + ds_in = safer_timeslice(ds_in, time_slice) # Finish import - ds = xr.decode_cf(ds, decode_times=True) + ds_in = xr.decode_cf(ds_in, decode_times=True) # Compute derived variables - for v in derived_vars: - if v == "HYEARS" and "HDATES" in ds and ds.HDATES.dims == ("time", "mxharvests", "patch"): - yearList = np.array([np.float32(x.year - 1) for x in ds.time.values]) - hyears = ds["HDATES"].copy() + for var in derived_vars: + if ( + var == "HYEARS" + and "HDATES" in ds_in + and ds_in.HDATES.dims == ("time", "mxharvests", "patch") + ): + year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) + hyears = ds_in["HDATES"].copy() hyears.values = np.tile( - np.expand_dims(yearList, (1, 2)), (1, ds.dims["mxharvests"], ds.dims["patch"]) + np.expand_dims(year_list, (1, 2)), + (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), ) with np.errstate(invalid="ignore"): - is_le_zero = ~np.isnan(ds.HDATES.values) & (ds.HDATES.values <= 0) - hyears.values[is_le_zero] = ds.HDATES.values[is_le_zero] - hyears.values[np.isnan(ds.HDATES.values)] = np.nan + is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) + hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] + hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" hyears.attrs["units"] = "year" - ds["HYEARS"] = hyears + ds_in["HYEARS"] = hyears - return ds + return ds_in # Import a dataset that can be spread over multiple files, only including specified variables and/or vegetation types and/or timesteps, concatenating by time. DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. def import_ds( filelist, - myVars=None, - myVegtypes=None, - timeSlice=None, - myVars_missing_ok=[], + my_vars=None, + my_vegtypes=None, + time_slice=None, + my_vars_missing_ok=[], only_active_patches=False, rename_lsmlatlon=False, chunks=None, ): - # Convert myVegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). - if myVegtypes is not None: - if not isinstance(myVegtypes, list): - myVegtypes = [myVegtypes] - if isinstance(myVegtypes[0], str): - myVegtypes = vegtype_str2int(myVegtypes) + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). + if my_vegtypes is not None: + if not isinstance(my_vegtypes, list): + my_vegtypes = [my_vegtypes] + if isinstance(my_vegtypes[0], str): + my_vegtypes = vegtype_str2int(my_vegtypes) # Same for these variables. - if myVars != None: - if not isinstance(myVars, list): - myVars = [myVars] - if myVars_missing_ok: - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] + if my_vars != None: + if not isinstance(my_vars, list): + my_vars = [my_vars] + if my_vars_missing_ok: + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] # Make sure lists are actually lists if not isinstance(filelist, list): filelist = [filelist] - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] # Remove files from list if they don't contain requested timesteps. - # timeSlice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. - if timeSlice: + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. + if time_slice: new_filelist = [] for file in sorted(filelist): filetime = xr.open_dataset(file).time - filetime_sel = safer_timeslice(filetime, timeSlice) + filetime_sel = safer_timeslice(filetime, time_slice) include_this_file = filetime_sel.size if include_this_file: new_filelist.append(file) @@ -616,11 +620,11 @@ def import_ds( elif new_filelist: break if not new_filelist: - raise RuntimeError(f"No files found in timeSlice {timeSlice}") + raise RuntimeError(f"No files found in time_slice {time_slice}") filelist = new_filelist - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access myVars and myVegtypes directly, but that's bad practice as it could lead to scoping issues. - mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, myVars, myVegtypes, timeSlice) + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes directly, but that's bad practice as it could lead to scoping issues. + mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) # Import if isinstance(filelist, list) and len(filelist) == 1: @@ -646,7 +650,7 @@ def import_ds( ) elif isinstance(filelist, str): this_ds = xr.open_dataset(filelist, chunks=chunks) - this_ds = mfdataset_preproc(this_ds, myVars, myVegtypes, timeSlice) + this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) this_ds = this_ds.compute() # Include only active patches (or whatever) @@ -656,10 +660,10 @@ def import_ds( this_ds_active = this_ds.isel(patch=p_active) # Warn and/or error about variables that couldn't be imported or derived - if myVars: - missing_vars = [v for v in myVars if v not in this_ds] - ok_missing_vars = [v for v in missing_vars if v in myVars_missing_ok] - bad_missing_vars = [v for v in missing_vars if v not in myVars_missing_ok] + if my_vars: + missing_vars = [v for v in my_vars if v not in this_ds] + ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] + bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] if ok_missing_vars: print( "Could not import some variables; either not present or not deriveable:" @@ -681,37 +685,37 @@ def import_ds( # Return a DataArray, with defined coordinates, for a given variable in a dataset. -def get_thisVar_da(thisVar, this_ds): +def get_thisvar_da(var, this_ds): # Make DataArray for this variable - thisvar_da = np.array(this_ds.variables[thisVar]) - theseDims = this_ds.variables[thisVar].dims - thisvar_da = xr.DataArray(thisvar_da, dims=theseDims) + thisvar_da = np.array(this_ds.variables[var]) + these_dims = this_ds.variables[var].dims + thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) # Define coordinates of this variable's DataArray - dimsDict = dict() - for thisDim in theseDims: - dimsDict[thisDim] = this_ds[thisDim] - thisvar_da = thisvar_da.assign_coords(dimsDict) - thisvar_da.attrs = this_ds[thisVar].attrs + dims_dict = dict() + for dim in these_dims: + dims_dict[dim] = this_ds[dim] + thisvar_da = thisvar_da.assign_coords(dims_dict) + thisvar_da.attrs = this_ds[var].attrs return thisvar_da # Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, lon) of one variable within a Dataset. Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices along the specified ax(ie)s. # -# fillValue: Default None means grid will be filled with NaN, unless the variable in question already has a fillValue, in which case that will be used. -def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): +# fill_value: Default None means grid will be filled with NaN, unless the variable in question already has a _FillValue, in which case that will be used. +def grid_one_variable(this_ds, var, fill_value=None, **kwargs): # Get this Dataset's values for selection(s), if provided this_ds = xr_flexsel(this_ds, **kwargs) # Get DataArrays needed for gridding - thisvar_da = get_thisVar_da(thisVar, this_ds) + thisvar_da = get_thisvar_da(var, this_ds) vt_da = None if "patch" in thisvar_da.dims: spatial_unit = "patch" xy_1d_prefix = "patches" if "patches1d_itype_veg" in this_ds: - vt_da = get_thisVar_da("patches1d_itype_veg", this_ds) + vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) elif "gridcell" in thisvar_da.dims: spatial_unit = "gridcell" xy_1d_prefix = "grid" @@ -719,11 +723,11 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): raise RuntimeError( f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" ) - ixy_da = get_thisVar_da(xy_1d_prefix + "1d_ixy", this_ds) - jxy_da = get_thisVar_da(xy_1d_prefix + "1d_jxy", this_ds) + ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) + jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) - if not fillValue and "_FillValue" in thisvar_da.attrs: - fillValue = thisvar_da.attrs["_FillValue"] + if not fill_value and "_FillValue" in thisvar_da.attrs: + fill_value = thisvar_da.attrs["_FillValue"] # Renumber vt_da to work as indices on new ivt dimension, if needed. ### Ensures that the unique set of vt_da values begins with 1 and @@ -743,18 +747,18 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): new_dims = new_dims + ["lat", "lon"] # Set up empty array - n_list = [] + dim_size_list = [] for dim in new_dims: if dim == "ivt_str": - n = this_ds.sizes["ivt"] + dim_size = this_ds.sizes["ivt"] elif dim in thisvar_da.coords: - n = thisvar_da.sizes[dim] + dim_size = thisvar_da.sizes[dim] else: - n = this_ds.sizes[dim] - n_list = n_list + [n] - thisvar_gridded = np.empty(n_list) - if fillValue: - thisvar_gridded[:] = fillValue + dim_size = this_ds.sizes[dim] + dim_size_list = dim_size_list + [dim_size] + thisvar_gridded = np.empty(dim_size_list) + if fill_value: + thisvar_gridded[:] = fill_value else: thisvar_gridded[:] = np.NaN @@ -790,45 +794,45 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): else: values = this_ds[dim].values thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) - thisvar_gridded.name = thisVar + thisvar_gridded.name = var # Add FillValue attribute - if fillValue: - thisvar_gridded.attrs["_FillValue"] = fillValue + if fill_value: + thisvar_gridded.attrs["_FillValue"] = fill_value return thisvar_gridded # ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) for some reason. This function tries to fall back to slicing by integers. It should work with both Datasets and DataArrays. -def safer_timeslice(ds, timeSlice, timeVar="time"): +def safer_timeslice(ds_in, time_slice, time_var="time"): try: - ds = ds.sel({timeVar: timeSlice}) + ds_in = ds_in.sel({time_var: time_slice}) except: # If the issue might have been slicing using strings, try to fall back to integer slicing if ( - isinstance(timeSlice.start, str) - and isinstance(timeSlice.stop, str) - and len(timeSlice.start.split("-")) == 3 - and timeSlice.start.split("-")[1:] == ["01", "01"] - and len(timeSlice.stop.split("-")) == 3 + isinstance(time_slice.start, str) + and isinstance(time_slice.stop, str) + and len(time_slice.start.split("-")) == 3 + and time_slice.start.split("-")[1:] == ["01", "01"] + and len(time_slice.stop.split("-")) == 3 and ( - timeSlice.stop.split("-")[1:] == ["12", "31"] - or timeSlice.stop.split("-")[1:] == ["01", "01"] + time_slice.stop.split("-")[1:] == ["12", "31"] + or time_slice.stop.split("-")[1:] == ["01", "01"] ) ): - fileyears = np.array([x.year for x in ds.time.values]) + fileyears = np.array([x.year for x in ds_in.time.values]) if len(np.unique(fileyears)) != len(fileyears): print("Could not fall back to integer slicing of years: Time axis not annual") raise - yStart = int(timeSlice.start.split("-")[0]) - yStop = int(timeSlice.stop.split("-")[0]) - where_in_timeSlice = np.where((fileyears >= yStart) & (fileyears <= yStop))[0] - ds = ds.isel({timeVar: where_in_timeSlice}) + y_start = int(time_slice.start.split("-")[0]) + y_stop = int(time_slice.stop.split("-")[0]) + where_in_timeslice = np.where((fileyears >= y_start) & (fileyears <= y_stop))[0] + ds_in = ds_in.isel({time_var: where_in_timeslice}) else: - print(f"Could not fall back to integer slicing for timeSlice {timeSlice}") + print(f"Could not fall back to integer slicing for time_slice {time_slice}") raise - return ds + return ds_in # Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 to 360 around the prime meridian. If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it assumes you're passing in numeric data. @@ -878,9 +882,9 @@ def do_it(tmp): # Helper function to check that a list is strictly increasing -def is_strictly_increasing(L): +def is_strictly_increasing(this_list): # https://stackoverflow.com/a/4983359/2965321 - return all(x < y for x, y in zip(L, L[1:])) + return all(x < y for x, y in zip(this_list, this_list[1:])) # Ensure that longitude axis coordinates are monotonically increasing diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 74e8fd57f4..0b8f1211b7 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -156,7 +156,7 @@ def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, lo this_var = f"{s_or_h}date{n_sowing+1}_{i}" date_var_list = date_var_list + [this_var] - this_ds = utils.import_ds(date_infile, myVars=date_var_list) + this_ds = utils.import_ds(date_infile, my_vars=date_var_list) for var in this_ds: this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) @@ -272,9 +272,9 @@ def import_and_process_1yr( print(h1_filelist) dates_ds = utils.import_ds( h1_filelist, - myVars=["SDATES", "HDATES"], - myVegtypes=crops_to_read, - timeSlice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), + my_vars=["SDATES", "HDATES"], + my_vegtypes=crops_to_read, + time_slice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), chunks=chunks, ) @@ -541,8 +541,8 @@ def import_and_process_1yr( error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") h2_ds = utils.import_ds( h2_files, - myVars=my_vars, - myVegtypes=crops_to_read, + my_vars=my_vars, + my_vegtypes=crops_to_read, chunks=chunks, ) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 5c2e7f8820..1a16387f7d 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -117,9 +117,9 @@ def regrid_ggcmi_shdates( # Import and format latitude if "lat" in template_ds_in: - lat, Nlat = import_coord_1d(template_ds_in, "lat") + lat, n_lat = import_coord_1d(template_ds_in, "lat") elif "LATIXY" in template_ds_in: - lat, Nlat = import_coord_2d(template_ds_in, "lat", "LATIXY") + lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") lat.attrs["axis"] = "Y" else: abort("No latitude variable found in regrid template file") @@ -130,14 +130,14 @@ def regrid_ggcmi_shdates( # Import and format longitude if "lon" in template_ds_in: - lon, Nlon = import_coord_1d(template_ds_in, "lon") + lon, n_lon = import_coord_1d(template_ds_in, "lon") elif "LONGXY" in template_ds_in: - lon, Nlon = import_coord_2d(template_ds_in, "lon", "LONGXY") + lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") lon.attrs["axis"] = "Y" else: abort("No longitude variable found in regrid template file") template_da_out = xr.DataArray( - data=np.full((Nlat, Nlon), 0.0), + data=np.full((n_lat, n_lon), 0.0), dims={"lat": lat, "lon": lon}, name="area", ) @@ -159,36 +159,38 @@ def regrid_ggcmi_shdates( if len(input_files) == 0: abort(f"No files found matching {os.path.join(os.getcwd(), pattern)}") input_files.sort() - for f in input_files: - this_crop = f[0:6] + for file in input_files: + this_crop = file[0:6] if crop_list is not None and this_crop not in crop_list: continue logger.info(" " + this_crop) - f2 = os.path.join(regrid_output_directory, f) - f3 = f2.replace(regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}") + file_2 = os.path.join(regrid_output_directory, file) + file_3 = file_2.replace( + regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}" + ) - if os.path.exists(f3): - os.remove(f3) + if os.path.exists(file_3): + os.remove(file_3) # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x in a row. try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) # Delete template file, which is no longer needed diff --git a/python/ctsm/test/test_unit_utils_import_coord.py b/python/ctsm/test/test_unit_utils_import_coord.py index c5607356fd..6e339a913f 100755 --- a/python/ctsm/test/test_unit_utils_import_coord.py +++ b/python/ctsm/test/test_unit_utils_import_coord.py @@ -62,8 +62,8 @@ def test_importcoord1d(self): Tests importing a 1-d lat/lon variable """ ds = xr.open_dataset(self._1d_lonlat_file) - lat, Nlat = import_coord_1d(ds, "lat") - np.testing.assert_equal(Nlat, 360) + lat, n_lat = import_coord_1d(ds, "lat") + np.testing.assert_equal(n_lat, 360) np.testing.assert_array_equal(lat.values[:4], [89.75, 89.25, 88.75, 88.25]) np.testing.assert_array_equal(lat.values[-4:], [-88.25, -88.75, -89.25, -89.75]) From 5fc5bf2e5ba80ae2dbfe78b453807227230a5ed4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:34:23 -0700 Subject: [PATCH 25/40] pylint: Resolve no-else-return and no-else-raise. --- python/ctsm/crop_calendars/cropcal_utils.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 2f84bd6739..d7e7ff93f4 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -189,14 +189,13 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): # Perform the comparison if this_method == "ok_contains": return any(n in this_vegtype for n in this_filter) - elif this_method == "notok_contains": + if this_method == "notok_contains": return not any(n in this_vegtype for n in this_filter) - elif this_method == "ok_exact": + if this_method == "ok_exact": return any(n == this_vegtype for n in this_filter) - elif this_method == "notok_exact": + if this_method == "notok_exact": return not any(n == this_vegtype for n in this_filter) - else: - raise ValueError(f"Unknown comparison method: '{this_method}'") + raise ValueError(f"Unknown comparison method: '{this_method}'") # Get boolean list of whether each vegetation type in list is a managed crop @@ -241,10 +240,9 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): raise TypeError( f"Not sure how to handle vegtype_mainlist as list of {type(vegtype_mainlist[0])}" ) - else: - raise TypeError( - f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" - ) + raise TypeError( + f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" + ) if vegtype_str.shape == (): indices = np.array([-1]) @@ -847,10 +845,9 @@ def check_ok(tmp, fail_silently): if msg == "": return True - elif fail_silently: + if fail_silently: return False - else: - raise ValueError(msg) + raise ValueError(msg) def do_it(tmp): tmp = tmp + 360 From cbff7d928450a48c972627fbe18599966bd56782 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:35:38 -0700 Subject: [PATCH 26/40] pylint: Resolve singleton-comparison. --- python/ctsm/crop_calendars/cropcal_utils.py | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index d7e7ff93f4..a67b2ed346 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -233,7 +233,7 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): vegtype_mainlist = vegtype_mainlist.vegtype_str.values elif isinstance(vegtype_mainlist, xr.DataArray): vegtype_mainlist = vegtype_mainlist.values - elif vegtype_mainlist == None: + elif vegtype_mainlist is None: vegtype_mainlist = define_pftlist() if not isinstance(vegtype_mainlist, list) and isinstance(vegtype_mainlist[0], str): if isinstance(vegtype_mainlist, list): @@ -320,11 +320,11 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr slice_members = [] if selection == slice(0): raise ValueError("slice(0) will be empty") - if selection.start != None: + if selection.start is not None: slice_members = slice_members + [selection.start] - if selection.stop != None: + if selection.stop is not None: slice_members = slice_members + [selection.stop] - if selection.step != None: + if selection.step is not None: slice_members = slice_members + [selection.step] if slice_members == []: raise TypeError("slice is all None?") @@ -346,7 +346,7 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr else: this_type = type(member) break - if this_type == None: + if this_type is None: this_type = int selection = selection.astype(int) else: @@ -401,7 +401,7 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr " name?" ) pattern = re.compile(f"1d_{this_xy}") - matches = [x for x in list(xr_object.keys()) if pattern.search(x) != None] + matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] for var in matches: if len(xr_object[var].dims) != 1: raise RuntimeError( @@ -473,14 +473,14 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): # Rename "pft" dimension and variables to "patch", if needed if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] pft2patch_dict = {"pft": "patch"} for match in matches: pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") ds_in = ds_in.rename(pft2patch_dict) derived_vars = [] - if vars_to_import != None: + if vars_to_import is not None: # Split vars_to_import into variables that are vs. aren't already in ds derived_vars = [v for v in vars_to_import if v not in ds_in] present_vars = [v for v in vars_to_import if v in ds_in] @@ -496,7 +496,7 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): oned_vars = [] for dim in dim_list: pattern = re.compile(f"{dim}.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] oned_vars = list(set(oned_vars + matches)) # Add dimensions and _1d variables to vars_to_import @@ -537,7 +537,7 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) # Restrict to veg. types of interest, if any - if vegtypes_to_import != None: + if vegtypes_to_import is not None: ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) # Restrict to time slice, if any @@ -590,7 +590,7 @@ def import_ds( my_vegtypes = vegtype_str2int(my_vegtypes) # Same for these variables. - if my_vars != None: + if my_vars is not None: if not isinstance(my_vars, list): my_vars = [my_vars] if my_vars_missing_ok: From 08ce92963a3307022afa0221cee296099179ea24 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:37:59 -0700 Subject: [PATCH 27/40] pylint: Resolve bare-except. --- python/ctsm/crop_calendars/cropcal_utils.py | 4 ++-- python/ctsm/crop_calendars/regrid_ggcmi_shdates.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index a67b2ed346..2efd3508c5 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -774,7 +774,7 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): fill_indices.append(Ellipsis) try: thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values - except: + except: # pylint: disable=bare-except thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): if np.all(np.isnan(thisvar_da.values)): @@ -805,7 +805,7 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): def safer_timeslice(ds_in, time_slice, time_var="time"): try: ds_in = ds_in.sel({time_var: time_slice}) - except: + except: # pylint: disable=bare-except # If the issue might have been slicing using strings, try to fall back to integer slicing if ( isinstance(time_slice.start, str) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 1a16387f7d..c8e27a89f9 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -178,17 +178,17 @@ def regrid_ggcmi_shdates( run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except try: run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except try: run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) From 58b75f121fb9bc5528f660312eeff32395e0c30e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:39:43 -0700 Subject: [PATCH 28/40] pylint: Resolve unused-import. --- python/ctsm/crop_calendars/cropcal_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 2efd3508c5..0f0824d6e8 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -7,13 +7,6 @@ import warnings import importlib -with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("cf_units") is not None: - import cf_units as cf - if importlib.find_loader("cartopy") is not None: - from cartopy.util import add_cyclic_point -import cftime import numpy as np import xarray as xr From 3dae192c7424d857e4ed07d4a0b7a3afb5665abc Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:50:16 -0700 Subject: [PATCH 29/40] pylint: Add docstrings. --- python/ctsm/crop_calendars/cropcal_utils.py | 139 +++++++++++++----- .../crop_calendars/regrid_ggcmi_shdates.py | 15 +- 2 files changed, 113 insertions(+), 41 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 0f0824d6e8..db0d1f4777 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -11,8 +11,10 @@ import xarray as xr -# List of PFTs used in CLM def define_pftlist(): + """ + Return list of PFTs used in CLM + """ pftlist = [ "not_vegetated", "needleleaf_evergreen_temperate_tree", @@ -97,8 +99,10 @@ def define_pftlist(): return pftlist -# Get CLM ivt number corresponding to a given name def ivt_str2int(ivt_str): + """ + Get CLM ivt number corresponding to a given name + """ pftlist = define_pftlist() if isinstance(ivt_str, str): ivt_int = pftlist.index(ivt_str) @@ -114,8 +118,10 @@ def ivt_str2int(ivt_str): return ivt_int -# Get CLM ivt name corresponding to a given number def ivt_int2str(ivt_int): + """ + Get CLM ivt name corresponding to a given number + """ pftlist = define_pftlist() if np.issubdtype(type(ivt_int), np.integer) or int(ivt_int) == ivt_int: ivt_str = pftlist[int(ivt_int)] @@ -133,19 +139,19 @@ def ivt_int2str(ivt_int): return ivt_str -# Does this vegetation type's name match (for a given comparison method) any member of a filtering list? -""" -Methods: +def is_this_vegtype(this_vegtype, this_filter, this_method): + """ + Does this vegetation type's name match (for a given comparison method) any member of a filtering + list? + + Methods: ok_contains: True if any member of this_filter is found in this_vegtype. notok_contains: True of no member of this_filter is found in this_vegtype. - ok_exact: True if this_vegtype matches any member of this_filter + ok_exact: True if this_vegtype matches any member of this_filter exactly. - notok_exact: True if this_vegtype does not match any member of + notok_exact: True if this_vegtype does not match any member of this_filter exactly. -""" - - -def is_this_vegtype(this_vegtype, this_filter, this_method): + """ # Make sure data type of this_vegtype is acceptable if isinstance(this_vegtype, float) and int(this_vegtype) == this_vegtype: this_vegtype = int(this_vegtype) @@ -191,33 +197,35 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): raise ValueError(f"Unknown comparison method: '{this_method}'") -# Get boolean list of whether each vegetation type in list is a managed crop -""" - this_vegtypelist: The list of vegetation types whose members you want to - test. - this_filter: The list of strings against which you want to compare - each member of this_vegtypelist. - this_method: How you want to do the comparison. See is_this_vegtype(). -""" - - def is_each_vegtype(this_vegtypelist, this_filter, this_method): + """ + Get boolean list of whether each vegetation type in list is a managed crop + + this_vegtypelist: The list of vegetation types whose members you want to test. + this_filter: The list of strings against which you want to compare each member of + this_vegtypelist. + this_method: How you want to do the comparison. See is_this_vegtype(). + """ if isinstance(this_vegtypelist, xr.DataArray): this_vegtypelist = this_vegtypelist.values return [is_this_vegtype(x, this_filter, this_method) for x in this_vegtypelist] -# List (strings) of managed crops in CLM. def define_mgdcrop_list(): + """ + List (strings) of managed crops in CLM. + """ notcrop_list = ["tree", "grass", "shrub", "unmanaged", "not_vegetated"] defined_pftlist = define_pftlist() is_crop = is_each_vegtype(defined_pftlist, notcrop_list, "notok_contains") return [defined_pftlist[i] for i, x in enumerate(is_crop) if x] -# Convert list of vegtype strings to integer index equivalents. def vegtype_str2int(vegtype_str, vegtype_mainlist=None): + """ + Convert list of vegtype strings to integer index equivalents. + """ convert_to_ndarray = not isinstance(vegtype_str, np.ndarray) if convert_to_ndarray: vegtype_str = np.array(vegtype_str) @@ -248,9 +256,19 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): return indices -# Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. Keyword arguments like dimension=selection. Selections can be individual values or slice()s. Optimize memory usage by beginning keyword argument list with the selections that will result in the largest reduction of object size. Use dimension "vegtype" to extract patches of designated vegetation type (can be string or integer). -# Can also do dimension=function---e.g., time=np.mean will take the mean over the time dimension. def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): + """ + Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. + + - Keyword arguments like dimension=selection. + - Selections can be individual values or slice()s. + - Optimize memory usage by beginning keyword argument list with the selections that will result + in the largest reduction of object size. + - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or + integer). + - Can also do dimension=function---e.g., time=np.mean will take the mean over the time + dimension. + """ # Setup havewarned = False delimiter = "__" @@ -438,8 +456,10 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr return xr_object -# Get PFT of each patch, in both integer and string forms. def get_patch_ivts(this_ds, this_pftlist): + """ + Get PFT of each patch, in both integer and string forms. + """ # First, get all the integer values; should be time*pft or pft*time. We will eventually just take the first timestep. vegtype_int = this_ds.patches1d_itype_veg vegtype_int.values = vegtype_int.values.astype(int) @@ -451,8 +471,10 @@ def get_patch_ivts(this_ds, this_pftlist): return {"int": vegtype_int, "str": vegtype_str, "all_str": this_pftlist} -# Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). def get_vegtype_str_da(vegtype_str): + """ + Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). + """ nvt = len(vegtype_str) vegtype_str_da = xr.DataArray( vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name="vegtype_str" @@ -460,9 +482,16 @@ def get_vegtype_str_da(vegtype_str): return vegtype_str_da -# Function to drop unwanted variables in preprocessing of open_mfdataset(), making sure to NOT drop any unspecified variables that will be useful in gridding. Also adds vegetation type info in the form of a DataArray of strings. -# Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be named like "patch". This can later be reversed, for compatibility with other code, using patch2pft(). def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): + """ + Function to drop unwanted variables in preprocessing of open_mfdataset(). + + - Makes sure to NOT drop any unspecified variables that will be useful in gridding. + - Also adds vegetation type info in the form of a DataArray of strings. + - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be + named like "patch". This can later be reversed, for compatibility with other code, using + patch2pft(). + """ # Rename "pft" dimension and variables to "patch", if needed if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") @@ -564,7 +593,6 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): return ds_in -# Import a dataset that can be spread over multiple files, only including specified variables and/or vegetation types and/or timesteps, concatenating by time. DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. def import_ds( filelist, my_vars=None, @@ -575,6 +603,13 @@ def import_ds( rename_lsmlatlon=False, chunks=None, ): + """ + Import a dataset that can be spread over multiple files, only including specified variables + and/or vegetation types and/or timesteps, concatenating by time. + + - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. + """ # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). if my_vegtypes is not None: if not isinstance(my_vegtypes, list): @@ -675,8 +710,10 @@ def import_ds( return this_ds -# Return a DataArray, with defined coordinates, for a given variable in a dataset. def get_thisvar_da(var, this_ds): + """ + Return a DataArray, with defined coordinates, for a given variable in a dataset. + """ # Make DataArray for this variable thisvar_da = np.array(this_ds.variables[var]) these_dims = this_ds.variables[var].dims @@ -692,10 +729,16 @@ def get_thisvar_da(var, this_ds): return thisvar_da -# Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, lon) of one variable within a Dataset. Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices along the specified ax(ie)s. -# -# fill_value: Default None means grid will be filled with NaN, unless the variable in question already has a _FillValue, in which case that will be used. def grid_one_variable(this_ds, var, fill_value=None, **kwargs): + """ + Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, + lon) of one variable within a Dataset. + + - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. + - fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. + """ # Get this Dataset's values for selection(s), if provided this_ds = xr_flexsel(this_ds, **kwargs) @@ -794,8 +837,12 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): return thisvar_gridded -# ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) for some reason. This function tries to fall back to slicing by integers. It should work with both Datasets and DataArrays. def safer_timeslice(ds_in, time_slice, time_var="time"): + """ + ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) + for some reason. This function tries to fall back to slicing by integers. It should work with + both Datasets and DataArrays. + """ try: ds_in = ds_in.sel({time_var: time_slice}) except: # pylint: disable=bare-except @@ -826,8 +873,15 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): return ds_in -# Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 to 360 around the prime meridian. If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it assumes you're passing in numeric data. def lon_idl2pm(lons_in, fail_silently=False): + """ + Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 + to 360 around the prime meridian. + + - If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it + assumes you're passing in numeric data. + """ + def check_ok(tmp, fail_silently): msg = "" @@ -871,14 +925,19 @@ def do_it(tmp): return lons_out -# Helper function to check that a list is strictly increasing def is_strictly_increasing(this_list): - # https://stackoverflow.com/a/4983359/2965321 + """ + Helper function to check that a list is strictly increasing + + https://stackoverflow.com/a/4983359/2965321 + """ return all(x < y for x, y in zip(this_list, this_list[1:])) -# Ensure that longitude axis coordinates are monotonically increasing def make_lon_increasing(xr_obj): + """ + Ensure that longitude axis coordinates are monotonically increasing + """ if not "lon" in xr_obj.dims: return xr_obj diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index c8e27a89f9..8db38ddf71 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -1,3 +1,6 @@ +""" +Regrid GGCMI sowing and harvest date files +""" from subprocess import run import os import glob @@ -40,6 +43,9 @@ def main(): def run_and_check(cmd): + """ + Run a given shell command and check its result + """ result = run( cmd, shell=True, @@ -50,8 +56,12 @@ def run_and_check(cmd): abort(f"Trouble running `{result.args}` in shell:\n{result.stdout}\n{result.stderr}") -# Functionized because these are shared by process_ggcmi_shdates def define_arguments(parser): + """ + Set up arguments shared between regrid_ggcmi_shdates and process_ggcmi_shdates + + Functionized because these are shared by process_ggcmi_shdates + """ # Required parser.add_argument( "-rr", @@ -92,6 +102,9 @@ def regrid_ggcmi_shdates( regrid_extension, crop_list, ): + """ + Regrid GGCMI sowing and harvest date files + """ logger.info(f"Regridding GGCMI crop calendars to {regrid_resolution}:") # Ensure we can call necessary shell script(s) From 538ab01a593b81a29c100a9601816e5bdd531305 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 16:12:11 -0700 Subject: [PATCH 30/40] Move xr_flexsel to its own module; functionize bits of it. --- python/ctsm/crop_calendars/cropcal_utils.py | 201 +------------ .../crop_calendars/generate_gdds_functions.py | 5 +- python/ctsm/crop_calendars/xr_flexsel.py | 263 ++++++++++++++++++ 3 files changed, 267 insertions(+), 202 deletions(-) create mode 100644 python/ctsm/crop_calendars/xr_flexsel.py diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index db0d1f4777..171e0bae56 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -9,6 +9,7 @@ import numpy as np import xarray as xr +from ctsm.crop_calendars.xr_flexsel import xr_flexsel def define_pftlist(): @@ -256,206 +257,6 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): return indices -def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): - """ - Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. - - - Keyword arguments like dimension=selection. - - Selections can be individual values or slice()s. - - Optimize memory usage by beginning keyword argument list with the selections that will result - in the largest reduction of object size. - - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or - integer). - - Can also do dimension=function---e.g., time=np.mean will take the mean over the time - dimension. - """ - # Setup - havewarned = False - delimiter = "__" - - for key, selection in kwargs.items(): - if callable(selection): - # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. - if selection == np.mean: - try: - xr_object = xr_object.mean(dim=key) - except: - raise ValueError( - f"Failed to take mean of dimension {key}. Try doing so outside of" - " xr_flexsel()." - ) - else: - raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") - - elif key == "vegtype": - # Convert to list, if needed - if not isinstance(selection, list): - selection = [selection] - - # Convert to indices, if needed - if isinstance(selection[0], str): - selection = vegtype_str2int(selection) - - # Get list of boolean(s) - if isinstance(selection[0], int): - if isinstance(patches1d_itype_veg, type(None)): - patches1d_itype_veg = xr_object.patches1d_itype_veg.values - elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): - patches1d_itype_veg = patches1d_itype_veg.values - is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") - elif isinstance(selection[0], bool): - if len(selection) != len(xr_object.patch): - raise ValueError( - "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" - f" same length as xr_object.patch ({len(selection)} vs." - f" {len(xr_object.patch)})" - ) - is_vegtype = selection - else: - raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") - xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) - if "ivt" in xr_object: - xr_object = xr_object.isel( - ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact") - ) - - else: - # Parse selection type, if provided - if delimiter in key: - key, selection_type = key.split(delimiter) - - # Check type of selection - else: - is_inefficient = False - if isinstance(selection, slice): - slice_members = [] - if selection == slice(0): - raise ValueError("slice(0) will be empty") - if selection.start is not None: - slice_members = slice_members + [selection.start] - if selection.stop is not None: - slice_members = slice_members + [selection.stop] - if selection.step is not None: - slice_members = slice_members + [selection.step] - if slice_members == []: - raise TypeError("slice is all None?") - this_type = int - for member in slice_members: - if member < 0 or not isinstance(member, int): - this_type = "values" - break - elif isinstance(selection, np.ndarray): - if selection.dtype.kind in np.typecodes["AllInteger"]: - this_type = int - else: - is_inefficient = True - this_type = None - for member in selection: - if member < 0 or member % 1 > 0: - if isinstance(member, int): - this_type = "values" - else: - this_type = type(member) - break - if this_type is None: - this_type = int - selection = selection.astype(int) - else: - this_type = type(selection) - - warn_about_this_seltype_interp = warn_about_seltype_interp - if this_type == list and isinstance(selection[0], str): - selection_type = "values" - warn_about_this_seltype_interp = False - elif this_type == int: - selection_type = "indices" - else: - selection_type = "values" - - if warn_about_this_seltype_interp: - # Suggest suppressing selection type interpretation warnings - if not havewarned: - print( - "xr_flexsel(): Suppress all 'selection type interpretation' messages by" - " specifying warn_about_seltype_interp=False" - ) - havewarned = True - if is_inefficient: - extra = " This will also improve efficiency for large selections." - else: - extra = "" - print( - f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" - f" interpreted as {this_type}. If not correct, specify selection type" - " ('indices' or 'values') in keyword like" - f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" - ) - - # Trim along relevant 1d axes - if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: - if selection_type == "indices": - incl_coords = xr_object[key].values[selection] - elif selection_type == "values": - if isinstance(selection, slice): - incl_coords = xr_object.sel({key: selection}, drop=False)[key].values - else: - incl_coords = selection - else: - raise TypeError(f"selection_type {selection_type} not recognized") - if key == "lat": - this_xy = "jxy" - elif key == "lon": - this_xy = "ixy" - else: - raise KeyError( - f"Key '{key}' not recognized: What 1d_ suffix should I use for variable" - " name?" - ) - pattern = re.compile(f"1d_{this_xy}") - matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] - for var in matches: - if len(xr_object[var].dims) != 1: - raise RuntimeError( - f"Expected {var} to have 1 dimension, but it has" - f" {len(xr_object[var].dims)}: {xr_object[var].dims}" - ) - dim = xr_object[var].dims[0] - # print(f"Variable {var} has dimension {dim}") - coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] - # print(f"{dim} size before: {xr_object.sizes[dim]}") - ok_ind = [] - new_1d_this_xy = [] - for i, member in enumerate(coords): - if member in incl_coords: - ok_ind = ok_ind + [i] - new_1d_this_xy = new_1d_this_xy + [ - (incl_coords == member).nonzero()[0] + 1 - ] - xr_object = xr_object.isel({dim: ok_ind}) - new_1d_this_xy = np.array(new_1d_this_xy).squeeze() - xr_object[var].values = new_1d_this_xy - # print(f"{dim} size after: {xr_object.sizes[dim]}") - - # Perform selection - if selection_type == "indices": - # Have to select like this instead of with index directly because otherwise assign_coords() will throw an error. Not sure why. - if isinstance(selection, int): - # Single integer? Turn it into a slice. - selection = slice(selection, selection + 1) - elif ( - isinstance(selection, np.ndarray) - and not selection.dtype.kind in np.typecodes["AllInteger"] - ): - selection = selection.astype(int) - xr_object = xr_object.isel({key: selection}) - elif selection_type == "values": - xr_object = xr_object.sel({key: selection}) - else: - raise TypeError(f"selection_type {selection_type} not recognized") - - return xr_object - - def get_patch_ivts(this_ds, this_pftlist): """ Get PFT of each patch, in both integer and string forms. diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 0b8f1211b7..7c015f9dd6 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -20,6 +20,7 @@ sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position CAN_PLOT = True try: @@ -573,10 +574,10 @@ def import_and_process_1yr( continue vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - this_crop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) + this_crop_full_patchlist = list(xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) # Get time series for each patch of this type - this_crop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) + this_crop_ds = xr_flexsel(h2_incl_ds, vegtype=vegtype_str) this_crop_gddaccum_da = this_crop_ds[clm_gdd_var] if save_figs: this_crop_gddharv_da = this_crop_ds["GDDHARV"] diff --git a/python/ctsm/crop_calendars/xr_flexsel.py b/python/ctsm/crop_calendars/xr_flexsel.py new file mode 100644 index 0000000000..1e30593946 --- /dev/null +++ b/python/ctsm/crop_calendars/xr_flexsel.py @@ -0,0 +1,263 @@ +""" +Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. +""" +import re +import numpy as np +import xarray as xr + +from ctsm.crop_calendars.cropcal_utils import vegtype_str2int, is_each_vegtype + + +def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): + """ + Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. + + - Keyword arguments like dimension=selection. + - Selections can be individual values or slice()s. + - Optimize memory usage by beginning keyword argument list with the selections that will result + in the largest reduction of object size. + - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or + integer). + - Can also do dimension=function---e.g., time=np.mean will take the mean over the time + dimension. + """ + # Setup + havewarned = False + delimiter = "__" + + for key, selection in kwargs.items(): + if callable(selection): + xr_object = handle_callable(xr_object, key, selection) + + elif key == "vegtype": + xr_object = handle_vegtype(xr_object, patches1d_itype_veg, selection) + + else: + # Parse selection type, if provided + if delimiter in key: + key, selection_type = key.split(delimiter) + + # Check type of selection + else: + is_inefficient = False + if isinstance(selection, slice): + this_type = set_type_from_slice(selection) + elif isinstance(selection, np.ndarray): + selection, is_inefficient, this_type = set_type_from_ndarray(selection) + else: + this_type = type(selection) + + warn_about_this_seltype_interp = warn_about_seltype_interp + if this_type == list and isinstance(selection[0], str): + selection_type = "values" + warn_about_this_seltype_interp = False + elif this_type == int: + selection_type = "indices" + else: + selection_type = "values" + + if warn_about_this_seltype_interp: + do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type + ) + + # Trim along relevant 1d axes + if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: + xr_object = trim_along_relevant_1d_axes(xr_object, selection, selection_type, key) + + # Perform selection + xr_object = perform_selection(xr_object, key, selection, selection_type) + + return xr_object + + +def perform_selection(xr_object, key, selection, selection_type): + """ + Perform selection + """ + if selection_type == "indices": + # Have to select like this instead of with index directly because otherwise assign_coords() + # will throw an error. Not sure why. + if isinstance(selection, int): + # Single integer? Turn it into a slice. + selection = slice(selection, selection + 1) + elif ( + isinstance(selection, np.ndarray) + and not selection.dtype.kind in np.typecodes["AllInteger"] + ): + selection = selection.astype(int) + xr_object = xr_object.isel({key: selection}) + elif selection_type == "values": + xr_object = xr_object.sel({key: selection}) + else: + raise TypeError(f"selection_type {selection_type} not recognized") + return xr_object + + +def trim_along_relevant_1d_axes(xr_object, selection, selection_type, key): + """ + Trim along relevant 1d axes + """ + if selection_type == "indices": + incl_coords = xr_object[key].values[selection] + elif selection_type == "values": + if isinstance(selection, slice): + incl_coords = xr_object.sel({key: selection}, drop=False)[key].values + else: + incl_coords = selection + else: + raise TypeError(f"selection_type {selection_type} not recognized") + if key == "lat": + this_xy = "jxy" + elif key == "lon": + this_xy = "ixy" + else: + raise KeyError( + f"Key '{key}' not recognized: What 1d_ suffix should I use for variable name?" + ) + pattern = re.compile(f"1d_{this_xy}") + matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] + for var in matches: + if len(xr_object[var].dims) != 1: + raise RuntimeError( + f"Expected {var} to have 1 dimension, but it has" + f" {len(xr_object[var].dims)}: {xr_object[var].dims}" + ) + dim = xr_object[var].dims[0] + # print(f"Variable {var} has dimension {dim}") + coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] + # print(f"{dim} size before: {xr_object.sizes[dim]}") + ok_ind = [] + new_1d_this_xy = [] + for i, member in enumerate(coords): + if member in incl_coords: + ok_ind = ok_ind + [i] + new_1d_this_xy = new_1d_this_xy + [(incl_coords == member).nonzero()[0] + 1] + xr_object = xr_object.isel({dim: ok_ind}) + new_1d_this_xy = np.array(new_1d_this_xy).squeeze() + xr_object[var].values = new_1d_this_xy + # print(f"{dim} size after: {xr_object.sizes[dim]}") + return xr_object + + +def do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type +): + """ + Suggest suppressing selection type interpretation warnings + """ + if not havewarned: + print( + "xr_flexsel(): Suppress all 'selection type interpretation' messages by specifying" + + "warn_about_seltype_interp=False" + ) + havewarned = True + if is_inefficient: + extra = " This will also improve efficiency for large selections." + else: + extra = "" + print( + f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" + f" interpreted as {this_type}. If not correct, specify selection type" + " ('indices' or 'values') in keyword like" + f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" + ) + + +def set_type_from_ndarray(selection): + """ + Sets selection type if given a Numpy array + """ + if selection.dtype.kind in np.typecodes["AllInteger"]: + this_type = int + else: + is_inefficient = True + this_type = None + for member in selection: + if member < 0 or member % 1 > 0: + if isinstance(member, int): + this_type = "values" + else: + this_type = type(member) + break + if this_type is None: + this_type = int + selection = selection.astype(int) + return selection, is_inefficient, this_type + + +def set_type_from_slice(selection): + """ + Sets selection type if given a slice + """ + slice_members = [] + if selection == slice(0): + raise ValueError("slice(0) will be empty") + if selection.start is not None: + slice_members = slice_members + [selection.start] + if selection.stop is not None: + slice_members = slice_members + [selection.stop] + if selection.step is not None: + slice_members = slice_members + [selection.step] + if not slice_members: + raise TypeError("slice is all None?") + this_type = int + for member in slice_members: + if member < 0 or not isinstance(member, int): + this_type = "values" + break + return this_type + + +def handle_vegtype(xr_object, patches1d_itype_veg, selection): + """ + Handle selection "vegtype + """ + # Convert to list, if needed + if not isinstance(selection, list): + selection = [selection] + + # Convert to indices, if needed + if isinstance(selection[0], str): + selection = vegtype_str2int(selection) + + # Get list of boolean(s) + if isinstance(selection[0], int): + if isinstance(patches1d_itype_veg, type(None)): + patches1d_itype_veg = xr_object.patches1d_itype_veg.values + elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): + patches1d_itype_veg = patches1d_itype_veg.values + is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") + elif isinstance(selection[0], bool): + if len(selection) != len(xr_object.patch): + raise ValueError( + "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" + f" same length as xr_object.patch ({len(selection)} vs." + f" {len(xr_object.patch)})" + ) + is_vegtype = selection + else: + raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") + xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) + if "ivt" in xr_object: + xr_object = xr_object.isel(ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact")) + + return xr_object + + +def handle_callable(xr_object, key, selection): + """ + Handle selection that's a callable + """ + # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and + # xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. + if selection == np.mean: # pylint: disable=comparison-with-callable + try: + xr_object = xr_object.mean(dim=key) + except: + raise ValueError( + f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()." + ) + else: + raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") + return xr_object From f7ad444a95ff1c8be15b727ab9022101793927f1 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:02:21 -0700 Subject: [PATCH 31/40] Resolve pylint for regrid_ggcmi_shdates.py. --- .../crop_calendars/regrid_ggcmi_shdates.py | 102 ++++++++++-------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 8db38ddf71..b1988aa8b5 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -6,9 +6,9 @@ import glob import argparse import sys +import logging import xarray as xr import numpy as np -import logging # -- add python/ctsm to path (needed if we want to run regrid_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) @@ -51,6 +51,7 @@ def run_and_check(cmd): shell=True, capture_output=True, text=True, + check=False, ) if result.returncode != 0: abort(f"Trouble running `{result.args}` in shell:\n{result.stdout}\n{result.stderr}") @@ -73,7 +74,11 @@ def define_arguments(parser): parser.add_argument( "-rt", "--regrid-template-file", - help="Template netCDF file to be used in regridding of inputs. This can be a CLM output file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset (i.e., something with 2-d LATIXY and LONGXY variables).", + help=( + "Template netCDF file to be used in regridding of inputs. This can be a CLM output " + + "file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset " + + "(i.e., something with 2-d LATIXY and LONGXY variables)." + ), type=str, required=True, ) @@ -88,7 +93,10 @@ def define_arguments(parser): parser.add_argument( "-c", "--crop-list", - help="List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, will process all GGCMI crops.", + help=( + "List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, " + + "will process all GGCMI crops." + ), default=None, ) return parser @@ -105,7 +113,7 @@ def regrid_ggcmi_shdates( """ Regrid GGCMI sowing and harvest date files """ - logger.info(f"Regridding GGCMI crop calendars to {regrid_resolution}:") + logger.info("Regridding GGCMI crop calendars to %s:", regrid_resolution) # Ensure we can call necessary shell script(s) for cmd in ["module load cdo; cdo"]: @@ -129,31 +137,7 @@ def regrid_ggcmi_shdates( regrid_extension = "." + regrid_extension # Import and format latitude - if "lat" in template_ds_in: - lat, n_lat = import_coord_1d(template_ds_in, "lat") - elif "LATIXY" in template_ds_in: - lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") - lat.attrs["axis"] = "Y" - else: - abort("No latitude variable found in regrid template file") - - # Flip latitude, if needed - if lat.values[0] < lat.values[1]: - lat = lat.reindex(lat=list(reversed(lat["lat"]))) - - # Import and format longitude - if "lon" in template_ds_in: - lon, n_lon = import_coord_1d(template_ds_in, "lon") - elif "LONGXY" in template_ds_in: - lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") - lon.attrs["axis"] = "Y" - else: - abort("No longitude variable found in regrid template file") - template_da_out = xr.DataArray( - data=np.full((n_lat, n_lon), 0.0), - dims={"lat": lat, "lon": lon}, - name="area", - ) + lat, lon, template_da_out = get_template_da_out(template_ds_in) # Save template Dataset for use by cdo template_ds_out = xr.Dataset( @@ -177,7 +161,7 @@ def regrid_ggcmi_shdates( if crop_list is not None and this_crop not in crop_list: continue - logger.info(" " + this_crop) + logger.info(" %s", this_crop) file_2 = os.path.join(regrid_output_directory, file) file_3 = file_2.replace( regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}" @@ -186,31 +170,61 @@ def regrid_ggcmi_shdates( if os.path.exists(file_3): os.remove(file_3) - # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x in a row. + # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x + # in a row. + cdo_cmd = ( + f"module load cdo; cdo -L -remapnn,'{templatefile}' " + + f"-setmisstonn '{file}' '{file_3}'" + ) try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) # Delete template file, which is no longer needed os.remove(templatefile) os.chdir(previous_dir) +def get_template_da_out(template_ds_in): + """ + Get template output DataArray from input Dataset + """ + if "lat" in template_ds_in: + lat, n_lat = import_coord_1d(template_ds_in, "lat") + elif "LATIXY" in template_ds_in: + lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") + lat.attrs["axis"] = "Y" + else: + abort("No latitude variable found in regrid template file") + + # Flip latitude, if needed + if lat.values[0] < lat.values[1]: + lat = lat.reindex(lat=list(reversed(lat["lat"]))) + + # Import and format longitude + if "lon" in template_ds_in: + lon, n_lon = import_coord_1d(template_ds_in, "lon") + elif "LONGXY" in template_ds_in: + lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") + lon.attrs["axis"] = "Y" + else: + abort("No longitude variable found in regrid template file") + template_da_out = xr.DataArray( + data=np.full((n_lat, n_lon), 0.0), + dims={"lat": lat, "lon": lon}, + name="area", + ) + + return lat, lon, template_da_out + + def regrid_ggcmi_shdates_arg_process(): """Process input arguments @@ -222,7 +236,7 @@ def regrid_ggcmi_shdates_arg_process(): ctsm_logging.setup_logging_pre_config() parser = argparse.ArgumentParser( - description="Regrids raw sowing and harvest date files provided by GGCMI to a target CLM resolution." + description=("Regrid raw sowing/harvest date files from GGCMI to a target CLM resolution."), ) # Define arguments From 35df413cdbbe5d1af714af7ef8429a4dd946332b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:03:55 -0700 Subject: [PATCH 32/40] Move grid_one_variable() to its own module; functionize parts. --- python/ctsm/crop_calendars/cropcal_utils.py | 132 +------------ .../crop_calendars/generate_gdds_functions.py | 7 +- .../ctsm/crop_calendars/grid_one_variable.py | 179 ++++++++++++++++++ 3 files changed, 187 insertions(+), 131 deletions(-) create mode 100644 python/ctsm/crop_calendars/grid_one_variable.py diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 171e0bae56..ebc275279c 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -511,133 +511,6 @@ def import_ds( return this_ds -def get_thisvar_da(var, this_ds): - """ - Return a DataArray, with defined coordinates, for a given variable in a dataset. - """ - # Make DataArray for this variable - thisvar_da = np.array(this_ds.variables[var]) - these_dims = this_ds.variables[var].dims - thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) - - # Define coordinates of this variable's DataArray - dims_dict = dict() - for dim in these_dims: - dims_dict[dim] = this_ds[dim] - thisvar_da = thisvar_da.assign_coords(dims_dict) - thisvar_da.attrs = this_ds[var].attrs - - return thisvar_da - - -def grid_one_variable(this_ds, var, fill_value=None, **kwargs): - """ - Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, - lon) of one variable within a Dataset. - - - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices - along the specified ax(ie)s. - - fill_value: Default None means grid will be filled with NaN, unless the variable in question - already has a _FillValue, in which case that will be used. - """ - # Get this Dataset's values for selection(s), if provided - this_ds = xr_flexsel(this_ds, **kwargs) - - # Get DataArrays needed for gridding - thisvar_da = get_thisvar_da(var, this_ds) - vt_da = None - if "patch" in thisvar_da.dims: - spatial_unit = "patch" - xy_1d_prefix = "patches" - if "patches1d_itype_veg" in this_ds: - vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) - elif "gridcell" in thisvar_da.dims: - spatial_unit = "gridcell" - xy_1d_prefix = "grid" - else: - raise RuntimeError( - f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" - ) - ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) - jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) - - if not fill_value and "_FillValue" in thisvar_da.attrs: - fill_value = thisvar_da.attrs["_FillValue"] - - # Renumber vt_da to work as indices on new ivt dimension, if needed. - ### Ensures that the unique set of vt_da values begins with 1 and - ### contains no missing steps. - if "ivt" in this_ds and vt_da is not None: - vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) - - # Get new dimension list - new_dims = list(thisvar_da.dims) - ### Remove "[spatial_unit]". - if spatial_unit in new_dims: - new_dims.remove(spatial_unit) - # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible situation where you wind up with multiple Ellipsis members of fill_indices. - if "ivt" in this_ds and spatial_unit == "patch": - new_dims.append("ivt_str") - ### Add lat and lon to end of list - new_dims = new_dims + ["lat", "lon"] - - # Set up empty array - dim_size_list = [] - for dim in new_dims: - if dim == "ivt_str": - dim_size = this_ds.sizes["ivt"] - elif dim in thisvar_da.coords: - dim_size = thisvar_da.sizes[dim] - else: - dim_size = this_ds.sizes[dim] - dim_size_list = dim_size_list + [dim_size] - thisvar_gridded = np.empty(dim_size_list) - if fill_value: - thisvar_gridded[:] = fill_value - else: - thisvar_gridded[:] = np.NaN - - # Fill with this variable - fill_indices = [] - for dim in new_dims: - if dim == "lat": - fill_indices.append(jxy_da.values.astype(int) - 1) - elif dim == "lon": - fill_indices.append(ixy_da.values.astype(int) - 1) - elif dim == "ivt_str": - fill_indices.append(vt_da) - elif not fill_indices: - # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". - fill_indices.append(Ellipsis) - try: - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values - except: # pylint: disable=bare-except - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() - if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): - if np.all(np.isnan(thisvar_da.values)): - print("Warning: This DataArray (and thus map) is all NaN") - else: - raise RuntimeError("thisvar_gridded was not filled!") - - # Assign coordinates, attributes and name - thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) - for dim in new_dims: - if dim == "ivt_str": - values = this_ds.vegtype_str.values - elif dim in thisvar_da.coords: - values = thisvar_da[dim] - else: - values = this_ds[dim].values - thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) - thisvar_gridded.name = var - - # Add FillValue attribute - if fill_value: - thisvar_gridded.attrs["_FillValue"] = fill_value - - return thisvar_gridded - - def safer_timeslice(ds_in, time_slice, time_var="time"): """ ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) @@ -648,7 +521,7 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): ds_in = ds_in.sel({time_var: time_slice}) except: # pylint: disable=bare-except # If the issue might have been slicing using strings, try to fall back to integer slicing - if ( + can_try_integer_slicing = ( isinstance(time_slice.start, str) and isinstance(time_slice.stop, str) and len(time_slice.start.split("-")) == 3 @@ -658,7 +531,8 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): time_slice.stop.split("-")[1:] == ["12", "31"] or time_slice.stop.split("-")[1:] == ["01", "01"] ) - ): + ) + if can_try_integer_slicing: fileyears = np.array([x.year for x in ds_in.time.values]) if len(np.unique(fileyears)) != len(fileyears): print("Could not fall back to integer slicing of years: Time axis not annual") diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 7c015f9dd6..cb7315c00c 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -21,6 +21,9 @@ import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position +from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position + grid_one_variable, +) CAN_PLOT = True try: @@ -75,7 +78,7 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): """ log(logger, " Checking that input and output sdates match...") - sdates_grid = utils.grid_one_variable(dates_ds, "SDATES") + sdates_grid = grid_one_variable(dates_ds, "SDATES") all_ok = True any_found = False @@ -212,7 +215,7 @@ def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefi # Grid this crop this_ds["tmp"] = this_da - da_gridded = utils.grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) + da_gridded = grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) da_gridded = da_gridded.squeeze(drop=True) # Add singleton time dimension and save to output Dataset diff --git a/python/ctsm/crop_calendars/grid_one_variable.py b/python/ctsm/crop_calendars/grid_one_variable.py new file mode 100644 index 0000000000..cb5d330032 --- /dev/null +++ b/python/ctsm/crop_calendars/grid_one_variable.py @@ -0,0 +1,179 @@ +""" +Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, +lon) of one variable within a Dataset. + +- Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. +- fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. +""" +import numpy as np +import xarray as xr +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def get_thisvar_da(var, this_ds): + """ + Return a DataArray, with defined coordinates, for a given variable in a dataset. + """ + # Make DataArray for this variable + thisvar_da = np.array(this_ds.variables[var]) + these_dims = this_ds.variables[var].dims + thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) + + # Define coordinates of this variable's DataArray + dims_dict = dict() + for dim in these_dims: + dims_dict[dim] = this_ds[dim] + thisvar_da = thisvar_da.assign_coords(dims_dict) + thisvar_da.attrs = this_ds[var].attrs + + return thisvar_da + + +def convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded): + """ + Convert Numpy array to DataArray with coordinates, attributes and name + """ + thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) + for dim in new_dims: + if dim == "ivt_str": + values = this_ds.vegtype_str.values + elif dim in thisvar_da.coords: + values = thisvar_da[dim] + else: + values = this_ds[dim].values + thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) + thisvar_gridded.name = var + + # Add FillValue attribute + if fill_value: + thisvar_gridded.attrs["_FillValue"] = fill_value + return thisvar_gridded + + +def grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded): + """ + Fill lat-lon array with previously-ungridded data + """ + fill_indices = [] + for dim in new_dims: + if dim == "lat": + fill_indices.append(jxy_da.values.astype(int) - 1) + elif dim == "lon": + fill_indices.append(ixy_da.values.astype(int) - 1) + elif dim == "ivt_str": + fill_indices.append(vt_da) + elif not fill_indices: + # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". + fill_indices.append(Ellipsis) + try: + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values + except: # pylint: disable=bare-except + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() + if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): + if np.all(np.isnan(thisvar_da.values)): + print("Warning: This DataArray (and thus map) is all NaN") + else: + raise RuntimeError("thisvar_gridded was not filled!") + + +def create_filled_array(this_ds, fill_value, thisvar_da, new_dims): + """ + Create a Numpy array to be filled with gridded data + """ + dim_size_list = [] + for dim in new_dims: + if dim == "ivt_str": + dim_size = this_ds.sizes["ivt"] + elif dim in thisvar_da.coords: + dim_size = thisvar_da.sizes[dim] + else: + dim_size = this_ds.sizes[dim] + dim_size_list = dim_size_list + [dim_size] + thisvar_gridded = np.empty(dim_size_list) + if fill_value: + thisvar_gridded[:] = fill_value + else: + thisvar_gridded[:] = np.NaN + return thisvar_gridded + + +def get_ixy_jxy_das(this_ds, var): + """ + Get DataArrays needed for gridding + """ + thisvar_da = get_thisvar_da(var, this_ds) + vt_da = None + if "patch" in thisvar_da.dims: + spatial_unit = "patch" + xy_1d_prefix = "patches" + if "patches1d_itype_veg" in this_ds: + vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) + elif "gridcell" in thisvar_da.dims: + spatial_unit = "gridcell" + xy_1d_prefix = "grid" + else: + raise RuntimeError( + f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" + ) + ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) + jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) + return thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da + + +def get_new_dim_list(this_ds, thisvar_da, spatial_unit): + """ + Get new dimension list + """ + new_dims = list(thisvar_da.dims) + ### Remove "[spatial_unit]". + if spatial_unit in new_dims: + new_dims.remove(spatial_unit) + # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible + # situation where you wind up with multiple Ellipsis members of fill_indices. + if "ivt" in this_ds and spatial_unit == "patch": + new_dims.append("ivt_str") + ### Add lat and lon to end of list + new_dims = new_dims + ["lat", "lon"] + return new_dims + + +def grid_one_variable(this_ds, var, fill_value=None, **kwargs): + """ + Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, + lon) of one variable within a Dataset. + + - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. + - fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. + """ + # Get this Dataset's values for selection(s), if provided + this_ds = xr_flexsel(this_ds, **kwargs) + + # Get DataArrays needed for gridding + thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da = get_ixy_jxy_das(this_ds, var) + + if not fill_value and "_FillValue" in thisvar_da.attrs: + fill_value = thisvar_da.attrs["_FillValue"] + + # Renumber vt_da to work as indices on new ivt dimension, if needed. + ### Ensures that the unique set of vt_da values begins with 1 and + ### contains no missing steps. + if "ivt" in this_ds and vt_da is not None: + vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) + + # Get new dimension list + new_dims = get_new_dim_list(this_ds, thisvar_da, spatial_unit) + + # Create a Numpy array to be filled with gridded data + thisvar_gridded = create_filled_array(this_ds, fill_value, thisvar_da, new_dims) + + # Fill lat-lon array with previously-ungridded data + grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded) + + # Convert Numpy array to DataArray with coordinates, attributes and name + thisvar_gridded = convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded) + + return thisvar_gridded From 81ac26e9e25bb4b4edb33c99cee3b12dc3ca4641 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:12:54 -0700 Subject: [PATCH 33/40] Resolve most issues with import_ds(). --- python/ctsm/crop_calendars/cropcal_utils.py | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index ebc275279c..fd35686fa1 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -5,7 +5,7 @@ import re import warnings -import importlib +from importlib.util import find_spec import numpy as np import xarray as xr @@ -399,8 +399,7 @@ def import_ds( my_vars=None, my_vegtypes=None, time_slice=None, - my_vars_missing_ok=[], - only_active_patches=False, + my_vars_missing_ok=None, rename_lsmlatlon=False, chunks=None, ): @@ -411,7 +410,10 @@ def import_ds( - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. """ - # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). + if my_vars_missing_ok is None: + my_vars_missing_ok = [] + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file + # in xr.open_mfdataset(). if my_vegtypes is not None: if not isinstance(my_vegtypes, list): my_vegtypes = [my_vegtypes] @@ -433,7 +435,10 @@ def import_ds( my_vars_missing_ok = [my_vars_missing_ok] # Remove files from list if they don't contain requested timesteps. - # time_slice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be + # unbounded on one side. Note that the standard slice() documentation suggests that only + # elements through end-1 will be selected, but that seems not to be the case in the xarray + # implementation. if time_slice: new_filelist = [] for file in sorted(filelist): @@ -443,14 +448,18 @@ def import_ds( if include_this_file: new_filelist.append(file) - # If you found some matching files, but then you find one that doesn't, stop going through the list. + # If you found some matching files, but then you find one that doesn't, stop going + # through the list. elif new_filelist: break if not new_filelist: raise RuntimeError(f"No files found in time_slice {time_slice}") filelist = new_filelist - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes directly, but that's bad practice as it could lead to scoping issues. + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one + # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function + # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes + # directly, but that's bad practice as it could lead to scoping issues. mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) # Import @@ -459,7 +468,7 @@ def import_ds( if isinstance(filelist, list): with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("dask") is None: + if find_spec("dask") is None: raise ModuleNotFoundError( "You have asked xarray to import a list of files as a single Dataset using" " open_mfdataset(), but this requires dask, which is not available.\nFile" @@ -480,12 +489,6 @@ def import_ds( this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) this_ds = this_ds.compute() - # Include only active patches (or whatever) - if only_active_patches: - is_active = this_ds.patches1d_active.values - p_active = np.where(is_active)[0] - this_ds_active = this_ds.isel(patch=p_active) - # Warn and/or error about variables that couldn't be imported or derived if my_vars: missing_vars = [v for v in my_vars if v not in this_ds] From c2899bde3417f2373c408c1f5a567dc9d2b185d4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:27:26 -0700 Subject: [PATCH 34/40] Move import_ds() to its own module. --- python/ctsm/crop_calendars/cropcal_module.py | 7 +- python/ctsm/crop_calendars/cropcal_utils.py | 239 +--------------- .../crop_calendars/generate_gdds_functions.py | 9 +- python/ctsm/crop_calendars/import_ds.py | 267 ++++++++++++++++++ 4 files changed, 279 insertions(+), 243 deletions(-) create mode 100644 python/ctsm/crop_calendars/import_ds.py diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index b3b415b77c..671a6334c7 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -25,6 +25,9 @@ from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position DEFAULT_GDD_MIN, ) +from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position + import_ds, +) def check_and_trim_years(year_1, year_n, ds_in): @@ -266,7 +269,7 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): this_var = f"{var_prefix}{j+1}_{i}" date_varlist = date_varlist + [this_var] - this_ds = utils.import_ds(date_infile, my_vars=date_varlist) + this_ds = import_ds(date_infile, my_vars=date_varlist) did_warn = False for var in this_ds: @@ -355,7 +358,7 @@ def import_output( Import CLM output """ # Import - this_ds = utils.import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) + this_ds = import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) if year_1 and year_n: diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index fd35686fa1..e2b94070a1 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -2,14 +2,8 @@ utility functions copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py """ - -import re -import warnings -from importlib.util import find_spec - import numpy as np import xarray as xr -from ctsm.crop_calendars.xr_flexsel import xr_flexsel def define_pftlist(): @@ -274,7 +268,7 @@ def get_patch_ivts(this_ds, this_pftlist): def get_vegtype_str_da(vegtype_str): """ - Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). + Convert a list of strings with vegetation type names into a DataArray. """ nvt = len(vegtype_str) vegtype_str_da = xr.DataArray( @@ -283,237 +277,6 @@ def get_vegtype_str_da(vegtype_str): return vegtype_str_da -def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): - """ - Function to drop unwanted variables in preprocessing of open_mfdataset(). - - - Makes sure to NOT drop any unspecified variables that will be useful in gridding. - - Also adds vegetation type info in the form of a DataArray of strings. - - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be - named like "patch". This can later be reversed, for compatibility with other code, using - patch2pft(). - """ - # Rename "pft" dimension and variables to "patch", if needed - if "pft" in ds_in.dims: - pattern = re.compile("pft.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] - pft2patch_dict = {"pft": "patch"} - for match in matches: - pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") - ds_in = ds_in.rename(pft2patch_dict) - - derived_vars = [] - if vars_to_import is not None: - # Split vars_to_import into variables that are vs. aren't already in ds - derived_vars = [v for v in vars_to_import if v not in ds_in] - present_vars = [v for v in vars_to_import if v in ds_in] - vars_to_import = present_vars - - # Get list of dimensions present in variables in vars_to_import. - dim_list = [] - for var in vars_to_import: - # list(set(x)) returns a list of the unique items in x - dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) - - # Get any _1d variables that are associated with those dimensions. These will be useful in gridding. Also, if any dimension is "pft", set up to rename it and all like-named variables to "patch" - oned_vars = [] - for dim in dim_list: - pattern = re.compile(f"{dim}.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] - oned_vars = list(set(oned_vars + matches)) - - # Add dimensions and _1d variables to vars_to_import - vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) - - # Add any _bounds variables - bounds_vars = [] - for var in vars_to_import: - bounds_var = var + "_bounds" - if bounds_var in ds_in: - bounds_vars = bounds_vars + [bounds_var] - vars_to_import = vars_to_import + bounds_vars - - # Get list of variables to drop - varlist = list(ds_in.variables) - vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) - - # Drop them - ds_in = ds_in.drop_vars(vars_to_drop) - - # Add vegetation type info - if "patches1d_itype_veg" in list(ds_in): - this_pftlist = define_pftlist() - get_patch_ivts( - ds_in, this_pftlist - ) # Includes check of whether vegtype changes over time anywhere - vegtype_da = get_vegtype_str_da(this_pftlist) - patches1d_itype_veg_str = vegtype_da.values[ - ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) - ] - npatch = len(patches1d_itype_veg_str) - patches1d_itype_veg_str = xr.DataArray( - patches1d_itype_veg_str, - coords={"patch": np.arange(0, npatch)}, - dims=["patch"], - name="patches1d_itype_veg_str", - ) - ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) - - # Restrict to veg. types of interest, if any - if vegtypes_to_import is not None: - ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) - - # Restrict to time slice, if any - if time_slice: - ds_in = safer_timeslice(ds_in, time_slice) - - # Finish import - ds_in = xr.decode_cf(ds_in, decode_times=True) - - # Compute derived variables - for var in derived_vars: - if ( - var == "HYEARS" - and "HDATES" in ds_in - and ds_in.HDATES.dims == ("time", "mxharvests", "patch") - ): - year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) - hyears = ds_in["HDATES"].copy() - hyears.values = np.tile( - np.expand_dims(year_list, (1, 2)), - (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), - ) - with np.errstate(invalid="ignore"): - is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) - hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] - hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan - hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" - hyears.attrs["units"] = "year" - ds_in["HYEARS"] = hyears - - return ds_in - - -def import_ds( - filelist, - my_vars=None, - my_vegtypes=None, - time_slice=None, - my_vars_missing_ok=None, - rename_lsmlatlon=False, - chunks=None, -): - """ - Import a dataset that can be spread over multiple files, only including specified variables - and/or vegetation types and/or timesteps, concatenating by time. - - - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or - vegetation types. - """ - if my_vars_missing_ok is None: - my_vars_missing_ok = [] - # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file - # in xr.open_mfdataset(). - if my_vegtypes is not None: - if not isinstance(my_vegtypes, list): - my_vegtypes = [my_vegtypes] - if isinstance(my_vegtypes[0], str): - my_vegtypes = vegtype_str2int(my_vegtypes) - - # Same for these variables. - if my_vars is not None: - if not isinstance(my_vars, list): - my_vars = [my_vars] - if my_vars_missing_ok: - if not isinstance(my_vars_missing_ok, list): - my_vars_missing_ok = [my_vars_missing_ok] - - # Make sure lists are actually lists - if not isinstance(filelist, list): - filelist = [filelist] - if not isinstance(my_vars_missing_ok, list): - my_vars_missing_ok = [my_vars_missing_ok] - - # Remove files from list if they don't contain requested timesteps. - # time_slice should be in the format slice(start,end[,step]). start or end can be None to be - # unbounded on one side. Note that the standard slice() documentation suggests that only - # elements through end-1 will be selected, but that seems not to be the case in the xarray - # implementation. - if time_slice: - new_filelist = [] - for file in sorted(filelist): - filetime = xr.open_dataset(file).time - filetime_sel = safer_timeslice(filetime, time_slice) - include_this_file = filetime_sel.size - if include_this_file: - new_filelist.append(file) - - # If you found some matching files, but then you find one that doesn't, stop going - # through the list. - elif new_filelist: - break - if not new_filelist: - raise RuntimeError(f"No files found in time_slice {time_slice}") - filelist = new_filelist - - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one - # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function - # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes - # directly, but that's bad practice as it could lead to scoping issues. - mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) - - # Import - if isinstance(filelist, list) and len(filelist) == 1: - filelist = filelist[0] - if isinstance(filelist, list): - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if find_spec("dask") is None: - raise ModuleNotFoundError( - "You have asked xarray to import a list of files as a single Dataset using" - " open_mfdataset(), but this requires dask, which is not available.\nFile" - f" list: {filelist}" - ) - this_ds = xr.open_mfdataset( - sorted(filelist), - data_vars="minimal", - preprocess=mfdataset_preproc_closure, - compat="override", - coords="all", - concat_dim="time", - combine="nested", - chunks=chunks, - ) - elif isinstance(filelist, str): - this_ds = xr.open_dataset(filelist, chunks=chunks) - this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) - this_ds = this_ds.compute() - - # Warn and/or error about variables that couldn't be imported or derived - if my_vars: - missing_vars = [v for v in my_vars if v not in this_ds] - ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] - bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] - if ok_missing_vars: - print( - "Could not import some variables; either not present or not deriveable:" - f" {ok_missing_vars}" - ) - if bad_missing_vars: - raise RuntimeError( - "Could not import some variables; either not present or not deriveable:" - f" {bad_missing_vars}" - ) - - if rename_lsmlatlon: - if "lsmlat" in this_ds.dims: - this_ds = this_ds.rename({"lsmlat": "lat"}) - if "lsmlon" in this_ds.dims: - this_ds = this_ds.rename({"lsmlon": "lon"}) - - return this_ds - - def safer_timeslice(ds_in, time_slice, time_var="time"): """ ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index cb7315c00c..909e1f80a7 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -24,6 +24,9 @@ from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position grid_one_variable, ) +from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position + import_ds, +) CAN_PLOT = True try: @@ -160,7 +163,7 @@ def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, lo this_var = f"{s_or_h}date{n_sowing+1}_{i}" date_var_list = date_var_list + [this_var] - this_ds = utils.import_ds(date_infile, my_vars=date_var_list) + this_ds = import_ds(date_infile, my_vars=date_var_list) for var in this_ds: this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) @@ -274,7 +277,7 @@ def import_and_process_1yr( crops_to_read = utils.define_mgdcrop_list() print(h1_filelist) - dates_ds = utils.import_ds( + dates_ds = import_ds( h1_filelist, my_vars=["SDATES", "HDATES"], my_vegtypes=crops_to_read, @@ -543,7 +546,7 @@ def import_and_process_1yr( h2_files = glob.glob(pattern) if not h2_files: error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") - h2_ds = utils.import_ds( + h2_ds = import_ds( h2_files, my_vars=my_vars, my_vegtypes=crops_to_read, diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py new file mode 100644 index 0000000000..77a22b626b --- /dev/null +++ b/python/ctsm/crop_calendars/import_ds.py @@ -0,0 +1,267 @@ +""" +Import a dataset that can be spread over multiple files, only including specified variables +and/or vegetation types and/or timesteps, concatenating by time. + +- DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. +""" +import re +import warnings +from importlib.util import find_spec +import numpy as np +import xarray as xr +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def compute_derived_vars(ds_in, var): + """ + Compute derived variables + """ + if ( + var == "HYEARS" + and "HDATES" in ds_in + and ds_in.HDATES.dims == ("time", "mxharvests", "patch") + ): + year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) + hyears = ds_in["HDATES"].copy() + hyears.values = np.tile( + np.expand_dims(year_list, (1, 2)), + (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), + ) + with np.errstate(invalid="ignore"): + is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) + hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] + hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan + hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" + hyears.attrs["units"] = "year" + ds_in["HYEARS"] = hyears + else: + raise RuntimeError(f"Unable to compute derived variable {var}") + return ds_in + + +def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): + """ + Function to drop unwanted variables in preprocessing of open_mfdataset(). + + - Makes sure to NOT drop any unspecified variables that will be useful in gridding. + - Also adds vegetation type info in the form of a DataArray of strings. + - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be + named like "patch". This can later be reversed, for compatibility with other code, using + patch2pft(). + """ + # Rename "pft" dimension and variables to "patch", if needed + if "pft" in ds_in.dims: + pattern = re.compile("pft.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + pft2patch_dict = {"pft": "patch"} + for match in matches: + pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") + ds_in = ds_in.rename(pft2patch_dict) + + derived_vars = [] + if vars_to_import is not None: + # Split vars_to_import into variables that are vs. aren't already in ds + derived_vars = [v for v in vars_to_import if v not in ds_in] + present_vars = [v for v in vars_to_import if v in ds_in] + vars_to_import = present_vars + + # Get list of dimensions present in variables in vars_to_import. + dim_list = [] + for var in vars_to_import: + # list(set(x)) returns a list of the unique items in x + dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) + + # Get any _1d variables that are associated with those dimensions. These will be useful in + # gridding. Also, if any dimension is "pft", set up to rename it and all like-named + # variables to "patch" + oned_vars = [] + for dim in dim_list: + pattern = re.compile(f"{dim}.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + oned_vars = list(set(oned_vars + matches)) + + # Add dimensions and _1d variables to vars_to_import + vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) + + # Add any _bounds variables + bounds_vars = [] + for var in vars_to_import: + bounds_var = var + "_bounds" + if bounds_var in ds_in: + bounds_vars = bounds_vars + [bounds_var] + vars_to_import = vars_to_import + bounds_vars + + # Get list of variables to drop + varlist = list(ds_in.variables) + vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) + + # Drop them + ds_in = ds_in.drop_vars(vars_to_drop) + + # Add vegetation type info + if "patches1d_itype_veg" in list(ds_in): + this_pftlist = utils.define_pftlist() + utils.get_patch_ivts( + ds_in, this_pftlist + ) # Includes check of whether vegtype changes over time anywhere + vegtype_da = utils.get_vegtype_str_da(this_pftlist) + patches1d_itype_veg_str = vegtype_da.values[ + ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) + ] + npatch = len(patches1d_itype_veg_str) + patches1d_itype_veg_str = xr.DataArray( + patches1d_itype_veg_str, + coords={"patch": np.arange(0, npatch)}, + dims=["patch"], + name="patches1d_itype_veg_str", + ) + ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) + + # Restrict to veg. types of interest, if any + if vegtypes_to_import is not None: + ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) + + # Restrict to time slice, if any + if time_slice: + ds_in = utils.safer_timeslice(ds_in, time_slice) + + # Finish import + ds_in = xr.decode_cf(ds_in, decode_times=True) + + # Compute derived variables + for var in derived_vars: + ds_in = compute_derived_vars(ds_in, var) + + return ds_in + + +def process_inputs(filelist, my_vars, my_vegtypes, my_vars_missing_ok): + """ + Process inputs to import_ds() + """ + if my_vars_missing_ok is None: + my_vars_missing_ok = [] + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file + # in xr.open_mfdataset(). + if my_vegtypes is not None: + if not isinstance(my_vegtypes, list): + my_vegtypes = [my_vegtypes] + if isinstance(my_vegtypes[0], str): + my_vegtypes = utils.vegtype_str2int(my_vegtypes) + + # Same for these variables. + if my_vars is not None: + if not isinstance(my_vars, list): + my_vars = [my_vars] + if my_vars_missing_ok: + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + + # Make sure lists are actually lists + if not isinstance(filelist, list): + filelist = [filelist] + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + return filelist, my_vars, my_vegtypes, my_vars_missing_ok + + +def import_ds( + filelist, + my_vars=None, + my_vegtypes=None, + time_slice=None, + my_vars_missing_ok=None, + rename_lsmlatlon=False, + chunks=None, +): + """ + Import a dataset that can be spread over multiple files, only including specified variables + and/or vegetation types and/or timesteps, concatenating by time. + + - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. + """ + filelist, my_vars, my_vegtypes, my_vars_missing_ok = process_inputs( + filelist, my_vars, my_vegtypes, my_vars_missing_ok + ) + + # Remove files from list if they don't contain requested timesteps. + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be + # unbounded on one side. Note that the standard slice() documentation suggests that only + # elements through end-1 will be selected, but that seems not to be the case in the xarray + # implementation. + if time_slice: + new_filelist = [] + for file in sorted(filelist): + filetime = xr.open_dataset(file).time + filetime_sel = utils.safer_timeslice(filetime, time_slice) + include_this_file = filetime_sel.size + if include_this_file: + new_filelist.append(file) + + # If you found some matching files, but then you find one that doesn't, stop going + # through the list. + elif new_filelist: + break + if not new_filelist: + raise RuntimeError(f"No files found in time_slice {time_slice}") + filelist = new_filelist + + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one + # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function + # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes + # directly, but that's bad practice as it could lead to scoping issues. + mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) + + # Import + if isinstance(filelist, list) and len(filelist) == 1: + filelist = filelist[0] + if isinstance(filelist, list): + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", category=DeprecationWarning) + if find_spec("dask") is None: + raise ModuleNotFoundError( + "You have asked xarray to import a list of files as a single Dataset using" + " open_mfdataset(), but this requires dask, which is not available.\nFile" + f" list: {filelist}" + ) + this_ds = xr.open_mfdataset( + sorted(filelist), + data_vars="minimal", + preprocess=mfdataset_preproc_closure, + compat="override", + coords="all", + concat_dim="time", + combine="nested", + chunks=chunks, + ) + elif isinstance(filelist, str): + this_ds = xr.open_dataset(filelist, chunks=chunks) + this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) + this_ds = this_ds.compute() + + # Warn and/or error about variables that couldn't be imported or derived + if my_vars: + missing_vars = [v for v in my_vars if v not in this_ds] + ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] + bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] + if ok_missing_vars: + print( + "Could not import some variables; either not present or not deriveable:" + f" {ok_missing_vars}" + ) + if bad_missing_vars: + raise RuntimeError( + "Could not import some variables; either not present or not deriveable:" + f" {bad_missing_vars}" + ) + + if rename_lsmlatlon: + if "lsmlat" in this_ds.dims: + this_ds = this_ds.rename({"lsmlat": "lat"}) + if "lsmlon" in this_ds.dims: + this_ds = this_ds.rename({"lsmlon": "lon"}) + + return this_ds From 605bb3b2ece854ff831313fe99a53acac8784413 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:30:37 -0700 Subject: [PATCH 35/40] Satisfy pylint for cropcal_utils.py. --- python/ctsm/crop_calendars/cropcal_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index e2b94070a1..00ed2413d2 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -101,7 +101,7 @@ def ivt_str2int(ivt_str): pftlist = define_pftlist() if isinstance(ivt_str, str): ivt_int = pftlist.index(ivt_str) - elif isinstance(ivt_str, list) or isinstance(ivt_str, np.ndarray): + elif isinstance(ivt_str, (list, np.ndarray)): ivt_int = [ivt_str2int(x) for x in ivt_str] if isinstance(ivt_str, np.ndarray): ivt_int = np.array(ivt_int) @@ -120,7 +120,7 @@ def ivt_int2str(ivt_int): pftlist = define_pftlist() if np.issubdtype(type(ivt_int), np.integer) or int(ivt_int) == ivt_int: ivt_str = pftlist[int(ivt_int)] - elif isinstance(ivt_int, list) or isinstance(ivt_int, np.ndarray): + elif isinstance(ivt_int, (list, np.ndarray)): ivt_str = [ivt_int2str(x) for x in ivt_int] if isinstance(ivt_int, np.ndarray): ivt_str = np.array(ivt_str) @@ -150,7 +150,7 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): # Make sure data type of this_vegtype is acceptable if isinstance(this_vegtype, float) and int(this_vegtype) == this_vegtype: this_vegtype = int(this_vegtype) - data_type_ok = lambda x: isinstance(x, str) or isinstance(x, int) or isinstance(x, np.int64) + data_type_ok = lambda x: isinstance(x, (int, np.int64, str)) ok_input = True if not data_type_ok(this_vegtype): if isinstance(this_vegtype, xr.core.dataarray.DataArray): @@ -255,7 +255,8 @@ def get_patch_ivts(this_ds, this_pftlist): """ Get PFT of each patch, in both integer and string forms. """ - # First, get all the integer values; should be time*pft or pft*time. We will eventually just take the first timestep. + # First, get all the integer values; should be time*pft or pft*time. We will eventually just + # take the first timestep. vegtype_int = this_ds.patches1d_itype_veg vegtype_int.values = vegtype_int.values.astype(int) From 3808b4b8391b6dad3e7c9bdfd8b936ad972f424e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:33:18 -0700 Subject: [PATCH 36/40] Ignore raise-missing-from in xr_flexsel.py. --- python/ctsm/crop_calendars/xr_flexsel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/xr_flexsel.py b/python/ctsm/crop_calendars/xr_flexsel.py index 1e30593946..d51d925985 100644 --- a/python/ctsm/crop_calendars/xr_flexsel.py +++ b/python/ctsm/crop_calendars/xr_flexsel.py @@ -254,7 +254,7 @@ def handle_callable(xr_object, key, selection): if selection == np.mean: # pylint: disable=comparison-with-callable try: xr_object = xr_object.mean(dim=key) - except: + except: # pylint: disable=raise-missing-from raise ValueError( f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()." ) From a90049d260de22ff3b7fbe5c3bf71423fc6e3b31 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:50:37 -0700 Subject: [PATCH 37/40] Remove unneeded '_CTSM_PYTHON =' bits. --- .../crop_calendars/check_constant_vars.py | 14 +--------- python/ctsm/crop_calendars/check_rx_obeyed.py | 15 ++--------- .../crop_calendars/convert_axis_time2gs.py | 9 ------- python/ctsm/crop_calendars/cropcal_module.py | 26 ++++--------------- .../crop_calendars/generate_gdds_functions.py | 22 ++++------------ 5 files changed, 13 insertions(+), 73 deletions(-) diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py index 1a5a4e62c6..aa25a412fe 100644 --- a/python/ctsm/crop_calendars/check_constant_vars.py +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -2,20 +2,8 @@ For variables that should stay constant, make sure they are """ -import sys -import os import numpy as np - -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position - import_rx_dates, -) +from ctsm.crop_calendars.cropcal_module import import_rx_dates def check_one_constant_var_setup(this_ds, case, var): diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py index 3d769d3820..99b8d80bde 100644 --- a/python/ctsm/crop_calendars/check_rx_obeyed.py +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -2,21 +2,10 @@ Check that prescribed crop calendars were obeyed """ -import sys -import os import numpy as np -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position - DEFAULT_GDD_MIN, -) +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN def get_pct_harv_at_mature(harvest_reason_da): diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py index f311d39e05..d48514370d 100644 --- a/python/ctsm/crop_calendars/convert_axis_time2gs.py +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -3,18 +3,9 @@ """ import warnings import sys -import os import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) - try: import pandas as pd except ModuleNotFoundError: diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 671a6334c7..3fe6942f94 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -2,32 +2,16 @@ Helper functions for various crop calendar stuff """ -import sys import os import glob import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.convert_axis_time2gs import ( # pylint: disable=wrong-import-position - convert_axis_time2gs, -) -from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position - check_rx_obeyed, -) -from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position - DEFAULT_GDD_MIN, -) -from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position - import_ds, -) +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.convert_axis_time2gs import convert_axis_time2gs +from ctsm.crop_calendars.check_rx_obeyed import check_rx_obeyed +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN +from ctsm.crop_calendars.import_ds import import_ds def check_and_trim_years(year_1, year_n, ds_in): diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 909e1f80a7..8af2fdc049 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -4,29 +4,17 @@ # pylint: disable=too-many-lines,too-many-statements import warnings import os -import sys import glob import datetime as dt from importlib import util as importlib_util import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position -from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position -from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position - grid_one_variable, -) -from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position - import_ds, -) +import ctsm.crop_calendars.cropcal_utils as utils +import ctsm.crop_calendars.cropcal_module as cc +from ctsm.crop_calendars.xr_flexsel import xr_flexsel +from ctsm.crop_calendars.grid_one_variable import grid_one_variable +from ctsm.crop_calendars.import_ds import import_ds CAN_PLOT = True try: From 657cc5c45fc5ca65e84746c15d0a70097c29ce20 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 15:10:55 -0700 Subject: [PATCH 38/40] Fix imports in check_rxboth_run.py. --- python/ctsm/crop_calendars/check_rxboth_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 126ef98bbc..ae4decde30 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -6,7 +6,6 @@ import glob import os import numpy as np -import cropcal_module as cc # pylint: disable=import-error # Import the CTSM Python utilities. # sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script @@ -15,6 +14,7 @@ os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position check_rx_obeyed, ) From b671ed77779d2296d62564ee2b7eeb8655354b81 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 13 Feb 2024 16:24:17 -0700 Subject: [PATCH 39/40] FSURDATMODIFYCTSM test (aux_clm/clm_pymods) moved from cheyenne_intel to derecho_gnu. derecho_intel doesn't currently work with debug mode on. --- cime_config/testdefs/testlist_clm.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 4761a2111f..d8ef6357cd 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -3308,8 +3308,8 @@ - - + + From 9b43482202383badada23dd865bdd8ccdc27aba7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 22 Feb 2024 09:42:06 -0700 Subject: [PATCH 40/40] Update ChangeLog and ChangeSum. --- doc/ChangeLog | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 1 + 2 files changed, 72 insertions(+) diff --git a/doc/ChangeLog b/doc/ChangeLog index fcdf87d053..d26715f699 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,75 @@ =============================================================== +Tag name: ctsm5.1.dev169 +Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) +Date: Thu 22 Feb 2024 09:42:57 AM MST +One-line Summary: Merge b4b-dev + +Purpose and description of changes +---------------------------------- + +Brings in 3 PRs from b4b-dev to master: +- Do not crash "make all" even if pylint isn't clean (ESCOMP/CTSM#2353; Sam Rabin) +- Resolve pylint issues (ESCOMP/CTSM#2354; Sam Rabin) +- Move FSURDATMODIFYCTSM test to Derecho (ESCOMP/CTSM#2364; Sam Rabin) + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- + +CTSM issues fixed: +- Fixes ESCOMP/CTSM#2255: make lint is not clean in ctsm5.1.dev152 +- Fixes ESCOMP/CTSM#2316: "make all" doesn't run black if lint fails +- FIXES ESCOMP/CTSM#2362: FSURDATMODIFYCTSM test should be moved to Derecho or Izumi + + +Notes of particular relevance for developers: +--------------------------------------------- + +Changes to tests or testing: +- FSURDATMODIFYCTSM test changed from derecho_intel (didn't work in debug mode) to derecho_gnu. I.e., from + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_intel + to + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_gnu + + +Testing summary: +---------------- + + [PASS means all tests PASS; OK means tests PASS other than expected fails.] + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + + any other testing (give details below): + - "make all" in python/ is clean. + + +Other details +------------- + +Pull Requests that document the changes (include PR ids): +- ESCOMP/CTSM#2353: Do not crash "make all" even if pylint isn't clean (https://github.com/ESCOMP/CTSM/pull/2353) +- ESCOMP/CTSM#2354: Resolve pylint issues (https://github.com/ESCOMP/CTSM/pull/2354) +- ESCOMP/CTSM#2364: Move FSURDATMODIFYCTSM test to Derecho (https://github.com/ESCOMP/CTSM/pull/2364) + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev168 Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310) Date: Fri 16 Feb 2024 01:27:41 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index 56a460ea85..18ae34626f 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev169 samrabin 02/22/2024 Merge b4b-dev ctsm5.1.dev168 slevis 02/16/2024 Remove a source of negative snocan in CanopyFluxesMod ctsm5.1.dev167 samrabin 02/08/2024 Delete _FillValue and history from parameter files ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag