From 368b017ddec5bae528bf98bc72bba9f781be7b49 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Tue, 6 Jul 2021 21:37:27 +0200 Subject: [PATCH 1/8] Fixed tests related to fx file retrieval --- doc/recipe/preprocessor.rst | 182 ++++++++++++++-------------- esmvalcore/_recipe.py | 18 ++- tests/integration/test_recipe.py | 196 ++++++++++++++++--------------- 3 files changed, 200 insertions(+), 196 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 5ec6ca5aa8..5fa78818d8 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -180,14 +180,62 @@ To get an overview on data fixes and how to implement new ones, please go to Fx variables as cell measures or ancillary variables ==================================================== -The following preprocessor may require the use of ``fx_variables`` -to be able to perform the computations: +The following preprocessor may require the use of ``fx_variables`` to be able +to perform the computations: - - ``area_statistics`` - - ``mask_landsea`` - - ``mask_landseaice`` - - ``volume_statistics`` - - ``weighting_landsea_fraction`` + - area_statistics_ (``areacella``, ``areacello``) + - :ref:`mask_landsea` (``sftlf``, ``sftof``) + - :ref:`mask_landseaice` (``sftgif``) + - volume_statistics_ (``volcello``) + - :ref:`weighting_landsea_fraction` (``sftlf``, ``sftof``) + +If no ``fx_variables`` are specified for these preprocessors, the fx variables +given in brackets are used. If given, the ``fx_variables`` argument specifies +the fx variables that the user wishes to input to the corresponding +preprocessor function; the user may specify it calling the variables e.g. + +.. code-block:: yaml + + fx_variables: + areacello: + volcello: + +or calling the variables and adding specific variable parameters (the key-value +pair may be as specific as a CMOR variable can permit): + +.. code-block:: yaml + + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx + +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] + +The recipe parser will automatically find the data files that are associated +with these variables and pass them to the function for loading and processing. + +If ``mip`` is not given, ESMValTool will search for the fx variable in all +available tables of the specified project (in alphabetical order) and return +the first match. + +.. note:: + Some fx variables exist in more than one table (e.g., ``volcello`` exists in + CMIP6's ``Ofx`` and ``Omon`` table or ``sftgif`` exists in CMIP6's ``fx`` + and ``IyrAnt`` table). If a specific table is desired, this needs to be + specified by ``mip``, otherwise the table is selected based on the + alphabetical order and data availability. The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, checks them against CMOR standards and adds them either as ``cell_measure`` @@ -195,7 +243,7 @@ or ``ancillary_variable`` inside the cube data. This ensures that the defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. Note that when calling steps that require ``fx_variables`` inside diagnostic -scripts, the variables are expected to contain the required ``cell_measures`` or +scripts, the variables are expected to contain the required ``cell_measures`` or ``ancillary_variables``. If missing, they can be added using the following functions: .. code-block:: @@ -209,7 +257,7 @@ scripts, the variables are expected to contain the required ``cell_measures`` or cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) - + Details on the arguments needed for each step can be found in the following sections. .. _Vertical interpolation: @@ -351,8 +399,8 @@ is for example useful for climate models which do not offer land/sea fraction files. This arguments also accepts the special dataset specifiers ``reference_dataset`` and ``alternative_dataset``. -Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval: +Optionally you can specify your own custom fx variable to be used in cases when +e.g. a certain experiment is preferred for fx data retrieval: .. code-block:: yaml @@ -361,7 +409,7 @@ experiment is preferred for fx data retrieval: weighting_landsea_fraction: area_type: land exclude: ['CanESM2', 'reference_dataset'] - fx_variables: + fx_variables: sftlf: exp: piControl sftof: @@ -377,10 +425,13 @@ or alternatively: area_type: land exclude: ['CanESM2', 'reference_dataset'] fx_variables: [ - {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'} ] +More details on the argument ``fx_variables`` and its default values are given +:ref:`here`. + See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -426,11 +477,6 @@ To mask out a certain domain (e.g., sea) in the preprocessor, and requires only one argument: ``mask_out``: either ``land`` or ``sea``. -The preprocessor automatically retrieves the corresponding mask (``fx: stfof`` -in this case) and applies it so that sea-covered grid cells are set to -missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be -masked out, respectively. - Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain experiment is preferred for fx data retrieval. Note that it is possible to specify as many tags for the fx variable as required: @@ -442,8 +488,8 @@ for the fx variable as required: landmask: mask_landsea: mask_out: sea - fx_variables: - sftlf: + fx_variables: + sftlf: exp: piControl sftof: exp: piControl @@ -458,10 +504,13 @@ or alternatively: mask_landsea: mask_out: sea fx_variables: [ - {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl', 'ensemble': 'r2i1p1f1'} ] +More details on the argument ``fx_variables`` and its default values are given +:ref:`here`. + If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the preprocessor attempts to mask the data using Natural Earth mask files (that are @@ -471,6 +520,8 @@ land and glaciated areas and 50m for ocean masks). See also :func:`esmvalcore.preprocessor.mask_landsea`. +.. _ice masking: + Ice masking ----------- @@ -487,11 +538,8 @@ losing generality. To mask ice out, ``mask_landseaice`` can be used: and requires only one argument: ``mask_out``: either ``landsea`` or ``ice``. -As in the case of ``mask_landsea``, the preprocessor automatically retrieves -the ``fx_variables: [sftgif]`` mask. - -Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval: +Optionally you can specify your own custom fx variable to be used in cases when +e.g. a certain experiment is preferred for fx data retrieval: .. code-block:: yaml @@ -500,7 +548,7 @@ experiment is preferred for fx data retrieval: landseaicemask: mask_landseaice: mask_out: sea - fx_variables: + fx_variables: sftgif: exp: piControl @@ -514,6 +562,9 @@ or alternatively: mask_out: sea fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] +More details on the argument ``fx_variables`` and its default values are given +:ref:`here`. + See also :func:`esmvalcore.preprocessor.mask_landseaice`. Glaciated masking @@ -1420,40 +1471,9 @@ Note that this function is applied over the entire dataset. If only a specific region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. -The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it calling the variables e.g. - -.. code-block:: yaml - - fx_variables: - areacello: - volcello: - -or calling the variables and adding specific variable parameters (the key-value pair may be as specific -as a CMOR variable can permit): - -.. code-block:: yaml - - fx_variables: - areacello: - mip: Omon - volcello: - mip: fx - -Alternatively, the ``fx_variables`` argument can also be specified as a list: - -.. code-block:: yaml - - fx_variables: ['areacello', 'volcello'] - -or as a list of dictionaries: - -.. code-block:: yaml - - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] - -The recipe parser will automatically find the data files that are associated with these -variables and pass them to the function for loading and processing. +The optional ``fx_variables`` argument specifies the fx variables that the user +wishes to input to the function. More details on this are given +:ref:`here`. See also :func:`esmvalcore.preprocessor.area_statistics`. @@ -1496,42 +1516,10 @@ This function takes the argument: ``operator``, which defines the operation to apply over the volume. No depth coordinate is required as this is determined by Iris. This function -works best when the ``fx_variables`` provide the cell volume. - -The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it calling the variables e.g. - -.. code-block:: yaml - - fx_variables: - areacello: - volcello: - -or calling the variables and adding specific variable parameters (the key-value pair may be as specific -as a CMOR variable can permit): - -.. code-block:: yaml - - fx_variables: - areacello: - mip: Omon - volcello: - mip: fx - -Alternatively, the ``fx_variables`` argument can also be specified as a list: - -.. code-block:: yaml - - fx_variables: ['areacello', 'volcello'] - -or as a list of dictionaries: - -.. code-block:: yaml - - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] - -The recipe parser will automatically find the data files that are associated with these -variables and pass them to the function for loading and processing. +works best when the ``fx_variables`` provide the cell volume. The optional +``fx_variables`` argument specifies the fx variables that the user wishes to +input to the function. More details on this are given +:ref:`here`. See also :func:`esmvalcore.preprocessor.volume_statistics`. diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index fcf06502f4..26577cc619 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -363,12 +363,12 @@ def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): if fx_files: logger.debug("Found fx variables '%s':\n%s", fx_info['short_name'], pformat(fx_files)) + break return found_mip, fx_info, fx_files def _get_fx_files(variable, fx_info, config_user): """Get fx files (searching all possible mips).""" - # assemble info from master variable var_project = variable['project'] # check if project in config-developer @@ -380,10 +380,13 @@ def _get_fx_files(variable, fx_info, config_user): f"a '{var_project}' project in config-developer.") project_tables = CMOR_TABLES[var_project].tables - # force only the mip declared by user + # Force only the mip declared by user. If mip is not given, search all + # available tables in alphabetical order found_mip = False if not fx_info['mip']: - found_mip, fx_info, fx_files = _search_fx_mip(project_tables, + sorted_project_tables = dict(sorted(deepcopy(project_tables).items(), + key=lambda item: item[0])) + found_mip, fx_info, fx_files = _search_fx_mip(sorted_project_tables, found_mip, variable, fx_info, config_user) else: @@ -395,9 +398,13 @@ def _get_fx_files(variable, fx_info, config_user): # If fx variable was not found in any table, raise exception if not found_mip: + if fx_info['mip'] is None: + raise RecipeError( + f"Requested fx variable '{fx_info['short_name']}' " + f"not available in any CMOR table for '{var_project}'") raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' " - f"not available in any CMOR table for '{var_project}'") + f"fx variable '{fx_info['short_name']}' not available in CMOR " + f"table '{fx_info['mip']}' for '{var_project}'") # flag a warning if not fx_files: @@ -470,7 +477,6 @@ def _fx_list_to_dict(fx_vars): def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" - # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 75af43bacb..9d3fe3de5f 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -10,7 +10,6 @@ from PIL import Image import esmvalcore -from esmvalcore import __version__ as current_version from esmvalcore._config import TAGS from esmvalcore._recipe import TASKSEP, read_recipe_file from esmvalcore._recipe_checks import RecipeError @@ -296,6 +295,8 @@ def find_files(_, filenames): return [] if 'sftlf' in filename: return [] + if 'IyrAnt_' in filename: + return [] return _get_filenames(tmp_path, filenames, tracking_id) monkeypatch.setattr(esmvalcore._data_finder, 'find_files', find_files) @@ -2143,36 +2144,22 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): assert not any(fx_variables) -# TODO remove after fixing test below -def test_fx_vars_mip_change_cmip6_incomplete(tmp_path, - patched_datafinder, - config_user): - TAGS.set_tag_values(TAGS_FOR_TESTING) - +def test_fx_vars_not_all_available(tmp_path, patched_failing_datafinder, + config_user): + """Check that IyrGre is used for sftgif if IyrAnt is not available.""" content = dedent(""" preprocessors: preproc: - area_statistics: - operator: mean - fx_variables: - areacella: - ensemble: r2i1p1f1 - areacello: - clayfrac: - sftlf: - sftgif: - mip: fx - sftof: - mask_landsea: + mask_landseaice: mask_out: sea diagnostics: diagnostic_name: variables: - tas: + tos: preprocessor: preproc project: CMIP6 - mip: Amon + mip: Omon exp: historical start_year: 2000 end_year: 2005 @@ -2187,35 +2174,28 @@ def test_fx_vars_mip_change_cmip6_incomplete(tmp_path, # Check generated tasks assert len(recipe.tasks) == 1 task = recipe.tasks.pop() - assert task.name == 'diagnostic_name' + TASKSEP + 'tas' + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' assert len(task.products) == 1 product = task.products.pop() - # Check area_statistics - assert 'area_statistics' in product.settings - settings = product.settings['area_statistics'] + # Check mask_landseaice + assert 'mask_landseaice' in product.settings + settings = product.settings['mask_landseaice'] assert len(settings) == 1 - assert settings['operator'] == 'mean' + assert settings['mask_out'] == 'sea' + + # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella']['filename'] - assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] - assert '_Ofx_' in fx_variables['areacello']['filename'] - assert '_Efx_' in fx_variables['clayfrac']['filename'] - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_Ofx_' in fx_variables['sftof']['filename'] - - # Check mask_landsea - assert 'mask_landsea' in product.settings - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' + assert len(fx_variables) == 1 + filenames = fx_variables['sftgif']['filename'] + assert isinstance(filenames, list) # assert list since frequency != fx + assert len(filenames) == 1 + assert '_IyrGre_' in filenames[0] -@pytest.mark.skipif(current_version < "2.4.0", - reason="github.com/ESMValGroup/ESMValCore/issues/1159") -def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): +def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user): + """Test fx variables with given mips.""" TAGS.set_tag_values(TAGS_FOR_TESTING) content = dedent(""" @@ -2224,16 +2204,11 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): area_statistics: operator: mean fx_variables: - areacella: - ensemble: r2i1p1f1 - areacello: - clayfrac: - sftlf: sftgif: mip: fx - sftof: - mask_landsea: - mask_out: sea + volcello: + ensemble: r2i1p1f1 + mip: Ofx diagnostics: diagnostic_name: @@ -2265,43 +2240,70 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' + + # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella']['filename'] - assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] - assert '_Ofx_' in fx_variables['areacello']['filename'] - assert '_Efx_' in fx_variables['clayfrac']['filename'] - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_fx_' in fx_variables['sftgif']['filename'] # fails, see skipif - assert '_Ofx_' in fx_variables['sftof']['filename'] + assert len(fx_variables) == 2 + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_r2i1p1f1_' in fx_variables['volcello']['filename'] + assert '_Ofx_' in fx_variables['volcello']['filename'] - # Check mask_landsea - assert 'mask_landsea' in product.settings - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' +def test_fx_vars_invalid_mip_cmip6(tmp_path, patched_datafinder, config_user): + """Test fx variables with invalid mip.""" + TAGS.set_tag_values(TAGS_FOR_TESTING) -# TODO remove when you fix test below -def test_fx_list_mip_change_cmip6_incomplete(tmp_path, - patched_datafinder, - config_user): content = dedent(""" preprocessors: preproc: area_statistics: operator: mean - fx_variables: [ - 'areacella', - 'areacello', - 'clayfrac', - 'sftlf', - 'sftgif', - 'sftof', - ] + fx_variables: + areacella: + mip: Lmon + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + msg = ("fx variable 'areacella' not available in CMOR table 'Lmon' for " + "'CMIP6'") + with pytest.raises(RecipeError) as rec_err_exp: + get_recipe(tmp_path, content, config_user) + assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG + assert msg in rec_err_exp.value.failed_tasks[0].message + + +def test_fx_vars_mip_search_cmip6(tmp_path, patched_datafinder, config_user): + """Test mip tables search for different fx variables.""" + TAGS.set_tag_values(TAGS_FOR_TESTING) + + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: + areacella: + areacello: + clayfrac: mask_landsea: mask_out: sea + volume_statistics: + operator: mean diagnostics: diagnostic_name: @@ -2333,6 +2335,20 @@ def test_fx_list_mip_change_cmip6_incomplete(tmp_path, settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' + + # Check mask_landsea + assert 'mask_landsea' in product.settings + settings = product.settings['mask_landsea'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + + # Check volume_statistics + assert 'volume_statistics' in product.settings + settings = product.settings['volume_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + + # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 @@ -2341,17 +2357,14 @@ def test_fx_list_mip_change_cmip6_incomplete(tmp_path, assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] assert '_Ofx_' in fx_variables['sftof']['filename'] + volcello_files = fx_variables['volcello']['filename'] + assert isinstance(volcello_files, list) + assert len(volcello_files) == 1 + assert '_Odec_' in volcello_files[0] - # Check mask_landsea - assert 'mask_landsea' in product.settings - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' - -@pytest.mark.skipif(current_version < "2.4.0", - reason="github.com/ESMValGroup/ESMValCore/issues/1159") -def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): +def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user): + """Test mip tables search for list of different fx variables.""" content = dedent(""" preprocessors: preproc: @@ -2365,8 +2378,6 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): 'sftgif', 'sftof', ] - mask_landsea: - mask_out: sea diagnostics: diagnostic_name: @@ -2398,6 +2409,8 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' + + # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 @@ -2405,14 +2418,11 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_fx_' in fx_variables['sftgif']['filename'] # fails, see skipif assert '_Ofx_' in fx_variables['sftof']['filename'] - - # Check mask_landsea - assert 'mask_landsea' in product.settings - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' + sftgif_files = fx_variables['sftgif']['filename'] + assert isinstance(sftgif_files, list) + assert len(sftgif_files) == 1 + assert '_IyrAnt_' in sftgif_files[0] def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, From 4cca1e51dc9366a0dcc17429089214f2b9ebed12 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 7 Jul 2021 16:55:28 +0200 Subject: [PATCH 2/8] Raise error when fx variable is present in multiple tables --- doc/recipe/preprocessor.rst | 12 ++- esmvalcore/_recipe.py | 89 ++++++++++--------- tests/integration/test_recipe.py | 144 ++++++++++++++++--------------- 3 files changed, 129 insertions(+), 116 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 5fa78818d8..4304bdc696 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -227,15 +227,13 @@ The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. If ``mip`` is not given, ESMValTool will search for the fx variable in all -available tables of the specified project (in alphabetical order) and return -the first match. +available tables of the specified project. .. note:: Some fx variables exist in more than one table (e.g., ``volcello`` exists in - CMIP6's ``Ofx`` and ``Omon`` table or ``sftgif`` exists in CMIP6's ``fx`` - and ``IyrAnt`` table). If a specific table is desired, this needs to be - specified by ``mip``, otherwise the table is selected based on the - alphabetical order and data availability. + CMIP6's ``Ofx``, ``Oyr``, ``Odec`` and ``Omon`` tables or ``sftgif`` exists + in CMIP6's ``fx``, ``LImon``, ``IyrGre`` and ``IyrAnt`` tables). In these + cases, ``mip`` needs to be specified, otherwise an error is raised. The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, checks them against CMOR standards and adds them either as ``cell_measure`` @@ -1072,7 +1070,7 @@ See also :func:`esmvalcore.preprocessor.decadal_statistics`. ---------------------- This function produces statistics for the whole dataset. It can produce scalars -(if the full period is chosen) or daily, monthly or seasonal statics. +(if the full period is chosen) or daily, monthly or seasonal statistics. Parameters: * operator: operation to apply. Accepted values are 'mean', 'median', diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 26577cc619..9dbaf52935 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -349,22 +349,37 @@ def _add_fxvar_keys(fx_info, variable): return fx_variable -def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): - fx_files = None - for mip in tables: - fx_cmor = tables[mip].get(fx_info['short_name']) - if fx_cmor: - found_mip = True - fx_info['mip'] = mip - fx_info = _add_fxvar_keys(fx_info, variable) - logger.debug("For fx variable '%s', found table '%s'", - fx_info['short_name'], mip) - fx_files = _get_input_files(fx_info, config_user)[0] - if fx_files: - logger.debug("Found fx variables '%s':\n%s", - fx_info['short_name'], pformat(fx_files)) - break - return found_mip, fx_info, fx_files +def _search_fx_mip(tables, variable, fx_info, config_user): + """Search mip for fx variable.""" + mips_with_fx_var = [] + for (mip, table) in tables.items(): + if fx_info['short_name'] in table: + mips_with_fx_var.append(mip) + + # List is empty -> no table includes the fx variable + if not mips_with_fx_var: + raise RecipeError( + f"Requested fx variable '{fx_info['short_name']}' not available " + f"in any CMOR table for '{variable['project']}'") + + # List contains more than one element -> ambiguity + if len(mips_with_fx_var) > 1: + raise RecipeError( + f"Requested fx variable '{fx_info['short_name']}' is available in " + f"more than one CMOR table for '{variable['project']}': " + f"{mips_with_fx_var}") + + # Dict contains exactly one element -> ok + mip = mips_with_fx_var[0] + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug("For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug("Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) + return fx_info, fx_files def _get_fx_files(variable, fx_info, config_user): @@ -380,38 +395,32 @@ def _get_fx_files(variable, fx_info, config_user): f"a '{var_project}' project in config-developer.") project_tables = CMOR_TABLES[var_project].tables - # Force only the mip declared by user. If mip is not given, search all - # available tables in alphabetical order - found_mip = False + # If mip is not given, search all available tables. If the variable is not + # found or available in more than one table, raise error if not fx_info['mip']: - sorted_project_tables = dict(sorted(deepcopy(project_tables).items(), - key=lambda item: item[0])) - found_mip, fx_info, fx_files = _search_fx_mip(sorted_project_tables, - found_mip, variable, - fx_info, config_user) + fx_info, fx_files = _search_fx_mip(project_tables, variable, fx_info, + config_user) else: - fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) - if fx_cmor: - found_mip = True - fx_info = _add_fxvar_keys(fx_info, variable) - fx_files = _get_input_files(fx_info, config_user)[0] - - # If fx variable was not found in any table, raise exception - if not found_mip: - if fx_info['mip'] is None: + mip = fx_info['mip'] + if mip not in project_tables: raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' " - f"not available in any CMOR table for '{var_project}'") - raise RecipeError( - f"fx variable '{fx_info['short_name']}' not available in CMOR " - f"table '{fx_info['mip']}' for '{var_project}'") + f"Requested mip table '{mip}' for fx variable " + f"'{fx_info['short_name']}' not available for project " + f"'{var_project}'") + fx_cmor = project_tables[mip].get(fx_info['short_name']) + if fx_cmor is None: + raise RecipeError( + f"fx variable '{fx_info['short_name']}' not available in CMOR " + f"table '{mip}' for '{var_project}'") + fx_info = _add_fxvar_keys(fx_info, variable) + fx_files = _get_input_files(fx_info, config_user)[0] - # flag a warning + # Flag a warning if no files are found if not fx_files: logger.warning("Missing data for fx variable '%s'", fx_info['short_name']) - # allow for empty lists corrected for by NE masks + # If frequency = fx, only allow a single file if fx_files: if fx_info['frequency'] == 'fx': fx_files = fx_files[0] diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 9d3fe3de5f..cadca1f2a7 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -295,8 +295,6 @@ def find_files(_, filenames): return [] if 'sftlf' in filename: return [] - if 'IyrAnt_' in filename: - return [] return _get_filenames(tmp_path, filenames, tracking_id) monkeypatch.setattr(esmvalcore._data_finder, 'find_files', find_files) @@ -2144,22 +2142,29 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): assert not any(fx_variables) -def test_fx_vars_not_all_available(tmp_path, patched_failing_datafinder, - config_user): - """Check that IyrGre is used for sftgif if IyrAnt is not available.""" +def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user): + """Test fx variables with given mips.""" + TAGS.set_tag_values(TAGS_FOR_TESTING) + content = dedent(""" preprocessors: preproc: - mask_landseaice: - mask_out: sea + area_statistics: + operator: mean + fx_variables: + sftgif: + mip: fx + volcello: + ensemble: r2i1p1f1 + mip: Ofx diagnostics: diagnostic_name: variables: - tos: + tas: preprocessor: preproc project: CMIP6 - mip: Omon + mip: Amon exp: historical start_year: 2000 end_year: 2005 @@ -2174,28 +2179,27 @@ def test_fx_vars_not_all_available(tmp_path, patched_failing_datafinder, # Check generated tasks assert len(recipe.tasks) == 1 task = recipe.tasks.pop() - assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert task.name == 'diagnostic_name' + TASKSEP + 'tas' assert len(task.products) == 1 product = task.products.pop() - # Check mask_landseaice - assert 'mask_landseaice' in product.settings - settings = product.settings['mask_landseaice'] + # Check area_statistics + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] assert len(settings) == 1 - assert settings['mask_out'] == 'sea' + assert settings['operator'] == 'mean' # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - filenames = fx_variables['sftgif']['filename'] - assert isinstance(filenames, list) # assert list since frequency != fx - assert len(filenames) == 1 - assert '_IyrGre_' in filenames[0] + assert len(fx_variables) == 2 + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_r2i1p1f1_' in fx_variables['volcello']['filename'] + assert '_Ofx_' in fx_variables['volcello']['filename'] -def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user): - """Test fx variables with given mips.""" +def test_fx_vars_invalid_mip_cmip6(tmp_path, patched_datafinder, config_user): + """Test fx variables with invalid mip.""" TAGS.set_tag_values(TAGS_FOR_TESTING) content = dedent(""" @@ -2204,11 +2208,8 @@ def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user): area_statistics: operator: mean fx_variables: - sftgif: - mip: fx - volcello: - ensemble: r2i1p1f1 - mip: Ofx + areacella: + mip: INVALID diagnostics: diagnostic_name: @@ -2226,32 +2227,17 @@ def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user): - {dataset: CanESM5} scripts: null """) - recipe = get_recipe(tmp_path, content, config_user) - - # Check generated tasks - assert len(recipe.tasks) == 1 - task = recipe.tasks.pop() - assert task.name == 'diagnostic_name' + TASKSEP + 'tas' - assert len(task.products) == 1 - product = task.products.pop() - - # Check area_statistics - assert 'area_statistics' in product.settings - settings = product.settings['area_statistics'] - assert len(settings) == 1 - assert settings['operator'] == 'mean' - - # Check add_fx_variables - fx_variables = product.settings['add_fx_variables']['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 2 - assert '_fx_' in fx_variables['sftgif']['filename'] - assert '_r2i1p1f1_' in fx_variables['volcello']['filename'] - assert '_Ofx_' in fx_variables['volcello']['filename'] + msg = ("Requested mip table 'INVALID' for fx variable 'areacella' not " + "available for project 'CMIP6'") + with pytest.raises(RecipeError) as rec_err_exp: + get_recipe(tmp_path, content, config_user) + assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG + assert msg in rec_err_exp.value.failed_tasks[0].message -def test_fx_vars_invalid_mip_cmip6(tmp_path, patched_datafinder, config_user): - """Test fx variables with invalid mip.""" +def test_fx_vars_invalid_mip_for_var_cmip6(tmp_path, patched_datafinder, + config_user): + """Test fx variables with invalid mip for variable.""" TAGS.set_tag_values(TAGS_FOR_TESTING) content = dedent(""" @@ -2302,8 +2288,6 @@ def test_fx_vars_mip_search_cmip6(tmp_path, patched_datafinder, config_user): clayfrac: mask_landsea: mask_out: sea - volume_statistics: - operator: mean diagnostics: diagnostic_name: @@ -2342,25 +2326,15 @@ def test_fx_vars_mip_search_cmip6(tmp_path, patched_datafinder, config_user): assert len(settings) == 1 assert settings['mask_out'] == 'sea' - # Check volume_statistics - assert 'volume_statistics' in product.settings - settings = product.settings['volume_statistics'] - assert len(settings) == 1 - assert settings['operator'] == 'mean' - # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 6 + assert len(fx_variables) == 5 assert '_fx_' in fx_variables['areacella']['filename'] assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] assert '_Ofx_' in fx_variables['sftof']['filename'] - volcello_files = fx_variables['volcello']['filename'] - assert isinstance(volcello_files, list) - assert len(volcello_files) == 1 - assert '_Odec_' in volcello_files[0] def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user): @@ -2375,7 +2349,6 @@ def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user): 'areacello', 'clayfrac', 'sftlf', - 'sftgif', 'sftof', ] @@ -2413,16 +2386,12 @@ def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user): # Check add_fx_variables fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 6 + assert len(fx_variables) == 5 assert '_fx_' in fx_variables['areacella']['filename'] assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] assert '_Ofx_' in fx_variables['sftof']['filename'] - sftgif_files = fx_variables['sftgif']['filename'] - assert isinstance(sftgif_files, list) - assert len(sftgif_files) == 1 - assert '_IyrAnt_' in sftgif_files[0] def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2753,6 +2722,7 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): + """Test that error is raised for invalid fx variable.""" TAGS.set_tag_values(TAGS_FOR_TESTING) content = dedent(""" @@ -2788,6 +2758,42 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): assert msg in rec_err_exp.value.failed_tasks[0].message +def test_ambiguous_fx_var_cmip6(tmp_path, patched_datafinder, config_user): + """Test that error is raised for fx variable available in multiple mips.""" + TAGS.set_tag_values(TAGS_FOR_TESTING) + + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: + volcello: + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + msg = ("Requested fx variable 'volcello' is available in more than one " + "CMOR table for 'CMIP6': ['Ofx', 'Oyr', 'Odec', 'Omon']") + with pytest.raises(RecipeError) as rec_err_exp: + get_recipe(tmp_path, content, config_user) + assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG + assert msg in rec_err_exp.value.failed_tasks[0].message + + def test_multimodel_mask(tmp_path, patched_datafinder, config_user): """Test ``mask_multimodel``.""" content = dedent(""" From a9b85da9e1e9dbe7dc4788ccd8eb163a40582cdc Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Wed, 7 Jul 2021 17:09:46 +0200 Subject: [PATCH 3/8] Update doc/recipe/preprocessor.rst Co-authored-by: Valeriu Predoi --- doc/recipe/preprocessor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 4304bdc696..8d8d9c662c 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -229,7 +229,7 @@ with these variables and pass them to the function for loading and processing. If ``mip`` is not given, ESMValTool will search for the fx variable in all available tables of the specified project. -.. note:: +.. warning:: Some fx variables exist in more than one table (e.g., ``volcello`` exists in CMIP6's ``Ofx``, ``Oyr``, ``Odec`` and ``Omon`` tables or ``sftgif`` exists in CMIP6's ``fx``, ``LImon``, ``IyrGre`` and ``IyrAnt`` tables). In these From 2e0d9dc06be7a9e8aba8896687af5833ab17d8a0 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 8 Jul 2021 10:24:45 +0200 Subject: [PATCH 4/8] Updated documentation on fx variables --- doc/recipe/preprocessor.rst | 70 ++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 8d8d9c662c..632cabbc18 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -180,19 +180,24 @@ To get an overview on data fixes and how to implement new ones, please go to Fx variables as cell measures or ancillary variables ==================================================== -The following preprocessor may require the use of ``fx_variables`` to be able +The following preprocessors may require the use of ``fx_variables`` to be able to perform the computations: - - area_statistics_ (``areacella``, ``areacello``) - - :ref:`mask_landsea` (``sftlf``, ``sftof``) - - :ref:`mask_landseaice` (``sftgif``) - - volume_statistics_ (``volcello``) - - :ref:`weighting_landsea_fraction` (``sftlf``, ``sftof``) +============================================================== ===================== +Preprocessor Default fx variables +============================================================== ===================== +area_statistics_ ``areacella``, ``areacello`` +:ref:`mask_landsea` ``sftlf``, ``sftof`` +:ref:`mask_landseaice` ``sftgif`` +volume_statistics_ ``volcello`` +:ref:`weighting_landsea_fraction` ``sftlf``, ``sftof`` +============================================================== ===================== If no ``fx_variables`` are specified for these preprocessors, the fx variables -given in brackets are used. If given, the ``fx_variables`` argument specifies -the fx variables that the user wishes to input to the corresponding -preprocessor function; the user may specify it calling the variables e.g. +in the second column are used. If given, the ``fx_variables`` argument +specifies the fx variables that the user wishes to input to the corresponding +preprocessor function. The user may specify these by simply adding the names of +the variables, e.g., .. code-block:: yaml @@ -200,16 +205,16 @@ preprocessor function; the user may specify it calling the variables e.g. areacello: volcello: -or calling the variables and adding specific variable parameters (the key-value -pair may be as specific as a CMOR variable can permit): +or by explicitly adding variable parameters (the key-value pair may be as +specific as a CMOR variable can permit): .. code-block:: yaml fx_variables: areacello: - mip: Omon + mip: Ofx volcello: - mip: fx + mip: Omon Alternatively, the ``fx_variables`` argument can also be specified as a list: @@ -221,7 +226,7 @@ or as a list of dictionaries: .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Ofx'}, {'short_name': 'volcello', 'mip': 'Omon'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -231,14 +236,21 @@ available tables of the specified project. .. warning:: Some fx variables exist in more than one table (e.g., ``volcello`` exists in - CMIP6's ``Ofx``, ``Oyr``, ``Odec`` and ``Omon`` tables or ``sftgif`` exists - in CMIP6's ``fx``, ``LImon``, ``IyrGre`` and ``IyrAnt`` tables). In these - cases, ``mip`` needs to be specified, otherwise an error is raised. - -The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, -checks them against CMOR standards and adds them either as ``cell_measure`` -or ``ancillary_variable`` inside the cube data. This ensures that the -defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. + CMIP6's ``Odec``, ``Ofx``, ``Omon``, and ``Oyr`` tables; ``sftgif`` exists + in CMIP6's ``fx``, ``IyrAnt`` and ``IyrGre``, and ``LImon`` tables). In + these cases, ``mip`` needs to be specified, otherwise an error is raised. + +Internally, the required ``fx_variables`` are automatically loaded by the +preprocessor step ``add_fx_variables`` which also checks them against CMOR +standards and adds them either as ``cell_measure`` (see `CF conventions on cell +measures +`_ +and :class:`iris.coords.CellMeasure`) or ``ancillary_variable`` (see `CF +conventions on ancillary variables +`_ +and :class:`iris.coords.AncillaryVariable`) inside the cube data. This ensures +that the defined preprocessor chain is applied to both ``variables`` and +``fx_variables``. Note that when calling steps that require ``fx_variables`` inside diagnostic scripts, the variables are expected to contain the required ``cell_measures`` or @@ -428,7 +440,7 @@ or alternatively: ] More details on the argument ``fx_variables`` and its default values are given -:ref:`here`. +in :ref:`Fx variables as cell measures or ancillary variables`. See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -507,7 +519,7 @@ or alternatively: ] More details on the argument ``fx_variables`` and its default values are given -:ref:`here`. +in :ref:`Fx variables as cell measures or ancillary variables`. If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the @@ -561,7 +573,7 @@ or alternatively: fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] More details on the argument ``fx_variables`` and its default values are given -:ref:`here`. +in :ref:`Fx variables as cell measures or ancillary variables`. See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -1470,8 +1482,8 @@ region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. The optional ``fx_variables`` argument specifies the fx variables that the user -wishes to input to the function. More details on this are given -:ref:`here`. +wishes to input to the function. More details on this are given in :ref:`Fx +variables as cell measures or ancillary variables`. See also :func:`esmvalcore.preprocessor.area_statistics`. @@ -1516,8 +1528,8 @@ apply over the volume. No depth coordinate is required as this is determined by Iris. This function works best when the ``fx_variables`` provide the cell volume. The optional ``fx_variables`` argument specifies the fx variables that the user wishes to -input to the function. More details on this are given -:ref:`here`. +input to the function. More details on this are given in :ref:`Fx variables as +cell measures or ancillary variables`. See also :func:`esmvalcore.preprocessor.volume_statistics`. From cc603325ba93f6e1137b654f307818fb0f277687 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 8 Jul 2021 10:39:04 +0200 Subject: [PATCH 5/8] Used :ref: to link area_statistics and volume_statistics --- doc/recipe/preprocessor.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 632cabbc18..df884d718b 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -186,10 +186,10 @@ to perform the computations: ============================================================== ===================== Preprocessor Default fx variables ============================================================== ===================== -area_statistics_ ``areacella``, ``areacello`` +:ref:`area_statistics` ``areacella``, ``areacello`` :ref:`mask_landsea` ``sftlf``, ``sftof`` :ref:`mask_landseaice` ``sftgif`` -volume_statistics_ ``volcello`` +:ref:`volume_statistics` ``volcello`` :ref:`weighting_landsea_fraction` ``sftlf``, ``sftof`` ============================================================== ===================== @@ -1467,6 +1467,8 @@ argument: See also :func:`esmvalcore.preprocessor.meridional_means`. +.. _area_statistics: + ``area_statistics`` ------------------- @@ -1516,6 +1518,8 @@ as the Iris cube. That is, if the cube has `z`-coordinate as negative, then See also :func:`esmvalcore.preprocessor.extract_volume`. +.. _volume_statistics: + ``volume_statistics`` --------------------- From 9dfd16dc9e63e230706e278075ba3b4eb29ae4ca Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 8 Jul 2021 15:29:22 +0200 Subject: [PATCH 6/8] Optimized fx doc --- doc/recipe/preprocessor.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index df884d718b..3072efad7b 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -205,17 +205,21 @@ the variables, e.g., areacello: volcello: -or by explicitly adding variable parameters (the key-value pair may be as -specific as a CMOR variable can permit): +or by additionally specifying further keys that are used to define the fx +datsets, e.g., .. code-block:: yaml fx_variables: areacello: mip: Ofx + exp: piControl volcello: mip: Omon +This might be useful to select fx files from a specific ``mip`` table or from a +specific ``exp`` in case not all experiments provide the fx variable. + Alternatively, the ``fx_variables`` argument can also be specified as a list: .. code-block:: yaml @@ -226,7 +230,7 @@ or as a list of dictionaries: .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Ofx'}, {'short_name': 'volcello', 'mip': 'Omon'}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Ofx', 'exp': 'piControl'}, {'short_name': 'volcello', 'mip': 'Omon'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. From ca039cfaaa20b311ca4ee830793f29cb3366dfaf Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 8 Jul 2021 16:16:07 +0200 Subject: [PATCH 7/8] Only raise ambiguity error if fx files are found in mutiple tables --- doc/recipe/preprocessor.rst | 7 ++-- esmvalcore/_recipe.py | 55 +++++++++++++++--------- tests/integration/test_recipe.py | 72 +++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 27 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 3072efad7b..02de1aa0a0 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -206,7 +206,7 @@ the variables, e.g., volcello: or by additionally specifying further keys that are used to define the fx -datsets, e.g., +datasets, e.g., .. code-block:: yaml @@ -241,8 +241,9 @@ available tables of the specified project. .. warning:: Some fx variables exist in more than one table (e.g., ``volcello`` exists in CMIP6's ``Odec``, ``Ofx``, ``Omon``, and ``Oyr`` tables; ``sftgif`` exists - in CMIP6's ``fx``, ``IyrAnt`` and ``IyrGre``, and ``LImon`` tables). In - these cases, ``mip`` needs to be specified, otherwise an error is raised. + in CMIP6's ``fx``, ``IyrAnt`` and ``IyrGre``, and ``LImon`` tables). If (for + a given dataset) fx files are found in more than one table, ``mip`` needs to + be specified, otherwise an error is raised. Internally, the required ``fx_variables`` are automatically loaded by the preprocessor step ``add_fx_variables`` which also checks them against CMOR diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 9dbaf52935..1a04844c31 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -351,6 +351,7 @@ def _add_fxvar_keys(fx_info, variable): def _search_fx_mip(tables, variable, fx_info, config_user): """Search mip for fx variable.""" + # Get all mips that offer that specific fx variable mips_with_fx_var = [] for (mip, table) in tables.items(): if fx_info['short_name'] in table: @@ -362,23 +363,40 @@ def _search_fx_mip(tables, variable, fx_info, config_user): f"Requested fx variable '{fx_info['short_name']}' not available " f"in any CMOR table for '{variable['project']}'") - # List contains more than one element -> ambiguity - if len(mips_with_fx_var) > 1: + # Iterate through all possible mips and check if files are available; in + # case of ambiguity raise an error + fx_files_for_mips = {} + for mip in mips_with_fx_var: + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug("For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug("Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) + fx_files_for_mips[mip] = fx_files + + # Dict contains more than one element -> ambiguity + if len(fx_files_for_mips) > 1: raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' is available in " - f"more than one CMOR table for '{variable['project']}': " - f"{mips_with_fx_var}") - - # Dict contains exactly one element -> ok - mip = mips_with_fx_var[0] - fx_info['mip'] = mip - fx_info = _add_fxvar_keys(fx_info, variable) - logger.debug("For fx variable '%s', found table '%s'", - fx_info['short_name'], mip) - fx_files = _get_input_files(fx_info, config_user)[0] - if fx_files: - logger.debug("Found fx variables '%s':\n%s", - fx_info['short_name'], pformat(fx_files)) + f"Requested fx variable '{fx_info['short_name']}' for dataset " + f"'{variable['dataset']}' of project '{variable['project']}' is " + f"available in more than one CMOR table for " + f"'{variable['project']}': {list(fx_files_for_mips)}") + + # Dict is empty -> no files found -> handled at later stage + if not fx_files_for_mips: + fx_info['mip'] = variable['mip'] + fx_files = [] + + # Dict contains one element -> ok + else: + mip = list(fx_files_for_mips)[0] + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + fx_files = fx_files_for_mips[mip] + return fx_info, fx_files @@ -396,7 +414,7 @@ def _get_fx_files(variable, fx_info, config_user): project_tables = CMOR_TABLES[var_project].tables # If mip is not given, search all available tables. If the variable is not - # found or available in more than one table, raise error + # found or files are available in more than one table, raise error if not fx_info['mip']: fx_info, fx_files = _search_fx_mip(project_tables, variable, fx_info, config_user) @@ -407,8 +425,7 @@ def _get_fx_files(variable, fx_info, config_user): f"Requested mip table '{mip}' for fx variable " f"'{fx_info['short_name']}' not available for project " f"'{var_project}'") - fx_cmor = project_tables[mip].get(fx_info['short_name']) - if fx_cmor is None: + if fx_info['short_name'] not in project_tables[mip]: raise RecipeError( f"fx variable '{fx_info['short_name']}' not available in CMOR " f"table '{mip}' for '{var_project}'") diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index cadca1f2a7..e116c2fcfb 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -295,6 +295,10 @@ def find_files(_, filenames): return [] if 'sftlf' in filename: return [] + if 'IyrAnt_' in filename: + return [] + if 'IyrGre_' in filename: + return [] return _get_filenames(tmp_path, filenames, tracking_id) monkeypatch.setattr(esmvalcore._data_finder, 'find_files', find_files) @@ -2009,8 +2013,8 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): settings = product.settings['mask_landseaice'] assert len(settings) == 1 assert settings['mask_out'] == 'sea' - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_piControl_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_piControl_' in fx_variables['sftgif']['filename'] # volume statistics settings = product.settings['volume_statistics'] @@ -2759,7 +2763,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): def test_ambiguous_fx_var_cmip6(tmp_path, patched_datafinder, config_user): - """Test that error is raised for fx variable available in multiple mips.""" + """Test that error is raised for fx files available in multiple mips.""" TAGS.set_tag_values(TAGS_FOR_TESTING) content = dedent(""" @@ -2786,14 +2790,72 @@ def test_ambiguous_fx_var_cmip6(tmp_path, patched_datafinder, config_user): - {dataset: CanESM5} scripts: null """) - msg = ("Requested fx variable 'volcello' is available in more than one " - "CMOR table for 'CMIP6': ['Ofx', 'Oyr', 'Odec', 'Omon']") + msg = ("Requested fx variable 'volcello' for dataset 'CanESM5' of project " + "'CMIP6' is available in more than one CMOR table for 'CMIP6': " + "['Ofx', 'Oyr', 'Odec', 'Omon']") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG assert msg in rec_err_exp.value.failed_tasks[0].message +def test_unique_fx_var_in_multiple_mips_cmip6(tmp_path, + patched_failing_datafinder, + config_user): + """Test that no error is raised for fx files available in one mip.""" + TAGS.set_tag_values(TAGS_FOR_TESTING) + + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: + sftgif: + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tas' + assert len(task.products) == 1 + product = task.products.pop() + + # Check area_statistics + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + + # Check add_fx_variables + # Due to failing datafinder, only files in LImon are found even though + # sftgif is available in the tables fx, IyrAnt, IyrGre and LImon + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + sftgif_files = fx_variables['sftgif']['filename'] + assert isinstance(sftgif_files, list) + assert len(sftgif_files) == 1 + assert'_LImon_' in sftgif_files[0] + + def test_multimodel_mask(tmp_path, patched_datafinder, config_user): """Test ``mask_multimodel``.""" content = dedent(""" From 8cd5dcfd7af0b43eedcf9b8b2de2dd44502cb28a Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 9 Jul 2021 13:14:16 +0200 Subject: [PATCH 8/8] Fixed machine-dependent error-message --- esmvalcore/_recipe.py | 2 +- tests/integration/test_recipe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 1a04844c31..10b4691591 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -383,7 +383,7 @@ def _search_fx_mip(tables, variable, fx_info, config_user): f"Requested fx variable '{fx_info['short_name']}' for dataset " f"'{variable['dataset']}' of project '{variable['project']}' is " f"available in more than one CMOR table for " - f"'{variable['project']}': {list(fx_files_for_mips)}") + f"'{variable['project']}': {sorted(list(fx_files_for_mips))}") # Dict is empty -> no files found -> handled at later stage if not fx_files_for_mips: diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index e116c2fcfb..5f7bb2ce58 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2792,7 +2792,7 @@ def test_ambiguous_fx_var_cmip6(tmp_path, patched_datafinder, config_user): """) msg = ("Requested fx variable 'volcello' for dataset 'CanESM5' of project " "'CMIP6' is available in more than one CMOR table for 'CMIP6': " - "['Ofx', 'Oyr', 'Odec', 'Omon']") + "['Odec', 'Ofx', 'Omon', 'Oyr']") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG