diff --git a/config-user-example.yml b/config-user-example.yml index 4eb31be542..918d66d88c 100644 --- a/config-user-example.yml +++ b/config-user-example.yml @@ -5,9 +5,14 @@ # Rootpaths to the data from different projects (lists are also possible) rootpath: + CMIP3: [~/cmip3_inputpath1, ~/cmip3_inputpath2] CMIP5: [~/cmip5_inputpath1, ~/cmip5_inputpath2] CMIP6: [~/cmip6_inputpath1, ~/cmip6_inputpath2] OBS: ~/obs_inputpath + OBS6: ~/obs6_inputpath + obs4mips: ~/obs4mips_inputpath + ana4mips: ~/ana4mips_inputpath + native6: ~/native6_inputpath RAWOBS: ~/rawobs_inputpath default: ~/default_inputpath diff --git a/doc/sphinx/source/esmvaldiag/observations.rst b/doc/sphinx/source/esmvaldiag/observations.rst index 7d230cbf34..a829519ff3 100644 --- a/doc/sphinx/source/esmvaldiag/observations.rst +++ b/doc/sphinx/source/esmvaldiag/observations.rst @@ -18,6 +18,14 @@ data set for the use in ESMValTool. | `6. Naming convention of the observational data files`_ | `7. Test the cmorized dataset`_ +.. note:: + **CMORization as a fix.** As of early 2020, we've started implementing cmorization as + *fixes*. As compared to the workflow described below, this has the advantage that + the user does not need to store a duplicate (CMORized) copy of the data. Instead, the + CMORization is performed 'on the fly' when running a recipe. **ERA5** is the first dataset + for which this 'CMORization on the fly' is supported. For more information about fixes, + see: :ref:`fixing data ` + 1. Check if your variable is CMOR standard ========================================== diff --git a/esmvaltool/cmorizers/obs/cmor_config/ERA5.yml b/esmvaltool/cmorizers/obs/cmor_config/ERA5.yml deleted file mode 100644 index 74de6a2027..0000000000 --- a/esmvaltool/cmorizers/obs/cmor_config/ERA5.yml +++ /dev/null @@ -1,108 +0,0 @@ ---- -# Global attributes of NetCDF file -attributes: - dataset_id: ERA5 - project_id: OBS6 - tier: 3 - version: '1' - modeling_realm: reanaly - source: 'https://www.ecmwf.int/en/forecasts/datasets/reanalysis-datasets/era5' - reference: 'era5' - comment: 'Contains modified Copernicus Climate Change Service Information {year}' - -# Variables to CMORize -variables: - # time independent - orog: - mip: fx - raw: z - file: 'era5_orography_*_hourly.nc' - # hourly frequency - clt: - mip: E1hr - raw: tcc - file: 'era5_total_cloud_cover_*_hourly.nc' - evspsbl: - mip: E1hr - raw: e - file: 'era5_evaporation_*_hourly.nc' - evspsblpot: - mip: E1hr - raw: pev - file: 'era5_potential_evaporation_*_hourly.nc' - mrro: - mip: E1hr - raw: ro - file: 'era5_runoff_*_hourly.nc' - pr: - mip: E1hr - raw: tp - file: 'era5_total_precipitation_*_hourly.nc' - prsn: - mip: E1hr - raw: sf - file: 'era5_snowfall_*_hourly.nc' - ps: - mip: E1hr - raw: sp - file: 'era5_surface_pressure_*_hourly.nc' - psl: - mip: E1hr - raw: msl - file: 'era5_mean_sea_level_pressure_*_hourly.nc' - ptype: - mip: E1hr - raw: ptype - file: 'era5_precipitation_type_*_hourly.nc' - rls: - mip: E1hr - raw: msnlwrf - file: 'era5_mean_surface_net_long_wave_radiation_flux_*_hourly.nc' - rlds: - mip: E1hr - raw: strd - file: 'era5_surface_thermal_radiation_downwards_*_hourly.nc' - rsds: - mip: E1hr - raw: ssrd - file: 'era5_surface_solar_radiation_downwards_*_hourly.nc' - rsdt: - mip: E1hr - raw: tisr - file: 'era5_toa_incident_solar_radiation_*_hourly.nc' - rss: - mip: E1hr - raw: ssr - file: 'era5_surface_net_solar_radiation_*_hourly.nc' - uas: - mip: E1hr - raw: u10 - file: 'era5_10m_u_component_of_wind_*_hourly.nc' - vas: - mip: E1hr - raw: v10 - file: 'era5_10m_v_component_of_wind_*_hourly.nc' - tas: - mip: E1hr - raw: t2m - file: 'era5_2m_temperature_*_hourly.nc' - tasmax: - mip: E1hr - raw: mx2t - file: 'era5_maximum_2m_temperature_since_previous_post_processing_*_hourly.nc' - tasmin: - mip: E1hr - raw: mn2t - file: 'era5_minimum_2m_temperature_since_previous_post_processing_*_hourly.nc' - tdps: - mip: E1hr - raw: d2m - file: 'era5_2m_dewpoint_temperature_*_hourly.nc' - ts: - mip: E1hr - raw: skt - file: 'era5_skin_temperature_*_hourly.nc' - tsn: - mip: E1hr - raw: tsn - file: 'era5_temperature_of_snow_layer_*_hourly.nc' diff --git a/esmvaltool/cmorizers/obs/cmorize_obs_era5.py b/esmvaltool/cmorizers/obs/cmorize_obs_era5.py deleted file mode 100644 index c9c7519275..0000000000 --- a/esmvaltool/cmorizers/obs/cmorize_obs_era5.py +++ /dev/null @@ -1,227 +0,0 @@ -"""ESMValTool CMORizer for ERA5 data. - -Tier - Tier 3: restricted datasets (i.e., dataset which requires a registration - to be retrieved or provided upon request to the respective contact or PI). - -Source - https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels - https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels - -Last access - 20190718 - -Download and processing instructions - This cmorization script currently supports hourly data of the following -variables: - 10m_u_component_of_wind - 10m_v_component_of_wind - 2m_dewpoint_temperature - 2m_temperature - evaporation - maximum_2m_temperature_since_previous_post_processing - mean_sea_level_pressure - mean_surface_net_long_wave_radiation_flux - minimum_2m_temperature_since_previous_post_processing - orography [#1] - potential_evaporation - precipitation_type - runoff - skin_temperature - snowfall - surface_net_solar_radiation - surface_pressure - surface_solar_radiation_downwards - surface_thermal_radiation_downwards - temperature_of_snow_layer - toa_incident_solar_radiation - total_cloud_cover - total_precipitation - - Downloading ERA5 data can either be done via the Climate Data Store (cds) -web form or era5cli: - $pip install era5cli - $era5cli hourly --variables total_precipitation --startyear 1990 - - [#1]: orography is a time-invariant variable, but ERA5 makes it available - as an hourly variable (which is the same for each time step). The CMORizer - will remove the time dimension from the data and hence only works for 2D - data with a time dimension of length 1. Request example: - $era5cli hourly --variables orography - > --startyear 1989 --months 1 --days 1 --hours 12 - Note that the filename will still follow the pattern: - "era5_orography_yyyy_hourly.nc" - -""" - -import logging -import re -from concurrent.futures import ProcessPoolExecutor, as_completed -from copy import deepcopy -from datetime import datetime -from os import cpu_count -from pathlib import Path -from warnings import catch_warnings, filterwarnings - -import iris -import numpy as np -from dask import array as da - -from esmvalcore.cmor.table import CMOR_TABLES - -from . import utilities as utils - -logger = logging.getLogger(__name__) - - -def _load_cube(in_file, raw_name): - """Load variable `raw_name` from file.""" - invalid_units = { - 'e': 'm of water equivalent', - 'ptype': 'code table (4.201)', - 'sf': 'm of water equivalent', - 'tcc': '(0 - 1)', - } - with catch_warnings(): - for var_name, units in invalid_units.items(): - filterwarnings( - action='ignore', - message=(f"Ignoring netCDF variable '{var_name}' " - f"invalid units '{re.escape(units)}'"), - category=UserWarning, - module='iris', - ) - cube = iris.load_cube( - str(in_file), - constraint=utils.var_name_constraint(raw_name), - ) - return cube - - -def _fix_units(cube): - """Correct the units.""" - if cube.var_name in {'e', 'sf'}: - # Change evaporation and snowfall units from - # 'm of water equivalent' to m - cube.units = 'm' - if cube.var_name == 'tcc': - # Change cloud cover units '(0 - 1)' to valid fraction unit - cube.units = 1 - if cube.var_name in {'e', 'ro', 'sf', 'tp', 'pev'}: - # Change units from meters of water to kg of water - # and add missing 'per hour' - cube.units = cube.units * 'kg m-3 h-1' - cube.data = cube.core_data() * 1000. - if cube.var_name in {'ssr', 'ssrd', 'strd', 'tisr'}: - # Add missing 'per hour' - cube.units = cube.units * 'h-1' - if cube.var_name in {'msnlwrf', 'ssrd', 'strd', 'tisr', 'ssr'}: - # Radiation fluxes are positive in downward direction - cube.attributes['positive'] = 'down' - if cube.var_name == 'ptype': - cube.units = 1 - # Fix rounding errors and mask out 0 (reserved value) - cube.data = da.ma.masked_equal(da.round(cube.core_data()), 0) - - return cube - - -def _extract_variable(in_file, var, cfg, out_dir): - logger.info("CMORizing variable '%s' from input file '%s'", - var['short_name'], in_file) - attributes = deepcopy(cfg['attributes']) - attributes['mip'] = var['mip'] - cmor_table = CMOR_TABLES[attributes['project_id']] - definition = cmor_table.get_variable(var['mip'], var['short_name']) - - cube = _load_cube(in_file, var['raw']) - - # Set global attributes - utils.set_global_atts(cube, attributes) - - # Fix missing/invalid units - _fix_units(cube) - - # Remove time dimension and coordinate from invariant variables - if 'fx' in var['mip']: - cube = iris.util.squeeze(cube) - cube.remove_coord('time') - - # Set correct names - cube.var_name = definition.short_name - if definition.standard_name: - cube.standard_name = definition.standard_name - cube.long_name = definition.long_name - - # Fix data type - cube.data = cube.core_data().astype('float32') - - # Fix coordinates - cube.coord('latitude').var_name = 'lat' - cube.coord('longitude').var_name = 'lon' - - # Make latitude increasing - cube = cube[..., ::-1, :] - - for coord_name in 'latitude', 'longitude', 'time': - try: - coord = cube.coord(coord_name) - coord.points = coord.core_points().astype('float64') - coord.guess_bounds() - except iris.exceptions.CoordinateNotFoundError: - if 'fx' in var['mip']: - pass - else: - raise - - if cube.var_name in {'zg', 'orog'}: - # Divide by acceleration of gravity [m s-2], - # required for geopotential height, see: - # https://apps.ecmwf.int/codes/grib/param-db?id=129 - cube.units = cube.units / 'm s-2' - cube.data = cube.core_data() / 9.80665 - - if 'height2m' in definition.dimensions: - utils.add_scalar_height_coord(cube, 2.) - if 'height10m' in definition.dimensions: - utils.add_scalar_height_coord(cube, 10.) - - # Convert units if required - cube.convert_units(definition.units) - - logger.info("Saving cube\n%s", cube) - logger.info("Expected output size is %.1fGB", - np.prod(cube.shape) * 4 / 2**30) - utils.save_variable( - cube, - cube.var_name, - out_dir, - attributes, - local_keys=['positive'], - ) - - -def cmorization(in_dir, out_dir, cfg, _): - """Cmorization func call.""" - cfg['attributes']['comment'] = cfg['attributes']['comment'].format( - year=datetime.now().year) - cfg.pop('cmor_table') - - n_workers = int(cpu_count() / 1.5) - logger.info("Using at most %s workers", n_workers) - futures = {} - with ProcessPoolExecutor(max_workers=1) as executor: - for short_name, var in cfg['variables'].items(): - var['short_name'] = short_name - for in_file in sorted(Path(in_dir).glob(var['file'])): - future = executor.submit(_extract_variable, in_file, var, cfg, - out_dir) - futures[future] = in_file - - for future in as_completed(futures): - try: - future.result() - except: # noqa - logger.error("Failed to CMORize %s", futures[future]) - raise - logger.info("Finished CMORizing %s", futures[future]) diff --git a/esmvaltool/diag_scripts/cmorizers/era5.py b/esmvaltool/diag_scripts/cmorizers/era5.py new file mode 100644 index 0000000000..ceefc1cb24 --- /dev/null +++ b/esmvaltool/diag_scripts/cmorizers/era5.py @@ -0,0 +1,41 @@ +"""native6 diagnostic.""" + +import logging +import shutil +from pathlib import Path + +from esmvaltool.diag_scripts.shared import (get_diagnostic_filename, + run_diagnostic) + +from esmvalcore.cmor.table import CMOR_TABLES + +logger = logging.getLogger(Path(__file__).name) + + +def main(cfg): + """Rename preprocessed native6 file.""" + fixed_files = cfg['input_data'] + + for file, info in fixed_files.items(): + stem = Path(file).stem + basename = stem.replace('native', 'OBS') + + if info['diagnostic'] == 'daily': + for mip in ['day', 'Eday', 'CFday']: + if CMOR_TABLES['CMIP6'].get_variable(mip, info['short_name']): + basename = basename.replace('E1hr', mip) + basename = basename.replace('E1hr', 'day') + + if 'fx' not in basename: + end_year = basename[-4:] + basename = basename.replace(end_year, f'{int(end_year) - 1}') + + outfile = get_diagnostic_filename(basename, cfg) + logger.info('Moving %s to %s', file, outfile) + shutil.move(file, outfile) + + +if __name__ == '__main__': + + with run_diagnostic() as config: + main(config) diff --git a/esmvaltool/recipes/cmorizers/recipe_era5.yml b/esmvaltool/recipes/cmorizers/recipe_era5.yml new file mode 100644 index 0000000000..cdd434f99a --- /dev/null +++ b/esmvaltool/recipes/cmorizers/recipe_era5.yml @@ -0,0 +1,330 @@ +# ESMValTool +# recipe_era5.yml +--- +documentation: + description: CMORize ERA5 data + + authors: + - andela_bouwe + - kalverla_peter + + references: + - acknow_project + + projects: + - ewatercycle + +datasets: + # For the daily diagnostic, always add the next year, otherwise the last day is not cmor compatible + - {dataset: ERA5, project: native6, type: reanaly, version: '1', tier: 3, start_year: 1990, end_year: 1991} + +preprocessors: + add_one_day: &add_one_day + extract_time: + end_year: 1991 + end_month: 1 + end_day: 1 + + daily_mean: + <<: *add_one_day + daily_statistics: + operator: mean + daily_min: + <<: *add_one_day + daily_statistics: + operator: min + daily_max: + <<: *add_one_day + daily_statistics: + operator: max + +diagnostics: + hourly: + description: CMORize hourly ERA5 data + scripts: + rename: + script: cmorizers/era5.py + variables: + clt: + mip: E1hr + era5_name: total_cloud_cover + era5_freq: hourly + evspsbl: + mip: E1hr + era5_name: evaporation + era5_freq: hourly + evspsblpot: + mip: E1hr + era5_name: potential_evaporation + era5_freq: hourly + mrro: + mip: E1hr + era5_name: runoff + era5_freq: hourly + orog: + mip: fx + era5_name: orography + era5_freq: hourly + pr: + mip: E1hr + era5_name: total_precipitation + era5_freq: hourly + prsn: + mip: E1hr + era5_name: snowfall + era5_freq: hourly + psl: + mip: E1hr + era5_name: mean_sea_level_pressure + era5_freq: hourly + ps: + mip: E1hr + era5_name: surface_pressure + era5_freq: hourly + ptype: + mip: E1hr + era5_name: precipitation_type + era5_freq: hourly + rlds: + mip: E1hr + era5_name: surface_thermal_radiation_downwards + era5_freq: hourly + rls: + mip: E1hr + era5_name: mean_surface_net_long_wave_radiation_flux + era5_freq: hourly + rsds: + mip: E1hr + era5_name: surface_solar_radiation_downwards + era5_freq: hourly + rsdt: + mip: E1hr + era5_name: toa_incident_solar_radiation + era5_freq: hourly + rss: + mip: E1hr + era5_name: surface_net_solar_radiation + era5_freq: hourly + tas: + mip: E1hr + era5_name: 2m_temperature + era5_freq: hourly + tasmax: + mip: E1hr + era5_name: maximum_2m_temperature_since_previous_post_processing + era5_freq: hourly + tasmin: + mip: E1hr + era5_name: minimum_2m_temperature_since_previous_post_processing + era5_freq: hourly + tdps: + mip: E1hr + era5_name: 2m_dewpoint_temperature + era5_freq: hourly + ts: + mip: E1hr + era5_name: skin_temperature + era5_freq: hourly + tsn: + mip: E1hr + era5_name: temperature_of_snow_layer + era5_freq: hourly + uas: + mip: E1hr + era5_name: 10m_u_component_of_wind + era5_freq: hourly + vas: + mip: E1hr + era5_name: 10m_v_component_of_wind + era5_freq: hourly + + daily: + description: Create daily ERA5 data + scripts: + rename: + script: cmorizers/era5.py + variables: + clt: + mip: E1hr + era5_name: total_cloud_cover + era5_freq: hourly + preprocessor: daily_mean + evspsbl: + mip: E1hr + era5_name: evaporation + era5_freq: hourly + preprocessor: daily_mean + evspsblpot: + mip: E1hr + era5_name: potential_evaporation + era5_freq: hourly + preprocessor: daily_mean + mrro: + mip: E1hr + era5_name: runoff + era5_freq: hourly + preprocessor: daily_mean + pr: + mip: E1hr + era5_name: total_precipitation + era5_freq: hourly + preprocessor: daily_mean + prsn: + mip: E1hr + era5_name: snowfall + era5_freq: hourly + preprocessor: daily_mean + ps: + mip: E1hr + era5_name: surface_pressure + era5_freq: hourly + preprocessor: daily_mean + psl: + mip: E1hr + era5_name: mean_sea_level_pressure + era5_freq: hourly + preprocessor: daily_mean + rlds: + mip: E1hr + era5_name: surface_thermal_radiation_downwards + era5_freq: hourly + preprocessor: daily_mean + rls: + mip: E1hr + era5_name: mean_surface_net_long_wave_radiation_flux + era5_freq: hourly + preprocessor: daily_mean + rsds: + mip: E1hr + era5_name: surface_solar_radiation_downwards + era5_freq: hourly + preprocessor: daily_mean + rsdt: + mip: E1hr + era5_name: toa_incident_solar_radiation + era5_freq: hourly + preprocessor: daily_mean + rss: + mip: E1hr + era5_name: surface_net_solar_radiation + era5_freq: hourly + preprocessor: daily_mean + tas: + mip: E1hr + era5_name: 2m_temperature + era5_freq: hourly + preprocessor: daily_mean + tasmax: + mip: E1hr + era5_name: maximum_2m_temperature_since_previous_post_processing + era5_freq: hourly + preprocessor: daily_max + tasmin: + mip: E1hr + era5_name: minimum_2m_temperature_since_previous_post_processing + era5_freq: hourly + preprocessor: daily_min + tdps: + mip: E1hr + era5_name: 2m_dewpoint_temperature + era5_freq: hourly + preprocessor: daily_mean + ts: + mip: E1hr + era5_name: skin_temperature + era5_freq: hourly + preprocessor: daily_mean + tsn: + mip: E1hr + era5_name: temperature_of_snow_layer + era5_freq: hourly + preprocessor: daily_mean + uas: + mip: E1hr + era5_name: 10m_u_component_of_wind + era5_freq: hourly + preprocessor: daily_mean + vas: + mip: E1hr + era5_name: 10m_v_component_of_wind + era5_freq: hourly + preprocessor: daily_mean + + monthly: + description: CMORize monthly ERA5 data + scripts: + rename: + script: cmorizers/era5.py + variables: + clt: + mip: Amon + era5_name: total_cloud_cover + era5_freq: monthly + evspsbl: + mip: Amon + era5_name: evaporation + era5_freq: monthly + evspsblpot: + mip: Amon + era5_name: potential_evaporation + era5_freq: monthly + mrro: + mip: Amon + era5_name: runoff + era5_freq: monthly + pr: + mip: Amon + era5_name: total_precipitation + era5_freq: monthly + prsn: + mip: Amon + era5_name: snowfall + era5_freq: monthly + psl: + mip: Amon + era5_name: mean_sea_level_pressure + era5_freq: monthly + ps: + mip: Amon + era5_name: surface_pressure + era5_freq: monthly + ptype: + mip: Amon + era5_name: precipitation_type + era5_freq: monthly + rlds: + mip: Amon + era5_name: surface_thermal_radiation_downwards + era5_freq: monthly + rls: + mip: Amon + era5_name: mean_surface_net_long_wave_radiation_flux + era5_freq: monthly + rsds: + mip: Amon + era5_name: surface_solar_radiation_downwards + era5_freq: monthly + rsdt: + mip: Amon + era5_name: toa_incident_solar_radiation + era5_freq: monthly + rss: + mip: Amon + era5_name: surface_net_solar_radiation + era5_freq: monthly + tas: + mip: Amon + era5_name: 2m_temperature + era5_freq: monthly + tdps: + mip: Amon + era5_name: 2m_dewpoint_temperature + era5_freq: monthly + ts: + mip: Amon + era5_name: skin_temperature + era5_freq: monthly + tsn: + mip: Amon + era5_name: temperature_of_snow_layer + era5_freq: monthly diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index e8913521b1..ca7136cb1f 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -775,7 +775,7 @@ diagnostics: type: reanaly, version: Oct13, start_year: 1989, end_year: 2005} scripts: null - + MAC-LWP: description: MAC-LWP check variables: @@ -786,7 +786,7 @@ diagnostics: type: sat, version: v1, start_year: 1988, end_year: 2016} scripts: null - + MERRA2: description: MERRA2 check variables: