From 5fa786948db1e98c0e7c8694477387f25bc97860 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 19 Oct 2024 18:08:45 +0200 Subject: [PATCH 01/20] Drop ERA Interim support and update ERA5 CLI innterface --- setup.cfg | 18 +- src/ecmwf_models/cli.py | 365 +++++++++++++++++++ src/ecmwf_models/era5/download.py | 201 +---------- src/ecmwf_models/era5/era5-land_lut.csv | 1 + src/ecmwf_models/era5/era5_lut.csv | 7 +- src/ecmwf_models/era5/interface.py | 9 +- src/ecmwf_models/era5/reshuffle.py | 128 +------ src/ecmwf_models/erainterim/__init__.py | 1 - src/ecmwf_models/erainterim/download.py | 378 -------------------- src/ecmwf_models/erainterim/eraint_lut.csv | 80 ----- src/ecmwf_models/erainterim/interface.py | 193 ---------- src/ecmwf_models/erainterim/reshuffle.py | 218 ----------- src/ecmwf_models/glob.py | 16 + src/ecmwf_models/grid.py | 10 + src/ecmwf_models/interface.py | 9 +- src/ecmwf_models/utils.py | 121 ++++--- tests/tests_era5/test_era5_download.py | 3 +- tests/tests_eraint/test_eraint_download.py | 92 ----- tests/tests_eraint/test_eraint_interface.py | 288 --------------- tests/tests_eraint/test_eraint_reshuffle.py | 61 ---- 20 files changed, 497 insertions(+), 1702 deletions(-) create mode 100644 src/ecmwf_models/cli.py delete mode 100644 src/ecmwf_models/erainterim/__init__.py delete mode 100644 src/ecmwf_models/erainterim/download.py delete mode 100644 src/ecmwf_models/erainterim/eraint_lut.csv delete mode 100644 src/ecmwf_models/erainterim/interface.py delete mode 100644 src/ecmwf_models/erainterim/reshuffle.py create mode 100644 src/ecmwf_models/glob.py delete mode 100644 tests/tests_eraint/test_eraint_download.py delete mode 100644 tests/tests_eraint/test_eraint_interface.py delete mode 100644 tests/tests_eraint/test_eraint_reshuffle.py diff --git a/setup.cfg b/setup.cfg index 3a398b3..d5de20e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ name = ecmwf_models description = Downloading, reading and TS conversion of ECMWF reanalysis data author = TU Wien -author-email = remote.sensing@geo.tuwien.ac.at +author-email = support@qa4sm.eu license = mit long-description = file: README.rst long-description-content-type = text/x-rst; charset=UTF-8 @@ -34,19 +34,11 @@ install_requires = numpy pandas netcdf4 - pyresample - xarray + click cdsapi>=0.6.1 - ecmwf-api-client - datedown>=0.3 - pygeobase - pygeogrids repurpose - pynetcf>=0.5.0 - pygrib - configobj - requests - more_itertools + pygeogrids + # The usage of test_requires is discouraged, see `Dependency Management` docs # tests_require = pytest; pytest-cov # Require a specific Python version, e.g. Python 2.7 or >= 3.4 @@ -73,9 +65,7 @@ testing = # script_name = ecmwf_models.module:function # For example: console_scripts = - eraint_download = ecmwf_models.erainterim.download:run era5_download = ecmwf_models.era5.download:run - eraint_reshuffle = ecmwf_models.erainterim.reshuffle:run era5_reshuffle = ecmwf_models.era5.reshuffle:run # And any other entry points, for example: # pyscaffold.cli = diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py new file mode 100644 index 0000000..cd4bc98 --- /dev/null +++ b/src/ecmwf_models/cli.py @@ -0,0 +1,365 @@ +import click +from datetime import datetime + +from ecmwf_models.era5.download import download_and_move +from ecmwf_models.utils import default_variables +from ecmwf_models.era5.reshuffle import img2ts + +@click.command( + "download", + context_settings={'show_default': True}, + short_help="Download ERA5 reanalysis data images between two " + "dates. Before this program can be used, you have to " + "register at the CDS and setup your .cdsapirc file " + "as described here: " + "https://cds.climate.copernicus.eu/api-how-to") +@click.argument("PATH", type=click.Path(writable=True)) +@click.option( + '--start', '-s', + type=click.STRING, + default=str(datetime(1979, 1, 1)), + help="First date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--end', '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Last date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--variables', + '-var', + multiple=True, + type=click.STRING, + default=default_variables('era5'), + help="Name of variables to download. For all available variables see: " + "https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Parameterlistings") +@click.option( + "-k", + "--keep", + type=click.BOOL, + default=False, + help="Also keep the original image image stack as downloaded from CDS " + "instead of deleting it after extracting individual images. " + "Default is False.") +@click.option( + "-grb", + "--as_grib", + type=click.BOOL, + default=False, + help="Download data in grib format instead of netcdf. " + "Pass either True or False. Default is False.") +@click.option( + "--h_steps", + type=int, + multiple=True, + default=[0, 6, 12, 18], + nargs=4, + help="Temporal sampling of downloaded images. " + "Pass a set of full hours here, like '--h_steps 0 12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e., 0 6 12 18)") +@click.option( + "--max_request_size", + type=int, + default=1000, + nargs=4, + help="Maximum number of requests that the CDS API allows. " + "The default is 1000, but depends on server side settings. " + "Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks.") +def cli_download_era5(path, start, end, variables, keep_original, as_grib, + h_steps, max_request_size): + """ + Download ERA5 image data within a chosen period. NOTE: Before using this + program, create a CDS account and set up a `.cdsapirc` file as described + here: https://cds.climate.copernicus.eu/api-how-to + + \b + Required Parameters + ------------------- + PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as describe in + https://cds.climate.copernicus.eu/api-how-to + """ + status_code = download_and_move( + target_path=path, + startdate=start, + enddate=end, + product="era5", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + ) + + return status_code + + +@click.command( + "download", + context_settings={'show_default': True}, + short_help="Download ERA5-LAND reanalysis data images between two " + "dates. Before this program can be used, you have to " + "register at the CDS and setup your .cdsapirc file " + "as described here: " + "https://cds.climate.copernicus.eu/api-how-to " + "Alternatively, you can pass your UID:KEY here using the " + "cds_token option.") +@click.argument("PATH", type=click.Path(writable=True)) +@click.option( + '--start', '-s', + type=click.STRING, + default=str(datetime(1979, 1, 1)), + help="First date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--end', '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Last date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--variables', + '-var', + multiple=True, + type=click.STRING, + default=default_variables('era5-land'), + help="Name of variables to download. For all available variables see: " + "https://confluence.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation#ERA5Land:datadocumentation-parameterlistingParameterlistings") +@click.option( + "-k", + "--keep", + type=click.BOOL, + default=False, + help="Also keep the original image image stack as downloaded from CDS " + "instead of deleting it after extracting individual images. " + "Default is False.") +@click.option( + "-grb", + "--as_grib", + type=click.BOOL, + default=False, + help="Download data in grib format instead of netcdf. " + "Pass either True or False. Default is False.") +@click.option( + "--h_steps", + type=int, + multiple=True, + default=[0, 6, 12, 18], + nargs=4, + help="Temporal sampling of downloaded images. " + "Pass a set of full hours here, like '--h_steps 0 12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e., 0 6 12 18)") +@click.option( + "--max_request_size", + type=int, + default=1000, + nargs=4, + help="Maximum number of requests that the CDS API allows. " + "The default is 1000, but depends on server side settings. " + "Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks.") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Only required/used if no .cdsapi file " + "exists in your home directory (see documentation)." + "Consists of your UID and API Key . Both can be " + "found under your CDS User profile page.") +def cli_download_era5land(path, start, end, variables, keep_original, as_grib, + h_steps, max_request_size): + """ + Download ERA5-Land image data within a chosen period. NOTE: Before using this + program, create a CDS account and set up a `.cdsapirc` file as described + here: https://cds.climate.copernicus.eu/api-how-to + + \b + Required Parameters + ------------------- + PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as describe in + https://cds.climate.copernicus.eu/api-how-to + """ + status_code = download_and_move( + target_path=path, + startdate=start, + enddate=end, + product="era5-land", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + ) + + return status_code + + +@click.command( + "update_img", + context_settings={'show_default': True}, + short_help="Extend an existing set of images by downloading new data.") +@click.argument("path", type=click.Path(writable=True)) +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required if no .cdsapi file exists. " + "In the home directory (see documentation)." + "Consists of your UID and API Key . Both can be " + "found under your CDS User profile page.") +def cli_update_img(path, fntempl, cds_token=None): + """ + Extend a locally existing C3S SM record by downloading new files that + don't yet exist locally. + This will find the latest available local file, and download all + available extensions. + NOTE: Use the `c3s_sm download` program first do create a local record + to update with this function. + + \b + Required Parameters + ------------------- + PATH: string + Path where previously downloaded C3S SM images are stored. + Make sure to set up the CDS API for your account as describe in + https://cds.climate.copernicus.eu/api-how-to + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + # if not set, use URL from const + if 'CDSAPI_URL' not in os.environ: + os.environ['CDSAPI_URL'] = cds_api_url + + if cds_token is not None: + os.environ["CDSAPI_KEY"] = cds_token + + check_api_read() + + props = img_infer_file_props(path, fntempl=fntempl, start_from='last') + + freq = props['freq'].lower() + product = props['product'].lower() + version = props['version'] + + startdate = first_missing_date(props['datetime'], freq=freq) + + print( + f"Update C3S SM images: " + f"Fetching latest data for C3S SM CDR/ICDR {freq} {product} {version} " + f"after {startdate.isoformat()} into {path}.") + + download_and_extract( + path, + startdate=startdate, + freq=freq, + version=version, + product=product, + keep_original=False) + + +@click.command( + "reshuffle", + context_settings={'show_default': True}, + short_help="Convert downloaded ERA5/ERA5-Land image data into time series.") +@click.argument("IMG_PATH", type=click.Path(readable=True)) +@click.argument("TS_PATH", type=click.Path(writable=True)) +@click.argument("START", type=click.STRING) +@click.argument("END", type=click.STRING) +@click.option( + '--variables', + '-var', + multiple=True, + type=click.STRING, + default=None, + help="Subset of variables to convert (short names, as in the input " + "images). If not specified, all variables are be used.") +@click.option( + '--land_points', '-l', is_flag=True, show_default=True, + default=False, + help="Store only time series for points that are over land. " + "Default is False.") +@click.option( + '--bbox', + nargs=4, + type=click.FLOAT, + help="4 NUMBERS | min_lon min_lat max_lon max_lat. " + "Set Bounding Box (lower left and upper right corner) " + "of area to reshuffle (WGS84). [default: -180 -90 180 90]") +@click.option( + "--h_steps", + type=int, + multiple=True, + default=[0, 6, 12, 18], + nargs=4, + help="Full hour time stamps to include in time series. E.g. " + "'--h_steps 0 12' will only include data from images at 0:00 and " + "12:00 UTC and ignore all other available time stamps." + "By default, we include data for every 6th hour per day, " + "(i.e., 0 6 12 18)") +@click.option( + '--imgbuffer', + '-b', + type=click.INT, + default=50, + help="NUMBER | Number of images to read into memory at once before " + "conversion to time series. A larger buffer means faster" + " processing but requires more memory.") +def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, + h_steps, imgbuffer): + """ + Convert downloaded ERA5 or ERA5-Land image data into a time series format. + + \b + Required Parameters + ------------------- + IMG_PATH: string (required) + Root of local filesystem where the downloaded image data is stored. + TS_PATH: string (required) + Root of local filesystem where the time series should be stored. + START: string (required) + Date of the first image to include in the conversion. Format: YYYY-MM-DD + END: string (required) + Date of the last image to include in the conversion. Format: YYYY-MM-DD + """ + + print(f"Converting ERA5/ERA5-Land data from {start} to {end} into " + f"{ts_path}.") + + img2ts( + input_root=img_path, + outputpath=ts_path, + startdate=start, + enddate=end, + variables=variables, + bbox=bbox, + h_steps=h_steps, + land_points=land_points, + imgbuffer=imgbuffer, + ) + +@click.group(short_help="ERA5 Command Line Programs imported from the " + "`ecmwf_models` pip package.") +def era5(): + pass + + +@click.group(short_help="ERA5-Land Command Line Programs imported from the " + "`ecmwf_models` pip package.") +def era5land(): + pass + + +era5.add_command(cli_download_era5) +era5.add_command(cli_reshuffle) +era5land.add_command(cli_download_era5land) +era5land.add_command(cli_reshuffle) \ No newline at end of file diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index 36572a9..9ca0b57 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -5,17 +5,12 @@ import pandas as pd from ecmwf_models.utils import ( - load_var_table, lookup, save_gribs_from_grib, save_ncs_from_nc, - mkdate, - str2bool, + default_variables, split_array ) import warnings -import errno -import argparse -import sys import os import logging from datetime import datetime, time @@ -25,43 +20,6 @@ import numpy as np -def default_variables(product="era5"): - """ - These variables are being downloaded, when None are passed by the user - - Parameters - --------- - product : str, optional (default: 'era5') - Name of the era5 product to read the default variables for. - Either 'era5' or 'era5-land'. - """ - lut = load_var_table(name=product) - defaults = lut.loc[lut["default"] == 1]["dl_name"].values - return defaults.tolist() - - -def split_array(array, chunk_size): - """ - Split an array into chunks of a given size. - - Parameters - ---------- - array : array-like - Array to split into chunks - chunk_size : int - Size of each chunk - - Returns - ------- - chunks : list - List of chunks - """ - chunks = [] - for i in range(0, len(array), chunk_size): - chunks.append(array[i:i + chunk_size]) - return chunks - - def split_chunk(timestamps, n_vars, n_hsteps, @@ -254,7 +212,7 @@ def download_and_move( enddate: datetime last date to download product : str, optional (default: ERA5) - Either ERA5 or ERA5Land + Either ERA5 or ERA5-Land variables : list, optional (default: None) Name of variables to download keep_original: bool (default: False) @@ -313,11 +271,20 @@ def download_and_move( variables = lookup(name=product, variables=variables) variables = variables["dl_name"].values.tolist() + logger = logging.getLogger('dl_logger') + if dry_run: warnings.warn("Dry run does not create connection to CDS") c = None cds_status_tracker = None else: + if not check_api_read(): + raise ValueError( + "Cannot establish connection to CDS. Please set up" + "your CDS API key as described at " + "https://cds.climate.copernicus.eu/api-how-to") + + os.makedirs(target_path, exist_ok=True) cds_status_tracker = CDSStatusTracker() c = cdsapi.Client( error_callback=cds_status_tracker.handle_error_function) @@ -438,149 +405,3 @@ def download_and_move( # if any of the sub-periods was successful we want the function to return 0 consolidated_status_code = max(all_status_codes) return consolidated_status_code - - -def parse_args(args): - """ - Parse command line parameters for recursive download - - Parameters - ---------- - args : list - Command line parameters as list of strings - - Returns - ---------- - clparams : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Download ERA 5 reanalysis data images between two " - "dates. Before this program can be used, you have to " - "register at the CDS and setup your .cdsapirc file " - "as described here: " - "https://cds.climate.copernicus.eu/api-how-to") - parser.add_argument( - "localroot", - help="Root of local filesystem where the downloaded data will be " - "stored.", - ) - parser.add_argument( - "-s", - "--start", - type=mkdate, - default=datetime(1979, 1, 1), - help=("Startdate in format YYYY-MM-DD. " - "If no data is found there then the first available date of the " - "product is used."), - ) - parser.add_argument( - "-e", - "--end", - type=mkdate, - default=datetime.now(), - help=("Enddate in format YYYY-MM-DD. " - "If not given then the current date is used."), - ) - parser.add_argument( - "-p", - "--product", - type=str, - default="ERA5", - help=("The ERA5 product to download. Choose either ERA5 or ERA5-Land. " - "Default is ERA5."), - ) - parser.add_argument( - "-var", - "--variables", - metavar="variables", - type=str, - default=None, - nargs="+", - help=("Name of variables to download. If None are passed, we use the " - "default ones from the " - "era5_lut.csv resp. era5-land_lut.csv files in this package. " - "See the ERA5/ERA5-LAND documentation for more variable names: " - " https://confluence.ecmwf.int/display/CKB/" - "ERA5+data+documentation " - " https://confluence.ecmwf.int/display/CKB/" - "ERA5-Land+data+documentation"), - ) - parser.add_argument( - "-keep", - "--keep_original", - type=str2bool, - default="False", - help=("Also keep the downloaded image stack as retrieved from CDS - " - "before slicing it into single images - instead of deleting it " - "after extracting all images. " - "Pass either True or False. Default is False."), - ) - parser.add_argument( - "-grb", - "--as_grib", - type=str2bool, - default="False", - help=("Download data in grib format instead of netcdf. " - "Pass either True or False. Default is False."), - ) - parser.add_argument( - "--h_steps", - type=int, - default=[0, 6, 12, 18], - nargs="+", - help=("Temporal resolution of downloaded images. " - "Pass a set of full hours here, like '--h_steps 0 12'. " - "By default 6H images (starting at 0:00 UTC, i.e. 0 6 12 18) " - "will be downloaded"), - ) - - parser.add_argument( - "--max_request_size", - type=int, - default=1000, - help=("Maximum number of requests that the CDS API allows. " - "The default is 1000, but depends on server side settings. " - "Server settings may change at some point. Change " - "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks.")) - - args = parser.parse_args(args) - - print("Downloading {p} {f} files between {s} and {e} into folder {root}" - .format( - p=args.product, - f="grib" if args.as_grib is True else "netcdf", - s=args.start.isoformat(), - e=args.end.isoformat(), - root=args.localroot, - )) - return args - - -def main(args): - args = parse_args(args) - status_code = download_and_move( - target_path=args.localroot, - startdate=args.start, - enddate=args.end, - product=args.product, - variables=args.variables, - h_steps=args.h_steps, - grb=args.as_grib, - keep_original=args.keep_original, - stepsize='month', - n_max_request=args.max_request_size, - ) - return status_code - - -def run(): - status_code = main(sys.argv[1:]) - if status_code == -10: - return_code = errno.ENODATA # Default no data status code of 61 - else: - return_code = status_code - - sys.exit(return_code) diff --git a/src/ecmwf_models/era5/era5-land_lut.csv b/src/ecmwf_models/era5/era5-land_lut.csv index be6911d..ea14871 100644 --- a/src/ecmwf_models/era5/era5-land_lut.csv +++ b/src/ecmwf_models/era5/era5-land_lut.csv @@ -43,6 +43,7 @@ surface_sensible_heat_flux,Surface sensible heat flux,sshf,1 surface_solar_radiation_downwards,Surface solar radiation downwards,ssrd,1 surface_thermal_radiation_downwards,Surface thermal radiation downwards,strd,1 temperature_of_snow_layer,Temperature of snow layer,tsn,1 +total_evaporation,Evaporation,e,1 total_precipitation,Total precipitation,tp,1 volumetric_soil_water_layer_1,Volumetric soil water layer 1,swvl1,1 volumetric_soil_water_layer_2,Volumetric soil water layer 2,swvl2,1 diff --git a/src/ecmwf_models/era5/era5_lut.csv b/src/ecmwf_models/era5/era5_lut.csv index 48393e3..5038aec 100644 --- a/src/ecmwf_models/era5/era5_lut.csv +++ b/src/ecmwf_models/era5/era5_lut.csv @@ -5,15 +5,10 @@ dl_name,long_name,short_name,default 10m_u_component_of_wind,10 metre U wind component,u10,0 10m_v_component_of_neutral_wind,Neutral wind at 10 m v-component,v10n,0 10m_v_component_of_wind,10 metre V wind component,v10,0 -10m_wind_direction,10 metre wind direction,dwi,0 10m_wind_gust_since_previous_post_processing,10 metre wind gust since previous post-processing,fg10,0 -10m_wind_speed,10 metre wind speed,wind,0 2m_dewpoint_temperature,2 metre dewpoint temperature,d2m,0 2m_temperature,2 metre temperature,t2m,0 air_density_over_the_oceans,Air density over the oceans,p140209,0 -altimeter_corrected_wave_height,Altimeter corrected wave height,acwh,0 -altimeter_range_relative_correction,Altimeter range relative correction,arrc,0 -altimeter_wave_height,Altimeter wave height,awh,0 angle_of_sub_gridscale_orography,Angle of sub-gridscale orography,anor,0 anisotropy_of_sub_gridscale_orography,Anisotropy of sub-gridscale orography,isor,0 benjamin_feir_index,Benjamin-Feir index,bfi,0 @@ -39,6 +34,7 @@ forecast_logarithm_of_surface_roughness_for_heat,Forecast logarithm of surface r forecast_surface_roughness,Forecast surface roughness,fsr,0 free_convective_velocity_over_the_oceans,Free convective velocity over the oceans,p140208,0 friction_velocity,Friction velocity,zust,0 +geopotential,Geopotential,z,0 gravity_wave_dissipation,Gravity wave dissipation,gwd,0 high_cloud_cover,High cloud cover,hcc,0 high_vegetation_cover,High vegetation cover,cvh,0 @@ -147,7 +143,6 @@ normalized_energy_flux_into_waves,Normalized energy flux into waves,phiaw,0 normalized_stress_into_ocean,Normalized stress into ocean,tauoc,0 northward_gravity_wave_surface_stress,Northward gravity wave surface stress,mgws,0 northward_turbulent_surface_stress,Northward turbulent surface stress,nsss,0 -orography,Geopotential,z,0 peak_wave_period,Peak wave period,pp1d,0 period_corresponding_to_maximum_individual_wave_height,Period corresponding to maximum individual wave height,tmax,0 potential_evaporation,Potential evaporation,pev,1 diff --git a/src/ecmwf_models/era5/interface.py b/src/ecmwf_models/era5/interface.py index 48c85a6..5cf6c2d 100644 --- a/src/ecmwf_models/era5/interface.py +++ b/src/ecmwf_models/era5/interface.py @@ -5,19 +5,18 @@ and grib base classes, that are used for reading all ecmwf products. """ -from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs from typing import Optional, Collection from typing_extensions import Literal from pygeogrids.grids import CellGrid -# ERA5 products supported by the reader. -_supported_products = ['era5', 'era5-land'] +from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs +from ecmwf_models.glob import SUPPORTED_PRODUCTS def _assert_product(product: str) -> str: - if product not in _supported_products: + if product not in SUPPORTED_PRODUCTS: raise ValueError(f"Got product {product} but expected one of " - f"{_supported_products}") + f"{SUPPORTED_PRODUCTS}") return product diff --git a/src/ecmwf_models/era5/reshuffle.py b/src/ecmwf_models/era5/reshuffle.py index e0b10bd..5ed98d8 100644 --- a/src/ecmwf_models/era5/reshuffle.py +++ b/src/ecmwf_models/era5/reshuffle.py @@ -6,18 +6,19 @@ """ import os +import pandas as pd import sys import argparse import numpy as np +from datetime import time, datetime from repurpose.img2ts import Img2Ts + from ecmwf_models.era5.interface import ERA5NcDs, ERA5GrbDs from ecmwf_models.grid import ERA_RegularImgGrid, ERA5_RegularImgLandGrid from ecmwf_models.utils import mkdate, parse_filetype, parse_product, str2bool -from datetime import time, datetime - -def reshuffle( +def img2ts( input_root, outputpath, startdate, @@ -39,10 +40,12 @@ def reshuffle( Input path where ERA image data was downloaded to. outputpath : str Output path, where the reshuffled netcdf time series are stored. - startdate : datetime + startdate : str Start date, from which images are read and time series are generated. - enddate : datetime + Format YYYY-mm-dd + enddate : str End date, from which images are read and time series are generated. + Format YYYY-mm-dd variables: tuple or list or str Variables to read from the passed images and convert into time series format. @@ -67,6 +70,9 @@ def reshuffle( a single image. """ + startdate = pd.to_datetime(startdate).to_pydatetime().date() + enddate = pd.to_datetime(enddate).to_pydatetime().date() + if h_steps is None: h_steps = (0, 6, 12, 18) @@ -124,7 +130,7 @@ def reshuffle( global_attr = {f"product": f"{product.upper()} (from {filetype})"} # get time series attributes from first day of data. - first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) + first_date_time = datetime.combine(startdate, time(h_steps[0], 0)) # get time series attributes from first day of data. data = input_dataset.read(first_date_time) @@ -133,8 +139,8 @@ def reshuffle( reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, - startdate=startdate, - enddate=enddate, + startdate=str(startdate), + enddate=str(enddate), input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, @@ -144,109 +150,7 @@ def reshuffle( zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes, + backend='multiprocessing', + n_proc=1, ) reshuffler.calc() - - -def parse_args(args): - """ - Parse command line parameters for conversion from image to time series. - - Parameters - ---------- - args: list - command line parameters as list of strings - - Returns - ---------- - args : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Convert downloaded ERA5/ERA5-Land image data into time " - "series format.") - parser.add_argument( - "dataset_root", - help="Root of local filesystem where the image data is stored.", - ) - parser.add_argument( - "timeseries_root", - help="Root of local filesystem where the time series should " - "be stored.", - ) - parser.add_argument( - "start", type=mkdate, help=("Startdate in format YYYY-MM-DD")) - parser.add_argument( - "end", type=mkdate, help=("Enddate in format YYYY-MM-DD")) - parser.add_argument( - "variables", - metavar="variables", - nargs="+", - help=("Short name of variables as stored in the images, " - "which are reshuffled. " - "See documentation on image download for resp. ERA products, " - "for more information on variable names of the product. "), - ) - parser.add_argument( - "--land_points", - type=str2bool, - default="False", - help=("Store only time series for points that are over land. " - "Default is False."), - ) - parser.add_argument( - "--bbox", - type=float, - default=None, - nargs=4, - help=("min_lon min_lat max_lon max_lat. " - "Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). By default all data is loaded."), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=( - "Time steps (full hours) of images that will be reshuffled " - "(must be in the images). " - "By default 6H images (starting at 0:00 UTC) will be reshuffled: " - "0 6 12 18"), - ) - parser.add_argument( - "--imgbuffer", - type=int, - default=50, - help=( - "How many images to read at once. Bigger numbers make the " - "conversion faster but consume more memory. Choose this according " - "to your system and the size of a single image. Default is 50."), - ) - args = parser.parse_args(args) - - print("Converting ERA5/ERA5-Land data from {} to {} into {}.".format( - args.start.isoformat(), args.end.isoformat(), args.timeseries_root)) - - return args - - -def main(args): - args = parse_args(args) - - reshuffle( - input_root=args.dataset_root, - outputpath=args.timeseries_root, - startdate=args.start, - enddate=args.end, - variables=args.variables, - bbox=args.bbox, - h_steps=args.h_steps, - land_points=args.land_points, - imgbuffer=args.imgbuffer, - ) - - -def run(): - main(sys.argv[1:]) diff --git a/src/ecmwf_models/erainterim/__init__.py b/src/ecmwf_models/erainterim/__init__.py deleted file mode 100644 index 40a96af..0000000 --- a/src/ecmwf_models/erainterim/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/src/ecmwf_models/erainterim/download.py b/src/ecmwf_models/erainterim/download.py deleted file mode 100644 index 07986d3..0000000 --- a/src/ecmwf_models/erainterim/download.py +++ /dev/null @@ -1,378 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Module to download ERA Interim from terminal. -""" - -from ecmwfapi import ECMWFDataServer -import argparse -import sys -from datetime import datetime, timedelta -import shutil -import os -import warnings - -from ecmwf_models.utils import ( - load_var_table, - save_ncs_from_nc, - save_gribs_from_grib, - lookup, - mkdate, - str2bool -) - - -def default_variables() -> list: - "These variables are being downloaded, when None are passed by the user" - lut = load_var_table(name="ERAINT") - defaults = lut.loc[lut["default"] == 1]["dl_name"].values - return defaults.tolist() - - -def download_eraint( - target_path, - start, - end, - variables, - grid_size=None, - type="fc", - h_steps=(0, 6, 12, 18), - grb=False, - dry_run=False, - steps=(0,), -): - """ - Download era interim data - - Parameters - ---------- - target_path : str - path at which to save the downloaded grib file - start : date - start date - end : date - end date - variables : list - parameter ids, see wiki - product : str, optional - Name of the model, "ERA-interim" (default) or "ERA5" - grid_size: [float,float], optional - size of the grid in form (lon, lat), which the data is resampled to - If None is passed the minimum grid for the accoring product is chosen - h_steps: tuple, optional (default: (0, 6, 12, 18)) - List of full hours to download data at the selected dates - grb: bool, optional (default: False) - Download data as grb files instead of nc files - dry_run: bool - Do not download anything, this is just used for testing the functions - """ - if dry_run: - warnings.warn("Dry run does not create connection to ECMWF") - server = None - else: - server = ECMWFDataServer() - - param_strings = [] - - dataset = "interim" - dataclass = "ei" - - for variable in variables: - param_strings.append(str(variable)) - - timestep_strings = [] - for timestep in h_steps: - timestep_strings.append("%02d" % timestep) - - param_string = "/".join(param_strings) - timestep_string = "/".join(timestep_strings) - date_string = "%s/to/%s" % ( - start.strftime("%Y-%m-%d"), - end.strftime("%Y-%m-%d"), - ) - - grid_size = "%f/%f" % (grid_size[0], grid_size[1]) if grid_size else None - - step = "/".join([str(s) for s in steps]) - # ATTENTION: When downloading netcdf files steps and times - # must not overlap!! see: - # https://software.ecmwf.int/wiki/display/CKB/What+to+do+with+ECCODES+ERROR+%3A+Try+using+the+-T+option # noqa: E501 - - dl_params = { - "class": dataclass, - "dataset": dataset, - "expver": "1", - "stream": "oper", - "type": type, - "levtype": "sfc", - "param": param_string, - "date": date_string, - "time": timestep_string, - "step": step, - "grid": grid_size, - "format": "grib1" if grb else "netcdf", - "target": target_path, - } - - if not grid_size: - if not grb: - grid_size = "%f/%f" % (0.75, 0.75) - dl_params["grid"] = grid_size - else: - del dl_params["grid"] - else: - if any(size < 0.75 for size in grid_size): - raise Warning( - "Custom grid smaller than original ERA Interim resolution. " - "See https://software.ecmwf.int/wiki/display/CKB/" - "Does+downloading+data+at+higher+resolution+improve+the+output" # noqa: E501 - ) - if not dry_run: - server.retrieve(dl_params) - - -def download_and_move( - target_path, - startdate, - enddate, - variables=None, - keep_original=False, - grid_size=None, - type="an", - h_steps=(0, 6, 12, 18), - steps=(0,), - grb=False, - dry_run=False, -): - """ - Downloads the data from the ECMWF servers and moves them to the target - path. This is done in 30 days increments between start and end date to - be efficient with the MARS system. - See the recommendation for doing it this way in - https://software.ecmwf.int/wiki/display/WEBAPI/ERA-Interim+daily+retrieval+efficiency - - The files are then extracted into separate grib/nc files and stored in - yearly folders under the target_path. - - Parameters - ---------- - target_path: str - Path to which to copy the extracted parameter files - startdate: datetime - First date to download - enddate: datetime - Last date to download - variables : list, optional (default: None) - List of variable ids to pass to the client, if None are passed, - the default variable ids will be downloaded. - keep_original: bool, optional (default: False) - Keep the original downloaded data - grid_size: list, optional (default: None) - [lon, lat] extent of the grid (regular for netcdf, at lat=0 for grib) - If None is passed, the default grid size for the data product is used. - type : str, optional (default: 'an') - Data stream, model to download data for (fc=forecase) - h_steps: list, optional (default: [0, 6, 12, 18]) - List of full hours to download data at the selected dates - grb: bool, optional (default: False) - Download data as grib files instead of netcdf files - dry_run: bool - Do not download anything, this is just used for testing the functions - """ - product = "eraint" - if variables is None: - variables = default_variables() - else: - # find the dl_names - variables = lookup(name=product, variables=variables) - variables = variables["dl_name"].values.tolist() - - td = timedelta(days=30) - current_start = startdate - - while current_start <= enddate: - current_end = current_start + td - if current_end >= enddate: - current_end = enddate - - fname = "{start}_{end}.{ext}".format( - start=current_start.strftime("%Y%m%d"), - end=current_end.strftime("%Y%m%d"), - ext="grb" if grb else "nc", - ) - - downloaded_data_path = os.path.join(target_path, "temp_downloaded") - if not os.path.exists(downloaded_data_path): - os.mkdir(downloaded_data_path) - - dl_file = os.path.join(downloaded_data_path, fname) - - download_eraint( - dl_file, - current_start, - current_end, - variables, - grid_size=grid_size, - h_steps=h_steps, - type=type, - steps=steps, - grb=grb, - dry_run=dry_run, - ) - - if grb: - save_gribs_from_grib(dl_file, target_path, product.upper()) - else: - save_ncs_from_nc(dl_file, target_path, product.upper()) - - if not keep_original: - shutil.rmtree(downloaded_data_path) - current_start = current_end + timedelta(days=1) - - -def parse_args(args): - """ - Parse command line parameters for recursive download - - Parameters - ---------- - args : list - Command line parameters as list of strings - - Returns - ---------- - clparams : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Download ERA Interim data (6H) between two dates. " - "Before this program can be used, you have to register at ECMWF " - "and setup your .ecmwfapirc file as described here: " - "https://confluence.ecmwf.int//display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key" # noqa: E501 - ) - parser.add_argument( - "localroot", - help="Root of local filesystem where the downloaded data is stored.", - ) - parser.add_argument( - "-s", - "--start", - type=mkdate, - default=datetime(1979, 1, 1), - help=( - "Startdate in format YYYY-MM-DD. " - "If no data is found there then the first available date of " - "the product is used." - ), - ) - parser.add_argument( - "-e", - "--end", - type=mkdate, - default=datetime.now(), - help=( - "Enddate in format YYYY-MM-DD. " - "If not given then the current date is used." - ), - ) - parser.add_argument( - "-var", - "--variables", - metavar="variables", - type=str, - default=None, - nargs="+", - help=( - "Name of variables to download. " - "A list of possible IDs is available at " - "https://github.com/TUW-GEO/ecmwf_models/tree/master/ecmwf_models/erainterim/eraint_lut.csv " # noqa: E501 - "or by using the 'View MARS request' option in the web based " - "ordering system." - ), - ) - parser.add_argument( - "-keep", - "--keep_original", - type=str2bool, - default="False", - help=( - "Keep the originally, temporally downloaded file as it is " - "instead of deleting it afterwards" - ), - ) - parser.add_argument( - "-grb", - "--as_grib", - type=str2bool, - default="False", - help=( - "Download data in grib1 format instead of the default " - "netcdf format" - ), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=("Manually change the temporal resolution of donwloaded images"), - ) - parser.add_argument( - "--steps", - type=int, - default=None, - nargs="+", - help=("Manually change the steps"), - ) - parser.add_argument( - "--type", - type=str, - default="an", - help=("Manually set the data stream, e.g. 'an' (default) or 'fc'"), - ) - parser.add_argument( - "--grid_size", - type=float, - default=None, - nargs="+", - help=( - "lon lat. Size of the grid that the data is stored to. " - "Should be at least (and is by default) " - "(0.75, 0.75) for ERA-Interim " - ), - ) - - args = parser.parse_args(args) - - print("ERA Interim data is deprecated. Use ERA5 instead.") - print( - "Downloading ERA Interim {} data from {} to {} into folder {}".format( - "grib" if args.as_grib is True else "netcdf", - args.start.isoformat(), - args.end.isoformat(), - args.localroot, - ) - ) - - return args - - -def main(args): - args = parse_args(args) - - download_and_move( - target_path=args.localroot, - startdate=args.start, - enddate=args.end, - variables=args.variables, - keep_original=args.keep_original, - grid_size=args.grid_size, - h_steps=args.h_steps, - type=args.type, - grb=args.as_grib, - ) - - -def run(): - main(sys.argv[1:]) diff --git a/src/ecmwf_models/erainterim/eraint_lut.csv b/src/ecmwf_models/erainterim/eraint_lut.csv deleted file mode 100644 index 1f77147..0000000 --- a/src/ecmwf_models/erainterim/eraint_lut.csv +++ /dev/null @@ -1,80 +0,0 @@ -dl_name,long_name,levtype,short_name,default -31.128,Sea ice area fraction,an,ci, -32.128,Snow albedo,an,asn, -33.128,Snow density,an,rsn,1 -34.128,Sea surface temperature,an,sst, -35.128,Ice temperature layer 1,an,istl1, -36.128,Ice temperature layer 2,an,istl2, -37.128,Ice temperature layer 3,an,istl3, -38.128,Ice temperature layer 4,an,istl4, -39.128,Volumetric soil water layer 1,an,swvl1,1 -40.128,Volumetric soil water layer 2,an,swvl2,1 -41.128,Volumetric soil water layer 3,an,swvl3,1 -42.128,Volumetric soil water layer 4,an,swvl4,1 -53.162,Vertical integral of mass of atmosphere,an,p53.162, -54.162,Vertical integral of temperature,an,p54.162, -55.162,Vertical integral of water vapour,an,p55.162, -56.162,Vertical integral of cloud liquid water,an,p56.162, -57.162,Vertical integral of cloud frozen water,an,p57.162, -58.162,Vertical integral of ozone,an,p58.162, -59.162,Vertical integral of kinetic energy,an,p59.162, -60.162,Vertical integral of thermal energy,an,p60.162, -61.162,Vertical integral of potential+internal energy,an,p61.162, -62.162,Vertical integral of potential+internal+latent energy,an,p62.162, -63.162,Vertical integral of total energy,an,p63.162, -64.162,Vertical integral of energy conversion,an,p64.162, -65.162,Vertical integral of eastward mass flux,an,p65.162, -66.162,Vertical integral of northward mass flux,an,p66.162, -67.162,Vertical integral of eastward kinetic energy flux,an,p67.162, -68.162,Vertical integral of northward kinetic energy flux,an,p68.162, -69.162,Vertical integral of eastward heat flux,an,p69.162, -70.162,Vertical integral of northward heat flux,an,p70.162, -71.162,Vertical integral of eastward water vapour flux,an,p71.162, -72.162,Vertical integral of northward water vapour flux,an,p72.162, -73.162,Vertical integral of eastward geopotential flux,an,p73.162, -74.162,Vertical integral of northward geopotential flux,an,p74.162, -75.162,Vertical integral of eastward total energy flux,an,p75.162, -76.162,Vertical integral of northward total energy flux,an,p76.162, -77.162,Vertical integral of eastward ozone flux,an,p77.162, -78.162,Vertical integral of northward ozone flux,an,p78.162, -79.162,Vertical integral of divergence of cloud liquid water flux,an,p79.162, -80.162,Vertical integral of divergence of cloud frozen water flux,an,p80.162, -81.162,Vertical integral of divergence of mass flux,an,p81.162, -82.162,Vertical integral of divergence of kinetic energy flux,an,p82.162, -83.162,Vertical integral of divergence of thermal energy flux,an,p83.162, -84.162,Vertical integral of divergence of moisture flux,an,p84.162, -85.162,Vertical integral of divergence of geopotential flux,an,p85.162, -86.162,Vertical integral of divergence of total energy flux,an,p86.162, -87.162,Vertical integral of divergence of ozone flux,an,p87.162, -88.162,Vertical integral of eastward cloud liquid water flux,an,p88.162, -89.162,Vertical integral of northward cloud liquid water flux,an,p89.162, -90.162,Vertical integral of eastward cloud frozen water flux,an,p90.162, -91.162,Vertical integral of northward cloud frozen water flux,an,p91.162, -92.162,Vertical integral of mass tendency,an,p92.162, -134.128,Surface pressure,an,sp, -136.128,Total column water,an,tcw, -137.128,Total column water vapour,an,tcwv, -139.128,Soil temperature level 1,an,stl1,1 -141.128,Snow depth,an,sd,1 -148.128,Charnock,an,chnk, -151.128,Mean sea level pressure,an,msl, -164.128,Total cloud cover,an,tcc, -165.128,10 metre U wind component,an,u10, -166.128,10 metre V wind component,an,v10, -167.128,2 metre temperature,an,t2m, -168.128,2 metre dewpoint temperature,an,d2m, -169.128,Surface solar radiation downwards,fc,ssrd, -170.128,Soil temperature level 2,an,stl2,1 -172.128,Land-sea mask,an,lsm,1 -173.128,Surface roughness,an,sr, -174.128,Albedo,an,al, -183.128,Soil temperature level 3,an,stl3,1 -186.128,Low cloud cover,an,lcc, -187.128,Medium cloud cover,an,mcc, -188.128,High cloud cover,an,hcc, -198.128,Skin reservoir content,an,src, -206.128,Total column ozone,an,tco3, -234.128,Logarithm of surface roughness length for heat,an,lsrh, -235.128,Skin temperature,an,skt, -236.128,Soil temperature level 4,an,stl4,1 -238.128,Temperature of snow layer,an,tsn, \ No newline at end of file diff --git a/src/ecmwf_models/erainterim/interface.py b/src/ecmwf_models/erainterim/interface.py deleted file mode 100644 index 752202c..0000000 --- a/src/ecmwf_models/erainterim/interface.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -This module contains ERA Interim specific child classes of the netcdf and grib -base classes, that are used for reading all ecmwf products. -""" - -from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs -import warnings -from typing import Collection, Optional -from pygeogrids.grids import CellGrid - - -class ERAIntNcImg(ERANcImg): - def __init__( - self, - filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - mode: Optional[str] = "r", - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - array_1D: Optional[bool] = False, - ): - """ - Reader for a single ERA INT netcdf image file. - - Parameters - ---------- - filename: str - Path to the image file to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) - Name of parameters to read from the image file. - subgrid: CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and - set them to nan. This option needs the 'lsm' parameter to be - in the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntNcImg, self).__init__( - filename=filename, - product=product, - parameter=parameter, - mode=mode, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - array_1D=array_1D, - ) - - -class ERAIntNcDs(ERANcDs): - def __init__( - self, - root_path: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - h_steps: Collection[int] = (0, 6, 12, 18), - array_1D: Optional[bool] = False, - ): - - """ - Reader for a stack of ERA INT netcdf image files. - - Parameters - ---------- - root_path: str - Path to the image files to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) - Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them - to nan. This option needs the 'lsm' parameter to be in the file! - h_steps : list, optional (default: (0,6,12,18)) - List of full hours to read images for. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntNcDs, self).__init__( - root_path=root_path, - product=product, - parameter=parameter, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - array_1D=array_1D, - ) - - -class ERAIntGrbImg(ERAGrbImg): - def __init__( - self, - filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - mode: Optional[str] = "r", - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - array_1D: Optional[bool] = False, - ): - """ - Reader for a single ERA5 grib image file. - - Parameters - ---------- - filename: str - Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - mode: str, optional (default: "r") - File mode, better don't change... - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set - them to nan. This option needs the 'lsm' parameter to be in - the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntGrbImg, self).__init__( - filename=filename, - product=product, - parameter=parameter, - mode=mode, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - array_1D=array_1D, - ) - - -class ERAIntGrbDs(ERAGrbDs): - def __init__( - self, - root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - h_steps: Collection[int] = (0, 6, 12, 18), - array_1D: Optional[bool] = False, - ): - """ - Reader for a stack of ERA INT grib image file. - - Parameters - ---------- - root_path: str - Path to the image files to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - subgrid: CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them - to nan. This option needs the 'lsm' parameter to be in the file! - h_steps : list, optional (default: [0,6,12,18]) - List of full hours to read images for. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntGrbDs, self).__init__( - root_path=root_path, - product=product, - parameter=parameter, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - array_1D=array_1D, - ) diff --git a/src/ecmwf_models/erainterim/reshuffle.py b/src/ecmwf_models/erainterim/reshuffle.py deleted file mode 100644 index edfb4ef..0000000 --- a/src/ecmwf_models/erainterim/reshuffle.py +++ /dev/null @@ -1,218 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Module for a command line interface to convert the ERA Interim data into a -time series format using the repurpose package -""" - -import os -import sys -import argparse -import numpy as np - -from pygeogrids import BasicGrid - -from repurpose.img2ts import Img2Ts -from ecmwf_models.erainterim.interface import ERAIntGrbDs, ERAIntNcDs -from ecmwf_models.utils import mkdate, parse_filetype -from datetime import time, datetime - - -def reshuffle( - input_root, - outputpath, - startdate, - enddate, - variables, - mask_seapoints=False, - h_steps=(0, 6, 12, 18), - imgbuffer=50, -): - """ - Reshuffle method applied to ERA images for conversion into netcdf time - series format. - - Parameters - ---------- - input_root: str - Input path where ERA image data was downloaded to. - outputpath : str - Output path, where the reshuffled netcdf time series are stored. - startdate : datetime - Start date, from which images are read and time series are generated. - enddate : datetime - End date, from which images are read and time series are generated. - variables: list or str or tuple - Variables to read from the passed images and convert into time - series format. - mask_seapoints: bool, optional (default: False) - Mask points over sea, replace them with nan. - h_steps: tuple, optional (default: (0,6,12,18)) - Full hours for which images are available. - imgbuffer: int, optional (default: 50) - How many images to read at once before writing time series. This number - affects how many images are stored in memory and should be chosen according - to the available amount of memory and the size of a single image. - """ - - filetype = parse_filetype(input_root) - - if filetype == "grib": - input_dataset = ERAIntGrbDs( - root_path=input_root, - parameter=variables, - subgrid=None, - array_1D=True, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - ) - elif filetype == "netcdf": - input_dataset = ERAIntNcDs( - root_path=input_root, - parameter=variables, - subgrid=None, - array_1D=True, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - ) - else: - raise Exception("Unknown file format") - - if not os.path.exists(outputpath): - os.makedirs(outputpath) - - global_attr = {"product": "ERA Interim (from {})".format(filetype)} - - # get time series attributes from first day of data. - first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) - - data = input_dataset.read(first_date_time) - ts_attributes = data.metadata - - grid = BasicGrid(data.lon, data.lat) - - reshuffler = Img2Ts( - input_dataset=input_dataset, - outputpath=outputpath, - startdate=startdate, - enddate=enddate, - input_grid=grid, - imgbuffer=imgbuffer, - cellsize_lat=5.0, - cellsize_lon=5.0, - ts_dtypes=np.dtype("float32"), - global_attr=global_attr, - zlib=True, - unlim_chunksize=1000, - ts_attributes=ts_attributes, - ) - reshuffler.calc() - - -def parse_args(args): - """ - Parse command line parameters for conversion from image to time series. - - Parameters - ---------- - args: list - command line parameters as list of strings - - Returns - ---------- - args : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Convert downloaded ERA image data into time series " - "format." - ) - parser.add_argument( - "dataset_root", - help="Root of local filesystem where the image data is stored.", - ) - parser.add_argument( - "timeseries_root", - help="Root of local filesystem where the time series " - "should be stored.", - ) - parser.add_argument( - "start", type=mkdate, help=("Startdate in format YYYY-MM-DD") - ) - parser.add_argument( - "end", type=mkdate, help=("Enddate in format YYYY-MM-DD") - ) - parser.add_argument( - "variables", - metavar="variables", - nargs="+", - help=( - "Short name of variables as stored in the images, " - "which are reshuffled. " - "See documentation on image download for resp. ERA products, " - "for more information on variable names of the product. " - ), - ) - parser.add_argument( - "--mask_seapoints", - type=bool, - default=False, - help=( - "Replace points over water with nan. This option needs the " - "LandSeaMask (lsm) variable in the image data (will use mask " - "from first available file). " - "To use a dynamic LSM, reshuffle the LSM variable to time series." - ), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=( - "Time steps (full hours) of images that will be reshuffled " - "(must be in the images). " - "By default 6H images (starting at 0:00 UTC) will be reshuffled." - ), - ) - parser.add_argument( - "--imgbuffer", - type=int, - default=50, - help=( - "How many images to read at once. Bigger numbers make the " - "conversion faster but consume more memory. " - "Choose this according to your system and the size of a " - "single image." - ), - ) - args = parser.parse_args(args) - - print("ERA Interim data is deprecated. Use ERA5 instead.") - print( - "Converting data from {} to {} into {}.".format( - args.start.isoformat(), args.end.isoformat(), args.timeseries_root - ) - ) - - return args - - -def main(args): - args = parse_args(args) - - reshuffle( - input_root=args.dataset_root, - outputpath=args.timeseries_root, - startdate=args.start, - enddate=args.end, - variables=args.variables, - mask_seapoints=args.mask_seapoints, - h_steps=args.h_steps, - imgbuffer=args.imgbuffer, - ) - - -def run(): - main(sys.argv[1:]) diff --git a/src/ecmwf_models/glob.py b/src/ecmwf_models/glob.py new file mode 100644 index 0000000..7fc736b --- /dev/null +++ b/src/ecmwf_models/glob.py @@ -0,0 +1,16 @@ +import os +from pathlib import Path + +# ERA5 products supported by the reader. +SUPPORTED_PRODUCTS = ['era5', 'era5-land'] + +CDS_API_URL = "https://cds.climate.copernicus.eu/api/v2" + +# CDSAPI_RC variable must be set or we use home dir +DOTRC = os.environ.get('CDSAPI_RC', os.path.join(Path.home(), '.cdsapirc')) + +class CdoNotFoundError(ModuleNotFoundError): + def __init__(self, msg=None): + _default_msg = "cdo and/or python-cdo not installed. " \ + "Use conda to install it them under Linux." + self.msg = _default_msg if msg is None else msg diff --git a/src/ecmwf_models/grid.py b/src/ecmwf_models/grid.py index b1d4ce7..9f8104f 100644 --- a/src/ecmwf_models/grid.py +++ b/src/ecmwf_models/grid.py @@ -143,6 +143,16 @@ def ERA_IrregularImgGrid( ) -> CellGrid: """ Create a irregular grid from the passed coordinates. + + Parameters + ---------- + lons : np.ndarray + List of longitude values + lats: np.ndarray + List of latitude values + bbox : Tuple[float, float, float], optional (default: None) + Keep only points in the passed bounding box. + latmin, latmax, lonmin, lonmax """ lons_gt_180 = np.where(lons > 180.0) lons[lons_gt_180] = lons[lons_gt_180] - 360 diff --git a/src/ecmwf_models/interface.py b/src/ecmwf_models/interface.py index 6d59486..293d33e 100644 --- a/src/ecmwf_models/interface.py +++ b/src/ecmwf_models/interface.py @@ -20,6 +20,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +""" +Base classes for reading downloaded ERA netcdf and grib images and stacks +""" + import warnings import os from pygeobase.io_base import ImageBase, MultiTemporalImageBase @@ -43,9 +47,6 @@ import pygrib except ImportError: warnings.warn("pygrib has not been imported") -""" -Base classes for reading downloaded ERA netcdf and grib images and stacks -""" class ERANcImg(ImageBase): @@ -57,7 +58,7 @@ class ERANcImg(ImageBase): filename: str Path to the image file to read. product : str - 'era5' or 'era5-land' or 'eraint' + 'era5' or 'era5-land' parameter: list or str, optional (default: ['swvl1', 'swvl2']) Name of parameters to read from the image file. subgrid: pygeogrids.CellGrid, optional (default: None) diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index 8b573de..03aef85 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -31,11 +31,12 @@ import xarray as xr import pandas as pd from datedown.fname_creator import create_dt_fpath -import argparse import numpy as np from netCDF4 import Dataset from collections import OrderedDict +from ecmwf_models.glob import CdoNotFoundError, DOTRC + try: from cdo import Cdo cdo_available = True @@ -49,36 +50,6 @@ pygrib_available = False -class CdoNotFoundError(ModuleNotFoundError): - - def __init__(self, msg=None): - _default_msg = "cdo and/or python-cdo not installed. " \ - "Use conda to install it them under Linux." - self.msg = _default_msg if msg is None else msg - - -def str2bool(v): - """ - Parse a string to True/False - - Parameters - --------- - v : str - String to parse, must be part of the lists below. - - Return - --------- - str2bool : bool - The parsed bool from the passed string - """ - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - def save_ncs_from_nc( input_nc, output_path, @@ -216,25 +187,28 @@ def save_gribs_from_grib( os.remove(input_grib) -def mkdate(datestring): - """ - Turns a datetime string into a datetime object - - Parameters - ----------- - datestring: str - Input datetime string - - Returns - ----------- - datetime : datetime - Converted string - """ - if len(datestring) == 10: - return datetime.strptime(datestring, "%Y-%m-%d") - if len(datestring) == 16: - return datetime.strptime(datestring, "%Y-%m-%dT%H:%M") - +def check_api_read() -> bool: + if not os.path.isfile(DOTRC): + url = os.environ.get('CDSAPI_URL') + key = os.environ.get('CDSAPI_KEY') + if url is None or key is None: + ValueError('CDS API KEY or .cdsapirc file not found, ' + 'download will not work! ' + 'Please create a .cdsapirc file with your credentials' + 'or pass your uid/key to the command line tool ' + 'See: ' + 'https://cds.climate.copernicus.eu/api-how-to') + api_ready = False + elif ":" not in key: + raise ValueError( + 'Your CDS token is not valid. It must be in the format ' + ':, both of which are found on your CDS' + 'profile page.') + else: + api_ready = True + else: + api_ready = True + return api_ready def parse_product(inpath: str) -> str: """ @@ -263,8 +237,6 @@ def parse_product(inpath: str) -> str: return "era5-land" elif "ERA5" in parts: return "era5" - elif "ERAINT" in parts: - return "eraint" else: continue @@ -323,12 +295,6 @@ def load_var_table(name="era5", lut=False): "era5", "era5-land_lut.csv", ) - elif name == "eraint": - era_vars_csv = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "erainterim", - "eraint_lut.csv", - ) else: raise ValueError(name, "No LUT for the selected dataset found.") @@ -343,7 +309,7 @@ def load_var_table(name="era5", lut=False): def lookup(name, variables): """ Search the passed elements in the lookup table, if one does not exists, - raise a Warning + print a Warning """ lut = load_var_table(name=name, lut=True) @@ -455,3 +421,40 @@ def make_era5_land_definition_file( ds_in.close() ds_out.close() + + +def default_variables(product="era5"): + """ + These variables are being downloaded, when None are passed by the user + + Parameters + --------- + product : str, optional (default: 'era5') + Name of the era5 product to read the default variables for. + Either 'era5' or 'era5-land'. + """ + lut = load_var_table(name=product) + defaults = lut.loc[lut["default"] == 1]["dl_name"].values + return defaults.tolist() + + +def split_array(array, chunk_size): + """ + Split an array into chunks of a given size. + + Parameters + ---------- + array : array-like + Array to split into chunks + chunk_size : int + Size of each chunk + + Returns + ------- + chunks : list + List of chunks + """ + chunks = [] + for i in range(0, len(array), chunk_size): + chunks.append(array[i:i + chunk_size]) + return chunks diff --git a/tests/tests_era5/test_era5_download.py b/tests/tests_era5/test_era5_download.py index fe80a99..3e9a0e0 100644 --- a/tests/tests_era5/test_era5_download.py +++ b/tests/tests_era5/test_era5_download.py @@ -25,7 +25,8 @@ ''' from ecmwf_models.era5.download import download_and_move, save_ncs_from_nc -from ecmwf_models.utils import CdoNotFoundError, cdo_available +from ecmwf_models.utils import cdo_available +from ecmwf_models.glob import CdoNotFoundError import os import shutil diff --git a/tests/tests_eraint/test_eraint_download.py b/tests/tests_eraint/test_eraint_download.py deleted file mode 100644 index c137cfd..0000000 --- a/tests/tests_eraint/test_eraint_download.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Tests for transferring downloaded data to netcdf or grib files -''' - -from ecmwf_models.erainterim.download import download_and_move - -import os -import shutil -from datetime import datetime -from tempfile import TemporaryDirectory - - -def test_dry_download_nc_eraint(): - - with TemporaryDirectory() as dl_path: - - dldir = os.path.join(dl_path, 'temp_downloaded') - os.makedirs(dldir, exist_ok=True) - - thefile = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '..', "ecmwf_models-test-data", "download", - "eraint_example_downloaded_raw.nc") - shutil.copyfile(thefile, os.path.join(dl_path, 'temp_downloaded', - '20000101_20000101.nc')) - - startdate = enddate = datetime(2000, 1, 1) - - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=False, - dry_run=True) - - assert (os.listdir(dl_path) == ['2000']) - assert (os.listdir(os.path.join(dl_path, '2000')) == ['001']) - assert(len(os.listdir(os.path.join(dl_path, '2000', '001'))) == 2) - - should_dlfiles = [ - 'ERAINT_AN_20000101_0000.nc', - 'ERAINT_AN_20000101_1200.nc', - ] - - assert (len(os.listdir(os.path.join(dl_path, '2000', - '001'))) == len(should_dlfiles)) - - assert (sorted(os.listdir(os.path.join( - dl_path, '2000', '001'))) == sorted(should_dlfiles)) - - -def test_dry_download_grb_eraint(): - - with TemporaryDirectory() as dl_path: - - dldir = os.path.join(dl_path, 'temp_downloaded') - os.makedirs(dldir, exist_ok=True) - - thefile = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '..', "ecmwf_models-test-data", "download", - "eraint_example_downloaded_raw.grb") - shutil.copyfile(thefile, os.path.join(dl_path, 'temp_downloaded', - '20000101_20000101.grb')) - - startdate = enddate = datetime(2000, 1, 1) - - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 6, 12, 18], - grb=True, - dry_run=True) - - assert (os.listdir(dl_path) == ['2000']) - assert (os.listdir(os.path.join(dl_path, '2000')) == ['001']) - assert(len(os.listdir(os.path.join(dl_path, '2000', '001'))) == 2) - - should_dlfiles = [ - 'ERAINT_AN_20000101_0000.grb', - 'ERAINT_AN_20000101_1200.grb', - ] - assert (len(os.listdir(os.path.join(dl_path, '2000', - '001'))) == len(should_dlfiles)) - - assert (sorted(os.listdir(os.path.join( - dl_path, '2000', '001'))) == sorted(should_dlfiles)) diff --git a/tests/tests_eraint/test_eraint_interface.py b/tests/tests_eraint/test_eraint_interface.py deleted file mode 100644 index 669c0b8..0000000 --- a/tests/tests_eraint/test_eraint_interface.py +++ /dev/null @@ -1,288 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import numpy.testing as nptest -from ecmwf_models.erainterim.interface import (ERAIntGrbImg, ERAIntNcImg, - ERAIntGrbDs, ERAIntNcDs) -import numpy as np -from datetime import datetime - - -def test_ERAInt_nc_image(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf", "2000", "001", - 'ERAINT_AN_20000101_0000.nc') - - dset = ERAIntNcImg( - fname, parameter=['swvl1', 'swvl2'], mask_seapoints=True) - data = dset.read() - assert data.data['swvl1'].shape == (241, 480) - assert data.data['swvl2'].shape == (241, 480) - assert data.lon.shape == (241, 480) - assert data.lat.shape == (241, 480) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[100, 64], 48.0, rtol=1e-5) - nptest.assert_allclose(data.lat[100, 64], 15.0, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][100, 64], 0.17185359, rtol=1e-5) - nptest.assert_allclose(data.data['swvl2'][100, 64], 0.17981644, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[136, 108], 81.0, rtol=1e-5) - nptest.assert_allclose(data.lat[136, 108], -12.0, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][136, 108], np.nan) - nptest.assert_allclose(data.data['swvl2'][136, 108], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0, 0], 90.0) - nptest.assert_allclose(data.lat[-1, 0], -90.0) - nptest.assert_allclose(data.lon[0, 0], 0.0) - nptest.assert_allclose(data.lon[0, 240], 180.0) # middle of image - - -def test_ERAInt_nc_image_1d(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf", "2000", "001", - 'ERAINT_AN_20000101_0000.nc') - - dset = ERAIntNcImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=True) - data = dset.read() - assert data.data['swvl1'].shape == (241 * 480,) - assert data.data['swvl2'].shape == (241 * 480,) - assert data.lon.shape == (241 * 480,) - assert data.lat.shape == (241 * 480,) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[100 * 480 + 64], 48.0) - nptest.assert_allclose(data.lat[100 * 480 + 64], 15.0) - nptest.assert_allclose( - data.data['swvl1'][100 * 480 + 64], 0.17185359, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl2'][100 * 480 + 64], 0.17981644, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[136 * 480 + 108], 81.0) - nptest.assert_allclose(data.lat[136 * 480 + 108], -12.0) - nptest.assert_allclose(data.data['swvl1'][136 * 480 + 108], np.nan) - nptest.assert_allclose(data.data['swvl2'][136 * 480 + 108], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0], 90.0) - nptest.assert_allclose(data.lat[-1], -90.0) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[len(data.lon) // 2], - 180.0) # middle of image - - -def test_ERAInt_grb_image(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib", "2000", "001", - 'ERAINT_AN_20000101_0000.grb') - - dset = ERAIntGrbImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=False) - data = dset.read() - assert data.data['swvl1'].shape == (256, 512) - assert data.data['swvl2'].shape == (256, 512) - assert data.lon.shape == (256, 512) - assert data.lat.shape == (256, 512) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.data['swvl1'][106, 68], 0.171760559) - nptest.assert_allclose(data.data['swvl2'][106, 68], 0.178138732) - nptest.assert_allclose(data.lon[106, 68], 47.81238356, rtol=1e-5) - nptest.assert_allclose(data.lat[106, 68], 15.08768995, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[145, 115], 80.85917808219178, rtol=1e-5) - nptest.assert_allclose(data.lat[145, 115], -12.280678058301, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][145, 115], np.nan) - nptest.assert_allclose(data.data['swvl2'][145, 115], np.nan) - - # boundaries - nptest.assert_allclose(data.lat[0, 0], 89.46282157) - nptest.assert_allclose(data.lat[-1, 0], -89.4628215685774) - nptest.assert_allclose(data.lon[0, 0], 0.0) - nptest.assert_allclose(data.lon[0, 256], - 179.99956164383) # middle of image - - -def test_ERAInt_grb_image_1d(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib", "2000", "001", - 'ERAINT_AN_20000101_0000.grb') - - dset = ERAIntGrbImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=True) - data = dset.read() - assert data.data['swvl1'].shape == (256 * 512,) - assert data.data['swvl2'].shape == (256 * 512,) - assert data.lon.shape == (256 * 512,) - assert data.lat.shape == (256 * 512,) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[106 * 512 + 68], 47.81238356, rtol=1e-5) - nptest.assert_allclose(data.lat[106 * 512 + 68], 15.08768995, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl1'][106 * 512 + 68], 0.171760559, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl2'][106 * 512 + 68], 0.178138732, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[145 * 512 + 115], 80.8591780, rtol=1e-5) - nptest.assert_allclose(data.lat[145 * 512 + 115], -12.280678058, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][145 * 512 + 115], np.nan) - nptest.assert_allclose(data.data['swvl2'][145 * 512 + 115], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0], 89.46282157) - nptest.assert_allclose(data.lat[-1], -89.4628215685774) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[256], 179.99956164383) # middle of image - - -def test_ERAInt_nc_ds(): - root_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf") - - tstamps_should = [datetime(2000, 1, 1), datetime(2000, 1, 1, 12)] - - ds = ERAIntNcDs( - root_path, - parameter=['swvl1', 'swvl2'], - array_1D=True, - mask_seapoints=True, - h_steps=[0, 12]) - - for data, tstamp_should in zip( - ds.iter_images(datetime(2000, 1, 1), datetime(2000, 1, 1)), - tstamps_should): - assert data.data['swvl1'].shape == (241 * 480,) - assert data.data['swvl2'].shape == (241 * 480,) - assert data.lon.shape == (241 * 480,) - assert data.lat.shape == (241 * 480,) - assert data.timestamp == tstamp_should - metadata_should = { - 'swvl1': { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - }, - 'swvl2': { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - } - for var in ['swvl1', 'swvl2']: - assert data.metadata[var]['long_name'] == metadata_should[var][ - 'long_name'] - assert data.metadata[var]['units'] == metadata_should[var]['units'] - - nptest.assert_allclose(data.lat[0], 90.0) - nptest.assert_allclose(data.lat[-1], -90.0) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[240], 180.0) # middle of image - - -def test_ERAInt_grb_ds(): - root_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib") - - tstamps_should = [datetime(2000, 1, 1), datetime(2000, 1, 1, 12)] - - ds = ERAIntGrbDs( - root_path, - parameter=['swvl1', 'swvl2'], - array_1D=True, - mask_seapoints=True, - h_steps=[0, 12]) - - for data, tstamp_should in zip( - ds.iter_images(datetime(2000, 1, 1), datetime(2000, 1, 1)), - tstamps_should): - assert data.data['swvl1'].shape == (256 * 512,) - assert data.data['swvl2'].shape == (256 * 512,) - assert data.lon.shape == (256 * 512,) - assert data.lat.shape == (256 * 512,) - assert data.timestamp == tstamp_should - metadata_should = \ - { - 'swvl1': {'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3'}, - 'swvl2': {'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3'} - } - for var in ['swvl1', 'swvl2']: - assert data.metadata[var]['long_name'] == metadata_should[var][ - 'long_name'] - assert data.metadata[var]['units'] == metadata_should[var][ - 'units'] - - nptest.assert_allclose(data.lat[0], 89.46282157) - nptest.assert_allclose(data.lat[-1], -89.4628215685774) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[256], - 179.99956164383) # middle of image diff --git a/tests/tests_eraint/test_eraint_reshuffle.py b/tests/tests_eraint/test_eraint_reshuffle.py deleted file mode 100644 index 038666e..0000000 --- a/tests/tests_eraint/test_eraint_reshuffle.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Test module for image to time series conversion. -''' - -import os -import glob -from tempfile import TemporaryDirectory -import numpy as np -import numpy.testing as nptest - -from ecmwf_models.erainterim.reshuffle import main -from ecmwf_models import ERATs - - -def test_ERAInterim_reshuffle_grb(): - # test reshuffling era interim grib images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib") - startdate = '2000-01-01' - enddate = '2000-01-01' - parameters = ["swvl1", "swvl2"] - h_steps = ['--h_steps', '0', '12'] - - with TemporaryDirectory() as ts_path: - args = [inpath, ts_path, startdate, enddate] + parameters + h_steps - - main(args) - - assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 2593 - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read(48, 15) - ds.close() - swvl1_should = np.array([0.171761, 0.171738], dtype=np.float32) - nptest.assert_allclose(ts['swvl1'].values, swvl1_should, rtol=1e-5) - swvl2_should = np.array([0.178139, 0.178200], dtype=np.float32) - nptest.assert_allclose(ts['swvl2'].values, swvl2_should, rtol=1e-5) - - -def test_ERAInterim_reshuffle_nc(): - # test reshuffling era interim grib images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf") - startdate = '2000-01-01' - enddate = '2000-01-01' - parameters = ["swvl1", "swvl2"] - h_steps = ['--h_steps', '0', '12'] - - with TemporaryDirectory() as ts_path: - args = [inpath, ts_path, startdate, enddate] + parameters + h_steps - main(args) - assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 2593 - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read(48, 15) - ds.close() - swvl1_should = np.array([0.171854, 0.171738], dtype=np.float32) - nptest.assert_allclose(ts['swvl1'].values, swvl1_should, rtol=1e-5) - swvl2_should = np.array([0.179816, 0.179860], dtype=np.float32) - nptest.assert_allclose(ts['swvl2'].values, swvl2_should, rtol=1e-5) From aca1227e67ac27c13a2d91472e5c7e2110a20975 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Thu, 24 Oct 2024 22:47:05 +0200 Subject: [PATCH 02/20] Update reader classes --- .github/workflows/ci.yml | 132 +++-- README.rst | 4 + setup.cfg | 7 +- src/ecmwf_models/cli.py | 203 ++++--- src/ecmwf_models/era5/download.py | 236 ++++++--- .../era5/{interface.py => reader.py} | 43 +- src/ecmwf_models/era5/reshuffle.py | 36 +- src/ecmwf_models/glob.py | 16 - src/ecmwf_models/globals.py | 24 + src/ecmwf_models/grid.py | 138 +++-- src/ecmwf_models/interface.py | 495 ++++++++++-------- src/ecmwf_models/utils.py | 475 ++++++++++++----- tests/test_grid.py | 19 +- tests/test_utils.py | 18 - tests/tests_era5/test_era5_download.py | 62 ++- tests/tests_era5/test_era5_download_cds.py | 29 +- tests/tests_era5/test_era5_interface.py | 7 +- tests/tests_era5/test_era5_reshuffle.py | 117 ++--- 18 files changed, 1246 insertions(+), 815 deletions(-) rename src/ecmwf_models/era5/{interface.py => reader.py} (82%) delete mode 100644 src/ecmwf_models/glob.py create mode 100644 src/ecmwf_models/globals.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01a4f6f..88c0785 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,45 +4,44 @@ # For more information see: # https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Automated Tests +name: CI/CD on: push: pull_request: workflow_dispatch: - schedule: # only upstream, won't trigger on forks! - - cron: '0 0 * * *' # daily + schedule: + - cron: '0 0 * * *' # nightly build + +env: # Required to upload docker image + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + jobs: build: strategy: matrix: - python-version: [ '3.8', '3.9', '3.10' ] - os: [ "ubuntu-latest" ] + python-version: [ '3.8', '3.12' ] + os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] ymlfile: [ "latest.yml" ] - include: - - os: "windows-latest" - python-version: "3.10" - ymlfile: "latest.yml" - name: Py${{ matrix.python-version }}@${{ matrix.os }}|${{ matrix.ymlfile }} + name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: true fetch-depth: 0 lfs: true - name: Checkout LFS objects run: git lfs checkout - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" auto-update-conda: true - channel-priority: flexible python-version: ${{ matrix.python-version }} environment-file: environment/${{ matrix.ymlfile }} - mamba-version: "*" - activate-environment: ecmwf_models # todo: must match with name in latest.yml + channel-priority: flexible + activate-environment: ecmwf_models auto-activate-base: false - name: Print Infos shell: bash -l {0} @@ -52,22 +51,27 @@ jobs: pip list which pip which python - - name: Export Environment + - name: Install dependencies and package shell: bash -l {0} run: | - mkdir -p .artifacts - if [ ${{ matrix.ymlfile }} == "latest.yml" ] + # On macos pykdtree was built with OpenMP (C-level parallelization library) support + if [ ${{ matrix.os }} == "macos-latest" ] then - filename=pinned_${{ matrix.python-version }}_${{ matrix.os }}.yml - conda env export --no-builds | grep -v "prefix" > .artifacts/$filename + conda install -c conda-forge pykdtree fi - - name: Install package and test + pip install -e .[testing] + - name: Run all tests env: - CDSAPI_KEY: ${{ secrets.CDSAPI_KEY }} + CDS_APIKEY: ${{ secrets.CDS_APIKEY }} shell: bash -l {0} - run: | - pip install . + run: | pytest + - name: Export Environment + shell: bash -l {0} + run: | + mkdir -p artifacts + filename=env_py${{ matrix.python-version }}_${{ matrix.os }}.yml + conda env export --no-builds | grep -v "prefix" > artifacts/$filename - name: Upload Coverage shell: bash -l {0} run: | @@ -79,23 +83,25 @@ jobs: - name: Create wheel and dist package shell: bash -l {0} run: | - pip install setuptools_scm + pip install setuptools_scm twine if [ ${{ matrix.os }} == "windows-latest" ] then # build whls on windows pip install wheel - python setup.py bdist_wheel --dist-dir .artifacts/dist + python setup.py bdist_wheel --dist-dir artifacts/dist else # build dist on linux - python setup.py sdist --dist-dir .artifacts/dist + python setup.py sdist --dist-dir artifacts/dist fi - ls .artifacts/dist + ls artifacts/dist + twine check artifacts/dist/* - name: Upload Artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Artifacts - path: .artifacts/* - coveralls: + name: Artifacts-py${{ matrix.python-version }}-${{ matrix.os }} + path: artifacts/* + + publish-coverage: name: Submit Coveralls 👚 needs: build runs-on: ubuntu-latest @@ -106,9 +112,13 @@ jobs: pip3 install --upgrade coveralls && coveralls --service=github --finish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - publish: - name: Upload to PyPI - if: startsWith(github.ref, 'refs/tags/v') && startsWith(github.repository, 'TUW-GEO') + + publish-pypi-package: + name: Publish PyPI 🚀 + # Will only trigger when Tests have passed on release/tag from the TUW-GEO repository + if: | + startsWith(github.ref, 'refs/tags/v') && + startsWith(github.repository, 'TUW-GEO') needs: build runs-on: ubuntu-latest steps: @@ -117,7 +127,11 @@ jobs: echo "GITHUB_REF = $GITHUB_REF" echo "GITHUB_REPOSITORY = $GITHUB_REPOSITORY" - name: Download Artifacts - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 + with: + path: Artifacts + pattern: Artifacts-* + merge-multiple: true - name: Display downloaded files run: ls -aR - name: Upload to PyPI @@ -128,4 +142,46 @@ jobs: verify_metadata: true packages_dir: Artifacts/dist/ user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} # todo: add pypi token to GHA + password: ${{ secrets.PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets + + publish-docker-image: + name: Publish Docker 📦 + # Will only trigger when Tests have passed on release/tag from the TUW-GEO repository + if: | + startsWith(github.ref, 'refs/tags/v') && + startsWith(github.repository, 'TUW-GEO') + needs: build + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true \ No newline at end of file diff --git a/README.rst b/README.rst index 096c1c7..d0721c6 100644 --- a/README.rst +++ b/README.rst @@ -68,6 +68,10 @@ dependencies. The file `pinned` is a fallback option and should always work. python setup.py develop pytest +API Setup +========= + +https://cds.climate.copernicus.eu/how-to-api Supported Products ================== diff --git a/setup.cfg b/setup.cfg index d5de20e..995386d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,8 @@ install_requires = pandas netcdf4 click - cdsapi>=0.6.1 + parse + cdsapi>=0.7.3 repurpose pygeogrids @@ -65,8 +66,8 @@ testing = # script_name = ecmwf_models.module:function # For example: console_scripts = - era5_download = ecmwf_models.era5.download:run - era5_reshuffle = ecmwf_models.era5.reshuffle:run + era5 = ecmwf_models.cli:era5 + era5land = ecmwf_models.cli:era5land # And any other entry points, for example: # pyscaffold.cli = # awesome = pyscaffoldext.awesome.extension:AwesomeExtension diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py index cd4bc98..421dd6c 100644 --- a/src/ecmwf_models/cli.py +++ b/src/ecmwf_models/cli.py @@ -1,9 +1,15 @@ +import os import click from datetime import datetime -from ecmwf_models.era5.download import download_and_move -from ecmwf_models.utils import default_variables +from ecmwf_models.era5.download import download_and_move, download_record_extension +from ecmwf_models.utils import ( + default_variables, + img_infer_file_props, +) + from ecmwf_models.era5.reshuffle import img2ts +from ecmwf_models.globals import IMG_FNAME_TEMPLATE @click.command( "download", @@ -12,7 +18,7 @@ "dates. Before this program can be used, you have to " "register at the CDS and setup your .cdsapirc file " "as described here: " - "https://cds.climate.copernicus.eu/api-how-to") + "https://cds.climate.copernicus.eu/how-to-api") @click.argument("PATH", type=click.Path(writable=True)) @click.option( '--start', '-s', @@ -26,15 +32,15 @@ help="Last date of the period to download data for. Format: YYYY-MM-DD") @click.option( '--variables', - '-var', - multiple=True, + '-v', type=click.STRING, - default=default_variables('era5'), - help="Name of variables to download. For all available variables see: " - "https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Parameterlistings") + default=','.join(default_variables('era5', 'short_name')), + help="Name of variables to download. To download multiple variables pass " + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") @click.option( "-k", - "--keep", + "--keep_original", type=click.BOOL, default=False, help="Also keep the original image image stack as downloaded from CDS " @@ -49,31 +55,37 @@ "Pass either True or False. Default is False.") @click.option( "--h_steps", - type=int, - multiple=True, - default=[0, 6, 12, 18], - nargs=4, - help="Temporal sampling of downloaded images. " - "Pass a set of full hours here, like '--h_steps 0 12' to download " + type=str, + default="0,6,12,18", + help="Temporal sampling of downloaded images. Multiple values are " + "comma-separated!" + "Pass a set of full hours here, like '--h_steps 0,12' to download " "two images for each day, at 0:00 and 12:00 respectively. " "By default, we download 6-hourly images starting at 0:00 UTC, " - "(i.e., 0 6 12 18)") + "(i.e., 0,6,12,18)") @click.option( "--max_request_size", type=int, default=1000, - nargs=4, help="Maximum number of requests that the CDS API allows. " "The default is 1000, but depends on server side settings. " "Server settings may change at some point. Change " "accordingly here in case that 'the request is too large'. " "A smaller number will results in smaller download chunks.") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required if no .cdsapirc file exists in " + "the home directory (see documentation). You can find your token/key " + "on your CDS user profile page. Alternatively, you can also set" + "the an environment variable CDSAPI_KEY that contains your token.") def cli_download_era5(path, start, end, variables, keep_original, as_grib, - h_steps, max_request_size): + h_steps, max_request_size, cds_token): """ Download ERA5 image data within a chosen period. NOTE: Before using this program, create a CDS account and set up a `.cdsapirc` file as described - here: https://cds.climate.copernicus.eu/api-how-to + here: https://cds.climate.copernicus.eu/how-to-api \b Required Parameters @@ -81,8 +93,11 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, PATH: string (required) Root of local filesystem where the downloaded data will be stored. Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/api-how-to + https://cds.climate.copernicus.eu/how-to-api """ + h_steps = [int(h.strip()) for h in h_steps.split(',')] + variables = [str(v.strip()) for v in variables.split(',')] + status_code = download_and_move( target_path=path, startdate=start, @@ -94,6 +109,7 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, keep_original=keep_original, stepsize='month', n_max_request=max_request_size, + cds_token=cds_token, ) return status_code @@ -106,9 +122,9 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "dates. Before this program can be used, you have to " "register at the CDS and setup your .cdsapirc file " "as described here: " - "https://cds.climate.copernicus.eu/api-how-to " - "Alternatively, you can pass your UID:KEY here using the " - "cds_token option.") + "https://cds.climate.copernicus.eu/how-to-api " + "Alternatively, you can pass your KEY/TOKEN here using the " + "--cds_token option.") @click.argument("PATH", type=click.Path(writable=True)) @click.option( '--start', '-s', @@ -122,15 +138,15 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, help="Last date of the period to download data for. Format: YYYY-MM-DD") @click.option( '--variables', - '-var', - multiple=True, + '-v', type=click.STRING, - default=default_variables('era5-land'), - help="Name of variables to download. For all available variables see: " - "https://confluence.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation#ERA5Land:datadocumentation-parameterlistingParameterlistings") + default=','.join(default_variables('era5-land', 'short_name')), + help="Name of variables to download. To download multiple variables pass " + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") @click.option( "-k", - "--keep", + "--keep_original", type=click.BOOL, default=False, help="Also keep the original image image stack as downloaded from CDS " @@ -145,20 +161,16 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "Pass either True or False. Default is False.") @click.option( "--h_steps", - type=int, - multiple=True, - default=[0, 6, 12, 18], - nargs=4, + type=click.STRING, + default='0,6,12,18', help="Temporal sampling of downloaded images. " - "Pass a set of full hours here, like '--h_steps 0 12' to download " + "Pass a set of full hours here, like '--h_steps 0,12' to download " "two images for each day, at 0:00 and 12:00 respectively. " - "By default, we download 6-hourly images starting at 0:00 UTC, " - "(i.e., 0 6 12 18)") + "By default, we download 6-hourly images starting at 0:00 UTC") @click.option( "--max_request_size", type=int, default=1000, - nargs=4, help="Maximum number of requests that the CDS API allows. " "The default is 1000, but depends on server side settings. " "Server settings may change at some point. Change " @@ -168,16 +180,16 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Only required/used if no .cdsapi file " - "exists in your home directory (see documentation)." - "Consists of your UID and API Key . Both can be " - "found under your CDS User profile page.") + help="To identify with the CDS. Required if no .cdsapirc file exists in " + "the home directory (see documentation). You can find your token/key " + "on your CDS user profile page. Alternatively, you can also set" + "the an environment variable CDSAPI_KEY that contains your token.") def cli_download_era5land(path, start, end, variables, keep_original, as_grib, - h_steps, max_request_size): + h_steps, max_request_size, cds_token): """ Download ERA5-Land image data within a chosen period. NOTE: Before using this program, create a CDS account and set up a `.cdsapirc` file as described - here: https://cds.climate.copernicus.eu/api-how-to + here: https://cds.climate.copernicus.eu/how-to-api \b Required Parameters @@ -185,8 +197,11 @@ def cli_download_era5land(path, start, end, variables, keep_original, as_grib, PATH: string (required) Root of local filesystem where the downloaded data will be stored. Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/api-how-to + https://cds.climate.copernicus.eu/how-to-api """ + h_steps = [int(h.strip()) for h in h_steps.split(',')] + variables = [str(v.strip()) for v in variables.split(',')] + status_code = download_and_move( target_path=path, startdate=start, @@ -198,6 +213,7 @@ def cli_download_era5land(path, start, end, variables, keep_original, as_grib, keep_original=keep_original, stepsize='month', n_max_request=max_request_size, + cds_token=cds_token ) return status_code @@ -212,59 +228,32 @@ def cli_download_era5land(path, start, end, variables, keep_original, as_grib, "--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Required if no .cdsapi file exists. " - "In the home directory (see documentation)." - "Consists of your UID and API Key . Both can be " - "found under your CDS User profile page.") -def cli_update_img(path, fntempl, cds_token=None): + help="To identify with the CDS. Required if no .cdsapirc file exists in " + "the home directory (see documentation). You can find your token/key " + "on your CDS user profile page. Or set your token under the " + "CDSAPI_KEY environment variable") +def cli_update_img(path, cds_token=None): """ - Extend a locally existing C3S SM record by downloading new files that + Extend a locally existing ERA record by downloading new files that don't yet exist locally. This will find the latest available local file, and download all available extensions. - NOTE: Use the `c3s_sm download` program first do create a local record - to update with this function. + NOTE: Use the `era5 download` or `era5land download` program first do + create a local record to update with this function. \b Required Parameters ------------------- PATH: string - Path where previously downloaded C3S SM images are stored. + Path where previously downloaded C3S SM images are stored. There must + be a file `overview.yml` which contains the download settings. Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/api-how-to + https://cds.climate.copernicus.eu/how-to-api, """ # The docstring above is slightly different to the normal python one to # display it properly on the command line. - # if not set, use URL from const - if 'CDSAPI_URL' not in os.environ: - os.environ['CDSAPI_URL'] = cds_api_url - - if cds_token is not None: - os.environ["CDSAPI_KEY"] = cds_token - - check_api_read() - - props = img_infer_file_props(path, fntempl=fntempl, start_from='last') - - freq = props['freq'].lower() - product = props['product'].lower() - version = props['version'] - - startdate = first_missing_date(props['datetime'], freq=freq) - - print( - f"Update C3S SM images: " - f"Fetching latest data for C3S SM CDR/ICDR {freq} {product} {version} " - f"after {startdate.isoformat()} into {path}.") - - download_and_extract( - path, - startdate=startdate, - freq=freq, - version=version, - product=product, - keep_original=False) + download_record_extension(path, cds_token=cds_token) @click.command( @@ -277,12 +266,13 @@ def cli_update_img(path, fntempl, cds_token=None): @click.argument("END", type=click.STRING) @click.option( '--variables', - '-var', - multiple=True, + '-v', type=click.STRING, default=None, - help="Subset of variables to convert (short names, as in the input " - "images). If not specified, all variables are be used.") + help="Subset of variables to convert. Pass comma separated names" + "to select multiple variables (short names, as in the input images, " + "e.g. `-v swvl1,stl1`). If not specified, all variables from the first" + "image file are used.") @click.option( '--land_points', '-l', is_flag=True, show_default=True, default=False, @@ -293,27 +283,26 @@ def cli_update_img(path, fntempl, cds_token=None): nargs=4, type=click.FLOAT, help="4 NUMBERS | min_lon min_lat max_lon max_lat. " - "Set Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). [default: -180 -90 180 90]") + "Set Bounding Box (lower left and upper right corner) " + "of area to reshuffle (WGS84). [default: -180 -90 180 90]") @click.option( "--h_steps", - type=int, - multiple=True, - default=[0, 6, 12, 18], - nargs=4, - help="Full hour time stamps to include in time series. E.g. " - "'--h_steps 0 12' will only include data from images at 0:00 and " - "12:00 UTC and ignore all other available time stamps." - "By default, we include data for every 6th hour per day, " - "(i.e., 0 6 12 18)") + type=str, + default="0,6,12,18", + nargs=1, + help="Full hour time stamps to include in time series. To select multiple," + "pass comma-separated values here, e.g. '--h_steps 0,12' will only " + "include data from images at 0:00 and 12:00 UTC and ignore all other " + "available time stamps." + "By default, we include data for every 6th hour each day.") @click.option( '--imgbuffer', '-b', type=click.INT, default=50, help="NUMBER | Number of images to read into memory at once before " - "conversion to time series. A larger buffer means faster" - " processing but requires more memory.") + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, h_steps, imgbuffer): """ @@ -332,8 +321,11 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, Date of the last image to include in the conversion. Format: YYYY-MM-DD """ - print(f"Converting ERA5/ERA5-Land data from {start} to {end} into " - f"{ts_path}.") + h_steps = [int(h.strip()) for h in h_steps.split(',')] + variables = [str(v.strip()) for v in variables.split(',')] + + print(f"Converting ERA5/ERA5-Land images in period {start} to {end} into " + f"time series to {ts_path}.") img2ts( input_root=img_path, @@ -345,6 +337,7 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, h_steps=h_steps, land_points=land_points, imgbuffer=imgbuffer, + product=None, # Infer product from files ) @click.group(short_help="ERA5 Command Line Programs imported from the " @@ -353,13 +346,15 @@ def era5(): pass +era5.add_command(cli_download_era5) +era5.add_command(cli_reshuffle) + + @click.group(short_help="ERA5-Land Command Line Programs imported from the " "`ecmwf_models` pip package.") def era5land(): pass -era5.add_command(cli_download_era5) -era5.add_command(cli_reshuffle) era5land.add_command(cli_download_era5land) era5land.add_command(cli_reshuffle) \ No newline at end of file diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index 9ca0b57..e444b6c 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -6,18 +6,22 @@ from ecmwf_models.utils import ( lookup, - save_gribs_from_grib, - save_ncs_from_nc, - default_variables, split_array + update_image_summary_file, + save_ncs_from_nc, save_gribs_from_grib, + default_variables, split_array, check_api_ready, + read_summary_yml ) +from repurpose.process import parallel_process +from repurpose.misc import delete_empty_directories + import warnings import os import logging -from datetime import datetime, time +from datetime import datetime, time, timedelta import shutil import cdsapi -import multiprocessing import numpy as np +import traceback def split_chunk(timestamps, @@ -115,8 +119,8 @@ def download_era5( default variables will be downloaded. target : str File name, where the data is stored. - geb : bool, optional (default: False) - Download data in grib format + grb : bool, optional (default: False) + Download data in grib format instead of netcdf product : str ERA5 data product to download, either era5 or era5-land dry_run: bool, optional (default: False) @@ -136,15 +140,17 @@ def download_era5( request = { "format": "grib" if grb else "netcdf", + "download_format": "unarchived", "variable": variables, "year": [str(y) for y in years], "month": [str(m).zfill(2) for m in months], "day": [str(d).zfill(2) for d in days], "time": [time(h, 0).strftime("%H:%M") for h in h_steps], } + request.update(cds_kwds) if product == "era5": - request["product_type"] = "reanalysis" + request["product_type"] = ["reanalysis"] c.retrieve("reanalysis-era5-single-levels", request, target) elif product == "era5-land": c.retrieve("reanalysis-era5-land", request, target) @@ -164,8 +170,9 @@ class CDSStatusTracker: statuscode_error = -1 statuscode_unavailable = 10 - def __init__(self): + def __init__(self, logger=logging.getLogger()): self.download_statuscode = self.statuscode_ok + self.logger = logger def handle_error_function(self, *args, **kwargs): message_prefix = args[0] @@ -176,7 +183,7 @@ def handle_error_function(self, *args, **kwargs): self.download_statuscode = self.statuscode_unavailable else: self.download_statuscode = self.statuscode_error - logging.error(*args, **kwargs) + self.logger.error(*args, **kwargs) def download_and_move( @@ -191,9 +198,11 @@ def download_and_move( dry_run=False, grid=None, remap_method="bil", - cds_kwds={}, + cds_kwds=None, stepsize="month", n_max_request=1000, + keep_prelim=True, + cds_token=None, ) -> int: """ Downloads the data from the ECMWF servers and moves them to the target @@ -211,16 +220,21 @@ def download_and_move( first date to download enddate: datetime last date to download - product : str, optional (default: ERA5) - Either ERA5 or ERA5-Land + product : str, optional (default: era5) + Either era5 or era5-land variables : list, optional (default: None) - Name of variables to download + Name of variables to download, see the documentation for all variable + names. If None is chosen, then the 'default' variables are downloaded. keep_original: bool (default: False) - keep the original downloaded data - h_steps: list or tuple - List of full hours to download data at the selected dates e.g [0, 12] + If True, keep the original downloaded data stack as received from CDS + after slicing individual time stamps. + h_steps: tuple, optional (default: (0, 6, 12, 18)) + List of full hours to download data for at the selected dates e.g + [0, 12] would download at 0:00 and 12:00. Only full hours are possible. grb: bool, optional (default: False) - Download data as grib files + Download data as grib files instead of netcdf. + Note that downloading in grib format, does not allow on-the-fly + resampling (`grid` argument) dry_run: bool Do not download anything, this is just used for testing the functions grid : dict, optional (default: None) @@ -238,6 +252,7 @@ def download_and_move( } Default is to use no regridding. + To use this option, it is necessary that CDO is installed. remap_method : str, optional (dafault: 'bil') Method to be used for regridding. Available methods are: - "bil": bilinear (default) @@ -247,22 +262,41 @@ def download_and_move( - "con": 1st order conservative remapping - "con2": 2nd order conservative remapping - "laf": largest area fraction remapping - cds_kwds: dict, optional (default: {}) - Additional arguments to be passed to the CDS API retrieve request. + cds_kwds: dict, optional (default: None) + Additional keyword arguments to be passed to the CDS API request. + This might be useful in the future, when new server-side options are + added which are not yet directly supported by this package. n_max_request : int, optional (default: 1000) Maximum size that a request can have to be processed by CDS. At the moment of writing this is 1000 (N_timstamps * N_variables in a request) but as this is a server side settings, it can change. + keep_prelim: bool, optional (default: True) + Keep preliminary data from ERA5T under a different file name. + These data are not yet final and might change if an issue is detected. + If False is chosen, then the preliminary data will be discarded and + not stored. + cds_token: str, optional (default: None) + To identify with the CDS. Required if no .cdsapirc file exists in + the home directory (see documentation). You can find your token/key + on your CDS user profile page. Alternatively, the CDSAPI_KEY + environment variable can be set manually instead of passing the token + here. Returns ------- status_code: int - 0 : Downloaded data ok - -1 : Error + Status code summary from all requests: + 0 : All Downloaded data ok + -1 : Error in at least one request -10 : No data available for requested time period """ + if cds_token is not None: + os.environ["CDSAPI_KEY"] = cds_token + check_api_ready() + h_steps = list(h_steps) product = product.lower() + cds_kwds = cds_kwds or dict() if variables is None: variables = default_variables(product=product) @@ -271,18 +305,15 @@ def download_and_move( variables = lookup(name=product, variables=variables) variables = variables["dl_name"].values.tolist() - logger = logging.getLogger('dl_logger') + # this logger name is also used by CDS API, don't change it + logger = logging.getLogger('cdsapi') if dry_run: warnings.warn("Dry run does not create connection to CDS") c = None cds_status_tracker = None else: - if not check_api_read(): - raise ValueError( - "Cannot establish connection to CDS. Please set up" - "your CDS API key as described at " - "https://cds.climate.copernicus.eu/api-how-to") + check_api_ready() os.makedirs(target_path, exist_ok=True) cds_status_tracker = CDSStatusTracker() @@ -295,22 +326,23 @@ def download_and_move( for t in pd.date_range(startdate, enddate, freq='D') ])) - daily_request = True if stepsize == "day" else False req_periods = split_chunk( timestamps, n_vars=len(variables), n_hsteps=len(h_steps), max_req_size=n_max_request, reduce=True, - daily_request=daily_request) + daily_request=True if stepsize == "day" else False + ) - # Since we download month/month or day/day we need to - # collect all the status codes to return a valid - # status code for the whole time period - all_status_codes = [] + logger.info(f"Request is split into {len(req_periods)} chunks") + logger.info(f"Target directory {target_path}") + + downloaded_data_path = os.path.join(target_path, "temp_downloaded") + if not os.path.exists(downloaded_data_path): + os.mkdir(downloaded_data_path) - pool = multiprocessing.Pool(1) - for curr_start, curr_end in req_periods: + def _download(curr_start, curr_end): curr_start = pd.to_datetime(curr_start).to_pydatetime() curr_end = pd.to_datetime(curr_end).to_pydatetime() @@ -319,12 +351,9 @@ def download_and_move( fname = "{start}_{end}.{ext}".format( start=curr_start.strftime("%Y%m%d"), end=curr_end.strftime("%Y%m%d"), - ext="grb" if grb else "nc", + ext="grb" if grb else "nc" ) - downloaded_data_path = os.path.join(target_path, "temp_downloaded") - if not os.path.exists(downloaded_data_path): - os.mkdir(downloaded_data_path) dl_file = os.path.join(downloaded_data_path, fname) finished, i = False, 0 @@ -363,37 +392,44 @@ def download_and_move( if status_code == 0: if grb: - pool.apply_async( - save_gribs_from_grib, - args=(dl_file, target_path), - kwds=dict( - product_name=product.upper(), - keep_original=keep_original, - ), + save_gribs_from_grib( + dl_file, + target_path, + product_name=product.upper(), + keep_original=keep_original, + keep_prelim=keep_prelim ) else: - pool.apply_async( - save_ncs_from_nc, - args=( - dl_file, - target_path, - ), - kwds=dict( - product_name=product.upper(), - grid=grid, - remap_method=remap_method, - keep_original=keep_original, - ), + save_ncs_from_nc( + dl_file, + target_path, + product_name=product.upper(), + grid=grid, + remap_method=remap_method, + keep_original=keep_original, + keep_prelim=keep_prelim ) - all_status_codes.append(status_code) + return status_code - pool.close() - pool.join() + + + # Since we download month/month or day/day we need to + # collect all the status codes to return a valid + # status code for the whole time period + all_status_codes = parallel_process( + _download, + ITER_KWARGS={ + 'curr_start': [p[0] for p in req_periods], + 'curr_end': [p[1] for p in req_periods] + }, + logger_name='cdsapi', loglevel='DEBUG' + ) # remove temporary files if not keep_original: shutil.rmtree(downloaded_data_path) + if grid is not None: gridpath = os.path.join(target_path, "grid.txt") if os.path.exists(gridpath): @@ -402,6 +438,82 @@ def download_and_move( if os.path.exists(weightspath): os.unlink(weightspath) + delete_empty_directories(target_path) + + dl_settings = { + 'product': product, + 'variables': variables, + 'keep_original': keep_original, + 'h_steps': h_steps, + 'grb': grb, + 'grid': grid, + 'remap_method': remap_method, + 'cds_kwds': cds_kwds, + 'stepsize': stepsize, + 'n_max_request': n_max_request, + 'keep_prelim': keep_prelim, + } + + update_image_summary_file(target_path, dl_settings) + + handlers = logger.handlers[:] + + for handler in handlers: + logger.removeHandler(handler) + handler.close() + handlers.clear() + # if any of the sub-periods was successful we want the function to return 0 consolidated_status_code = max(all_status_codes) return consolidated_status_code + + +def download_record_extension(path, dry_run=False, cds_token=None): + """ + Uses information from an existing record to download additional data + from CDS. + + Parameters + ---------- + path: str + Path where the image data to extend is stored. Must also contain + a `summary.yml` file. + dry_run: bool, optional + Do not download anything, this is just used for testing the functions + cds_token: str, optional (default: None) + To identify with the CDS. Required if no `.cdsapirc` file exists in + the home directory (see documentation). You can find your token/key + on your CDS user profile page. Alternatively, the CDSAPI_KEY + environment variable can be set manually instead of passing the token + here. + + Returns + ------- + status_code: int + Status code summary from all requests: + 0 : All Downloaded data ok + -1 : Error in at least one request + -10 : No data available for requested time period + """ + props = read_summary_yml(path) + + startdate = pd.to_datetime(props['period_to']).to_pydatetime() \ + + timedelta(days=1) # next day + + enddate = pd.to_datetime(datetime.now().date()).to_pydatetime() \ + - timedelta(days=1) # yesterday + + return download_and_move( + path, startdate=startdate, enddate=enddate, + cds_token=cds_token, dry_run=dry_run, + **props['download_settings']) + + +if __name__ == '__main__': + download_record_extension('/tmp/era5/nc') + # download_and_move( + # '/tmp/era5/nc', startdate='2024-07-01', + # enddate='2024-07-10', product='era5', keep_original=False, + # variables=['swvl1'], h_steps=[0,23], grb=False, dry_run=False, + # keep_prelim=False + # ) \ No newline at end of file diff --git a/src/ecmwf_models/era5/interface.py b/src/ecmwf_models/era5/reader.py similarity index 82% rename from src/ecmwf_models/era5/interface.py rename to src/ecmwf_models/era5/reader.py index 5cf6c2d..b364863 100644 --- a/src/ecmwf_models/era5/interface.py +++ b/src/ecmwf_models/era5/reader.py @@ -9,15 +9,8 @@ from typing_extensions import Literal from pygeogrids.grids import CellGrid +from ecmwf_models.utils import assert_product from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs -from ecmwf_models.glob import SUPPORTED_PRODUCTS - - -def _assert_product(product: str) -> str: - if product not in SUPPORTED_PRODUCTS: - raise ValueError(f"Got product {product} but expected one of " - f"{SUPPORTED_PRODUCTS}") - return product class ERA5NcImg(ERANcImg): @@ -25,7 +18,7 @@ class ERA5NcImg(ERANcImg): def __init__( self, filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, product: Literal['era5', 'era5-land'] = 'era5', subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, @@ -38,8 +31,9 @@ def __init__( ---------- filename: str Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter product: str, optional (default: 'era5') What era5 product, either era5 or era5-land. subgrid: pygeogrids.CellGrid, optional (default: None) @@ -54,7 +48,7 @@ def __init__( super(ERA5NcImg, self).__init__( filename=filename, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, mask_seapoints=mask_seapoints, @@ -70,10 +64,11 @@ class ERA5NcDs(ERANcDs): ---------- root_path: str Path to the image files to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter product: str, optional (default: 'era5') - What era5 product, either era5 or era5-land. + What era5 product, either `era5` or `era5-land`. h_steps : list, optional (default: (0,6,12,18)) List of full hours to read images for. subgrid: pygeogrids.CellGrid, optional (default: None) @@ -88,8 +83,8 @@ class ERA5NcDs(ERANcDs): def __init__( self, root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), - product: Literal['era5', 'era5-land'] = 'era5', + parameter: Collection[str] = None, + product: str = 'era5', h_steps: Collection[int] = (0, 6, 12, 18), subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, @@ -97,7 +92,7 @@ def __init__( ): super(ERA5NcDs, self).__init__( root_path=root_path, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, h_steps=h_steps, @@ -111,7 +106,7 @@ class ERA5GrbImg(ERAGrbImg): def __init__( self, filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, array_1D=False, @@ -123,8 +118,9 @@ def __init__( ---------- filename: str Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter subgrid: pygeogrids.CellGrid, optional (default: None) Read only data for points of this grid and not global values. mask_seapoints : bool, optional (default: False) @@ -149,9 +145,9 @@ class ERA5GrbDs(ERAGrbDs): def __init__( self, root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, h_steps: Collection[int] = (0, 6, 12, 18), - product: Literal['era5', 'era5-land'] = "era5", + product: str = "era5", subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, array_1D: Optional[bool] = False, @@ -163,12 +159,13 @@ def __init__( ---------- root_path: str Path to the image files to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter h_steps : list, optional (default: [0,6,12,18]) List of full hours to read images for. product: str, optional (default: 'era5') - What era5 product, either era5 or era5-land. + What era5 product, either `era5` or `era5-land`. subgrid: pygeogrids.CellGrid, optional (default: None) Read only data for points of this grid and not global values. mask_seapoints : bool, optional (default: False) @@ -180,7 +177,7 @@ def __init__( super(ERA5GrbDs, self).__init__( root_path=root_path, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, h_steps=h_steps, diff --git a/src/ecmwf_models/era5/reshuffle.py b/src/ecmwf_models/era5/reshuffle.py index 5ed98d8..ad17865 100644 --- a/src/ecmwf_models/era5/reshuffle.py +++ b/src/ecmwf_models/era5/reshuffle.py @@ -7,23 +7,21 @@ import os import pandas as pd -import sys -import argparse import numpy as np from datetime import time, datetime from repurpose.img2ts import Img2Ts -from ecmwf_models.era5.interface import ERA5NcDs, ERA5GrbDs +from ecmwf_models.era5.reader import ERA5NcDs, ERA5GrbDs from ecmwf_models.grid import ERA_RegularImgGrid, ERA5_RegularImgLandGrid -from ecmwf_models.utils import mkdate, parse_filetype, parse_product, str2bool +from ecmwf_models.utils import parse_filetype, parse_product def img2ts( input_root, outputpath, startdate, enddate, - variables, + variables=None, product=None, bbox=None, h_steps=(0, 6, 12, 18), @@ -46,7 +44,7 @@ def img2ts( enddate : str End date, from which images are read and time series are generated. Format YYYY-mm-dd - variables: tuple or list or str + variables: tuple or str Variables to read from the passed images and convert into time series format. product : str, optional (default: None) @@ -63,6 +61,8 @@ def img2ts( Reshuffle only land points. Uses the ERA5 land mask to create a land grid. The land grid is fixed to 0.25*0.25 or 0.1*0.1 deg for now. + include_prelim: bool, optional (default: True) + Include preliminary data (from Era5-T and ERA5-Land-T files) imgbuffer: int, optional (default: 200) How many images to read at once before writing time series. This number affects how many images are stored in memory and should @@ -73,26 +73,23 @@ def img2ts( startdate = pd.to_datetime(startdate).to_pydatetime().date() enddate = pd.to_datetime(enddate).to_pydatetime().date() - if h_steps is None: - h_steps = (0, 6, 12, 18) - filetype = parse_filetype(input_root) - product = parse_product(input_root) if not product else product + product: str = parse_product(input_root) if product is None else product if land_points: if product == "era5": grid = ERA5_RegularImgLandGrid( - res_lat=0.25, res_lon=0.25, bbox=bbox) + resolution=0.25, bbox=bbox) elif product == "era5-land": - grid = ERA5_RegularImgLandGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) + grid = ERA5_RegularImgLandGrid(resolution=0.1, bbox=bbox) else: raise NotImplementedError( product, "Land grid not implemented for product.") else: - if product == "era5": - grid = ERA_RegularImgGrid(res_lat=0.25, res_lon=0.25, bbox=bbox) - elif product == "era5-land": - grid = ERA_RegularImgGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) + if product.lower() in ['era5-land', 'era5-land-t']: + grid = ERA_RegularImgGrid(resolution=0.1, bbox=bbox) + elif product.lower() in ['era5', 'era5-t']: + grid = ERA_RegularImgGrid(resolution=0.25, bbox=bbox) else: raise NotImplementedError(product, "Grid not implemented for product.") @@ -154,3 +151,10 @@ def img2ts( n_proc=1, ) reshuffler.calc() + + +if __name__ == '__main__': + img2ts("/home/wpreimes/shares/home/code/ecmwf_models/tests/ecmwf_models-test-data/ERA5/grib", + '/tmp/era5/ts', variables=['swvl1', "swvl2"], + startdate='2010-01-01', enddate='2010-01-01', land_points=False, + h_steps=[0, 12], bbox=(12, 46, 17, 50)) \ No newline at end of file diff --git a/src/ecmwf_models/glob.py b/src/ecmwf_models/glob.py deleted file mode 100644 index 7fc736b..0000000 --- a/src/ecmwf_models/glob.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -from pathlib import Path - -# ERA5 products supported by the reader. -SUPPORTED_PRODUCTS = ['era5', 'era5-land'] - -CDS_API_URL = "https://cds.climate.copernicus.eu/api/v2" - -# CDSAPI_RC variable must be set or we use home dir -DOTRC = os.environ.get('CDSAPI_RC', os.path.join(Path.home(), '.cdsapirc')) - -class CdoNotFoundError(ModuleNotFoundError): - def __init__(self, msg=None): - _default_msg = "cdo and/or python-cdo not installed. " \ - "Use conda to install it them under Linux." - self.msg = _default_msg if msg is None else msg diff --git a/src/ecmwf_models/globals.py b/src/ecmwf_models/globals.py new file mode 100644 index 0000000..9432cd1 --- /dev/null +++ b/src/ecmwf_models/globals.py @@ -0,0 +1,24 @@ +import os +from pathlib import Path + + +IMG_FNAME_TEMPLATE = "{product}_{type}_{datetime}.{ext}" +IMG_FNAME_DATETIME_FORMAT = "%Y%m%d_%H%M" + +# ERA5 products supported by the reader. +SUPPORTED_PRODUCTS = ['era5', 'era5-land'] + +CDS_API_URL = "https://cds.climate.copernicus.eu/api" + +# CDSAPI_RC variable must be set or we use home dir +DOTRC = os.environ.get('CDSAPI_RC', os.path.join(Path.home(), '.cdsapirc')) + +EXPVER = {'0005': 'T', '0001': ''} + + +class CdoNotFoundError(ModuleNotFoundError): + def __init__(self, msg=None): + _default_msg = ("cdo and/or python-cdo not installed. " + "Pleas run `conda install -c conda-forge cdo` and also " + "`pip install cdo`.") + self.msg = _default_msg if msg is None else msg \ No newline at end of file diff --git a/src/ecmwf_models/grid.py b/src/ecmwf_models/grid.py index 9f8104f..9eeee5a 100644 --- a/src/ecmwf_models/grid.py +++ b/src/ecmwf_models/grid.py @@ -4,11 +4,20 @@ """ import numpy as np -from pygeogrids.grids import BasicGrid, CellGrid -from netCDF4 import Dataset +from pygeogrids.grids import BasicGrid, CellGrid, gridfromdims import os -from typing import Tuple +from typing import Tuple, Literal +import xarray as xr +def trafo_lon(lon): + lons_gt_180 = np.where(lon > 180.) + lon[lons_gt_180] = lon[lons_gt_180] - 360.0 + return lon + +def safe_arange(start, stop, step): + f_step = (1. / float(step)) + vals = np.arange(float(start) * f_step, float(stop) * f_step , float(step) * f_step) + return vals / f_step def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): """ @@ -38,8 +47,7 @@ def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): def ERA5_RegularImgLandGrid( - res_lat: float = 0.25, - res_lon: float = 0.25, + resolution: float = 0.25, bbox: Tuple[float, float, float, float] = None, ) -> CellGrid: """ @@ -48,49 +56,53 @@ def ERA5_RegularImgLandGrid( Parameters ---------- - res_lat: float, optional (default: 0.25) - Grid resolution (in degrees) in latitude direction. - res_lon: float, optional (default: 0.25) - Grid resolution (in degrees) in longitude direction. + resolution: float, optional (default: 0.25) + Grid resolution in degrees. Either 0.25 (ERA5) or 0.1 (ERA5-Land) bbox: tuple, optional (default: None) - (min_lon, min_lat, max_lon, max_lat) - wgs84 - bbox to cut the global grid to. + WGS84 (min_lon, min_lat, max_lon, max_lat) + Values must be between -180 to 180 and -90 to 90 + bbox to cut the global grid to + + Returns + ------- + landgrid: CellGrid + ERA Land grid at the given resolution, cut to the given bounding box """ + if resolution not in [0.25, 0.1]: + raise ValueError("Unsupported resolution. Choose one of 0.25 or 0.1") try: - ds = Dataset( + ds = xr.open_dataset( os.path.join( os.path.abspath(os.path.dirname(__file__)), "era5", "land_definition_files", - f"landmask_{res_lat}_{res_lon}.nc", - )) + f"landmask_{resolution}_{resolution}.nc", + ))["land"] + ds = ds.assign_coords({'longitude': trafo_lon(ds['longitude'].values)}) + if bbox is not None: + ds = ds.sel(latitude=slice(bbox[3], bbox[1])) + ds = ds.isel(longitude=np.where(( + (ds['longitude'].values >= bbox[0]) & + (ds['longitude'].values <= bbox[2]) + ))[0]) + + land_mask = np.array(ds.values == 1.0) + except FileNotFoundError: raise FileNotFoundError( "Land definition for this grid resolution not yet available. " "Please create and add it.") - global_grid = ERA_RegularImgGrid(res_lat, res_lon, bbox=None) - - land_mask = ds.variables["land"][:].flatten().filled(0.0) == 1.0 - land_points = np.ma.masked_array(global_grid.get_grid_points()[0], - ~land_mask) + full_grid = ERA_RegularImgGrid(resolution, bbox=bbox) - land_grid = global_grid.subgrid_from_gpis( - land_points[~land_points.mask].filled().astype("int")) - - land_grid = land_grid.to_cell_grid(5.0, 5.0) - - if bbox is not None: - gpis = land_grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - land_grid = land_grid.subgrid_from_gpis(gpis) + land_gpis = full_grid.get_grid_points()[0][land_mask.flatten()] + land_grid = full_grid.subgrid_from_gpis(land_gpis) return land_grid def ERA_RegularImgGrid( - res_lat: float = 0.25, - res_lon: float = 0.25, + resolution: float = 0.25, bbox: Tuple[float, float, float, float] = None, ) -> CellGrid: """ @@ -99,12 +111,12 @@ def ERA_RegularImgGrid( Parameters ---------- - res_lat: float, optional (default: 0.25) - Grid resolution (in degrees) in latitude direction. - res_lon: float, optional (default: 0.25) - Grid resolution (in degrees) in longitude direction. + resolution: float, optional (default: 0.25) + Grid resolution (in degrees) in both directions. + Either 0.25 (ERA5) or 0.1 (ERA5-Land) bbox: tuple, optional (default: None) - (min_lon, min_lat, max_lon, max_lat) - wgs84 + (min_lon, min_lat, max_lon, max_lat) + wgs84 (Lon -180 to 180) bbox to cut the global grid to. Returns @@ -113,55 +125,23 @@ def ERA_RegularImgGrid( Regular, CellGrid with 5DEG*5DEG cells for the passed bounding box. """ - # np.arange is not precise... - f_lon = 1.0 / res_lon - f_lat = 1.0 / res_lat - res_lon = res_lon * f_lon - res_lat = res_lat * f_lat - lon = np.arange(0.0, 360.0 * f_lon, res_lon) - lat = np.arange(90.0 * f_lat, -90 * f_lat - res_lat, -1 * res_lat) - lons_gt_180 = np.where(lon > (180.0 * f_lon)) - lon[lons_gt_180] = lon[lons_gt_180] - (360.0 * f_lon) - - lon, lat = np.meshgrid(lon, lat) - - glob_basic_grid = BasicGrid(lon.flatten() / f_lon, lat.flatten() / f_lat) - glob_cell_grid = glob_basic_grid.to_cell_grid(cellsize=5.0) + # to get precise coordinates + lon = safe_arange(-180, 180, resolution) + lat = safe_arange(-90, 90+resolution, resolution)[::-1] if bbox is not None: - gpis = glob_cell_grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - glob_cell_grid = glob_cell_grid.subgrid_from_gpis(gpis) + lon = lon[np.where((lon >= bbox[0]) & (lon <= bbox[2]))] + lat = lat[np.where((lat >= bbox[1]) & (lat <= bbox[3]))] - return glob_cell_grid + # ERA grid LLC point has Lon=0 + lon = np.roll(lon, int(len(lon) * 0.5)) + glob_basic_grid = gridfromdims(lon, lat, origin='top') + glob_cell_grid = glob_basic_grid.to_cell_grid(cellsize=5.0) -def ERA_IrregularImgGrid( - lons: np.ndarray, - lats: np.ndarray, - bbox: Tuple[float, float, float, float] = None, -) -> CellGrid: - """ - Create a irregular grid from the passed coordinates. + return glob_cell_grid - Parameters - ---------- - lons : np.ndarray - List of longitude values - lats: np.ndarray - List of latitude values - bbox : Tuple[float, float, float], optional (default: None) - Keep only points in the passed bounding box. - latmin, latmax, lonmin, lonmax - """ - lons_gt_180 = np.where(lons > 180.0) - lons[lons_gt_180] = lons[lons_gt_180] - 360 - grid = BasicGrid(lons.flatten(), lats.flatten())\ - .to_cell_grid(cellsize=5.0) - if bbox is not None: - gpis = grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - grid = grid.subgrid_from_gpis(gpis) - return grid +if __name__ == '__main__': + a = ERA5_RegularImgLandGrid(0.25) \ No newline at end of file diff --git a/src/ecmwf_models/interface.py b/src/ecmwf_models/interface.py index 293d33e..e8276a9 100644 --- a/src/ecmwf_models/interface.py +++ b/src/ecmwf_models/interface.py @@ -23,11 +23,11 @@ """ Base classes for reading downloaded ERA netcdf and grib images and stacks """ - import warnings import os from pygeobase.io_base import ImageBase, MultiTemporalImageBase from pygeobase.object_base import Image +from pygeogrids.grids import BasicGrid, gridfromdims import numpy as np from datetime import timedelta, datetime # noqa: F401 @@ -36,9 +36,10 @@ from ecmwf_models.grid import ( ERA_RegularImgGrid, get_grid_resolution, - ERA_IrregularImgGrid, + trafo_lon ) from ecmwf_models.utils import lookup +from ecmwf_models.globals import IMG_FNAME_TEMPLATE, SUPPORTED_PRODUCTS import glob import xarray as xr @@ -51,7 +52,9 @@ class ERANcImg(ImageBase): """ - Reader for a single ERA netcdf file. + Reader for a single ERA netcdf file. The main purpose of this class is + to use it in the time series conversion routine. To read downloaded image + files, we recommend using xarray (https://docs.xarray.dev/en/stable/). Parameters ---------- @@ -61,8 +64,9 @@ class ERANcImg(ImageBase): 'era5' or 'era5-land' parameter: list or str, optional (default: ['swvl1', 'swvl2']) Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional (default: None) + Read only data for points of this grid. + If None is passed, we read all points from the file. mask_seapoints : bool, optional (default: False) Read the land-sea mask to mask points over water and set them to nan. This option needs the 'lsm' parameter to be in the file! @@ -71,27 +75,26 @@ class ERANcImg(ImageBase): mode : str, optional (default: 'r') Mode in which to open the file, changing this can cause data loss. This argument should not be changed! - """ def __init__( - self, - filename, - product, - parameter=["swvl1", "swvl2"], - subgrid=None, - mask_seapoints=False, - array_1D=False, - mode="r", + self, + filename, + product, + parameter=None, + subgrid=None, + mask_seapoints=False, + array_1D=False, + mode='r', ): super(ERANcImg, self).__init__(filename, mode=mode) - if type(parameter) == str: - parameter = [parameter] - - # look up short names - self.parameter = lookup(product, parameter)["short_name"].values + if parameter is not None: + # look up short names + self.parameter = lookup(product, np.atleast_1d(parameter))["short_name"].values + else: + self.parameter = None self.mask_seapoints = mask_seapoints self.array_1D = array_1D @@ -111,24 +114,16 @@ def read(self, timestamp=None): timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ - return_img = {} - return_metadata = {} try: dataset = xr.open_dataset( self.filename, engine="netcdf4", mask_and_scale=True) except IOError as e: - print(e) print(" ".join([self.filename, "can not be opened"])) raise e - res_lat, res_lon = get_grid_resolution( - dataset.variables["latitude"][:], - dataset.variables["longitude"][:]) - - grid = ( - ERA_RegularImgGrid(res_lat, res_lon) - if not self.subgrid else self.subgrid) + if self.parameter is None: + self.parameter = list(dataset.data_vars) if self.mask_seapoints: if "lsm" not in dataset.variables.keys(): @@ -136,66 +131,84 @@ def read(self, timestamp=None): " passed image for masking.") else: sea_mask = dataset.variables["lsm"].values + else: + sea_mask = None - for name in dataset.variables: - if name in self.parameter: + return_img = {} + return_metadata = {} + + grid = gridfromdims(trafo_lon(dataset['longitude'].values), + dataset['latitude'].values, origin='top') + + if self.subgrid is not None: + gpis = grid.find_nearest_gpi(self.subgrid.activearrlon, + self.subgrid.activearrlat)[0] + else: + gpis = None + + for name in self.parameter: + try: variable = dataset[name] + except KeyError: + path, f = os.path.split(self.filename) + warnings.warn( + f"Cannot load variable {name} from file {f}. " + f"Filling image with NaNs.") + dat = np.full(grid.shape if gpis is None else len(gpis), + np.nan) + return_img[name] = dat + continue - if 'expver' in variable.dims and (variable.data.ndim == 3): - warnings.warn( - f"Found experimental data in {self.filename}") - param_data = variable.data[-1] - for vers_data in variable.data: - if not np.all(np.isnan(vers_data)): - param_data = vers_data - else: - param_data = variable.data - - if self.mask_seapoints: - param_data = np.ma.array( - param_data, - mask=np.logical_not(sea_mask), - fill_value=np.nan, - ) - param_data = param_data.filled() + if 'expver' in variable.dims and (variable.data.ndim == 3): + warnings.warn(f"Found experimental data in {self.filename}") + param_data = variable.data[-1] + for vers_data in variable.data: + if not np.all(np.isnan(vers_data)): + param_data = vers_data + else: + param_data = variable.data - param_data = param_data.flatten() + if self.mask_seapoints: + param_data = np.ma.array( + param_data, + mask=np.logical_not(sea_mask), + fill_value=np.nan, + ) + param_data = param_data.filled() - return_metadata[name] = variable.attrs - return_img.update( - dict([(str(name), param_data[grid.activegpis])])) + param_data = param_data.flatten() + + if gpis is not None: + param_data = param_data[gpis] + + return_metadata[name] = variable.attrs + return_img[name] = param_data - try: - return_img[name] - except KeyError: - path, thefile = os.path.split(self.filename) - warnings.warn( - "Cannot load variable {var} from file {thefile}. " - "Filling image with NaNs.".format( - var=name, thefile=thefile)) - return_img[name] = np.empty(grid.activegpis.size).fill( - np.nan) dataset.close() + if self.subgrid is None: + self.subgrid = grid + if self.array_1D: return Image( - grid.activearrlon, - grid.activearrlat, + self.subgrid.activearrlon, + self.subgrid.activearrlat, return_img, return_metadata, timestamp, ) else: - nlat = np.unique(grid.activearrlat).size - nlon = np.unique(grid.activearrlon).size + if len(self.subgrid.shape) != 2: + raise ValueError("Passed subgrid does not have a 2d shape." + "Did you mean to read data as 1d arrays?") for key in return_img: - return_img[key] = return_img[key].reshape((nlat, nlon)) + return_img[key] = return_img[key].reshape(self.subgrid.shape) return Image( - grid.activearrlon.reshape(nlat, nlon), - grid.activearrlat.reshape(nlat, nlon), + self.subgrid.activearrlon.reshape(self.subgrid.shape), + self.subgrid.activearrlat.reshape(self.subgrid.shape), return_img, return_metadata, timestamp, @@ -213,29 +226,41 @@ def close(self): class ERANcDs(MultiTemporalImageBase): """ - Class for reading ERA 5 images in nc format. + Reader to extract individual images from a multi-image netcdf dataset. + The main purpose of this class is to use it in the time series conversion + routine. To read downloaded image files, we recommend using + xarray (https://docs.xarray.dev/en/stable/). Parameters ---------- root_path: str Root path where image data is stored. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Parameter or list of parameters to read from image files. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Use the land-sea-mask parameter in the file to mask points over water. - h_steps : list, optional (default: [0,6,12,18]) - List of full hours for which images exist. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. + product: str + ERA5 or ERA5-LAND + parameter: list[str] or str, optional (default: None) + Parameter or list of parameters to read. None reads all available + Parameters. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints: bool, optional (default: False) + All points that are not over land are replaced with NaN values. + This requires that the land sea mask (lsm) parameter is included + in the image files! + h_steps: tuple, optional (default: (0, 6, 12, 18)) + Time stamps available for each day. Numbers refer to full hours. + array_1D: bool, optional (default: True) + Read data as 1d arrays. This is required when the passed subgrid + is 1-dimensional (e.g. when only landpoints are read). Otherwise + when a 2d (subgrid) is used, this switch means that the extracted + image data is also 2-dimensional (lon, lat). """ def __init__( self, root_path, product, - parameter=("swvl1", "swvl2"), + parameter=None, subgrid=None, mask_seapoints=False, h_steps=(0, 6, 12, 18), @@ -245,8 +270,11 @@ def __init__( self.h_steps = h_steps subpath_templ = ["%Y", "%j"] - if type(parameter) == str: - parameter = [parameter] + if parameter is not None: + # look up short names + self.parameter = lookup(product, np.atleast_1d(parameter))["short_name"].values + else: + self.parameter = None ioclass_kws = { 'product': product, @@ -256,15 +284,22 @@ def __init__( 'array_1D': array_1D } - # the goal is to use expERA5*.nc if necessary, but perfer ERA5*.nc + # the goal is to use ERA5-T*.nc if necessary, but prefer ERA5*.nc self.fn_templ_priority = [ - 'ERA5_*_{datetime}.nc', 'expERA5_*_{datetime}.nc' + IMG_FNAME_TEMPLATE.format(product=(p + ext).upper(), + type='*', + datetime="{datetime}", + ext='nc') + for ext in ['', '-T'] for p in SUPPORTED_PRODUCTS ] super(ERANcDs, self).__init__( root_path, ERANcImg, - fname_templ='*_{datetime}.nc', + fname_templ=IMG_FNAME_TEMPLATE.format(product='*', + type='*', + datetime='{datetime}', + ext='nc'), datetime_format="%Y%m%d_%H%M", subpath_templ=subpath_templ, exact_templ=False, @@ -352,47 +387,52 @@ def tstamps_for_daterange(self, start_date, end_date): class ERAGrbImg(ImageBase): - """ - Base class for reader for a single ERA Grib file. - - Parameters - ---------- - filename: str - Path to the image file to read. - product : str - ERA5 or ERAINT - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default:None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them to nan. - This option needs the 'lsm' parameter to be in the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - mode : str, optional (default: 'r') - Mode in which to open the file, changing this can cause data loss. - This argument should not be changed! - """ def __init__( - self, - filename, - product, - parameter=("swvl1", "swvl2"), - subgrid=None, - mask_seapoints=False, - array_1D=True, - mode="r", + self, + filename, + product, + parameter=None, + subgrid=None, + mask_seapoints=False, + array_1D=True, + mode='r' ): + """ + Reader for a single ERA grib file. The main purpose of this class is + to use it in the time series conversion routine. To read downloaded image + files, we recommend using xarray (https://docs.xarray.dev/en/stable/). + Parameters + ---------- + filename: str + Path to the image file to read. + product : str + ERA5 or ERA5-LAND + parameter: list or str, optional + Name of parameters to read from the image file. None means all + available data variables. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional (default: None) + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints : bool, optional (default: False) + Read the land-sea mask to mask points over water and set them to nan. + This option needs the 'lsm' parameter to be in the file! + array_1D: bool, optional (default: False) + Read data as list, instead of 2D array, used for reshuffling. + mode : str, optional (default: 'r') + Mode in which to open the file, changing this can cause data loss. + This argument should not be changed! + """ super(ERAGrbImg, self).__init__(filename, mode=mode) - if type(parameter) == str: - parameter = [parameter] + if parameter is None: + self.parameter = None + else: + # look up short names + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values - self.parameter = lookup( - product, parameter)["short_name"].values # look up short names self.product = product self.mask_seapoints = mask_seapoints @@ -410,51 +450,46 @@ def read(self, timestamp=None): """ grbs = pygrib.open(self.filename) - grid = self.subgrid - return_img = {} return_metadata = {} - var_msg_lut = {p: None for p in self.parameter} - sea_mask = None - for N in range(grbs.messages): - n = N + 1 - message = grbs.message(n) - param_name = str(message.cfVarNameECMF) + grid = None - if param_name == "lsm": - if self.mask_seapoints and sea_mask is None: - sea_mask = message.values.flatten() - - if param_name not in self.parameter: - continue + for n in range(1, grbs.messages + 1): + message = grbs.message(n) + try: + param_name = str(message.cfVarNameECMF) # old field? + except RuntimeError: + param_name = str(message.shortName) + + if self.mask_seapoints and (param_name == "lsm"): + pass + elif self.parameter is None: + pass + elif param_name in self.parameter: + pass else: - var_msg_lut[param_name] = n - - # available variables - shape = None - for param_name, n in var_msg_lut.items(): - if n is None: continue return_metadata[param_name] = {} message = grbs.message(n) - - param_data = message.values.flatten() - if not shape: - shape = param_data.shape - return_img[param_name] = param_data + lats, lons = message.latlons() + param_data = message.values if grid is None: - lats, lons = message.latlons() - try: - res_lat, res_lon = get_grid_resolution(lats, lons) - grid = ERA_RegularImgGrid(res_lat, res_lon) - except ValueError: # when grid not regular - lons_gt_180 = np.where(lons > 180.0) - lons[lons_gt_180] = lons[lons_gt_180] - 360 - grid = ERA_IrregularImgGrid(lons, lats) + grid = BasicGrid(trafo_lon(lons).flatten(), lats.flatten(), + shape=param_data.shape) + + param_data = param_data.flatten() + + if self.subgrid is not None: + gpis = grid.find_nearest_gpi(self.subgrid.activearrlon, + self.subgrid.activearrlat)[0] + + param_data = param_data[gpis] + + return_img[param_name] = param_data return_metadata[param_name]["units"] = message["units"] return_metadata[param_name]["long_name"] = \ @@ -464,56 +499,67 @@ def read(self, timestamp=None): return_metadata[param_name]["depth"] = "{:} cm".format( message["levels"]) + grbs.close() + + # Set data for non-land points to NaN if self.mask_seapoints: - if sea_mask is None: + if 'lsm' not in return_img: raise IOError( "No land sea mask parameter (lsm) in passed image" " for masking.") else: # mask the loaded data + mask = np.logical_not(return_img['lsm'].flatten()) for name in return_img.keys(): param_data = return_img[name] param_data = np.ma.array( param_data, - mask=np.logical_not(sea_mask), + mask=mask, fill_value=np.nan, ) param_data = param_data.filled() return_img[name] = param_data - grbs.close() + if (self.parameter is not None) and ('lsm' not in self.parameter): + return_img.pop('lsm') + return_metadata.pop('lsm') - # missing variables - for param_name, n in var_msg_lut.items(): - if n is not None: - continue - param_data = np.full(shape, np.nan) - warnings.warn("Cannot load variable {var} from file {thefile}. " - "Filling image with NaNs.".format( - var=param_name, thefile=self.filename)) - return_img[param_name] = param_data - return_metadata[param_name] = {} - return_metadata[param_name]["long_name"] = lookup( - self.product, [param_name]).iloc[0]["long_name"] + if self.subgrid is None: + self.subgrid = grid + + # Add empty arrays for missing variables + if self.parameter is not None: + for p in self.parameter: + if p not in return_img: + param_data = np.full(np.prod(self.subgrid.shape), np.nan) + warnings.warn( + f"Cannot load variable {param_name} from file " + f"{self.filename}. Filling image with NaNs." + ) + return_img[param_name] = param_data + return_metadata[param_name] = {} + return_metadata[param_name]["long_name"] = lookup( + self.product, [param_name]).iloc[0]["long_name"] if self.array_1D: return Image( - grid.activearrlon, - grid.activearrlat, + self.subgrid.activearrlon, + self.subgrid.activearrlat, return_img, return_metadata, timestamp, ) else: - nlat = np.unique(grid.activearrlat).size - nlon = np.unique(grid.activearrlon).size + if len(self.subgrid.shape) != 2: + raise ValueError("Passed subgrid does not have a 2d shape." + "Did you mean to read data as 1d arrays?") for key in return_img: - return_img[key] = return_img[key].reshape((nlat, nlon)) + return_img[key] = return_img[key].reshape(self.subgrid.shape) return Image( - grid.activearrlon.reshape(nlat, nlon), - grid.activearrlat.reshape(nlat, nlon), + self.subgrid.activearrlon.reshape(self.subgrid.shape), + self.subgrid.activearrlat.reshape(self.subgrid.shape), return_img, return_metadata, timestamp, @@ -530,40 +576,53 @@ def close(self): class ERAGrbDs(MultiTemporalImageBase): - """ - Reader for a stack of ERA grib files. - - Parameters - ---------- - root_path: string - Root path where the data is stored - product : str - ERA5 or ERAINT - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Parameter or list of parameters to read - expand_grid: bool, optional (default: True) - If the reduced gaussian grid should be expanded to a full - gaussian grid. - """ - def __init__( self, root_path, product, - parameter=("swvl1", "swvl2"), + parameter=None, subgrid=None, mask_seapoints=False, h_steps=(0, 6, 12, 18), array_1D=True, ): + """ + Reader to extract individual images from a multi-image grib dataset. + The main purpose of this class is to use it in the time series conversion + routine. To read downloaded image files, we recommend using + xarray (https://docs.xarray.dev/en/stable/). + Parameters + ---------- + root_path: str + Root path where the downloaded image data is stored in grib format. + We assume that image files are organized in subfolders by year, + with each year containing subfolders for individual days of the + year. + product : str + ERA5 or ERA5-Land + parameter: list[str] or str, optional (default: None) + Parameter or list of parameters to read. None reads all available + Parameters. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints: bool, optional (default: False) + All points that are not over land are replaced with NaN values. + This requires that the land sea mask (lsm) parameter is included + in the image files! + h_steps: tuple, optional (default: (0, 6, 12, 18)) + Time stamps available for each day. Numbers refer to full hours. + array_1D: bool, optional (default: True) + Read data as 1d arrays. This is required when the passed subgrid + is 1-dimensional (e.g. when only landpoints are read). Otherwise + when a 2d (subgrid) is used, this switch means that the extracted + image data is also 2-dimensional (lon, lat). + """ self.h_steps = h_steps subpath_templ = ["%Y", "%j"] - if type(parameter) == str: - parameter = [parameter] - ioclass_kws = { "product": product, "parameter": parameter, @@ -572,10 +631,15 @@ def __init__( "array_1D": array_1D, } + fname_templ = IMG_FNAME_TEMPLATE.format(product="*", + type="*", + datetime="{datetime}", + ext="grb") + super(ERAGrbDs, self).__init__( root_path, ERAGrbImg, - fname_templ="*_{datetime}.grb", + fname_templ=fname_templ, datetime_format="%Y%m%d_%H%M", subpath_templ=subpath_templ, exact_templ=False, @@ -596,7 +660,8 @@ def tstamps_for_daterange(self, start_date, end_date): Returns ---------- timestamps : list - List of datetimes + List of datetime values (between start and end date) for all + required time stamps. """ img_offsets = np.array([timedelta(hours=h) for h in self.h_steps]) @@ -612,21 +677,24 @@ def tstamps_for_daterange(self, start_date, end_date): class ERATs(GriddedNcOrthoMultiTs): """ Time series reader for all reshuffled ERA reanalysis products in time - series format. + series format (pynetcf OrthoMultiTs format) Use the read_ts(lon, lat) resp. read_ts(gpi) function of this class - to read data for locations! + to read data for a location! + """ - Parameters - ---------- - ts_path : str - Directory where the netcdf time series files are stored - grid_path : str, optional (default: None) - Path to grid file, that is used to organize the location of time - series to read. If None is passed, grid.nc is searched for in the - ts_path. - - Optional keyword arguments that are passed to the Gridded Base when used - ------------------------------------------------------------------------ + def __init__(self, ts_path, grid_path=None, **kwargs): + """ + Parameters + ---------- + ts_path : str + Directory where the netcdf time series files are stored + grid_path : str, optional (default: None) + Path to grid file, that is used to organize the location of time + series to read. If None is passed, grid.nc is searched for in the + ts_path. + + Optional keyword arguments that are passed to the Gridded Base when used + ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. @@ -646,12 +714,9 @@ class ERATs(GriddedNcOrthoMultiTs): If false, dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets. - """ - - def __init__(self, ts_path, grid_path=None, **kwargs): - + """ if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") - grid = load_grid(grid_path) + super(ERATs, self).__init__(ts_path, grid, **kwargs) diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index 03aef85..e4f8449 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -24,9 +24,8 @@ """ Utility functions for all data products in this package. """ - -import warnings import os +import logging from datetime import datetime import xarray as xr import pandas as pd @@ -34,30 +33,37 @@ import numpy as np from netCDF4 import Dataset from collections import OrderedDict +from parse import parse +import yaml +from repurpose.misc import find_first_at_depth -from ecmwf_models.glob import CdoNotFoundError, DOTRC +from ecmwf_models.globals import ( + CdoNotFoundError, DOTRC, EXPVER, CDS_API_URL, IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, SUPPORTED_PRODUCTS +) try: from cdo import Cdo + cdo_available = True except ImportError: cdo_available = False try: import pygrib + pygrib_available = True except ImportError: pygrib_available = False - def save_ncs_from_nc( - input_nc, - output_path, - product_name, - filename_templ="{product}_AN_%Y%m%d_%H%M.nc", - grid=None, - keep_original=True, - remap_method="bil", + input_nc, + output_path, + product_name, + grid=None, + keep_original=True, + remap_method="bil", + keep_prelim=True, ): """ Split the downloaded netcdf file into daily files and add to folder @@ -71,14 +77,25 @@ def save_ncs_from_nc( Where to save the resulting netcdf files product_name : str Name of the ECMWF model (only for filename generation) - filename_templ : str, optional (default: "{product}_AN_%Y%m%d_%H%M.nc") - Template for naming each separated nc file keep_original: bool - keep the original downloaded data + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. """ localsubdirs = ["%Y", "%j"] + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='nc' + ) + nc_in = xr.open_dataset(input_nc, mask_and_scale=True) + if 'valid_time' in nc_in.variables: + nc_in = nc_in.rename_vars({"valid_time": 'time'}) if grid is not None: if not cdo_available: @@ -92,19 +109,32 @@ def save_ncs_from_nc( for k, v in grid.items(): f.write(f"{k} = {v}\n") - for time in nc_in.time.values: - subset = nc_in.sel(time=time) - if (abs((datetime(2022, 1, 1) - datetime.now()).days) < 90): - warnings.warn( - f'Data for {time} may contain experimental versions of ' - f'variables') - if 'expver' in subset.dims: - subset_merge = subset.sel(expver=subset['expver'].values[0]) - for e in subset['expver'].values[1:]: - subset_merge = subset_merge.combine_first(subset.sel(expver=e)) - subset = subset_merge + for time in nc_in["time"].values: + subset = nc_in.sel({"time": time}) + + # Expver identifies preliminary data + if 'expver' in subset: + expver = str(subset['expver'].values) + subset = subset.drop_vars('expver') + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + else: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {time}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format( + product=product_name + '-' + ext) else: - filename_templ = filename_templ.format(product=product_name) + filename_templ = _filename_templ.format(product=product_name) + + if 'number' in subset.variables: + subset = subset.drop_vars('number') timestamp = pd.Timestamp(time).to_pydatetime() filepath = create_dt_fpath( @@ -113,6 +143,7 @@ def save_ncs_from_nc( fname=filename_templ, subdirs=localsubdirs, ) + if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) @@ -141,11 +172,11 @@ def save_ncs_from_nc( def save_gribs_from_grib( - input_grib, - output_path, - product_name, - filename_templ="{product}_AN_%Y%m%d_%H%M.grb", - keep_original=True, + input_grib, + output_path, + product_name, + keep_original=True, + keep_prelim=True, ): """ Split the downloaded grib file into daily files and add to folder structure @@ -159,57 +190,68 @@ def save_gribs_from_grib( Where to save the resulting grib files product_name : str Name of the ECMWF model (only for filename generation) - filename_templ : str, optional (default: product_OPER_0001_AN_date_time) - Template for naming each separated grb file + keep_original: bool + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. """ localsubdirs = ["%Y", "%j"] grib_in = pygrib.open(input_grib) + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='grb' + ) + grib_in.seek(0) + prev_date = None + for grb in grib_in: - template = filename_templ filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) - template = template.format(product=product_name) + expver = grb['expver'] + + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {filedate}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format( + product=product_name + '-' + ext) + else: + filename_templ = _filename_templ.format(product=product_name) filepath = create_dt_fpath( - filedate, root=output_path, fname=template, subdirs=localsubdirs) + filedate, root=output_path, fname=filename_templ, + subdirs=localsubdirs) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) - grb_out = open(filepath, "ab") + if prev_date != filedate: # to overwrite old files + mode = 'wb' + prev_date = filedate + else: + mode = "ab" + + with open(filepath, mode) as grb_out: + grb_out.write(grb.tostring()) - grb_out.write(grb.tostring()) - grb_out.close() grib_in.close() + if not keep_original: os.remove(input_grib) -def check_api_read() -> bool: - if not os.path.isfile(DOTRC): - url = os.environ.get('CDSAPI_URL') - key = os.environ.get('CDSAPI_KEY') - if url is None or key is None: - ValueError('CDS API KEY or .cdsapirc file not found, ' - 'download will not work! ' - 'Please create a .cdsapirc file with your credentials' - 'or pass your uid/key to the command line tool ' - 'See: ' - 'https://cds.climate.copernicus.eu/api-how-to') - api_ready = False - elif ":" not in key: - raise ValueError( - 'Your CDS token is not valid. It must be in the format ' - ':, both of which are found on your CDS' - 'profile page.') - else: - api_ready = True - else: - api_ready = True - return api_ready - def parse_product(inpath: str) -> str: """ Tries to find out what product is stored in the path. This is done based @@ -218,61 +260,78 @@ def parse_product(inpath: str) -> str: Parameters ---------- inpath: str - Input path where ERA data was downloaded to + Input path where ERA data was downloaded to. Contains annual folders. Returns ------- product : str Product name """ - onedown = os.path.join(inpath, os.listdir(inpath)[0]) - twodown = os.path.join(onedown, os.listdir(onedown)[0]) + props = img_infer_file_props(inpath) - for path, subdirs, files in os.walk(twodown): - for name in files: - filename, extension = os.path.splitext(name) - parts = filename.split("_") - - if "ERA5-LAND" in parts: - return "era5-land" - elif "ERA5" in parts: - return "era5" - else: - continue + if "era5-land" in props['product'].lower(): + return "era5-land" # also era5-land-t + elif "era5" in props['product'].lower(): + return "era5" # also era5-t + else: + raise ValueError(f"Could not derive product name from data in {inpath}") -def parse_filetype(inpath): +def parse_filetype(inpath: str) -> str: """ - Tries to find out the file type by searching for - grib or nc files two subdirectories into the passed input path. - If function fails, grib is assumed. + Tries to find out the file type by parsing filenames in the passed + directory. Parameters ---------- inpath: str - Input path where ERA data was downloaded to + Input path where ERA data was downloaded to. Contains annual folders. Returns ------- - filetype : str - File type string. + product : str + Product name """ - onedown = os.path.join(inpath, os.listdir(inpath)[0]) - twodown = os.path.join(onedown, os.listdir(onedown)[0]) - - filelist = [] - for path, subdirs, files in os.walk(twodown): - for name in files: - filename, extension = os.path.splitext(name) - filelist.append(extension) - - if ".nc" in filelist and ".grb" not in filelist: - return "netcdf" - elif ".grb" in filelist and ".nc" not in filelist: - return "grib" + props = img_infer_file_props(inpath) + if props['ext'] == 'grb': + return 'grib' else: - # if file type cannot be detected, guess grib - return "grib" + return 'netcdf' + + + +# def parse_filetype(inpath): +# """ +# Tries to find out the file type by searching for +# grib or nc files two subdirectories into the passed input path. +# If function fails, grib is assumed. +# +# Parameters +# ---------- +# inpath: str +# Input path where ERA data was downloaded to +# +# Returns +# ------- +# filetype : str +# File type string. +# """ +# onedown = os.path.join(inpath, os.listdir(inpath)[0]) +# twodown = os.path.join(onedown, os.listdir(onedown)[0]) +# +# filelist = [] +# for path, subdirs, files in os.walk(twodown): +# for name in files: +# filename, extension = os.path.splitext(name) +# filelist.append(extension) +# +# if ".nc" in filelist and ".grb" not in filelist: +# return "netcdf" +# elif ".grb" in filelist and ".nc" not in filelist: +# return "grib" +# else: +# # if file type cannot be detected, guess grib +# return "grib" def load_var_table(name="era5", lut=False): @@ -343,13 +402,32 @@ def get_default_params(name="era5"): return vars.loc[vars.default == 1.0] +def default_variables(product="era5", format='dl_name'): + """ + These variables are being downloaded, when None are passed by the user + + Parameters + --------- + product : str, optional (default: 'era5') + Name of the era5 product to read the default variables for. + Either 'era5' or 'era5-land'. + format: str, optional (default: 'dl_name') + 'dl_name' for name as in the downloaded image data + 'short_name' for short name + 'long_name' for long name + """ + lut = load_var_table(name=product) + defaults = lut.loc[lut["default"] == 1][format].values + return defaults.tolist() + + def make_era5_land_definition_file( - data_file, - out_file, - data_file_y_res=0.25, - ref_var="lsm", - threshold=0.5, - exclude_antarctica=True, + data_file, + out_file, + data_file_y_res=0.25, + ref_var="lsm", + threshold=0.5, + exclude_antarctica=True, ): """ Create a land grid definition file from a variable within a downloaded, @@ -358,15 +436,15 @@ def make_era5_land_definition_file( Parameters ---------- data_file : str - Path to the downloaded file that cotains the image that is used as the + Path to the downloaded file that contains the image that is used as the reference for creating the land definition file. out_file: str - Full output path to the land defintion file to create. + Full output path to the land definition file to create. data_file_y_res : float, optional (default: 0.25) The resolution of the data file in latitude direction. ref_var: str, optional (default: 'lsm') A variable in the data_file that is the reference for the - land definiton. + land definition. By default, we use the land-sea-mask variable. threshold: float, optional (default: 0.5) Threshold value below which a point is declared water, @@ -383,7 +461,8 @@ def make_era5_land_definition_file( for dim_name in ds_in.dimensions.keys(): ds_out.createDimension(dim_name, size=ds_in.dimensions[dim_name].size) - ds_out.createVariable(dim_name, "float32", (dim_name,), zlib=True) + ds_out.createVariable(dim_name, "float32", (dim_name,), + zlib=True) ds_out.variables[dim_name][:] = ds_in.variables[dim_name][:] ref = ds_in.variables[ref_var] @@ -399,12 +478,13 @@ def make_era5_land_definition_file( if exclude_antarctica: cut_off_lat = -60.0 index_thres_lat = ((180.0 / data_file_y_res) + 1) - ( - (90.0 + cut_off_lat) / data_file_y_res) + (90.0 + cut_off_lat) / data_file_y_res) land_mask[int(index_thres_lat):, :] = np.nan else: cut_off_lat = None - ds_out.createVariable("land", "float32", (lat_name, lon_name), zlib=True) + ds_out.createVariable("land", "float32", + (lat_name, lon_name), zlib=True) ds_out.variables["land"][:] = land_mask land_attrs = OrderedDict([ @@ -423,21 +503,6 @@ def make_era5_land_definition_file( ds_out.close() -def default_variables(product="era5"): - """ - These variables are being downloaded, when None are passed by the user - - Parameters - --------- - product : str, optional (default: 'era5') - Name of the era5 product to read the default variables for. - Either 'era5' or 'era5-land'. - """ - lut = load_var_table(name=product) - defaults = lut.loc[lut["default"] == 1]["dl_name"].values - return defaults.tolist() - - def split_array(array, chunk_size): """ Split an array into chunks of a given size. @@ -458,3 +523,163 @@ def split_array(array, chunk_size): for i in range(0, len(array), chunk_size): chunks.append(array[i:i + chunk_size]) return chunks + + +def check_api_ready() -> bool: + """ + Verify that the API is ready to be used. Otherwise raise an Error. + + Returns: + -------- + api_ready: bool + True if api is ready + """ + if not os.path.isfile(DOTRC): + key = os.environ.get('CDSAPI_KEY') + if "CDSAPI_URL" not in os.environ: + os.environ['CDSAPI_URL'] = CDS_API_URL + + if key is None: + raise ValueError( + 'Neither CDSAPI_KEY variable nor .cdsapirc file found, ' + 'download will not work! ' + 'Please create a .cdsapirc file with your API key. ' + 'See: https://cds.climate.copernicus.eu/how-to-api' + ) + else: + return True + else: + if "CDSAPI_URL" in os.environ: + os.environ.pop("CDSAPI_URL") # Use URL from file + return True + + +def img_infer_file_props(img_root_path: str, + fntempl: str = IMG_FNAME_TEMPLATE, + start_from_last=False) -> dict: + """ + Parse file names to retrieve properties from fntempl. + Does not open any files. + + Parameters + ---------- + img_root_path: str + Root directory where annual directories are located + fntempl: str, optional + Filename template to parse filenames with + start_from_last: bool, optional + Use the last available file instead of the first one. + """ + fname = find_first_at_depth(img_root_path, 2, reverse=start_from_last) + + if fname is None: + raise ValueError(f"No matching files for chosen template found in " + f"the directory {img_root_path}") + else: + file_args = parse(fntempl, fname) + return file_args.named + + +def get_first_last_image_date(path, start_from_last=True): + """ + Parse files in the given directory (or any subdir) using the passed + filename template. props will contain all fields specified in the template. + the `datetime` field is required and used to determine the last image date. + + Parameters + ---------- + path: str + Path to the directory containing the image files + start_from_last: str, optional (default: True') + Get date from last available file instead of the first available one. + + Returns + ------- + date: str + Parse date from the last found image file that matches `fntempl`. + """ + try: + props = img_infer_file_props(path, + fntempl=IMG_FNAME_TEMPLATE, + start_from_last=start_from_last) + dt = datetime.strptime(props['datetime'], IMG_FNAME_DATETIME_FORMAT) + except ValueError: + raise ValueError('Could not infer date from filenames. ' + 'Check filename template.') + + return str(dt) + + +def update_image_summary_file(data_path: str, + other_props: dict = None, + out_file=None): + """ + Summarize image metadata as yml file + + Parameters + ---------- + data_path: str + Root path to the image archive + other_props: dict, optional (default: None) + Other properties to write into the yml file. E.g. download + options to enable time series update. + out_file: str, optional (default: None) + Path to summary file. File will be created/updated. + If not specified, then `data_path` is used. If a file already exists, + it will be overwritten. + """ + first_image_date = get_first_last_image_date(data_path, + start_from_last='first') + last_image_date = get_first_last_image_date(data_path, + start_from_last='last') + + props = img_infer_file_props(data_path, start_from_last=False) + _ = props.pop("datetime") + props['period_from'] = str(pd.to_datetime(first_image_date).date()) + props['period_to'] = str(pd.to_datetime(last_image_date).date()) + + props['last_update'] = str(datetime.now()) + + props['download_settings'] = {} + + if other_props is not None: + for k, v in other_props.items(): + props['download_settings'][k] = v + + if out_file is None: + out_file = os.path.join(data_path, f"overview.yml") + + with open(out_file, 'w') as f: + yaml.dump(props, f, default_flow_style=False) + + +def assert_product(product: str) -> str: + if product not in SUPPORTED_PRODUCTS: + raise ValueError(f"Got product {product} but expected one of " + f"{SUPPORTED_PRODUCTS}") + return product + + +def read_summary_yml(path: str) -> dict: + """ + Read image summary and return fields as dict. + """ + path = os.path.join(path, 'overview.yml') + + if not os.path.isfile(path): + raise FileNotFoundError( + "No overview.yml file found in the passed directory. " + "This file is required to use the same settings to extend an " + "existing record. NOTE: Use the `era5 download` or " + "`era5land download` programs first." + ) + + with open(path, 'r') as stream: + props = yaml.safe_load(stream) + + return props + + +if __name__ == '__main__': + save_gribs_from_grib("/tmp/era5/grb/temp_downloaded/20240730_20240731.grb", + output_path='/tmp/era5/grb', product_name='ERA5') \ No newline at end of file diff --git a/tests/test_grid.py b/tests/test_grid.py index 00e8911..c422d14 100644 --- a/tests/test_grid.py +++ b/tests/test_grid.py @@ -6,14 +6,13 @@ from ecmwf_models.grid import ( ERA_RegularImgGrid, get_grid_resolution, - ERA_IrregularImgGrid, ERA5_RegularImgLandGrid, ) import numpy as np def test_ERA_regular_grid(): - reg_grid = ERA_RegularImgGrid(0.3, 0.3) + reg_grid = ERA_RegularImgGrid(0.3) assert np.unique(reg_grid.activearrlat).size == 601 assert np.unique(reg_grid.activearrlon).size == 1200 assert get_grid_resolution(reg_grid.activearrlat, @@ -22,27 +21,15 @@ def test_ERA_regular_grid(): 0.3, ) - -def test_ERA_irregular_grid(): - # we test this with a regular grid, because it's easier - lon = np.arange(0, 360 - 1.0 / 2, 1.0) - lat = np.arange(90, -1 * 90 - 1.0 / 2, -1 * 1.0) - lons, lats = np.meshgrid(lon, lat) - - grid = ERA_IrregularImgGrid(lons, lats) - - assert grid == ERA_RegularImgGrid(1.0, 1.0) - - def test_ERA5_landgrid_025(): - grid = ERA5_RegularImgLandGrid(0.25, 0.25) # 0.25*0.25 + grid = ERA5_RegularImgLandGrid(0.25) # 0.25*0.25 assert grid.get_grid_points()[0].size == 244450 assert grid.find_nearest_gpi(16.25, 48.25) == (240545, 0.0) assert grid.gpi2cell(240545) == 1431 def test_ERA5_landgrid_01(): - grid = ERA5_RegularImgLandGrid(0.1, 0.1) # 0.1*0.1 + grid = ERA5_RegularImgLandGrid(0.1) # 0.1*0.1 assert grid.get_grid_points()[0].size == 1544191 assert grid.find_nearest_gpi(16.4, 48.1) == (1508564, 0.0) np.testing.assert_almost_equal(grid.gpi2lonlat(1508564)[0], 16.4) diff --git a/tests/test_utils.py b/tests/test_utils.py index e587fcb..a217d75 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,8 +4,6 @@ ''' from ecmwf_models.utils import ( - str2bool, - mkdate, parse_filetype, parse_product, load_var_table, @@ -19,16 +17,6 @@ from netCDF4 import Dataset -def test_str2bool(): - assert str2bool('true') - assert not str2bool('false') - - -def test_mkdate(): - assert mkdate('2000-01-01') == datetime(2000, 1, 1) - assert mkdate('2000-01-01T06:00') == datetime(2000, 1, 1, 6) - - def test_parse_product(): inpath = os.path.join( os.path.dirname(os.path.abspath(__file__)), "ecmwf_models-test-data", @@ -55,12 +43,6 @@ def test_load_var_table(): assert table.loc[45].dl_name == 'volumetric_soil_water_layer_1' assert table.loc[45].short_name == 'swvl1' - table = load_var_table('eraint') - assert table.index.size == 79 - assert table.loc[8].dl_name == 39.128 - assert table.loc[8].long_name == 'Volumetric soil water layer 1' - assert table.loc[8].short_name == 'swvl1' - def test_lookup(): lut = lookup('era5', ['swvl1', 'stl1']) diff --git a/tests/tests_era5/test_era5_download.py b/tests/tests_era5/test_era5_download.py index 3e9a0e0..6a738d1 100644 --- a/tests/tests_era5/test_era5_download.py +++ b/tests/tests_era5/test_era5_download.py @@ -23,11 +23,6 @@ ''' Tests for transferring downloaded data to netcdf or grib files ''' - -from ecmwf_models.era5.download import download_and_move, save_ncs_from_nc -from ecmwf_models.utils import cdo_available -from ecmwf_models.glob import CdoNotFoundError - import os import shutil from datetime import datetime @@ -35,7 +30,10 @@ import xarray as xr import pytest import tempfile -import time + +from ecmwf_models.era5.download import download_and_move, save_ncs_from_nc +from ecmwf_models.utils import cdo_available, read_summary_yml +from ecmwf_models.globals import CdoNotFoundError grid = { "gridtype": "lonlat", @@ -76,20 +74,25 @@ def test_dry_download_nc_era5(): assert os.path.isfile(trgt) startdate = enddate = datetime(2010, 1, 1) + variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] download_and_move( dl_path, startdate, enddate, - variables=['swvl1', 'swvl2', 'lsm'], + variables=variables, keep_original=False, - h_steps=[0, 12], + h_steps=h_steps, grb=False, dry_run=True) - assert (os.listdir(dl_path) == ['2010']) - assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) - assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont + + assert os.listdir(os.path.join(dl_path, '2010')) == ['001'] + assert len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2 should_dlfiles = [ 'ERA5_AN_20100101_0000.nc', 'ERA5_AN_20100101_1200.nc' @@ -98,9 +101,12 @@ def test_dry_download_nc_era5(): assert (sorted(os.listdir(os.path.join( dl_path, '2010', '001'))) == sorted(should_dlfiles)) - assert (os.listdir(dl_path) == ['2010']) - assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) - assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) + props = read_summary_yml(dl_path) + assert props['product'].lower() == 'era5' + assert props['download_settings']['grb'] is False + assert props['period_to'] == props['period_from'] == '2010-01-01' + assert len(props['download_settings']['variables']) == len(variables) + assert props['download_settings']['h_steps'] == h_steps def test_dry_download_grb_era5(): @@ -118,18 +124,23 @@ def test_dry_download_grb_era5(): os.path.join(dl_path, 'temp_downloaded', '20100101_20100101.grb')) startdate = enddate = datetime(2010, 1, 1) + variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] download_and_move( dl_path, startdate, enddate, - variables=['swvl1', 'swvl2', 'lsm'], + variables=variables, keep_original=False, - h_steps=[0, 12], + h_steps=h_steps, grb=True, dry_run=True) - assert (os.listdir(dl_path) == ['2010']) + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont + assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) @@ -140,6 +151,13 @@ def test_dry_download_grb_era5(): assert (sorted(os.listdir(os.path.join( dl_path, '2010', '001'))) == sorted(should_dlfiles)) + props = read_summary_yml(dl_path) + assert props['product'].lower() == 'era5' + assert props['download_settings']['grb'] is True + assert props['period_to'] == props['period_from'] == '2010-01-01' + assert len(props['download_settings']['variables']) == len(variables) + assert props['download_settings']['h_steps'] == h_steps + @pytest.mark.skipif(not cdo_available, reason="CDO is not installed") def test_download_nc_era5_regridding(): @@ -168,7 +186,11 @@ def test_download_nc_era5_regridding(): dry_run=True, grid=grid) - assert (os.listdir(dl_path) == ['2010']) + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont + assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) @@ -181,9 +203,11 @@ def test_download_nc_era5_regridding(): for f in os.listdir(os.path.join(dl_path, "2010", "001")): ds = xr.open_dataset(os.path.join(dl_path, "2010", "001", f)) - assert dict(ds.dims) == {"lon": 720, "lat": 360} + assert ds['lon'].shape == (720,) + assert ds['lat'].shape == (360,) assert np.all(np.arange(89.75, -90, -0.5) == ds.lat) assert np.all(np.arange(-179.75, 180, 0.5) == ds.lon) + if __name__ == '__main__': test_dry_download_nc_era5() diff --git a/tests/tests_era5/test_era5_download_cds.py b/tests/tests_era5/test_era5_download_cds.py index 053e4a3..29c519f 100644 --- a/tests/tests_era5/test_era5_download_cds.py +++ b/tests/tests_era5/test_era5_download_cds.py @@ -4,40 +4,41 @@ import os import tempfile - -from ecmwf_models.era5.download import main -from ecmwf_models.era5.interface import ERA5NcImg import unittest from datetime import datetime import pytest +import subprocess + +from ecmwf_models.era5.reader import ERA5NcImg +from ecmwf_models.utils import check_api_ready +try: + check_api_ready() + api_ready = True +except ValueError: + api_ready = False class DownloadTest(unittest.TestCase): # these tests only run if a username and pw are set in the environment # variables. To manually set them: `export USERNAME="my_username"` etc. + @unittest.skipIf( - os.environ.get('CDSAPI_KEY') is None, + os.environ.get('CDSAPI_KEY') is None and not api_ready, 'CDSAPI_KEY not found. Make sure the environment variable exists.' ) @pytest.mark.wget - def test_full_download(self): - - home = os.path.expanduser('~') - - with open(os.path.join(home, '.cdsapirc'), 'w') as f: - f.write("url: https://cds.climate.copernicus.eu/api/v2\n") - f.write(f"key: {os.environ.get('CDSAPI_KEY')}") + def test_cli_download(self): with tempfile.TemporaryDirectory() as dl_path: startdate = enddate = "2023-01-01" args = [ - dl_path, '-s', startdate, '-e', enddate, '-p', 'ERA5', - '-var', 'swvl1', '--h_steps', '0' + dl_path, '-s', startdate, '-e', enddate, + '-v', 'swvl1', '--h_steps', '0' ] - main(args) + subprocess.call(['era5', 'download'] + args) out_path = os.path.join(dl_path, '2023', '001') assert(os.path.exists(out_path)) diff --git a/tests/tests_era5/test_era5_interface.py b/tests/tests_era5/test_era5_interface.py index 6c63c30..b71579f 100644 --- a/tests/tests_era5/test_era5_interface.py +++ b/tests/tests_era5/test_era5_interface.py @@ -2,7 +2,7 @@ import os import numpy.testing as nptest -from ecmwf_models.era5.interface import ( +from ecmwf_models.era5.reader import ( ERA5NcDs, ERA5NcImg, ERA5GrbImg, ERA5GrbDs) import numpy as np from datetime import datetime @@ -14,7 +14,7 @@ def test_ERA5_nc_image_landpoints(): os.path.dirname(os.path.abspath(__file__)), '..', "ecmwf_models-test-data", "ERA5", "netcdf", "2010", "001", 'ERA5_AN_20100101_0000.nc') - subgrid = ERA5_RegularImgLandGrid(0.25, 0.25) + subgrid = ERA5_RegularImgLandGrid(0.25) dset = ERA5NcImg( fname, parameter=['swvl1', 'swvl2'], @@ -328,3 +328,6 @@ def test_ERA5_grb_ds(): nptest.assert_allclose(data.lat[-1], -90.0) nptest.assert_allclose(data.lon[0], 0.0) nptest.assert_allclose(data.lon[720], 180.0) # middle of image + +if __name__ == '__main__': + test_ERA5_grb_image() \ No newline at end of file diff --git a/tests/tests_era5/test_era5_reshuffle.py b/tests/tests_era5/test_era5_reshuffle.py index ad76f43..a23417e 100644 --- a/tests/tests_era5/test_era5_reshuffle.py +++ b/tests/tests_era5/test_era5_reshuffle.py @@ -29,69 +29,59 @@ import tempfile import numpy as np import numpy.testing as nptest -from datetime import datetime +import subprocess -from ecmwf_models.era5.reshuffle import main +from ecmwf_models.era5.reshuffle import img2ts from ecmwf_models import ERATs -from ecmwf_models.era5.reshuffle import parse_args - - -def test_parse_args(): - - args = parse_args([ - "/in", - "/out", - "2000-01-01", - "2010-12-31", - "swvl1", - "swvl2", - "--land_points", - "True", - "--imgbuffer", - "1000", - "--bbox", - "12", - "46", - "17", - "50", - ]) - - assert isinstance(args.dataset_root, str) and args.dataset_root == "/in" - assert (isinstance(args.timeseries_root, str) and - args.timeseries_root == "/out") - assert isinstance(args.start, datetime) and args.start == datetime( - 2000, 1, 1) - assert isinstance(args.end, datetime) and args.end == datetime( - 2010, 12, 31) - assert isinstance(args.variables, list) and len(args.variables) == 2 - assert isinstance(args.land_points, bool) and args.land_points is True - assert isinstance(args.imgbuffer, int) and args.imgbuffer == 1000 - assert (isinstance(args.bbox, list) and len(args.bbox) == 4 and - all([isinstance(a, float) for a in args.bbox])) +from ecmwf_models.utils import read_summary_yml +inpath = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "..", + "ecmwf_models-test-data", + "ERA5", +) + +def test_cli_reshuffle(): + path = os.path.join(inpath, 'netcdf') + with tempfile.TemporaryDirectory() as ts_path: + subprocess.call(["era5", "reshuffle", path, ts_path, "2010-01-01", + "2010-01-01", "-v", "swvl1,swvl2", "-l", + "--bbox", "12.0", "46.0", "17.0", "50.0", + "--h_steps", "0"]) + + sum = read_summary_yml(ts_path) + change summary + + write new summary + + call update with manipulated summary + + subprocess.call(["era5", "update_ts", path, ts_path, "2010-01-01", + "2010-01-01", "-v", "swvl1,swvl2", "-l", + "--bbox", "12.0", "46.0", "17.0", "50.0", + "--h_steps", "12"]) + + ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) + ts = ds.read(15, 48) + swvl1_values_should = np.array([0.402825, 0.390983], dtype=np.float32) + nptest.assert_allclose( + ts["swvl1"].values, swvl1_values_should, rtol=1e-5) def test_ERA5_reshuffle_nc(): # test reshuffling era5 netcdf images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "ecmwf_models-test-data", - "ERA5", - "netcdf", - ) - startdate = "2010-01-01" enddate = "2010-01-01" parameters = ["swvl1", "swvl2"] - h_steps = ["--h_steps", "0", "12"] - landpoints = ["--land_points", "True"] - bbox = ["--bbox", "12", "46", "17", "50"] + h_steps = [0, 12] + landpoints = True + bbox = (12, 46, 17, 50) with tempfile.TemporaryDirectory() as ts_path: - args = ([inpath, ts_path, startdate, enddate] + parameters + h_steps + - landpoints + bbox) - main(args) + img2ts(os.path.join(inpath, 'netcdf'), ts_path, startdate, enddate, + variables=parameters, h_steps=h_steps, land_points=landpoints, + bbox=bbox) assert (len(glob.glob(os.path.join(ts_path, "*.nc"))) == 5) # less files because only land points and bbox ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) @@ -108,26 +98,19 @@ def test_ERA5_reshuffle_nc(): def test_ERA5_reshuffle_grb(): # test reshuffling era5 netcdf images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "ecmwf_models-test-data", - "ERA5", - "netcdf", - ) + startdate = "2010-01-01" enddate = "2010-01-01" parameters = ["swvl1", "swvl2"] - h_steps = ["--h_steps", "0", "12"] - landpoints = ["--land_points", "False"] - bbox = ["--bbox", "12", "46", "17", "50"] + h_steps = [0, 12] + landpoints = False + bbox = (12, 46, 17, 50) with tempfile.TemporaryDirectory() as ts_path: - args = ([inpath, ts_path, startdate, enddate] + parameters + h_steps + - landpoints + bbox) - - main(args) + img2ts(os.path.join(inpath, 'grib'), ts_path, startdate, enddate, + variables=parameters, + h_steps=h_steps, land_points=landpoints, bbox=bbox) assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 5 ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) @@ -139,3 +122,7 @@ def test_ERA5_reshuffle_grb(): swvl2_values_should = np.array([0.390514, 0.390980], dtype=np.float32) nptest.assert_allclose( ts["swvl2"].values, swvl2_values_should, rtol=1e-5) + + +if __name__ == '__main__': + test_cli_reshuffle() \ No newline at end of file From 893fc66959f18b733ae64342500eb1ba4c3773bc Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 17:28:46 +0200 Subject: [PATCH 03/20] Refactor for automatic time series updates --- environment/latest.yml | 32 --- environment/pinned_linux.yml | 163 ------------ setup.cfg | 5 +- src/ecmwf_models/cli.py | 313 +++++++++++++++--------- src/ecmwf_models/era5/download.py | 35 +-- src/ecmwf_models/era5/reshuffle.py | 108 ++++++-- src/ecmwf_models/extract.py | 211 ++++++++++++++++ src/ecmwf_models/globals.py | 24 ++ src/ecmwf_models/grid.py | 61 +++-- src/ecmwf_models/interface.py | 50 ++-- src/ecmwf_models/utils.py | 277 +-------------------- tests/test_utils.py | 29 +-- tests/tests_era5/test_era5_download.py | 11 +- tests/tests_era5/test_era5_reshuffle.py | 64 +++-- 14 files changed, 670 insertions(+), 713 deletions(-) delete mode 100644 environment/latest.yml delete mode 100644 environment/pinned_linux.yml create mode 100644 src/ecmwf_models/extract.py diff --git a/environment/latest.yml b/environment/latest.yml deleted file mode 100644 index 04cd718..0000000 --- a/environment/latest.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: ecmwf_models -channels: -- conda-forge -- defaults -dependencies: -- numpy -- pandas -- configobj -- pygrib>=2.0.1 -- hdf5 -- netcdf4>=1.4.2 -- scipy -- pyresample -- xarray -- pip -- pip: - - pygeobase - - pynetcf>=0.5.0 - - repurpose - - pygeogrids - - cdsapi>=0.6.1 - - ecmwf-api-client - - datedown>=0.3 - - requests - - more_itertools - - typing_extensions - - pre-commit - - sphinx_rtd_theme - - toml - - yapf - - pytest - - pytest-cov diff --git a/environment/pinned_linux.yml b/environment/pinned_linux.yml deleted file mode 100644 index da04d60..0000000 --- a/environment/pinned_linux.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: ecmwf_models -channels: - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - blosc=1.21.3=hafa529b_0 - - brotlipy=0.7.0=py39hb9d737c_1005 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2023.01.10=h06a4308_0 - - certifi=2023.5.7=pyhd8ed1ab_0 - - cffi=1.15.1=py39he91dace_3 - - cftime=1.6.2=py39h2ae25f5_1 - - charset-normalizer=3.1.0=pyhd8ed1ab_0 - - configobj=5.0.8=pyhd8ed1ab_0 - - cryptography=40.0.2=py39h079d5ae_0 - - curl=8.0.1=h588be90_0 - - eccodes=2.30.0=h95353b9_0 - - freeglut=3.2.2=h9c3ff4c_1 - - geos=3.11.2=hcb278e6_0 - - hdf4=4.2.15=h501b40f_6 - - hdf5=1.14.0=nompi_hb72d44e_103 - - icu=72.1=hcb278e6_0 - - idna=3.4=pyhd8ed1ab_0 - - importlib-metadata=6.6.0=pyha770c72_0 - - importlib_metadata=6.6.0=hd8ed1ab_0 - - jasper=4.0.0=h32699f2_1 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - ld_impl_linux-64=2.38=h1181459_1 - - lerc=4.0.0=h27087fc_0 - - libaec=1.0.6=hcb278e6_1 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcurl=8.0.1=h588be90_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h6a678d5_6 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libglu=9.0.0=he1b5a44_1001 - - libgomp=12.2.0=h65d4601_19 - - libiconv=1.17=h166bdaf_0 - - libjpeg-turbo=2.1.5.1=h0b41bf4_0 - - liblapack=3.9.0=16_linux64_openblas - - libnetcdf=4.9.2=nompi_hdf9a29f_104 - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libpng=1.6.39=h753d276_0 - - libsqlite=3.40.0=h753d276_1 - - libssh2=1.10.0=hf14f497_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libtiff=4.5.0=ha587672_6 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.0=h0b41bf4_0 - - libxcb=1.13=h7f98852_1004 - - libxml2=2.10.4=hfdac1af_0 - - libzip=1.9.2=hc929e4a_1 - - libzlib=1.2.13=h166bdaf_4 - - lz4-c=1.9.4=hcb278e6_0 - - ncurses=6.4=h6a678d5_0 - - netcdf4=1.6.3=nompi_py39h369ccc5_102 - - numpy=1.24.3=py39h6183b62_0 - - openssl=3.1.0=hd590300_3 - - packaging=23.1=pyhd8ed1ab_0 - - pandas=2.0.1=py39h40cae4c_1 - - pip=23.1.2=pyhd8ed1ab_0 - - platformdirs=3.5.0=pyhd8ed1ab_0 - - pooch=1.7.0=pyha770c72_3 - - proj=9.2.0=h8ffa02c_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pycparser=2.21=pyhd8ed1ab_0 - - pygrib=2.1.4=py39h0e146ac_7 - - pykdtree=1.3.7.post0=py39h389d5f1_0 - - pyopenssl=23.1.1=pyhd8ed1ab_0 - - pyproj=3.5.0=py39h718ffca_1 - - pyresample=1.26.1=py39h2ad29b5_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.9.16=h2782a2a_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.9=3_cp39 - - pytz=2023.3=pyhd8ed1ab_0 - - pyyaml=6.0=py39hb9d737c_5 - - readline=8.2=h5eee18b_0 - - scipy=1.10.1=py39he83f1e1_1 - - setuptools=66.0.0=py39h06a4308_0 - - shapely=2.0.1=py39hf1c3bca_1 - - six=1.16.0=pyh6c4a22f_0 - - snappy=1.1.10=h9fff704_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - typing-extensions=4.5.0=hd8ed1ab_0 - - typing_extensions=4.5.0=pyha770c72_0 - - tzdata=2023c=h04d1e81_0 - - urllib3=1.26.15=pyhd8ed1ab_0 - - wheel=0.38.4=py39h06a4308_0 - - xarray=2023.4.2=pyhd8ed1ab_0 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libx11=1.8.4=h0b41bf4_0 - - xorg-libxau=1.0.9=h7f98852_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.4.2=h5eee18b_0 - - yaml=0.2.5=h7f98852_2 - - zipp=3.15.0=pyhd8ed1ab_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h3eb15da_6 - - pip: - - alabaster==0.7.13 - - babel==2.12.1 - - cdsapi==0.6.1 - - cfgv==3.3.1 - - coverage==7.2.5 - - datedown==0.4 - - distlib==0.3.6 - - docutils==0.18.1 - - ecmwf-api-client==1.6.3 - - exceptiongroup==1.1.1 - - filelock==3.12.0 - - identify==2.5.24 - - imagesize==1.4.1 - - iniconfig==2.0.0 - - jinja2==3.1.2 - - markupsafe==2.1.2 - - more-itertools==9.1.0 - - nodeenv==1.7.0 - - pluggy==1.0.0 - - pre-commit==3.3.1 - - pygeobase==0.6.2 - - pygeogrids==0.4.2 - - pygments==2.15.1 - - pynetcf==0.5.0 - - pytest==7.3.1 - - pytest-cov==4.0.0 - - repurpose==0.9 - - requests==2.30.0 - - snowballstemmer==2.2.0 - - sphinx==6.2.1 - - sphinx-rtd-theme==1.2.0 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jquery==4.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - toml==0.10.2 - - tomli==2.0.1 - - tqdm==4.65.0 - - virtualenv==20.23.0 - - yapf==0.33.0 diff --git a/setup.cfg b/setup.cfg index 995386d..03c07be 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,8 +36,11 @@ install_requires = netcdf4 click parse + datedown + pygrib + c3s_sm>=0.3.1 cdsapi>=0.7.3 - repurpose + repurpose>=0.13 pygeogrids # The usage of test_requires is discouraged, see `Dependency Management` docs diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py index 421dd6c..1269660 100644 --- a/src/ecmwf_models/cli.py +++ b/src/ecmwf_models/cli.py @@ -1,23 +1,23 @@ -import os import click from datetime import datetime from ecmwf_models.era5.download import download_and_move, download_record_extension from ecmwf_models.utils import ( - default_variables, - img_infer_file_props, + default_variables ) -from ecmwf_models.era5.reshuffle import img2ts -from ecmwf_models.globals import IMG_FNAME_TEMPLATE +from ecmwf_models.era5.reshuffle import img2ts, extend_ts + @click.command( "download", - context_settings={'show_default': True}, - short_help="Download ERA5 reanalysis data images between two " - "dates. Before this program can be used, you have to " - "register at the CDS and setup your .cdsapirc file " - "as described here: " + context_settings={'show_default': True, + 'help_option_names': ['-h', '--help']}, + short_help="Download ERA5 reanalysis image data between two " + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " "https://cds.climate.copernicus.eu/how-to-api") @click.argument("PATH", type=click.Path(writable=True)) @click.option( @@ -43,8 +43,8 @@ "--keep_original", type=click.BOOL, default=False, - help="Also keep the original image image stack as downloaded from CDS " - "instead of deleting it after extracting individual images. " + help="Also keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images. " "Default is False.") @click.option( "-grb", @@ -52,79 +52,83 @@ type=click.BOOL, default=False, help="Download data in grib format instead of netcdf. " - "Pass either True or False. Default is False.") + "Default is False.") @click.option( "--h_steps", type=str, default="0,6,12,18", - help="Temporal sampling of downloaded images. Multiple values are " - "comma-separated!" + help="Temporal sampling of downloaded images. To download multiple time " + "stamps or each day, pass comma-separated values. " "Pass a set of full hours here, like '--h_steps 0,12' to download " "two images for each day, at 0:00 and 12:00 respectively. " "By default, we download 6-hourly images starting at 0:00 UTC, " - "(i.e., 0,6,12,18)") + "(i.e. `--h_steps 0,6,12,18`") @click.option( "--max_request_size", type=int, default=1000, - help="Maximum number of requests that the CDS API allows. " - "The default is 1000, but depends on server side settings. " - "Server settings may change at some point. Change " + help="Maximum number of requests to pass to the CDS API. " + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks.") + "A smaller number will results in smaller download chunks (slower).") @click.option( "--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Required if no .cdsapirc file exists in " - "the home directory (see documentation). You can find your token/key " - "on your CDS user profile page. Alternatively, you can also set" - "the an environment variable CDSAPI_KEY that contains your token.") + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_download_era5(path, start, end, variables, keep_original, as_grib, h_steps, max_request_size, cds_token): """ - Download ERA5 image data within a chosen period. NOTE: Before using this + Download ERA5 image data within the chosen period. NOTE: Before using this program, create a CDS account and set up a `.cdsapirc` file as described here: https://cds.climate.copernicus.eu/how-to-api \b Required Parameters ------------------- - PATH: string (required) - Root of local filesystem where the downloaded data will be stored. - Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/how-to-api + > PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + h_steps = [int(h.strip()) for h in h_steps.split(',')] variables = [str(v.strip()) for v in variables.split(',')] status_code = download_and_move( - target_path=path, - startdate=start, - enddate=end, - product="era5", - variables=variables, - h_steps=h_steps, - grb=as_grib, - keep_original=keep_original, - stepsize='month', - n_max_request=max_request_size, - cds_token=cds_token, - ) + target_path=path, + startdate=start, + enddate=end, + product="era5", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + cds_token=cds_token, + ) return status_code @click.command( "download", - context_settings={'show_default': True}, - short_help="Download ERA5-LAND reanalysis data images between two " - "dates. Before this program can be used, you have to " - "register at the CDS and setup your .cdsapirc file " - "as described here: " - "https://cds.climate.copernicus.eu/how-to-api " - "Alternatively, you can pass your KEY/TOKEN here using the " - "--cds_token option.") + context_settings={'show_default': True, + 'help_option_names': ['-h', '--help']}, + short_help="Download ERA5 reanalysis image data between two " + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " + "https://cds.climate.copernicus.eu/how-to-api") @click.argument("PATH", type=click.Path(writable=True)) @click.option( '--start', '-s', @@ -149,8 +153,8 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "--keep_original", type=click.BOOL, default=False, - help="Also keep the original image image stack as downloaded from CDS " - "instead of deleting it after extracting individual images. " + help="Also keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images. " "Default is False.") @click.option( "-grb", @@ -158,97 +162,107 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, type=click.BOOL, default=False, help="Download data in grib format instead of netcdf. " - "Pass either True or False. Default is False.") + "Default is False.") @click.option( "--h_steps", type=click.STRING, default='0,6,12,18', - help="Temporal sampling of downloaded images. " + help="Temporal sampling of downloaded images. To download multiple time " + "stamps or each day, pass comma-separated values. " "Pass a set of full hours here, like '--h_steps 0,12' to download " "two images for each day, at 0:00 and 12:00 respectively. " - "By default, we download 6-hourly images starting at 0:00 UTC") + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e. `--h_steps 0,6,12,18`") @click.option( "--max_request_size", type=int, default=1000, - help="Maximum number of requests that the CDS API allows. " - "The default is 1000, but depends on server side settings. " - "Server settings may change at some point. Change " + help="Maximum number of requests to pass to the CDS API. " + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks.") + "A smaller number will results in smaller download chunks (slower).") @click.option( "--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Required if no .cdsapirc file exists in " - "the home directory (see documentation). You can find your token/key " - "on your CDS user profile page. Alternatively, you can also set" - "the an environment variable CDSAPI_KEY that contains your token.") + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_download_era5land(path, start, end, variables, keep_original, as_grib, h_steps, max_request_size, cds_token): """ - Download ERA5-Land image data within a chosen period. NOTE: Before using this - program, create a CDS account and set up a `.cdsapirc` file as described - here: https://cds.climate.copernicus.eu/how-to-api + Download ERA5-Land image data within a chosen period. + NOTE: Before using this program, create a CDS account and set up a + `.cdsapirc` file as described here: + https://cds.climate.copernicus.eu/how-to-api \b Required Parameters ------------------- - PATH: string (required) - Root of local filesystem where the downloaded data will be stored. - Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/how-to-api + > PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + h_steps = [int(h.strip()) for h in h_steps.split(',')] variables = [str(v.strip()) for v in variables.split(',')] status_code = download_and_move( - target_path=path, - startdate=start, - enddate=end, - product="era5-land", - variables=variables, - h_steps=h_steps, - grb=as_grib, - keep_original=keep_original, - stepsize='month', - n_max_request=max_request_size, - cds_token=cds_token - ) + target_path=path, + startdate=start, + enddate=end, + product="era5-land", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + cds_token=cds_token + ) return status_code @click.command( "update_img", - context_settings={'show_default': True}, - short_help="Extend an existing set of images by downloading new data.") + context_settings={'show_default': True, + 'help_option_names': ['-h', '--help']}, + short_help="Extend an existing set of images by downloading new data " + "with the same settings as before." + "NOTE: First use the `era5 download` or `era5land download` " + "programs.") @click.argument("path", type=click.Path(writable=True)) @click.option( "--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Required if no .cdsapirc file exists in " - "the home directory (see documentation). You can find your token/key " - "on your CDS user profile page. Or set your token under the " - "CDSAPI_KEY environment variable") + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_update_img(path, cds_token=None): """ - Extend a locally existing ERA record by downloading new files that - don't yet exist locally. - This will find the latest available local file, and download all - available extensions. - NOTE: Use the `era5 download` or `era5land download` program first do - create a local record to update with this function. + Download new images from CDS to your existing local archive. Use the same + settings as before. + NOTE: First use the `era5 download` or `era5land download` programs. + The so-created archive can then be updated using this program. \b Required Parameters ------------------- - PATH: string - Path where previously downloaded C3S SM images are stored. There must - be a file `overview.yml` which contains the download settings. - Make sure to set up the CDS API for your account as describe in - https://cds.climate.copernicus.eu/how-to-api, + > PATH: string (required) + Path where previously downloaded images are stored. There must + be a file `overview.yml` which contains the download settings. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api, """ # The docstring above is slightly different to the normal python one to # display it properly on the command line. @@ -258,8 +272,10 @@ def cli_update_img(path, cds_token=None): @click.command( "reshuffle", - context_settings={'show_default': True}, - short_help="Convert downloaded ERA5/ERA5-Land image data into time series.") + context_settings={'show_default': True, + 'help_option_names': ['-h', '--help']}, + short_help="Convert previously downloaded ERA5/ERA5-Land image " + "data into a time series format.") @click.argument("IMG_PATH", type=click.Path(readable=True)) @click.argument("TS_PATH", type=click.Path(writable=True)) @click.argument("START", type=click.STRING) @@ -269,10 +285,10 @@ def cli_update_img(path, cds_token=None): '-v', type=click.STRING, default=None, - help="Subset of variables to convert. Pass comma separated names" + help="Subset of variables to convert. Pass comma-separated names" "to select multiple variables (short names, as in the input images, " - "e.g. `-v swvl1,stl1`). If not specified, all variables from the first" - "image file are used.") + "e.g. `-v swvl1,stl1`). If not specified, all variables from the " + "first image file are used.") @click.option( '--land_points', '-l', is_flag=True, show_default=True, default=False, @@ -282,43 +298,47 @@ def cli_update_img(path, cds_token=None): '--bbox', nargs=4, type=click.FLOAT, + default=None, help="4 NUMBERS | min_lon min_lat max_lon max_lat. " "Set Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). [default: -180 -90 180 90]") + "of area to reshuffle (WGS84). Default is global.") @click.option( "--h_steps", - type=str, + type=click.STRING, default="0,6,12,18", - nargs=1, - help="Full hour time stamps to include in time series. To select multiple," - "pass comma-separated values here, e.g. '--h_steps 0,12' will only " - "include data from images at 0:00 and 12:00 UTC and ignore all other " - "available time stamps." + help="Full hour time stamps of images to include in time series. " + "To select multiple, pass comma-separated values here, " + "e.g. '--h_steps 0,12' will only include data from images at " + "0:00 and 12:00 UTC and ignore all other available time stamps." "By default, we include data for every 6th hour each day.") @click.option( '--imgbuffer', '-b', type=click.INT, default=50, - help="NUMBER | Number of images to read into memory at once before " + help="Number of images to read into memory at once before " "conversion to time series. A larger buffer means faster " "processing but requires more memory.") def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, h_steps, imgbuffer): """ - Convert downloaded ERA5 or ERA5-Land image data into a time series format. + Convert previously downloaded ERA5 or ERA5-Land image data into a + time series format. \b Required Parameters ------------------- - IMG_PATH: string (required) - Root of local filesystem where the downloaded image data is stored. - TS_PATH: string (required) - Root of local filesystem where the time series should be stored. - START: string (required) - Date of the first image to include in the conversion. Format: YYYY-MM-DD - END: string (required) - Date of the last image to include in the conversion. Format: YYYY-MM-DD + > IMG_PATH: string (required) + Directory where the downloaded image data is stored, i.e., where + annual folders are found. + > TS_PATH: string (required) + Root of local filesystem where the time series should be stored. + > START: string (required) + Date of the first image to include in the conversion. + Format: YYYY-MM-DD + > END: string (required) + Date of the last image to include in the conversion. + Format: YYYY-MM-DD """ h_steps = [int(h.strip()) for h in h_steps.split(',')] @@ -337,9 +357,56 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, h_steps=h_steps, land_points=land_points, imgbuffer=imgbuffer, - product=None, # Infer product from files + product=None, # Infer product automatically from files ) + +@click.command( + "update_ts", + context_settings={'show_default': True, + 'help_option_names': ['-h', '--help']}, + short_help="Append new image data to an existing time series archive.") +@click.argument("TS_PATH", type=click.Path(writable=True)) +@click.option( + '--imgpath', + '-p', + type=click.STRING, + default=None, + help="Manually specify where the image data to extend the time " + "series are located. If this is not specified, we use the previously " + "used path from the `overview.yml` file stored with the time series " + "data.") +@click.option( + '--imgbuffer', + '-b', + type=click.INT, + default=50, + help="Number of images to read into memory at once before " + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") +def cli_extend_ts(ts_path, imgpath, imgbuffer): + """ + Append new image data to an existing time series archive. The archive must + be created first using the `reshuffle` program. We will use the same settings + as in the initial run (see `overview.yml` in TS_PATH) for consistent + extensions. + + \b + Required Parameters + ------------------- + > TS_PATH: string (required) + Root of local filesystem where the time series from a previous call + of `reshuffle` are stored. New data will be appended to the existing + files! + """ + kwargs = dict(ts_path=ts_path, imgbuffer=imgbuffer) + + if imgpath is not None: # otherwise use path from yml + kwargs["input_root"] = imgpath + + extend_ts(**kwargs) + + @click.group(short_help="ERA5 Command Line Programs imported from the " "`ecmwf_models` pip package.") def era5(): @@ -348,6 +415,7 @@ def era5(): era5.add_command(cli_download_era5) era5.add_command(cli_reshuffle) +era5.add_command(cli_extend_ts) @click.group(short_help="ERA5-Land Command Line Programs imported from the " @@ -357,4 +425,5 @@ def era5land(): era5land.add_command(cli_download_era5land) -era5land.add_command(cli_reshuffle) \ No newline at end of file +era5land.add_command(cli_reshuffle) +era5land.add_command(cli_extend_ts) diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index e444b6c..727f34d 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -2,18 +2,6 @@ """ Module to download ERA5 from terminal in netcdf and grib format. """ -import pandas as pd - -from ecmwf_models.utils import ( - lookup, - update_image_summary_file, - save_ncs_from_nc, save_gribs_from_grib, - default_variables, split_array, check_api_ready, - read_summary_yml -) -from repurpose.process import parallel_process -from repurpose.misc import delete_empty_directories - import warnings import os import logging @@ -21,7 +9,18 @@ import shutil import cdsapi import numpy as np -import traceback +import pandas as pd + +from c3s_sm.misc import read_summary_yml +from repurpose.process import parallel_process +from repurpose.misc import delete_empty_directories + +from ecmwf_models.utils import ( + lookup, + update_image_summary_file, + default_variables, split_array, check_api_ready, +) +from ecmwf_models.extract import save_ncs_from_nc, save_gribs_from_grib def split_chunk(timestamps, @@ -507,13 +506,3 @@ def download_record_extension(path, dry_run=False, cds_token=None): path, startdate=startdate, enddate=enddate, cds_token=cds_token, dry_run=dry_run, **props['download_settings']) - - -if __name__ == '__main__': - download_record_extension('/tmp/era5/nc') - # download_and_move( - # '/tmp/era5/nc', startdate='2024-07-01', - # enddate='2024-07-10', product='era5', keep_original=False, - # variables=['swvl1'], h_steps=[0,23], grb=False, dry_run=False, - # keep_prelim=False - # ) \ No newline at end of file diff --git a/src/ecmwf_models/era5/reshuffle.py b/src/ecmwf_models/era5/reshuffle.py index ad17865..020dcec 100644 --- a/src/ecmwf_models/era5/reshuffle.py +++ b/src/ecmwf_models/era5/reshuffle.py @@ -1,26 +1,74 @@ # -*- coding: utf-8 -*- """ -Module for a command line interface to convert the ERA Interim data into a -time series format using the repurpose package +Image to time series conversion tools. """ +import warnings import os import pandas as pd import numpy as np -from datetime import time, datetime +from datetime import time, datetime, timedelta from repurpose.img2ts import Img2Ts +from c3s_sm.misc import update_ts_summary_file, read_summary_yml from ecmwf_models.era5.reader import ERA5NcDs, ERA5GrbDs from ecmwf_models.grid import ERA_RegularImgGrid, ERA5_RegularImgLandGrid -from ecmwf_models.utils import parse_filetype, parse_product +from ecmwf_models.utils import parse_filetype, parse_product, get_first_last_image_date + +def extend_ts(ts_path, **img2ts_kwargs): + """ + Append any new data from the image path to the time series data. + This function is only applied to time series file that were created + using the img2ts function. + This will use the start from the previously written metadata, + and process only parameters that are already present in the time series + files. + + Parameters + ---------- + ts_path: str + Directory containing time series files to extend. It is also + expected that there is a `overview.yml` file in this directory. + img2ts_kwargs: + All kwargs are optional, if they are not given, we use them from the + previously created `overview.yml` file. If they are passed here, + they will override the values from the yml file. + """ + if not os.path.isfile(os.path.join(ts_path, 'overview.yml')): + raise FileNotFoundError( + "No overview.yml file found in the passed directory. " + "This file is required to use the same settings to extend an " + "existing record. NOTE: Use the `era5 download` or " + "`era5land download` programs first." + ) + + ts_props = read_summary_yml(ts_path) + + for k, v in ts_props['img2ts_kwargs'].items(): + if k not in img2ts_kwargs: + if k == 'startdate': + old_enddate = pd.to_datetime( + ts_props["img2ts_kwargs"]["enddate"]).to_pydatetime() + startdate = old_enddate + timedelta(days=1) + img2ts_kwargs['startdate'] = startdate + elif k in ['enddate', 'outputpath']: # fields from yml to ignore + continue + else: + img2ts_kwargs[k] = v + + if 'enddate' not in img2ts_kwargs: + img2ts_kwargs['enddate'] = None # All available images + + img2ts(outputpath=ts_path, **img2ts_kwargs) + def img2ts( input_root, outputpath, startdate, - enddate, + enddate=None, variables=None, product=None, bbox=None, @@ -31,6 +79,8 @@ def img2ts( """ Reshuffle method applied to ERA images for conversion into netcdf time series format. + Note: ERA5 and ERA5-Land files are preferred over their T-counterpart, + in case that multiple files for a time stamp are present! Parameters ---------- @@ -41,9 +91,10 @@ def img2ts( startdate : str Start date, from which images are read and time series are generated. Format YYYY-mm-dd - enddate : str + enddate : str, optional (default: None) End date, from which images are read and time series are generated. - Format YYYY-mm-dd + Format YYYY-mm-dd. If None is passed, then the last available image + date is used. variables: tuple or str Variables to read from the passed images and convert into time series format. @@ -61,18 +112,25 @@ def img2ts( Reshuffle only land points. Uses the ERA5 land mask to create a land grid. The land grid is fixed to 0.25*0.25 or 0.1*0.1 deg for now. - include_prelim: bool, optional (default: True) - Include preliminary data (from Era5-T and ERA5-Land-T files) - imgbuffer: int, optional (default: 200) + imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. This number affects how many images are stored in memory and should be chosen according to the available amount of memory and the size of a single image. """ - startdate = pd.to_datetime(startdate).to_pydatetime().date() + + if enddate is None: + enddate = get_first_last_image_date(input_root) + enddate = pd.to_datetime(enddate).to_pydatetime().date() + if startdate > enddate: + warnings.warn( + f"Start date {startdate} is greater than end date {enddate}. Abort." + ) + return + filetype = parse_filetype(input_root) product: str = parse_product(input_root) if product is None else product @@ -95,10 +153,6 @@ def img2ts( "Grid not implemented for product.") if filetype == "grib": - if land_points: - raise NotImplementedError( - "Reshuffling land points only implemented for netcdf files") - input_dataset = ERA5GrbDs( root_path=input_root, parameter=variables, @@ -133,6 +187,23 @@ def img2ts( data = input_dataset.read(first_date_time) ts_attributes = data.metadata + props = {'product': product, 'filetype': filetype, + 'last_update': datetime.now()} + + kwargs = { + 'input_root': input_root, + 'outputpath': outputpath, + 'variables': None if variables is None else list(variables), + 'land_points': land_points, + 'startdate': startdate, + 'enddate': enddate, + 'h_steps': h_steps, + 'bbox': None if bbox is None else list(bbox), + 'imgbuffer': imgbuffer + } + + props["img2ts_kwargs"] = kwargs + reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, @@ -150,11 +221,8 @@ def img2ts( backend='multiprocessing', n_proc=1, ) + reshuffler.calc() + update_ts_summary_file(outputpath, props) -if __name__ == '__main__': - img2ts("/home/wpreimes/shares/home/code/ecmwf_models/tests/ecmwf_models-test-data/ERA5/grib", - '/tmp/era5/ts', variables=['swvl1', "swvl2"], - startdate='2010-01-01', enddate='2010-01-01', land_points=False, - h_steps=[0, 12], bbox=(12, 46, 17, 50)) \ No newline at end of file diff --git a/src/ecmwf_models/extract.py b/src/ecmwf_models/extract.py new file mode 100644 index 0000000..d01e574 --- /dev/null +++ b/src/ecmwf_models/extract.py @@ -0,0 +1,211 @@ +from datetime import datetime +import logging +import os +import pandas as pd +import xarray as xr +from datedown.fname_creator import create_dt_fpath + +from ecmwf_models.globals import ( + IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, + EXPVER, SUBDIRS +) +from ecmwf_models.globals import ( + Cdo, cdo_available, CdoNotFoundError, + pygrib, pygrib_available, PygribNotFoundError, +) + + +def save_ncs_from_nc( + input_nc, + output_path, + product_name, + grid=None, + keep_original=True, + remap_method="bil", + keep_prelim=True, +): + """ + Split the downloaded netcdf file into daily files and add to folder + structure necessary for reshuffling. + + Parameters + ---------- + input_nc : str + Filepath of the downloaded .nc file + output_path : str + Where to save the resulting netcdf files + product_name : str + Name of the ECMWF model (only for filename generation) + keep_original: bool + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. + """ + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='nc' + ) + + nc_in = xr.open_dataset(input_nc, mask_and_scale=True) + if 'valid_time' in nc_in.variables: + nc_in = nc_in.rename_vars({"valid_time": 'time'}) + + if grid is not None: + if not cdo_available: + raise CdoNotFoundError() + cdo = Cdo() + + gridpath = os.path.join(output_path, "grid.txt") + weightspath = os.path.join(output_path, "remap_weights.nc") + if not os.path.exists(gridpath): + with open(gridpath, "w") as f: + for k, v in grid.items(): + f.write(f"{k} = {v}\n") + + for time in nc_in["time"].values: + subset = nc_in.sel({"time": time}) + + # Expver identifies preliminary data + if 'expver' in subset: + expver = str(subset['expver'].values) + subset = subset.drop_vars('expver') + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + else: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {time}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format( + product=product_name + '-' + ext) + else: + filename_templ = _filename_templ.format(product=product_name) + + if 'number' in subset.variables: + subset = subset.drop_vars('number') + + timestamp = pd.Timestamp(time).to_pydatetime() + filepath = create_dt_fpath( + timestamp, + root=output_path, + fname=filename_templ, + subdirs=SUBDIRS, + ) + + if not os.path.exists(os.path.dirname(filepath)): + os.makedirs(os.path.dirname(filepath)) + + if grid is not None: + if not os.path.exists(weightspath): + # create weights file + getattr(cdo, "gen" + remap_method)( + gridpath, input=subset, output=weightspath) + subset = cdo.remap( + ",".join([gridpath, weightspath]), + input=subset, + returnXDataset=True, + ) + + # same compression for all variables + var_encode = {"zlib": True, "complevel": 6} + subset.to_netcdf( + filepath, encoding={var: var_encode for var in subset.variables}) + + nc_in.close() + + if not keep_original: + os.remove(input_nc) + if grid is not None: + cdo.cleanTempDir() + + +def save_gribs_from_grib( + input_grib, + output_path, + product_name, + keep_original=True, + keep_prelim=True, +): + """ + Split the downloaded grib file into daily files and add to folder structure + necessary for reshuffling. + + Parameters + ---------- + input_grib : str + Filepath of the downloaded .grb file + output_path : str + Where to save the resulting grib files + product_name : str + Name of the ECMWF model (only for filename generation) + keep_original: bool + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. + """ + if not pygrib_available: + raise PygribNotFoundError() + grib_in = pygrib.open(input_grib) + + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='grb' + ) + + grib_in.seek(0) + prev_date = None + + for grb in grib_in: + filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) + + expver = grb['expver'] + + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {filedate}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format( + product=product_name + '-' + ext) + else: + filename_templ = _filename_templ.format(product=product_name) + + filepath = create_dt_fpath( + filedate, root=output_path, fname=filename_templ, + subdirs=SUBDIRS) + + if not os.path.exists(os.path.dirname(filepath)): + os.makedirs(os.path.dirname(filepath)) + + if prev_date != filedate: # to overwrite old files + mode = 'wb' + prev_date = filedate + else: + mode = "ab" + + with open(filepath, mode) as grb_out: + grb_out.write(grb.tostring()) + + grib_in.close() + + if not keep_original: + os.remove(input_grib) diff --git a/src/ecmwf_models/globals.py b/src/ecmwf_models/globals.py index 9432cd1..eb9c535 100644 --- a/src/ecmwf_models/globals.py +++ b/src/ecmwf_models/globals.py @@ -15,6 +15,30 @@ EXPVER = {'0005': 'T', '0001': ''} +SUBDIRS = ["%Y", "%j"] + +try: + import pygrib + pygrib_available = True +except ImportError: + pygrib_available = False + pygrib = None + +try: + from cdo import Cdo + cdo_available = True +except ImportError: + cdo_available = False + Cdo = None + + +class PygribNotFoundError(ModuleNotFoundError): + def __init__(self, msg=None): + _default_msg = ("pygrib could not be imported. " + "Pleas run `conda install -c conda-forge pygrib` to " + "install it.") + self.msg = _default_msg if msg is None else msg + class CdoNotFoundError(ModuleNotFoundError): def __init__(self, msg=None): diff --git a/src/ecmwf_models/grid.py b/src/ecmwf_models/grid.py index 9eeee5a..39f0087 100644 --- a/src/ecmwf_models/grid.py +++ b/src/ecmwf_models/grid.py @@ -4,19 +4,51 @@ """ import numpy as np -from pygeogrids.grids import BasicGrid, CellGrid, gridfromdims +from pygeogrids.grids import CellGrid, gridfromdims import os -from typing import Tuple, Literal +from typing import Tuple import xarray as xr def trafo_lon(lon): + """ + 0...360 -> 0...180...-180 + + Parameters + ---------- + lon: np.array + Longitude array + + Returns + ------- + lon_transformed: np.array + Transformed longitude array + """ lons_gt_180 = np.where(lon > 180.) lon[lons_gt_180] = lon[lons_gt_180] - 360.0 return lon def safe_arange(start, stop, step): + """ + Like numpy.arange, but floating point precision is kept. + Compare: `np.arange(0, 100, 0.01)[-1]` vs `safe_arange(0, 100, 0.01)[-1]` + + Parameters + ---------- + start: float + Start of interval + stop: float + End of interval (not included) + step: float + Stepsize + + Returns + ------- + arange: np.array + Range of values in interval at the given step size / sampling + """ f_step = (1. / float(step)) - vals = np.arange(float(start) * f_step, float(stop) * f_step , float(step) * f_step) + vals = np.arange(float(start) * f_step, float(stop) * f_step, + float(step) * f_step) return vals / f_step def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): @@ -124,24 +156,19 @@ def ERA_RegularImgGrid( CellGrid : CellGrid Regular, CellGrid with 5DEG*5DEG cells for the passed bounding box. """ - - # to get precise coordinates + # to get precise coordinates... lon = safe_arange(-180, 180, resolution) - lat = safe_arange(-90, 90+resolution, resolution)[::-1] - - if bbox is not None: - lon = lon[np.where((lon >= bbox[0]) & (lon <= bbox[2]))] - lat = lat[np.where((lat >= bbox[1]) & (lat <= bbox[3]))] + lat = safe_arange(-90, 90 + resolution, resolution)[::-1] # ERA grid LLC point has Lon=0 lon = np.roll(lon, int(len(lon) * 0.5)) - glob_basic_grid = gridfromdims(lon, lat, origin='top') - - glob_cell_grid = glob_basic_grid.to_cell_grid(cellsize=5.0) - - return glob_cell_grid + grid = gridfromdims(lon, lat, origin='top') + grid = grid.to_cell_grid(cellsize=5.0) + if bbox is not None: + subgpis = grid.get_bbox_grid_points(latmin=bbox[1], latmax=bbox[3], + lonmin=bbox[0], lonmax=bbox[2]) + grid = grid.subgrid_from_gpis(subgpis) -if __name__ == '__main__': - a = ERA5_RegularImgLandGrid(0.25) \ No newline at end of file + return grid diff --git a/src/ecmwf_models/interface.py b/src/ecmwf_models/interface.py index e8276a9..eed3da6 100644 --- a/src/ecmwf_models/interface.py +++ b/src/ecmwf_models/interface.py @@ -25,29 +25,26 @@ """ import warnings import os +import glob +from datetime import timedelta, datetime # noqa: F401 +import numpy as np +import xarray as xr + from pygeobase.io_base import ImageBase, MultiTemporalImageBase from pygeobase.object_base import Image +from pynetcf.time_series import GriddedNcOrthoMultiTs from pygeogrids.grids import BasicGrid, gridfromdims -import numpy as np -from datetime import timedelta, datetime # noqa: F401 - from pygeogrids.netcdf import load_grid -from pynetcf.time_series import GriddedNcOrthoMultiTs -from ecmwf_models.grid import ( - ERA_RegularImgGrid, - get_grid_resolution, - trafo_lon -) -from ecmwf_models.utils import lookup -from ecmwf_models.globals import IMG_FNAME_TEMPLATE, SUPPORTED_PRODUCTS -import glob - -import xarray as xr -try: - import pygrib -except ImportError: - warnings.warn("pygrib has not been imported") +from ecmwf_models.grid import trafo_lon +from ecmwf_models.utils import lookup +from ecmwf_models.globals import ( + IMG_FNAME_TEMPLATE, IMG_FNAME_DATETIME_FORMAT, + SUPPORTED_PRODUCTS, SUBDIRS, +) +from ecmwf_models.globals import ( + pygrib, pygrib_available, PygribNotFoundError +) class ERANcImg(ImageBase): @@ -92,7 +89,8 @@ def __init__( if parameter is not None: # look up short names - self.parameter = lookup(product, np.atleast_1d(parameter))["short_name"].values + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values else: self.parameter = None @@ -184,7 +182,6 @@ def read(self, timestamp=None): return_metadata[name] = variable.attrs return_img[name] = param_data - dataset.close() if self.subgrid is None: @@ -268,7 +265,6 @@ def __init__( ): self.h_steps = h_steps - subpath_templ = ["%Y", "%j"] if parameter is not None: # look up short names @@ -300,8 +296,8 @@ def __init__( type='*', datetime='{datetime}', ext='nc'), - datetime_format="%Y%m%d_%H%M", - subpath_templ=subpath_templ, + datetime_format=IMG_FNAME_DATETIME_FORMAT, + subpath_templ=SUBDIRS, exact_templ=False, ioclass_kws=ioclass_kws) @@ -448,6 +444,8 @@ def read(self, timestamp=None): timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ + if not pygrib_available: + raise PygribNotFoundError() grbs = pygrib.open(self.filename) return_img = {} @@ -621,8 +619,6 @@ def __init__( """ self.h_steps = h_steps - subpath_templ = ["%Y", "%j"] - ioclass_kws = { "product": product, "parameter": parameter, @@ -640,8 +636,8 @@ def __init__( root_path, ERAGrbImg, fname_templ=fname_templ, - datetime_format="%Y%m%d_%H%M", - subpath_templ=subpath_templ, + datetime_format=IMG_FNAME_DATETIME_FORMAT, + subpath_templ=SUBDIRS, exact_templ=False, ioclass_kws=ioclass_kws, ) diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index e4f8449..f132cee 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -25,233 +25,22 @@ Utility functions for all data products in this package. """ import os -import logging from datetime import datetime -import xarray as xr import pandas as pd -from datedown.fname_creator import create_dt_fpath import numpy as np from netCDF4 import Dataset from collections import OrderedDict from parse import parse import yaml + +from ecmwf_models.extract import save_gribs_from_grib from repurpose.misc import find_first_at_depth from ecmwf_models.globals import ( - CdoNotFoundError, DOTRC, EXPVER, CDS_API_URL, IMG_FNAME_TEMPLATE, + DOTRC, CDS_API_URL, IMG_FNAME_TEMPLATE, IMG_FNAME_DATETIME_FORMAT, SUPPORTED_PRODUCTS ) -try: - from cdo import Cdo - - cdo_available = True -except ImportError: - cdo_available = False - -try: - import pygrib - - pygrib_available = True -except ImportError: - pygrib_available = False - -def save_ncs_from_nc( - input_nc, - output_path, - product_name, - grid=None, - keep_original=True, - remap_method="bil", - keep_prelim=True, -): - """ - Split the downloaded netcdf file into daily files and add to folder - structure necessary for reshuffling. - - Parameters - ---------- - input_nc : str - Filepath of the downloaded .nc file - output_path : str - Where to save the resulting netcdf files - product_name : str - Name of the ECMWF model (only for filename generation) - keep_original: bool - keep the original downloaded data too, before it is sliced into - individual images. - keep_prelim: bool, optional (default: True) - True to keep preliminary data from ERA5T with a different file name, or - False drop these files and only keep the final records. - """ - localsubdirs = ["%Y", "%j"] - - _filename_templ = IMG_FNAME_TEMPLATE.format( - product="{product}", - type='AN', - datetime=IMG_FNAME_DATETIME_FORMAT, - ext='nc' - ) - - nc_in = xr.open_dataset(input_nc, mask_and_scale=True) - if 'valid_time' in nc_in.variables: - nc_in = nc_in.rename_vars({"valid_time": 'time'}) - - if grid is not None: - if not cdo_available: - raise CdoNotFoundError() - - cdo = Cdo() - gridpath = os.path.join(output_path, "grid.txt") - weightspath = os.path.join(output_path, "remap_weights.nc") - if not os.path.exists(gridpath): - with open(gridpath, "w") as f: - for k, v in grid.items(): - f.write(f"{k} = {v}\n") - - for time in nc_in["time"].values: - subset = nc_in.sel({"time": time}) - - # Expver identifies preliminary data - if 'expver' in subset: - expver = str(subset['expver'].values) - subset = subset.drop_vars('expver') - try: - ext = EXPVER[expver] - except KeyError: - ext = '' - else: - ext = '' - - if len(ext) > 0 and not keep_prelim: - logging.info(f"Dropping preliminary data {time}") - continue - - if len(ext) > 0: - filename_templ = _filename_templ.format( - product=product_name + '-' + ext) - else: - filename_templ = _filename_templ.format(product=product_name) - - if 'number' in subset.variables: - subset = subset.drop_vars('number') - - timestamp = pd.Timestamp(time).to_pydatetime() - filepath = create_dt_fpath( - timestamp, - root=output_path, - fname=filename_templ, - subdirs=localsubdirs, - ) - - if not os.path.exists(os.path.dirname(filepath)): - os.makedirs(os.path.dirname(filepath)) - - if grid is not None: - if not os.path.exists(weightspath): - # create weights file - getattr(cdo, "gen" + remap_method)( - gridpath, input=subset, output=weightspath) - subset = cdo.remap( - ",".join([gridpath, weightspath]), - input=subset, - returnXDataset=True, - ) - - # same compression for all variables - var_encode = {"zlib": True, "complevel": 6} - subset.to_netcdf( - filepath, encoding={var: var_encode for var in subset.variables}) - - nc_in.close() - - if not keep_original: - os.remove(input_nc) - if grid is not None: - cdo.cleanTempDir() - - -def save_gribs_from_grib( - input_grib, - output_path, - product_name, - keep_original=True, - keep_prelim=True, -): - """ - Split the downloaded grib file into daily files and add to folder structure - necessary for reshuffling. - - Parameters - ---------- - input_grib : str - Filepath of the downloaded .grb file - output_path : str - Where to save the resulting grib files - product_name : str - Name of the ECMWF model (only for filename generation) - keep_original: bool - keep the original downloaded data too, before it is sliced into - individual images. - keep_prelim: bool, optional (default: True) - True to keep preliminary data from ERA5T with a different file name, or - False drop these files and only keep the final records. - """ - localsubdirs = ["%Y", "%j"] - grib_in = pygrib.open(input_grib) - - _filename_templ = IMG_FNAME_TEMPLATE.format( - product="{product}", - type='AN', - datetime=IMG_FNAME_DATETIME_FORMAT, - ext='grb' - ) - - grib_in.seek(0) - prev_date = None - - for grb in grib_in: - filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) - - expver = grb['expver'] - - try: - ext = EXPVER[expver] - except KeyError: - ext = '' - - if len(ext) > 0 and not keep_prelim: - logging.info(f"Dropping preliminary data {filedate}") - continue - - if len(ext) > 0: - filename_templ = _filename_templ.format( - product=product_name + '-' + ext) - else: - filename_templ = _filename_templ.format(product=product_name) - - filepath = create_dt_fpath( - filedate, root=output_path, fname=filename_templ, - subdirs=localsubdirs) - - if not os.path.exists(os.path.dirname(filepath)): - os.makedirs(os.path.dirname(filepath)) - - if prev_date != filedate: # to overwrite old files - mode = 'wb' - prev_date = filedate - else: - mode = "ab" - - with open(filepath, mode) as grb_out: - grb_out.write(grb.tostring()) - - grib_in.close() - - if not keep_original: - os.remove(input_grib) - - def parse_product(inpath: str) -> str: """ Tries to find out what product is stored in the path. This is done based @@ -299,41 +88,6 @@ def parse_filetype(inpath: str) -> str: return 'netcdf' - -# def parse_filetype(inpath): -# """ -# Tries to find out the file type by searching for -# grib or nc files two subdirectories into the passed input path. -# If function fails, grib is assumed. -# -# Parameters -# ---------- -# inpath: str -# Input path where ERA data was downloaded to -# -# Returns -# ------- -# filetype : str -# File type string. -# """ -# onedown = os.path.join(inpath, os.listdir(inpath)[0]) -# twodown = os.path.join(onedown, os.listdir(onedown)[0]) -# -# filelist = [] -# for path, subdirs, files in os.walk(twodown): -# for name in files: -# filename, extension = os.path.splitext(name) -# filelist.append(extension) -# -# if ".nc" in filelist and ".grb" not in filelist: -# return "netcdf" -# elif ".grb" in filelist and ".nc" not in filelist: -# return "grib" -# else: -# # if file type cannot be detected, guess grib -# return "grib" - - def load_var_table(name="era5", lut=False): """ Load the variables table for supported variables to download. @@ -590,7 +344,7 @@ def get_first_last_image_date(path, start_from_last=True): ---------- path: str Path to the directory containing the image files - start_from_last: str, optional (default: True') + start_from_last: bool, optional (default: True') Get date from last available file instead of the first available one. Returns @@ -629,9 +383,9 @@ def update_image_summary_file(data_path: str, it will be overwritten. """ first_image_date = get_first_last_image_date(data_path, - start_from_last='first') + start_from_last=False) last_image_date = get_first_last_image_date(data_path, - start_from_last='last') + start_from_last=True) props = img_infer_file_props(data_path, start_from_last=False) _ = props.pop("datetime") @@ -660,25 +414,6 @@ def assert_product(product: str) -> str: return product -def read_summary_yml(path: str) -> dict: - """ - Read image summary and return fields as dict. - """ - path = os.path.join(path, 'overview.yml') - - if not os.path.isfile(path): - raise FileNotFoundError( - "No overview.yml file found in the passed directory. " - "This file is required to use the same settings to extend an " - "existing record. NOTE: Use the `era5 download` or " - "`era5land download` programs first." - ) - - with open(path, 'r') as stream: - props = yaml.safe_load(stream) - - return props - if __name__ == '__main__': save_gribs_from_grib("/tmp/era5/grb/temp_downloaded/20240730_20240731.grb", diff --git a/tests/test_utils.py b/tests/test_utils.py index a217d75..6d4f2a8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,18 +3,19 @@ Testing the utility functions ''' +import os +import tempfile +import numpy as np +from netCDF4 import Dataset + from ecmwf_models.utils import ( parse_filetype, parse_product, load_var_table, lookup, get_default_params, - make_era5_land_definition_file) -import os -from datetime import datetime -import tempfile -import numpy as np -from netCDF4 import Dataset + make_era5_land_definition_file, +) def test_parse_product(): @@ -33,21 +34,21 @@ def test_parse_filetype(): def test_load_var_table(): table = load_var_table('era5') - assert table.index.size == 265 + assert table.index.size == 260 assert table.loc[ - 100].dl_name == 'mean_surface_direct_short_wave_radiation_flux' - assert table.loc[100].short_name == 'msdrswrf' + 100].dl_name == 'mean_surface_downward_short_wave_radiation_flux' + assert table.loc[100].short_name == 'msdwswrf' table = load_var_table('era5-land') - assert table.index.size == 49 - assert table.loc[45].dl_name == 'volumetric_soil_water_layer_1' - assert table.loc[45].short_name == 'swvl1' + assert table.index.size == 50 + assert table.loc[46].dl_name == 'volumetric_soil_water_layer_1' + assert table.loc[46].short_name == 'swvl1' def test_lookup(): lut = lookup('era5', ['swvl1', 'stl1']) - assert lut.loc[254].dl_name == 'volumetric_soil_water_layer_1' - assert lut.loc[171].dl_name == 'soil_temperature_level_1' + assert lut.loc[249].dl_name == 'volumetric_soil_water_layer_1' + assert lut.loc[166].dl_name == 'soil_temperature_level_1' def test_get_default_params(): diff --git a/tests/tests_era5/test_era5_download.py b/tests/tests_era5/test_era5_download.py index 6a738d1..3004048 100644 --- a/tests/tests_era5/test_era5_download.py +++ b/tests/tests_era5/test_era5_download.py @@ -31,9 +31,14 @@ import pytest import tempfile -from ecmwf_models.era5.download import download_and_move, save_ncs_from_nc -from ecmwf_models.utils import cdo_available, read_summary_yml -from ecmwf_models.globals import CdoNotFoundError +from c3s_sm.misc import read_summary_yml + +from ecmwf_models.era5.download import download_and_move +from ecmwf_models.extract import save_ncs_from_nc +from ecmwf_models.globals import ( + cdo_available, + CdoNotFoundError +) grid = { "gridtype": "lonlat", diff --git a/tests/tests_era5/test_era5_reshuffle.py b/tests/tests_era5/test_era5_reshuffle.py index a23417e..191f6c2 100644 --- a/tests/tests_era5/test_era5_reshuffle.py +++ b/tests/tests_era5/test_era5_reshuffle.py @@ -30,10 +30,16 @@ import numpy as np import numpy.testing as nptest import subprocess +import shutil +from pathlib import Path +import xarray as xr +import yaml +from datetime import datetime + +from c3s_sm.misc import read_summary_yml from ecmwf_models.era5.reshuffle import img2ts from ecmwf_models import ERATs -from ecmwf_models.utils import read_summary_yml inpath = os.path.join( os.path.dirname(os.path.abspath(__file__)), @@ -42,32 +48,54 @@ "ERA5", ) -def test_cli_reshuffle(): - path = os.path.join(inpath, 'netcdf') - with tempfile.TemporaryDirectory() as ts_path: - subprocess.call(["era5", "reshuffle", path, ts_path, "2010-01-01", +def test_cli_reshuffle_and_update(): + testdata_path = Path(os.path.join(inpath, 'netcdf')) + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + img_path = tempdir / 'images' + shutil.copytree(testdata_path, img_path) + + # we duplicate 1 file to check whether prioritizing final images over T images works + ds = xr.open_dataset(testdata_path / '2010' / '001' / "ERA5_AN_20100101_0000.nc") + ds['swvl1'].values = np.full(ds['swvl1'].values.shape, 99) + ds.to_netcdf(img_path / '2010' / '001' / "ERA5-T_AN_20100101_0000.nc") + + ts_path = tempdir / 'ts' + + subprocess.call(["era5", "reshuffle", img_path, ts_path, "2010-01-01", "2010-01-01", "-v", "swvl1,swvl2", "-l", "--bbox", "12.0", "46.0", "17.0", "50.0", "--h_steps", "0"]) - sum = read_summary_yml(ts_path) - change summary + ts_reader = ERATs(ts_path) + ts = ts_reader.read(15, 48) + assert 99 not in ts['swvl1'].values # verify ERA5-T was NOT used! + swvl1_values_should = np.array([0.402825], dtype=np.float32) + nptest.assert_allclose( + ts["swvl1"].values, swvl1_values_should, rtol=1e-5) - write new summary + ts_reader.close() - call update with manipulated summary + # Manipulate settings to update with different time stamp for same day + props = read_summary_yml(ts_path) + props['img2ts_kwargs']['h_steps'] = [12] + props['img2ts_kwargs']['startdate'] = datetime(2009,12,31) + props['img2ts_kwargs']['enddate'] = datetime(2009,12,31) - subprocess.call(["era5", "update_ts", path, ts_path, "2010-01-01", - "2010-01-01", "-v", "swvl1,swvl2", "-l", - "--bbox", "12.0", "46.0", "17.0", "50.0", - "--h_steps", "12"]) + with open(ts_path / 'overview.yml', 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) - ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) - ts = ds.read(15, 48) + subprocess.call(["era5", "update_ts", ts_path]) + ts_reader = ERATs(ts_path) + ts = ts_reader.read(15, 48) swvl1_values_should = np.array([0.402825, 0.390983], dtype=np.float32) nptest.assert_allclose( ts["swvl1"].values, swvl1_values_should, rtol=1e-5) + assert 'swvl2' in ts.columns + + ts_reader.close() + def test_ERA5_reshuffle_nc(): # test reshuffling era5 netcdf images to time series @@ -121,8 +149,4 @@ def test_ERA5_reshuffle_grb(): ts["swvl1"].values, swvl1_values_should, rtol=1e-5) swvl2_values_should = np.array([0.390514, 0.390980], dtype=np.float32) nptest.assert_allclose( - ts["swvl2"].values, swvl2_values_should, rtol=1e-5) - - -if __name__ == '__main__': - test_cli_reshuffle() \ No newline at end of file + ts["swvl2"].values, swvl2_values_should, rtol=1e-5) \ No newline at end of file From 5b222f32e0682d6bdbc4147e3720998d003d1cda Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 17:34:16 +0200 Subject: [PATCH 04/20] Update build --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88c0785..2d58c1c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,6 @@ jobs: matrix: python-version: [ '3.8', '3.12' ] os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] - ymlfile: [ "latest.yml" ] name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} @@ -39,7 +38,6 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.python-version }} - environment-file: environment/${{ matrix.ymlfile }} channel-priority: flexible activate-environment: ecmwf_models auto-activate-base: false From e079468c0b1ffd7b84e3bfc161a16ef2868cc555 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 20:22:46 +0200 Subject: [PATCH 05/20] Fix tests --- .github/workflows/ci.yml | 2 +- src/ecmwf_models/era5/download.py | 6 +-- tests/tests_era5/test_era5_download.py | 61 ++++++++++++---------- tests/tests_era5/test_era5_download_cds.py | 3 +- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d58c1c..218ea6a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: build: strategy: matrix: - python-version: [ '3.8', '3.12' ] + python-version: [ '3.9', '3.12'] os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index 727f34d..a554410 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -289,10 +289,6 @@ def download_and_move( -1 : Error in at least one request -10 : No data available for requested time period """ - if cds_token is not None: - os.environ["CDSAPI_KEY"] = cds_token - check_api_ready() - h_steps = list(h_steps) product = product.lower() cds_kwds = cds_kwds or dict() @@ -312,6 +308,8 @@ def download_and_move( c = None cds_status_tracker = None else: + if cds_token is not None: + os.environ["CDSAPI_KEY"] = cds_token check_api_ready() os.makedirs(target_path, exist_ok=True) diff --git a/tests/tests_era5/test_era5_download.py b/tests/tests_era5/test_era5_download.py index 3004048..9f1d309 100644 --- a/tests/tests_era5/test_era5_download.py +++ b/tests/tests_era5/test_era5_download.py @@ -62,7 +62,6 @@ def test_download_with_cdo_not_installed(): save_ncs_from_nc( infile, out_path, 'ERA5', grid=grid, keep_original=True) - def test_dry_download_nc_era5(): with tempfile.TemporaryDirectory() as dl_path: dl_path = os.path.join(dl_path, 'era5') @@ -81,15 +80,16 @@ def test_dry_download_nc_era5(): startdate = enddate = datetime(2010, 1, 1) variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] - download_and_move( - dl_path, - startdate, - enddate, - variables=variables, - keep_original=False, - h_steps=h_steps, - grb=False, - dry_run=True) + with pytest.warns(UserWarning, match='Dry run*'): + download_and_move( + dl_path, + startdate, + enddate, + variables=variables, + keep_original=False, + h_steps=h_steps, + grb=False, + dry_run=True) cont = os.listdir(dl_path) assert len(cont) == 2 @@ -131,15 +131,16 @@ def test_dry_download_grb_era5(): startdate = enddate = datetime(2010, 1, 1) variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] - download_and_move( - dl_path, - startdate, - enddate, - variables=variables, - keep_original=False, - h_steps=h_steps, - grb=True, - dry_run=True) + with pytest.warns(UserWarning, match="Dry run*"): + download_and_move( + dl_path, + startdate, + enddate, + variables=variables, + keep_original=False, + h_steps=h_steps, + grb=True, + dry_run=True) cont = os.listdir(dl_path) assert len(cont) == 2 @@ -180,16 +181,18 @@ def test_download_nc_era5_regridding(): os.path.join(dl_path, 'temp_downloaded', '20100101_20100101.nc')) startdate = enddate = datetime(2010, 1, 1) - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=False, - dry_run=True, - grid=grid) + + with pytest.warns(UserWarning, match="Dry run*"): + download_and_move( + dl_path, + startdate, + enddate, + variables=['swvl1', 'swvl2', 'lsm'], + keep_original=False, + h_steps=[0, 12], + grb=False, + dry_run=True, + grid=grid) cont = os.listdir(dl_path) assert len(cont) == 2 diff --git a/tests/tests_era5/test_era5_download_cds.py b/tests/tests_era5/test_era5_download_cds.py index 29c519f..a05ab8f 100644 --- a/tests/tests_era5/test_era5_download_cds.py +++ b/tests/tests_era5/test_era5_download_cds.py @@ -25,9 +25,8 @@ class DownloadTest(unittest.TestCase): @unittest.skipIf( os.environ.get('CDSAPI_KEY') is None and not api_ready, - 'CDSAPI_KEY not found. Make sure the environment variable exists.' + 'CDSAPI_KEY not found.' ) - @pytest.mark.wget def test_cli_download(self): with tempfile.TemporaryDirectory() as dl_path: From ecac878966fe236972ea79b23ebec6a982647370 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 20:26:58 +0200 Subject: [PATCH 06/20] Fix tests --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 218ea6a..bb53d1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: build: strategy: matrix: - python-version: [ '3.9', '3.12'] + python-version: ['3.12'] os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} From 43381187630142c6e34515cc832c77fbcabeba63 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 20:32:46 +0200 Subject: [PATCH 07/20] Fix tests --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb53d1a..a969f36 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,6 +57,11 @@ jobs: then conda install -c conda-forge pykdtree fi + # And on windows pygrib wheels are not available + if [ ${{ matrix.os }} == "windows-latest" ] + then + conda install -c conda-forge pygrib + fi pip install -e .[testing] - name: Run all tests env: From 0c92eb3b92e089ce70ca85811805cae356722cdb Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 22:02:09 +0200 Subject: [PATCH 08/20] Update docs --- .github/workflows/ci.yml | 2 +- CONTRIBUTING.rst | 2 +- README.rst | 104 +++++++++++++++++++++++---------------- src/ecmwf_models/cli.py | 4 +- 4 files changed, 67 insertions(+), 45 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a969f36..0571245 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: build: strategy: matrix: - python-version: ['3.12'] + python-version: ['3.9', '3.12'] os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 96aab72..9f6cd88 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -87,7 +87,7 @@ Before you start coding, we recommend creating an isolated `virtual environment`_ to avoid any problems with your installed Python packages. This can easily be done via Miniconda_:: - conda env create -f environment/latest.yml + conda create -n ecmwf_models python=3.12 pygrib netcdf4 pyresample pykdtree conda activate ecmwf_models Clone the repository diff --git a/README.rst b/README.rst index d0721c6..ef6bfd1 100644 --- a/README.rst +++ b/README.rst @@ -3,90 +3,110 @@ ecmwf_models ============ -.. image:: https://github.com/TUW-GEO/ecmwf_models/workflows/Automated%20Tests/badge.svg?branch=master - :target: https://github.com/TUW-GEO/ecmwf_models/actions +|ci| |cov| |pip| |doc| -.. image:: https://coveralls.io/repos/github/TUW-GEO/ecmwf_models/badge.svg?branch=master - :target: https://coveralls.io/github/TUW-GEO/ecmwf_models?branch=master +.. |ci| image:: https://github.com/TUW-GEO/ecmwf_models/actions/workflows/ci.yml/badge.svg?branch=master + :target: https://github.com/ecmwf_models/c3s_sm/actions -.. image:: https://badge.fury.io/py/ecmwf-models.svg +.. |cov| image:: https://coveralls.io/repos/TUW-GEO/ecmwf_models/badge.png?branch=master + :target: https://coveralls.io/r/TUW-GEO/ecmwf_models?branch=master + +.. |pip| image:: https://badge.fury.io/py/ecmwf-models.svg :target: https://badge.fury.io/py/ecmwf-models -.. image:: https://readthedocs.org/projects/ecmwf-models/badge/?version=latest +.. |doc| image:: https://readthedocs.org/projects/ecmwf-models/badge/?version=latest :target: https://ecmwf-models.readthedocs.io/en/latest/ + Readers and converters for data from the `ECMWF reanalysis models `_. Written in Python. Works great in combination with `pytesmo `_. -Citation -======== - -.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.593533.svg - :target: https://doi.org/10.5281/zenodo.593533 - -If you use the software in a publication then please cite it using the Zenodo DOI. -Be aware that this badge links to the latest package version. - -Please select your specific version at https://doi.org/10.5281/zenodo.593533 to get the DOI of that version. -You should normally always use the DOI for the specific version of your record in citations. -This is to ensure that other researchers can access the exact research artefact you used for reproducibility. - -You can find additional information regarding DOI versioning at http://help.zenodo.org/#versioning Installation ============ Install required C-libraries via conda. For installation we recommend -`Miniconda `_. So please install it according -to the official installation instructions. As soon as you have the ``conda`` -command in your shell you can continue: +`Miniconda `_: .. code:: - conda install -c conda-forge pandas pygrib netcdf4 pyresample xarray + conda install -c conda-forge pygrib netcdf4 pyresample pykdtree -The following command will download and install all the needed pip packages as well -as the ecmwf-model package itself. +Afterwards the following command will install all remaining dependencies as +well as the ecmwf_models package itself. .. code:: pip install ecmwf_models -To create a full development environment with conda, the `yml` files inside -the folder `environment/` in this repository can be used. Both environements -should work. The file `latest` should install the newest version of most -dependencies. The file `pinned` is a fallback option and should always work. +Quickstart +========== -.. code:: +Download image data from CDS using the `era5 download` and `era5land download` +shell command ... + +.. code-block:: shell + + era5 download /tmp/era5/img -s 2024-04-01 -e 2024-04-05 -v swvl1,swvl2 --h_steps 0,12 - git clone --recursive git@github.com:TUW-GEO/ecmwf_models.git ecmwf_models - cd ecmwf_models - conda env create -f environment/latest.yml - source activate ecmwf_models - python setup.py develop - pytest +... and convert them to time series -API Setup -========= +.. code-block:: shell -https://cds.climate.copernicus.eu/how-to-api + era5 reshuffle /tmp/era5/img /tmp/era5/ts 2024-04-01 2024-04-05 --land_points + +Finally, in python, read the time series data for a location as a pandas +DataFrame. + +.. code-block:: python + + >> from ecmwf_models.interface import ERATs + >> ds = ERATs('/tmp/era5/ts') + >> ds.read(18, 48) + + swvl1 swvl2 + 2024-04-01 00:00:00 0.299046 0.303497 + 2024-04-01 12:00:00 0.286958 0.294542 + 2024-04-02 00:00:00 0.332938 0.292352 + ... ... ... + 2024-04-04 12:00:00 0.323561 0.313509 + 2024-04-05 00:00:00 0.331137 0.313139 + 2024-04-05 12:00:00 0.323436 0.309964 + + +CDS API Setup +============= + +In order to download data from CDS, this package uses the CDS API +(https://pypi.org/project/cdsapi/). You can either pass your credentials +directly on the command line (which might be unsafe) or set up a +.cdsapirc file in your home directory (recommended). +Please see the description at https://cds.climate.copernicus.eu/how-to-api. Supported Products ================== At the moment this package supports -- **ERA Interim** (deprecated) - **ERA5** - **ERA5-Land** reanalysis data in **grib** and **netcdf** format (download, reading, time series creation) with a default spatial -sampling of 0.75 degrees (ERA Interim), 0.25 degrees (ERA5), resp. 0.1 degrees (ERA5-Land). +sampling of 0.25 degrees (ERA5), and 0.1 degrees (ERA5-Land). It should be easy to extend the package to support other ECMWF reanalysis products. This will be done as need arises. +Citation +======== + +.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.593533.svg + :target: https://doi.org/10.5281/zenodo.593533 + +If you use the software in a publication then please cite it using the Zenodo DOI. +Be aware that this badge links to the latest package version. + Contribute ========== diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py index 1269660..bd2a33e 100644 --- a/src/ecmwf_models/cli.py +++ b/src/ecmwf_models/cli.py @@ -342,7 +342,9 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, """ h_steps = [int(h.strip()) for h in h_steps.split(',')] - variables = [str(v.strip()) for v in variables.split(',')] + + if variables is not None: + variables = [str(v.strip()) for v in variables.split(',')] print(f"Converting ERA5/ERA5-Land images in period {start} to {end} into " f"time series to {ts_path}.") From 7077aa0ff0aac6d425b902a7812157311e725c2d Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 22:05:09 +0200 Subject: [PATCH 09/20] Update readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ef6bfd1..fe7f9fe 100644 --- a/README.rst +++ b/README.rst @@ -35,7 +35,7 @@ Install required C-libraries via conda. For installation we recommend conda install -c conda-forge pygrib netcdf4 pyresample pykdtree Afterwards the following command will install all remaining dependencies as -well as the ecmwf_models package itself. +well as the ``ecmwf_models`` package itself. .. code:: @@ -44,7 +44,7 @@ well as the ecmwf_models package itself. Quickstart ========== -Download image data from CDS using the `era5 download` and `era5land download` +Download image data from CDS using the ``era5 download`` and ``era5land download`` shell command ... .. code-block:: shell From 1dfd85d030c3cc053115de3b1eeb8a20e12f57d8 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 22:17:55 +0200 Subject: [PATCH 10/20] Update readme --- README.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index fe7f9fe..fafc955 100644 --- a/README.rst +++ b/README.rst @@ -45,17 +45,17 @@ Quickstart ========== Download image data from CDS using the ``era5 download`` and ``era5land download`` -shell command ... +shell command (see ``era5 download --help`` for all options) ... .. code-block:: shell - era5 download /tmp/era5/img -s 2024-04-01 -e 2024-04-05 -v swvl1,swvl2 --h_steps 0,12 + era5land download /tmp/era5/img -s 2024-04-01 -e 2024-04-05 -v swvl1,swvl2 --h_steps 0,12 -... and convert them to time series +... and convert them to time series (ideally for a longer period). Check ``era5 reshuffle --help`` .. code-block:: shell - era5 reshuffle /tmp/era5/img /tmp/era5/ts 2024-04-01 2024-04-05 --land_points + era5land reshuffle /tmp/era5/img /tmp/era5/ts 2024-04-01 2024-04-05 --land_points Finally, in python, read the time series data for a location as a pandas DataFrame. @@ -64,16 +64,16 @@ DataFrame. >> from ecmwf_models.interface import ERATs >> ds = ERATs('/tmp/era5/ts') - >> ds.read(18, 48) + >> ds.read(18, 48) # (lon, lat) swvl1 swvl2 - 2024-04-01 00:00:00 0.299046 0.303497 - 2024-04-01 12:00:00 0.286958 0.294542 - 2024-04-02 00:00:00 0.332938 0.292352 - ... ... ... - 2024-04-04 12:00:00 0.323561 0.313509 - 2024-04-05 00:00:00 0.331137 0.313139 - 2024-04-05 12:00:00 0.323436 0.309964 + 2024-04-01 00:00:00 0.318054 0.329590 + 2024-04-01 12:00:00 0.310715 0.325958 + 2024-04-02 00:00:00 0.360229 0.323502 + ... ... ... + 2024-04-04 12:00:00 0.343353 0.348755 + 2024-04-05 00:00:00 0.350266 0.346558 + 2024-04-05 12:00:00 0.343994 0.344498 CDS API Setup From 3988a49d78132df356a8a3ea9d2e8aac261ce448 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 25 Oct 2024 23:27:20 +0200 Subject: [PATCH 11/20] Code formatting --- README.rst | 5 +- src/ecmwf_models/cli.py | 200 +++++++++++++++-------------- src/ecmwf_models/era5/download.py | 29 ++--- src/ecmwf_models/era5/reader.py | 1 - src/ecmwf_models/era5/reshuffle.py | 18 +-- src/ecmwf_models/extract.py | 56 ++++---- src/ecmwf_models/globals.py | 12 +- src/ecmwf_models/grid.py | 20 +-- src/ecmwf_models/interface.py | 98 +++++++------- src/ecmwf_models/utils.py | 55 ++++---- 10 files changed, 252 insertions(+), 242 deletions(-) diff --git a/README.rst b/README.rst index fafc955..54bc8ae 100644 --- a/README.rst +++ b/README.rst @@ -2,11 +2,10 @@ ecmwf_models ============ - |ci| |cov| |pip| |doc| .. |ci| image:: https://github.com/TUW-GEO/ecmwf_models/actions/workflows/ci.yml/badge.svg?branch=master - :target: https://github.com/ecmwf_models/c3s_sm/actions + :target: https://github.com/TUW-GEO/ecmwf_models/actions .. |cov| image:: https://coveralls.io/repos/TUW-GEO/ecmwf_models/badge.png?branch=master :target: https://coveralls.io/r/TUW-GEO/ecmwf_models?branch=master @@ -18,7 +17,7 @@ ecmwf_models :target: https://ecmwf-models.readthedocs.io/en/latest/ -Readers and converters for data from the `ECMWF reanalysis models +Readers and converters for `ECMWF reanalysis (ERA) data `_. Written in Python. Works great in combination with `pytesmo `_. diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py index bd2a33e..38c0cbe 100644 --- a/src/ecmwf_models/cli.py +++ b/src/ecmwf_models/cli.py @@ -2,31 +2,33 @@ from datetime import datetime from ecmwf_models.era5.download import download_and_move, download_record_extension -from ecmwf_models.utils import ( - default_variables -) +from ecmwf_models.utils import (default_variables) from ecmwf_models.era5.reshuffle import img2ts, extend_ts @click.command( "download", - context_settings={'show_default': True, - 'help_option_names': ['-h', '--help']}, + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, short_help="Download ERA5 reanalysis image data between two " - "dates from the Copernicus Climate Data Store (CDS). " - "Before this program can be used, you have to " - "register an account at CDS, and should setup a `.cdsapirc` " - "file as described here: " - "https://cds.climate.copernicus.eu/how-to-api") + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " + "https://cds.climate.copernicus.eu/how-to-api") @click.argument("PATH", type=click.Path(writable=True)) @click.option( - '--start', '-s', + '--start', + '-s', type=click.STRING, default=str(datetime(1979, 1, 1)), help="First date of the period to download data for. Format: YYYY-MM-DD") @click.option( - '--end', '-e', + '--end', + '-e', type=click.STRING, default=str(datetime.now().date()), help="Last date of the period to download data for. Format: YYYY-MM-DD") @@ -36,51 +38,51 @@ type=click.STRING, default=','.join(default_variables('era5', 'short_name')), help="Name of variables to download. To download multiple variables pass " - "comma-separated names (e.g. `-v swvl1,stl1`). " - "For all available variables see the docs. ") + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") @click.option( "-k", "--keep_original", type=click.BOOL, default=False, help="Also keep the original image stacks as downloaded from CDS, " - "instead of deleting them after extracting individual images. " - "Default is False.") + "instead of deleting them after extracting individual images. " + "Default is False.") @click.option( "-grb", "--as_grib", type=click.BOOL, default=False, help="Download data in grib format instead of netcdf. " - "Default is False.") + "Default is False.") @click.option( "--h_steps", type=str, default="0,6,12,18", help="Temporal sampling of downloaded images. To download multiple time " - "stamps or each day, pass comma-separated values. " - "Pass a set of full hours here, like '--h_steps 0,12' to download " - "two images for each day, at 0:00 and 12:00 respectively. " - "By default, we download 6-hourly images starting at 0:00 UTC, " - "(i.e. `--h_steps 0,6,12,18`") + "stamps or each day, pass comma-separated values. " + "Pass a set of full hours here, like '--h_steps 0,12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e. `--h_steps 0,6,12,18`") @click.option( "--max_request_size", type=int, default=1000, help="Maximum number of requests to pass to the CDS API. " - "The default is 1000, but what is allowed, depends on server " - "settings. Server settings may change at some point. Change " - "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks (slower).") + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks (slower).") @click.option( "--cds_token", type=click.STRING, default=None, help="To identify with the CDS. Required only if no `.cdsapirc` file " - "exists in the home directory (see documentation). " - "You can find your token/key on your CDS user profile page. " - "Alternatively, you can also set an environment variable " - "`CDSAPI_KEY` with your token.") + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_download_era5(path, start, end, variables, keep_original, as_grib, h_steps, max_request_size, cds_token): """ @@ -121,22 +123,26 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, @click.command( "download", - context_settings={'show_default': True, - 'help_option_names': ['-h', '--help']}, + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, short_help="Download ERA5 reanalysis image data between two " - "dates from the Copernicus Climate Data Store (CDS). " - "Before this program can be used, you have to " - "register an account at CDS, and should setup a `.cdsapirc` " - "file as described here: " - "https://cds.climate.copernicus.eu/how-to-api") + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " + "https://cds.climate.copernicus.eu/how-to-api") @click.argument("PATH", type=click.Path(writable=True)) @click.option( - '--start', '-s', + '--start', + '-s', type=click.STRING, default=str(datetime(1979, 1, 1)), help="First date of the period to download data for. Format: YYYY-MM-DD") @click.option( - '--end', '-e', + '--end', + '-e', type=click.STRING, default=str(datetime.now().date()), help="Last date of the period to download data for. Format: YYYY-MM-DD") @@ -146,51 +152,51 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, type=click.STRING, default=','.join(default_variables('era5-land', 'short_name')), help="Name of variables to download. To download multiple variables pass " - "comma-separated names (e.g. `-v swvl1,stl1`). " - "For all available variables see the docs. ") + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") @click.option( "-k", "--keep_original", type=click.BOOL, default=False, help="Also keep the original image stacks as downloaded from CDS, " - "instead of deleting them after extracting individual images. " - "Default is False.") + "instead of deleting them after extracting individual images. " + "Default is False.") @click.option( "-grb", "--as_grib", type=click.BOOL, default=False, help="Download data in grib format instead of netcdf. " - "Default is False.") + "Default is False.") @click.option( "--h_steps", type=click.STRING, default='0,6,12,18', help="Temporal sampling of downloaded images. To download multiple time " - "stamps or each day, pass comma-separated values. " - "Pass a set of full hours here, like '--h_steps 0,12' to download " - "two images for each day, at 0:00 and 12:00 respectively. " - "By default, we download 6-hourly images starting at 0:00 UTC, " - "(i.e. `--h_steps 0,6,12,18`") + "stamps or each day, pass comma-separated values. " + "Pass a set of full hours here, like '--h_steps 0,12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e. `--h_steps 0,6,12,18`") @click.option( "--max_request_size", type=int, default=1000, help="Maximum number of requests to pass to the CDS API. " - "The default is 1000, but what is allowed, depends on server " - "settings. Server settings may change at some point. Change " - "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks (slower).") + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks (slower).") @click.option( "--cds_token", type=click.STRING, default=None, help="To identify with the CDS. Required only if no `.cdsapirc` file " - "exists in the home directory (see documentation). " - "You can find your token/key on your CDS user profile page. " - "Alternatively, you can also set an environment variable " - "`CDSAPI_KEY` with your token.") + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_download_era5land(path, start, end, variables, keep_original, as_grib, h_steps, max_request_size, cds_token): """ @@ -224,30 +230,31 @@ def cli_download_era5land(path, start, end, variables, keep_original, as_grib, keep_original=keep_original, stepsize='month', n_max_request=max_request_size, - cds_token=cds_token - ) + cds_token=cds_token) return status_code @click.command( "update_img", - context_settings={'show_default': True, - 'help_option_names': ['-h', '--help']}, + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, short_help="Extend an existing set of images by downloading new data " - "with the same settings as before." - "NOTE: First use the `era5 download` or `era5land download` " - "programs.") + "with the same settings as before." + "NOTE: First use the `era5 download` or `era5land download` " + "programs.") @click.argument("path", type=click.Path(writable=True)) @click.option( "--cds_token", type=click.STRING, default=None, help="To identify with the CDS. Required only if no `.cdsapirc` file " - "exists in the home directory (see documentation). " - "You can find your token/key on your CDS user profile page. " - "Alternatively, you can also set an environment variable " - "`CDSAPI_KEY` with your token.") + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") def cli_update_img(path, cds_token=None): """ Download new images from CDS to your existing local archive. Use the same @@ -272,10 +279,12 @@ def cli_update_img(path, cds_token=None): @click.command( "reshuffle", - context_settings={'show_default': True, - 'help_option_names': ['-h', '--help']}, + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, short_help="Convert previously downloaded ERA5/ERA5-Land image " - "data into a time series format.") + "data into a time series format.") @click.argument("IMG_PATH", type=click.Path(readable=True)) @click.argument("TS_PATH", type=click.Path(writable=True)) @click.argument("START", type=click.STRING) @@ -286,39 +295,42 @@ def cli_update_img(path, cds_token=None): type=click.STRING, default=None, help="Subset of variables to convert. Pass comma-separated names" - "to select multiple variables (short names, as in the input images, " - "e.g. `-v swvl1,stl1`). If not specified, all variables from the " - "first image file are used.") + "to select multiple variables (short names, as in the input images, " + "e.g. `-v swvl1,stl1`). If not specified, all variables from the " + "first image file are used.") @click.option( - '--land_points', '-l', is_flag=True, show_default=True, + '--land_points', + '-l', + is_flag=True, + show_default=True, default=False, help="Store only time series for points that are over land. " - "Default is False.") + "Default is False.") @click.option( '--bbox', nargs=4, type=click.FLOAT, default=None, help="4 NUMBERS | min_lon min_lat max_lon max_lat. " - "Set Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). Default is global.") + "Set Bounding Box (lower left and upper right corner) " + "of area to reshuffle (WGS84). Default is global.") @click.option( "--h_steps", type=click.STRING, default="0,6,12,18", help="Full hour time stamps of images to include in time series. " - "To select multiple, pass comma-separated values here, " - "e.g. '--h_steps 0,12' will only include data from images at " - "0:00 and 12:00 UTC and ignore all other available time stamps." - "By default, we include data for every 6th hour each day.") + "To select multiple, pass comma-separated values here, " + "e.g. '--h_steps 0,12' will only include data from images at " + "0:00 and 12:00 UTC and ignore all other available time stamps." + "By default, we include data for every 6th hour each day.") @click.option( '--imgbuffer', '-b', type=click.INT, default=50, help="Number of images to read into memory at once before " - "conversion to time series. A larger buffer means faster " - "processing but requires more memory.") + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, h_steps, imgbuffer): """ @@ -365,8 +377,10 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, @click.command( "update_ts", - context_settings={'show_default': True, - 'help_option_names': ['-h', '--help']}, + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, short_help="Append new image data to an existing time series archive.") @click.argument("TS_PATH", type=click.Path(writable=True)) @click.option( @@ -375,17 +389,17 @@ def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, type=click.STRING, default=None, help="Manually specify where the image data to extend the time " - "series are located. If this is not specified, we use the previously " - "used path from the `overview.yml` file stored with the time series " - "data.") + "series are located. If this is not specified, we use the previously " + "used path from the `overview.yml` file stored with the time series " + "data.") @click.option( '--imgbuffer', '-b', type=click.INT, default=50, help="Number of images to read into memory at once before " - "conversion to time series. A larger buffer means faster " - "processing but requires more memory.") + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") def cli_extend_ts(ts_path, imgpath, imgbuffer): """ Append new image data to an existing time series archive. The archive must @@ -410,7 +424,7 @@ def cli_extend_ts(ts_path, imgpath, imgbuffer): @click.group(short_help="ERA5 Command Line Programs imported from the " - "`ecmwf_models` pip package.") + "`ecmwf_models` pip package.") def era5(): pass @@ -421,7 +435,7 @@ def era5(): @click.group(short_help="ERA5-Land Command Line Programs imported from the " - "`ecmwf_models` pip package.") + "`ecmwf_models` pip package.") def era5land(): pass diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index a554410..13be6d8 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -18,7 +18,9 @@ from ecmwf_models.utils import ( lookup, update_image_summary_file, - default_variables, split_array, check_api_ready, + default_variables, + split_array, + check_api_ready, ) from ecmwf_models.extract import save_ncs_from_nc, save_gribs_from_grib @@ -329,8 +331,7 @@ def download_and_move( n_hsteps=len(h_steps), max_req_size=n_max_request, reduce=True, - daily_request=True if stepsize == "day" else False - ) + daily_request=True if stepsize == "day" else False) logger.info(f"Request is split into {len(req_periods)} chunks") logger.info(f"Target directory {target_path}") @@ -348,8 +349,7 @@ def _download(curr_start, curr_end): fname = "{start}_{end}.{ext}".format( start=curr_start.strftime("%Y%m%d"), end=curr_end.strftime("%Y%m%d"), - ext="grb" if grb else "nc" - ) + ext="grb" if grb else "nc") dl_file = os.path.join(downloaded_data_path, fname) @@ -394,8 +394,7 @@ def _download(curr_start, curr_end): target_path, product_name=product.upper(), keep_original=keep_original, - keep_prelim=keep_prelim - ) + keep_prelim=keep_prelim) else: save_ncs_from_nc( dl_file, @@ -404,13 +403,10 @@ def _download(curr_start, curr_end): grid=grid, remap_method=remap_method, keep_original=keep_original, - keep_prelim=keep_prelim - ) + keep_prelim=keep_prelim) return status_code - - # Since we download month/month or day/day we need to # collect all the status codes to return a valid # status code for the whole time period @@ -420,8 +416,8 @@ def _download(curr_start, curr_end): 'curr_start': [p[0] for p in req_periods], 'curr_end': [p[1] for p in req_periods] }, - logger_name='cdsapi', loglevel='DEBUG' - ) + logger_name='cdsapi', + loglevel='DEBUG') # remove temporary files if not keep_original: @@ -501,6 +497,9 @@ def download_record_extension(path, dry_run=False, cds_token=None): - timedelta(days=1) # yesterday return download_and_move( - path, startdate=startdate, enddate=enddate, - cds_token=cds_token, dry_run=dry_run, + path, + startdate=startdate, + enddate=enddate, + cds_token=cds_token, + dry_run=dry_run, **props['download_settings']) diff --git a/src/ecmwf_models/era5/reader.py b/src/ecmwf_models/era5/reader.py index b364863..7992b6d 100644 --- a/src/ecmwf_models/era5/reader.py +++ b/src/ecmwf_models/era5/reader.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ This module contains ERA5/ERA5-Land specific child classes of the netcdf and grib base classes, that are used for reading all ecmwf products. diff --git a/src/ecmwf_models/era5/reshuffle.py b/src/ecmwf_models/era5/reshuffle.py index 020dcec..2e21e39 100644 --- a/src/ecmwf_models/era5/reshuffle.py +++ b/src/ecmwf_models/era5/reshuffle.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ Image to time series conversion tools. """ @@ -17,6 +16,7 @@ from ecmwf_models.grid import ERA_RegularImgGrid, ERA5_RegularImgLandGrid from ecmwf_models.utils import parse_filetype, parse_product, get_first_last_image_date + def extend_ts(ts_path, **img2ts_kwargs): """ Append any new data from the image path to the time series data. @@ -41,8 +41,7 @@ def extend_ts(ts_path, **img2ts_kwargs): "No overview.yml file found in the passed directory. " "This file is required to use the same settings to extend an " "existing record. NOTE: Use the `era5 download` or " - "`era5land download` programs first." - ) + "`era5land download` programs first.") ts_props = read_summary_yml(ts_path) @@ -59,7 +58,7 @@ def extend_ts(ts_path, **img2ts_kwargs): img2ts_kwargs[k] = v if 'enddate' not in img2ts_kwargs: - img2ts_kwargs['enddate'] = None # All available images + img2ts_kwargs['enddate'] = None # All available images img2ts(outputpath=ts_path, **img2ts_kwargs) @@ -136,8 +135,7 @@ def img2ts( if land_points: if product == "era5": - grid = ERA5_RegularImgLandGrid( - resolution=0.25, bbox=bbox) + grid = ERA5_RegularImgLandGrid(resolution=0.25, bbox=bbox) elif product == "era5-land": grid = ERA5_RegularImgLandGrid(resolution=0.1, bbox=bbox) else: @@ -187,8 +185,11 @@ def img2ts( data = input_dataset.read(first_date_time) ts_attributes = data.metadata - props = {'product': product, 'filetype': filetype, - 'last_update': datetime.now()} + props = { + 'product': product, + 'filetype': filetype, + 'last_update': datetime.now() + } kwargs = { 'input_root': input_root, @@ -225,4 +226,3 @@ def img2ts( reshuffler.calc() update_ts_summary_file(outputpath, props) - diff --git a/src/ecmwf_models/extract.py b/src/ecmwf_models/extract.py index d01e574..e5648f6 100644 --- a/src/ecmwf_models/extract.py +++ b/src/ecmwf_models/extract.py @@ -5,25 +5,26 @@ import xarray as xr from datedown.fname_creator import create_dt_fpath +from ecmwf_models.globals import (IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, EXPVER, SUBDIRS) from ecmwf_models.globals import ( - IMG_FNAME_TEMPLATE, - IMG_FNAME_DATETIME_FORMAT, - EXPVER, SUBDIRS -) -from ecmwf_models.globals import ( - Cdo, cdo_available, CdoNotFoundError, - pygrib, pygrib_available, PygribNotFoundError, + Cdo, + cdo_available, + CdoNotFoundError, + pygrib, + pygrib_available, + PygribNotFoundError, ) def save_ncs_from_nc( - input_nc, - output_path, - product_name, - grid=None, - keep_original=True, - remap_method="bil", - keep_prelim=True, + input_nc, + output_path, + product_name, + grid=None, + keep_original=True, + remap_method="bil", + keep_prelim=True, ): """ Split the downloaded netcdf file into daily files and add to folder @@ -48,8 +49,7 @@ def save_ncs_from_nc( product="{product}", type='AN', datetime=IMG_FNAME_DATETIME_FORMAT, - ext='nc' - ) + ext='nc') nc_in = xr.open_dataset(input_nc, mask_and_scale=True) if 'valid_time' in nc_in.variables: @@ -86,8 +86,8 @@ def save_ncs_from_nc( continue if len(ext) > 0: - filename_templ = _filename_templ.format( - product=product_name + '-' + ext) + filename_templ = _filename_templ.format(product=product_name + + '-' + ext) else: filename_templ = _filename_templ.format(product=product_name) @@ -130,11 +130,11 @@ def save_ncs_from_nc( def save_gribs_from_grib( - input_grib, - output_path, - product_name, - keep_original=True, - keep_prelim=True, + input_grib, + output_path, + product_name, + keep_original=True, + keep_prelim=True, ): """ Split the downloaded grib file into daily files and add to folder structure @@ -163,8 +163,7 @@ def save_gribs_from_grib( product="{product}", type='AN', datetime=IMG_FNAME_DATETIME_FORMAT, - ext='grb' - ) + ext='grb') grib_in.seek(0) prev_date = None @@ -184,14 +183,13 @@ def save_gribs_from_grib( continue if len(ext) > 0: - filename_templ = _filename_templ.format( - product=product_name + '-' + ext) + filename_templ = _filename_templ.format(product=product_name + + '-' + ext) else: filename_templ = _filename_templ.format(product=product_name) filepath = create_dt_fpath( - filedate, root=output_path, fname=filename_templ, - subdirs=SUBDIRS) + filedate, root=output_path, fname=filename_templ, subdirs=SUBDIRS) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) diff --git a/src/ecmwf_models/globals.py b/src/ecmwf_models/globals.py index eb9c535..c573416 100644 --- a/src/ecmwf_models/globals.py +++ b/src/ecmwf_models/globals.py @@ -1,7 +1,6 @@ import os from pathlib import Path - IMG_FNAME_TEMPLATE = "{product}_{type}_{datetime}.{ext}" IMG_FNAME_DATETIME_FORMAT = "%Y%m%d_%H%M" @@ -33,6 +32,7 @@ class PygribNotFoundError(ModuleNotFoundError): + def __init__(self, msg=None): _default_msg = ("pygrib could not be imported. " "Pleas run `conda install -c conda-forge pygrib` to " @@ -41,8 +41,10 @@ def __init__(self, msg=None): class CdoNotFoundError(ModuleNotFoundError): + def __init__(self, msg=None): - _default_msg = ("cdo and/or python-cdo not installed. " - "Pleas run `conda install -c conda-forge cdo` and also " - "`pip install cdo`.") - self.msg = _default_msg if msg is None else msg \ No newline at end of file + _default_msg = ( + "cdo and/or python-cdo not installed. " + "Pleas run `conda install -c conda-forge cdo` and also " + "`pip install cdo`.") + self.msg = _default_msg if msg is None else msg diff --git a/src/ecmwf_models/grid.py b/src/ecmwf_models/grid.py index 39f0087..a5fc178 100644 --- a/src/ecmwf_models/grid.py +++ b/src/ecmwf_models/grid.py @@ -9,6 +9,7 @@ from typing import Tuple import xarray as xr + def trafo_lon(lon): """ 0...360 -> 0...180...-180 @@ -27,6 +28,7 @@ def trafo_lon(lon): lon[lons_gt_180] = lon[lons_gt_180] - 360.0 return lon + def safe_arange(start, stop, step): """ Like numpy.arange, but floating point precision is kept. @@ -47,10 +49,13 @@ def safe_arange(start, stop, step): Range of values in interval at the given step size / sampling """ f_step = (1. / float(step)) - vals = np.arange(float(start) * f_step, float(stop) * f_step, - float(step) * f_step) + vals = np.arange( + float(start) * f_step, + float(stop) * f_step, + float(step) * f_step) return vals / f_step + def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): """ try to derive the grid resolution from given coords. @@ -113,10 +118,9 @@ def ERA5_RegularImgLandGrid( ds = ds.assign_coords({'longitude': trafo_lon(ds['longitude'].values)}) if bbox is not None: ds = ds.sel(latitude=slice(bbox[3], bbox[1])) - ds = ds.isel(longitude=np.where(( - (ds['longitude'].values >= bbox[0]) & - (ds['longitude'].values <= bbox[2]) - ))[0]) + ds = ds.isel( + longitude=np.where(((ds['longitude'].values >= bbox[0]) + & (ds['longitude'].values <= bbox[2])))[0]) land_mask = np.array(ds.values == 1.0) @@ -167,8 +171,8 @@ def ERA_RegularImgGrid( grid = grid.to_cell_grid(cellsize=5.0) if bbox is not None: - subgpis = grid.get_bbox_grid_points(latmin=bbox[1], latmax=bbox[3], - lonmin=bbox[0], lonmax=bbox[2]) + subgpis = grid.get_bbox_grid_points( + latmin=bbox[1], latmax=bbox[3], lonmin=bbox[0], lonmax=bbox[2]) grid = grid.subgrid_from_gpis(subgpis) return grid diff --git a/src/ecmwf_models/interface.py b/src/ecmwf_models/interface.py index eed3da6..3af58df 100644 --- a/src/ecmwf_models/interface.py +++ b/src/ecmwf_models/interface.py @@ -19,7 +19,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - """ Base classes for reading downloaded ERA netcdf and grib images and stacks """ @@ -39,12 +38,13 @@ from ecmwf_models.grid import trafo_lon from ecmwf_models.utils import lookup from ecmwf_models.globals import ( - IMG_FNAME_TEMPLATE, IMG_FNAME_DATETIME_FORMAT, - SUPPORTED_PRODUCTS, SUBDIRS, -) -from ecmwf_models.globals import ( - pygrib, pygrib_available, PygribNotFoundError + IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, + SUPPORTED_PRODUCTS, + SUBDIRS, ) +from ecmwf_models.globals import (pygrib, pygrib_available, + PygribNotFoundError) class ERANcImg(ImageBase): @@ -75,14 +75,14 @@ class ERANcImg(ImageBase): """ def __init__( - self, - filename, - product, - parameter=None, - subgrid=None, - mask_seapoints=False, - array_1D=False, - mode='r', + self, + filename, + product, + parameter=None, + subgrid=None, + mask_seapoints=False, + array_1D=False, + mode='r', ): super(ERANcImg, self).__init__(filename, mode=mode) @@ -90,7 +90,7 @@ def __init__( if parameter is not None: # look up short names self.parameter = lookup( - product, np.atleast_1d(parameter))["short_name"].values + product, np.atleast_1d(parameter))["short_name"].values else: self.parameter = None @@ -135,8 +135,10 @@ def read(self, timestamp=None): return_img = {} return_metadata = {} - grid = gridfromdims(trafo_lon(dataset['longitude'].values), - dataset['latitude'].values, origin='top') + grid = gridfromdims( + trafo_lon(dataset['longitude'].values), + dataset['latitude'].values, + origin='top') if self.subgrid is not None: gpis = grid.find_nearest_gpi(self.subgrid.activearrlon, @@ -149,11 +151,10 @@ def read(self, timestamp=None): variable = dataset[name] except KeyError: path, f = os.path.split(self.filename) - warnings.warn( - f"Cannot load variable {name} from file {f}. " - f"Filling image with NaNs.") - dat = np.full(grid.shape if gpis is None else len(gpis), - np.nan) + warnings.warn(f"Cannot load variable {name} from file {f}. " + f"Filling image with NaNs.") + dat = np.full( + grid.shape if gpis is None else len(gpis), np.nan) return_img[name] = dat continue @@ -268,7 +269,8 @@ def __init__( if parameter is not None: # look up short names - self.parameter = lookup(product, np.atleast_1d(parameter))["short_name"].values + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values else: self.parameter = None @@ -282,20 +284,18 @@ def __init__( # the goal is to use ERA5-T*.nc if necessary, but prefer ERA5*.nc self.fn_templ_priority = [ - IMG_FNAME_TEMPLATE.format(product=(p + ext).upper(), - type='*', - datetime="{datetime}", - ext='nc') - for ext in ['', '-T'] for p in SUPPORTED_PRODUCTS + IMG_FNAME_TEMPLATE.format( + product=(p + ext).upper(), + type='*', + datetime="{datetime}", + ext='nc') for ext in ['', '-T'] for p in SUPPORTED_PRODUCTS ] super(ERANcDs, self).__init__( root_path, ERANcImg, - fname_templ=IMG_FNAME_TEMPLATE.format(product='*', - type='*', - datetime='{datetime}', - ext='nc'), + fname_templ=IMG_FNAME_TEMPLATE.format( + product='*', type='*', datetime='{datetime}', ext='nc'), datetime_format=IMG_FNAME_DATETIME_FORMAT, subpath_templ=SUBDIRS, exact_templ=False, @@ -384,16 +384,14 @@ def tstamps_for_daterange(self, start_date, end_date): class ERAGrbImg(ImageBase): - def __init__( - self, - filename, - product, - parameter=None, - subgrid=None, - mask_seapoints=False, - array_1D=True, - mode='r' - ): + def __init__(self, + filename, + product, + parameter=None, + subgrid=None, + mask_seapoints=False, + array_1D=True, + mode='r'): """ Reader for a single ERA grib file. The main purpose of this class is to use it in the time series conversion routine. To read downloaded image @@ -476,8 +474,10 @@ def read(self, timestamp=None): param_data = message.values if grid is None: - grid = BasicGrid(trafo_lon(lons).flatten(), lats.flatten(), - shape=param_data.shape) + grid = BasicGrid( + trafo_lon(lons).flatten(), + lats.flatten(), + shape=param_data.shape) param_data = param_data.flatten() @@ -532,8 +532,7 @@ def read(self, timestamp=None): param_data = np.full(np.prod(self.subgrid.shape), np.nan) warnings.warn( f"Cannot load variable {param_name} from file " - f"{self.filename}. Filling image with NaNs." - ) + f"{self.filename}. Filling image with NaNs.") return_img[param_name] = param_data return_metadata[param_name] = {} return_metadata[param_name]["long_name"] = lookup( @@ -574,6 +573,7 @@ def close(self): class ERAGrbDs(MultiTemporalImageBase): + def __init__( self, root_path, @@ -627,10 +627,8 @@ def __init__( "array_1D": array_1D, } - fname_templ = IMG_FNAME_TEMPLATE.format(product="*", - type="*", - datetime="{datetime}", - ext="grb") + fname_templ = IMG_FNAME_TEMPLATE.format( + product="*", type="*", datetime="{datetime}", ext="grb") super(ERAGrbDs, self).__init__( root_path, diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index f132cee..7839b95 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -20,7 +20,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - """ Utility functions for all data products in this package. """ @@ -36,10 +35,10 @@ from ecmwf_models.extract import save_gribs_from_grib from repurpose.misc import find_first_at_depth -from ecmwf_models.globals import ( - DOTRC, CDS_API_URL, IMG_FNAME_TEMPLATE, - IMG_FNAME_DATETIME_FORMAT, SUPPORTED_PRODUCTS -) +from ecmwf_models.globals import (DOTRC, CDS_API_URL, IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, + SUPPORTED_PRODUCTS) + def parse_product(inpath: str) -> str: """ @@ -63,7 +62,8 @@ def parse_product(inpath: str) -> str: elif "era5" in props['product'].lower(): return "era5" # also era5-t else: - raise ValueError(f"Could not derive product name from data in {inpath}") + raise ValueError( + f"Could not derive product name from data in {inpath}") def parse_filetype(inpath: str) -> str: @@ -176,12 +176,12 @@ def default_variables(product="era5", format='dl_name'): def make_era5_land_definition_file( - data_file, - out_file, - data_file_y_res=0.25, - ref_var="lsm", - threshold=0.5, - exclude_antarctica=True, + data_file, + out_file, + data_file_y_res=0.25, + ref_var="lsm", + threshold=0.5, + exclude_antarctica=True, ): """ Create a land grid definition file from a variable within a downloaded, @@ -215,8 +215,7 @@ def make_era5_land_definition_file( for dim_name in ds_in.dimensions.keys(): ds_out.createDimension(dim_name, size=ds_in.dimensions[dim_name].size) - ds_out.createVariable(dim_name, "float32", (dim_name,), - zlib=True) + ds_out.createVariable(dim_name, "float32", (dim_name,), zlib=True) ds_out.variables[dim_name][:] = ds_in.variables[dim_name][:] ref = ds_in.variables[ref_var] @@ -232,13 +231,12 @@ def make_era5_land_definition_file( if exclude_antarctica: cut_off_lat = -60.0 index_thres_lat = ((180.0 / data_file_y_res) + 1) - ( - (90.0 + cut_off_lat) / data_file_y_res) + (90.0 + cut_off_lat) / data_file_y_res) land_mask[int(index_thres_lat):, :] = np.nan else: cut_off_lat = None - ds_out.createVariable("land", "float32", - (lat_name, lon_name), zlib=True) + ds_out.createVariable("land", "float32", (lat_name, lon_name), zlib=True) ds_out.variables["land"][:] = land_mask land_attrs = OrderedDict([ @@ -298,8 +296,7 @@ def check_api_ready() -> bool: 'Neither CDSAPI_KEY variable nor .cdsapirc file found, ' 'download will not work! ' 'Please create a .cdsapirc file with your API key. ' - 'See: https://cds.climate.copernicus.eu/how-to-api' - ) + 'See: https://cds.climate.copernicus.eu/how-to-api') else: return True else: @@ -353,9 +350,8 @@ def get_first_last_image_date(path, start_from_last=True): Parse date from the last found image file that matches `fntempl`. """ try: - props = img_infer_file_props(path, - fntempl=IMG_FNAME_TEMPLATE, - start_from_last=start_from_last) + props = img_infer_file_props( + path, fntempl=IMG_FNAME_TEMPLATE, start_from_last=start_from_last) dt = datetime.strptime(props['datetime'], IMG_FNAME_DATETIME_FORMAT) except ValueError: raise ValueError('Could not infer date from filenames. ' @@ -382,10 +378,10 @@ def update_image_summary_file(data_path: str, If not specified, then `data_path` is used. If a file already exists, it will be overwritten. """ - first_image_date = get_first_last_image_date(data_path, - start_from_last=False) - last_image_date = get_first_last_image_date(data_path, - start_from_last=True) + first_image_date = get_first_last_image_date( + data_path, start_from_last=False) + last_image_date = get_first_last_image_date( + data_path, start_from_last=True) props = img_infer_file_props(data_path, start_from_last=False) _ = props.pop("datetime") @@ -414,7 +410,8 @@ def assert_product(product: str) -> str: return product - if __name__ == '__main__': - save_gribs_from_grib("/tmp/era5/grb/temp_downloaded/20240730_20240731.grb", - output_path='/tmp/era5/grb', product_name='ERA5') \ No newline at end of file + save_gribs_from_grib( + "/tmp/era5/grb/temp_downloaded/20240730_20240731.grb", + output_path='/tmp/era5/grb', + product_name='ERA5') From 204e2647f0b5d0e40f404b01152ad443e2deaaf3 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 20:07:30 +0200 Subject: [PATCH 12/20] Update docs --- .readthedocs.yml | 24 +- docs/download.rst | 79 ++-- docs/env.yml | 7 + docs/img2ts.rst | 41 +-- docs/img_read.rst | 76 ++-- docs/index.rst | 10 +- docs/ts_read.rst | 25 +- docs/variables_era5.rst | 582 ++++++++++++++++++++++++++++++ docs/variables_era5land.rst | 114 ++++++ setup.cfg | 10 + src/ecmwf_models/era5/download.py | 18 +- 11 files changed, 833 insertions(+), 153 deletions(-) create mode 100644 docs/env.yml create mode 100644 docs/variables_era5.rst create mode 100644 docs/variables_era5land.rst diff --git a/.readthedocs.yml b/.readthedocs.yml index a7c33b6..4093fff 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,19 +1,17 @@ version: 2 -sphinx: - configuration: docs/conf.py - -formats: - - pdf - -submodules: - include: all - -conda: - environment: environment/latest.yml +build: + os: ubuntu-22.04 + tools: + python: mambaforge-4.10 python: - version: 3.7 install: - method: pip - path: . + path: .[docs] + +sphinx: + configuration: docs/conf.py + +conda: + environment: docs/env.yml \ No newline at end of file diff --git a/docs/download.rst b/docs/download.rst index 600bb6c..0ee917c 100644 --- a/docs/download.rst +++ b/docs/download.rst @@ -1,70 +1,41 @@ -Downloading ERA5 Data -========================== +Downloading ERA5 and ERA5-Land data +=================================== ERA5 (and ERA5-Land) data can be downloaded manually from the `Copernicus Data Store (CDS) `_ or automatically via the CDS api, -as done in the download module (era5_download). Before you can use this, you -have to set up an `account at the CDS -`_ and setup -the `CDS key `_. - -Then you can use the program ``era5_download`` to download ERA5 images between -a passed start and end date. -``era5_download --help`` will show additional information on using the command. +as done in the download modules (``era5 download`` and ``era5land download``). +Before you can use this, you have to set up an `account at the CDS +`_ and get your +`API key `_. +Then you can use the programs ``era5 download`` and ``era5land download`` to +download ERA5 images between a passed start and end date. +Passing ``--help`` will show additional information on using the commands. For example, the following command in your terminal would download ERA5 images for all available layers of soil moisture in netcdf format, between -January 1st and February 1st 2000 in grib format into ``/path/to/storage``. +January 1st and February 1st 2000 in netcdf format into ``/path/to/storage``. The data will be stored in subfolders of the format ``YYYY/jjj``. The temporal -resolution of the images is 6 hours by default. +resolution of the images is 6 hours by default, but can be changed using the +``--h_steps`` option. .. code-block:: shell - era5_download /path/to/storage -s 2000-01-01 -e 2000-02-01 --variables swvl1 swvl2 swvl3 swvl4 + era5 download /path/to/storage -s 2000-01-01 -e 2000-02-01 \ + --variables swvl1,swvl2,swvl3,swvl4 --h_steps 0,6,18,24 The names of the variables to download can be its long names, the short names -(as in the example) or the parameter IDs. We use the ``era5_lut.csv`` file to -look up the right name for the CDS API. -Other flags, that can be activated in ``era5_download`` are: - -- **-h (--help)** : shows the help text for the download function -- **-p (--product)**: specify the ERA5 product to download. Choose either ERA5 or ERA5-Land. Default is ERA5. -- **-keep (--keep_original)** : keeps the originally downloaded files as well. - We split the downloaded, monthly stacks into single images and discard the original - files by default. -- **-grb (--as_grib)** : download the data in grib format instead of the default nc4 - format (grib reading is not supported on Windows OS). -- **--h_steps** : full hours for which images are downloaded (e.g. --h_steps 0 - would download only data at 00:00 UTC). By default we use 0, 6, 12 and 18. - - -Downloading ERA Interim Data -================================= -**ERA-Interim has been decommissioned. Use ERA5 instead.** +(as in the example) or the parameter IDs. We use the file +``ecmwf_models/era5/era5_lut.csv`` and ``ecmwf_models/era5/era5-land_lut.csv`` +to look up the right name for the CDS API. -ERA-Interim data can be downloaded manually from the ECMWF servers. It can also -be done automatically using the ECMWF API. To use the ECMWF API you have to be -registered, install the ecmwf-api Python package and setup the ECMWF API Key. A -guide for this is provided by `ECMWF -`_. - -After that you can use the command line program ``eraint_download`` to download -images with a temporal resoltuion of 6 hours between a passed start and end date. -``eraint_download --help`` will show additional information on using the command. - -For example, the following command in your terminal would download ERA Interim -soil moisture images of all available layers (see the -`Variable DB `_) in netcdf format on -the default gaussian grid for ERA-Interim (0.75°x0.75°) into -the folder ``/path/to/storage`` between January 1st and February 1st 2000. -The data will be stored in subfolders of the format ``YYYY/jjj``, where ``YYYY`` -describes the year and ``jjj`` the day of the year for the downloaded files. - -.. code-block:: shell +By default, the command expects that you have set up your ``.cdsapirc`` file +to identify with the data store as described above. Alternatively you can pass +your token directly with the download command using the ``--cds_token`` option. +Or you can set an environment variable ``CDSAPI_KEY`` that contains your token. - eraint_download /path/to/storage -s 2000-01-01 -e 2000-02-01 --variables swvl1 swvl2 swvl3 swvl4 +We recommend downloading data in netcdf format, however, using the ``--as_grib`` +option, you can also download data in grib format. -Additional optional parameters allow downloading images in netcdf format, and -in a different spatial resolution (see the --help function and descriptions for -downloading ERA5 data) \ No newline at end of file +For all other available options, type ``era5 download --help``, +or ``era5land download --help`` respectively. \ No newline at end of file diff --git a/docs/env.yml b/docs/env.yml new file mode 100644 index 0000000..a379691 --- /dev/null +++ b/docs/env.yml @@ -0,0 +1,7 @@ +# To keep the RTD build as small as possible, we define a separate .yml here +name: docs +channels: +- conda-forge +- defaults +dependencies: +- python=3.12 \ No newline at end of file diff --git a/docs/img2ts.rst b/docs/img2ts.rst index fd1d1b1..e712574 100644 --- a/docs/img2ts.rst +++ b/docs/img2ts.rst @@ -7,44 +7,35 @@ often need for e.g. validation studies. This can be done by stacking the images into a netCDF file and choosing the correct chunk sizes or a lot of other methods. We have chosen to do it in the following way: -- Store only the reduced gaußian grid points (for grib data) since that saves space. -- Store the time series in netCDF4 in the Climate and Forecast convention +- Store the time series as netCDF4 Climate and Forecast convention (CF) `Orthogonal multidimensional array representation `_ -- Store the time series in 5x5 degree cells. This means there will be 2566 cell +- Store the time series in 5x5 degree cells. This means there will be up to 2566 cell files and a file called ``grid.nc`` which contains the information about which grid point is stored in which file. This allows us to read a whole 5x5 degree area into memory and iterate over the time series quickly. .. image:: 5x5_cell_partitioning.png - :target: _images/5x5_cell_partitioning.png + :target: 5x5_cell_partitioning.png -This conversion can be performed using the ``era5_reshuffle`` (respectively -``eraint_reshuffle``) command line program. An example would be: +This conversion can be performed using the ``era5 reshuffle`` (respectively +``era5land reshuffle``) command line program. An example would be: .. code-block:: shell - era5_reshuffle /era_data /timeseries/data 2000-01-01 2001-01-01 swvl1 swvl2 + era5 reshuffle /path/to/img /out/ts/path 2000-01-01 2000-12-31 \ + -v swvl1,swvl2 --h_steps 0,12 --bbox -10 30 30 60 --land_points -Which would take 6-hourly ERA5 images stored in ``/era_data`` from January -1st 2000 to January 1st 2001 and store the parameters "swvl1" and "swvl2" as time -series in the folder ``/timeseries/data``. If you time series should have a different -resolution than 6H, use the ``h_steps`` flag here accordingly (images to use for time -series generation have to be in the downloaded raw data). -The passed names have to correspond with the names in the downloaded file, -i.e. use the variable short names here. -Other flags, that can be used in ``era5_reshuffle`` are: +Which would take (previously downloaded) ERA5 images (at time stamps 0:00 and 12:00 UTC) +stored in `/path/to/img` from January 1st 2000 to December 31st 2000 and store the +data within land points of the selected bounding box of variables "swvl1" and +"swvl2" as time series in the folder ``/out/ts/path``. -- **-h (--help)** : Shows the help text for the reshuffle function -- **--land_points** : Reshuffle and store only data over land points. -- **-h_steps (--as_grib)** : full hours for which images are reshuffled (e.g. --h_steps 0 - would reshuffle only data at 00:00 UTC). By default we use 0, 6, 12 and 18. -- **--imgbuffer** : The number of images that are read into memory before converting - them into time series. Bigger numbers make the conversion faster but consume more memory. +The passed variable names (``-v``) have to correspond with the names in the +downloaded file, i.e. use the variable short names here. +For all other option see the output up ``era5 reshuffle --help`` and +``era5land reshuffle --help`` Conversion to time series is performed by the `repurpose package -`_ in the background. For custom settings -or other options see the `repurpose documentation -`_ and the code in -``ecmwf_models.reshuffle``. +`_ in the background. diff --git a/docs/img_read.rst b/docs/img_read.rst index 2e55c57..67a9d1e 100644 --- a/docs/img_read.rst +++ b/docs/img_read.rst @@ -1,48 +1,44 @@ Reading data ============ -After downloading the data for ERA Interim or ERA5 via ``eraint_download`` resp. -``era5_download``, images can be read with the ``ERA5GrbDs`` and -``ERA5NcDs`` (for grib and netcdf image stacks), respectively the -``ERA5GrbImg`` and ``ERA5NcImg`` (for single grib and netcdf images) classes. -The respective functions for reading images are defined in -``ecmwf_models.erainterim.interface`` ``ecmwf_models.era5.interface``. +To read the downloaded image data in python we recommend standard libraries such +as `xarray `_ or +`netCDF4 `_. -The following examples are shown for ERA5 data, but work the same way with the -respective ERA Interim functions. +However, you can also use the internal classes from this package. The main +purpose of these, however, is to use them in the time series conversion +module. -For example, you can read the image for a single variable at a specific date. -In this case for a stack of downloaded image files: +For example, you can read the image for some variables at a specific date. +In this case for a stack of downloaded image files (the chosen date +must be available of course): .. code-block:: python - # Script to load a stack of downloaded netcdf images - # and read a variable for a single date. - from ecmwf_models.era5.interface import ERA5NcDs - root_path = "/path/to/netcdf_storage" - ds = ERA5NcDs(root_path, parameter='swvl1') - data = ds.read(datetime(2010, 1, 1, 0)) - - # Script to load a stack of downloaded grib images - # and read a variable for a single date. - from ecmwf_models.era5.interface import ERA5GrbDs - root_path = "/path/to/grib_storage" - ds = ERA5GrbDs(root_path, parameter='swvl1') - data = ds.read(datetime(2010, 1, 1, 0)) - - -You can also read multiple variables at a specific date by passing a list of parameters. -In this case for a set of netcdf files: - -.. code-block:: python - - # Script to load a stack of downloaded netcdf images - # and read two variables for a single date. - from ecmwf_models.era5.interface import ERA5NcDs - root_path = "/path/to/storage" - ds = ERA5NcDs(root_path, parameter=['swvl1', 'swvl2']) - data = ds.read(datetime(2000, 1, 1, 0)) - - -All images between two given dates can be read using the -``iter_images`` methods of all the image stack reader classes. \ No newline at end of file + >> from ecmwf_models.era5.reader import ERA5NcDs + >> root_path = "/path/to/netcdf_storage" + >> ds = ERA5NcDs(root_path, parameter=['swvl1']) + >> img = ds.read(datetime(2010, 1, 1, 0)) + + # To read the coordinates + >> img.lat # also: img.lon + array([[ 90. , 90. , 90. , ..., 90. , 90. , 90. ], + [ 89.9, 89.9, 89.9, ..., 89.9, 89.9, 89.9], + [ 89.8, 89.8, 89.8, ..., 89.8, 89.8, 89.8], + ..., + [-89.8, -89.8, -89.8, ..., -89.8, -89.8, -89.8], + [-89.9, -89.9, -89.9, ..., -89.9, -89.9, -89.9], + [-90. , -90. , -90. , ..., -90. , -90. , -90. ]]) + + # To read the data variables + >> img.data['swvl1'] + array([[ nan, nan, nan, ..., nan, nan, nan], + [ nan, nan, nan, ..., nan, nan, nan], + [ nan, nan, nan, ..., nan, nan, nan], + ..., + [0.159 , 0.1589, 0.1588, ..., 0.1595, 0.1594, 0.1592], + [0.1582, 0.1582, 0.1581, ..., 0.1588, 0.1587, 0.1584], + [0.206 , 0.206 , 0.206 , ..., 0.206 , 0.206 , 0.206 ]]) + + +The equivalent class to read grib files is called in ``ERA5GrbDs``. diff --git a/docs/index.rst b/docs/index.rst index 0ec6abf..04cfd6b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,10 @@ -.. include:: ../README.rst - +.. include:: readme.rst .. include:: download.rst - .. include:: img_read.rst - .. include:: img2ts.rst - .. include:: ts_read.rst +.. include:: variables_era5 +.. include:: variables_era5land Contents ======== @@ -18,6 +16,8 @@ Contents Image Reading Time Series Creation Time Series Reading + All variables available for ERA5 + All variables available for ERA5-Land License Authors Changelog diff --git a/docs/ts_read.rst b/docs/ts_read.rst index d8f0b37..fa81123 100644 --- a/docs/ts_read.rst +++ b/docs/ts_read.rst @@ -1,21 +1,32 @@ - Reading converted time series data ---------------------------------- -For reading time series data, that the ``era5_reshuffle`` and ``eraint_reshuffle`` -command produces, the class ``ERATs`` can be used. -Optional arguments that are passed to the parent class +For reading time series data, that the ``era5 reshuffle`` and ``era5land reshuffle`` +command produces, the class ``ERATs`` can be used. This will return a time series +of values for the chosen location. + +Optional arguments that are forwarded to the parent class (``OrthoMultiTs``, as defined in `pynetcf.time_series `_) can be passed as well: .. code-block:: python - from ecmwf_models import ERATs + >> from ecmwf_models import ERATs # read_bulk reads full files into memory # read_ts takes either lon, lat coordinates to perform a nearest neighbour search # or a grid point index (from the grid.nc file) and returns a pandas.DataFrame. - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read_ts(45, 15) + >> ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) + + >> ds.read(18, 48) # (lon, lat) + + swvl1 swvl2 + 2024-04-01 00:00:00 0.318054 0.329590 + 2024-04-01 12:00:00 0.310715 0.325958 + 2024-04-02 00:00:00 0.360229 0.323502 + ... ... ... + 2024-04-04 12:00:00 0.343353 0.348755 + 2024-04-05 00:00:00 0.350266 0.346558 + 2024-04-05 12:00:00 0.343994 0.344498 Bulk reading speeds up reading multiple points from a cell file by storing the file in memory for subsequent calls. Either Longitude and Latitude can be passed diff --git a/docs/variables_era5.rst b/docs/variables_era5.rst new file mode 100644 index 0000000..c0d505a --- /dev/null +++ b/docs/variables_era5.rst @@ -0,0 +1,582 @@ +ERA5 Variables +-------------- + +The following table contains all variable names that can be used for the +``era5 download`` and ``era5 reshuffle`` programs. + +Note that the “default” column indicates which variables are downloaded +if none are specified. + ++------------------------------------------------+-----------+--------+ +| **Variable** | **Short | **Def | +| | Name** | ault** | ++================================================+===========+========+ +| 100 metre U wind component | u100 | 0 | ++------------------------------------------------+-----------+--------+ +| 100 metre V wind component | v100 | 0 | ++------------------------------------------------+-----------+--------+ +| Neutral wind at 10 m u-component | u10n | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre U wind component | u10 | 0 | ++------------------------------------------------+-----------+--------+ +| Neutral wind at 10 m v-component | v10n | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre V wind component | v10 | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre wind gust since previous | fg10 | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| 2 metre dewpoint temperature | d2m | 0 | ++------------------------------------------------+-----------+--------+ +| 2 metre temperature | t2m | 0 | ++------------------------------------------------+-----------+--------+ +| Air density over the oceans | p140209 | 0 | ++------------------------------------------------+-----------+--------+ +| Angle of sub-gridscale orography | anor | 0 | ++------------------------------------------------+-----------+--------+ +| Anisotropy of sub-gridscale orography | isor | 0 | ++------------------------------------------------+-----------+--------+ +| Benjamin-Feir index | bfi | 0 | ++------------------------------------------------+-----------+--------+ +| Boundary layer dissipation | bld | 0 | ++------------------------------------------------+-----------+--------+ +| Boundary layer height | blh | 0 | ++------------------------------------------------+-----------+--------+ +| Charnock | chnk | 0 | ++------------------------------------------------+-----------+--------+ +| Clear-sky direct solar radiation at surface | cdir | 0 | ++------------------------------------------------+-----------+--------+ +| Cloud base height | cbh | 0 | ++------------------------------------------------+-----------+--------+ +| Coefficient of drag with waves | cdww | 0 | ++------------------------------------------------+-----------+--------+ +| Convective available potential energy | cape | 0 | ++------------------------------------------------+-----------+--------+ +| Convective inhibition | cin | 0 | ++------------------------------------------------+-----------+--------+ +| Convective precipitation | cp | 0 | ++------------------------------------------------+-----------+--------+ +| Convective rain rate | crr | 0 | ++------------------------------------------------+-----------+--------+ +| Convective snowfall | csf | 0 | ++------------------------------------------------+-----------+--------+ +| Convective snowfall rate water equivalent | csfr | 1 | ++------------------------------------------------+-----------+--------+ +| Downward UV radiation at the surface | uvb | 0 | ++------------------------------------------------+-----------+--------+ +| Duct base height | dctb | 0 | ++------------------------------------------------+-----------+--------+ +| Eastward gravity wave surface stress | lgws | 0 | ++------------------------------------------------+-----------+--------+ +| Eastward turbulent surface stress | ewss | 0 | ++------------------------------------------------+-----------+--------+ +| Evaporation | e | 1 | ++------------------------------------------------+-----------+--------+ +| Forecast albedo | fal | 0 | ++------------------------------------------------+-----------+--------+ +| Forecast logarithm of surface roughness for | flsr | 0 | +| heat | | | ++------------------------------------------------+-----------+--------+ +| Forecast surface roughness | fsr | 0 | ++------------------------------------------------+-----------+--------+ +| Free convective velocity over the oceans | p140208 | 0 | ++------------------------------------------------+-----------+--------+ +| Friction velocity | zust | 0 | ++------------------------------------------------+-----------+--------+ +| Geopotential | z | 0 | ++------------------------------------------------+-----------+--------+ +| Gravity wave dissipation | gwd | 0 | ++------------------------------------------------+-----------+--------+ +| High cloud cover | hcc | 0 | ++------------------------------------------------+-----------+--------+ +| High vegetation cover | cvh | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 1 | istl1 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 2 | istl2 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 3 | istl3 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 4 | istl4 | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous 10 metre wind gust | i10fg | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous eastward turbulent surface | iews | 0 | +| stress | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous large-scale surface | ilspf | 0 | +| precipitation fraction | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous moisture flux | ie | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous northward turbulent surface | inss | 0 | +| stress | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous surface sensible heat flux | ishf | 0 | ++------------------------------------------------+-----------+--------+ +| K index | kx | 0 | ++------------------------------------------------+-----------+--------+ +| Lake bottom temperature | lblt | 0 | ++------------------------------------------------+-----------+--------+ +| Lake cover | cl | 0 | ++------------------------------------------------+-----------+--------+ +| Lake depth | dl | 0 | ++------------------------------------------------+-----------+--------+ +| Lake ice depth | licd | 0 | ++------------------------------------------------+-----------+--------+ +| Lake ice temperature | lict | 0 | ++------------------------------------------------+-----------+--------+ +| Lake mix-layer depth | lmld | 0 | ++------------------------------------------------+-----------+--------+ +| Lake mix-layer temperature | lmlt | 0 | ++------------------------------------------------+-----------+--------+ +| Lake shape factor | lshf | 0 | ++------------------------------------------------+-----------+--------+ +| Lake total layer temperature | ltlt | 0 | ++------------------------------------------------+-----------+--------+ +| Land-sea mask | lsm | 1 | ++------------------------------------------------+-----------+--------+ +| Large-scale precipitation | lsp | 0 | ++------------------------------------------------+-----------+--------+ +| Large-scale precipitation fraction | lspf | 0 | ++------------------------------------------------+-----------+--------+ +| Large scale rain rate | lsrr | 0 | ++------------------------------------------------+-----------+--------+ +| Large-scale snowfall | lsf | 0 | ++------------------------------------------------+-----------+--------+ +| Large scale snowfall rate water equivalent | lssfr | 1 | ++------------------------------------------------+-----------+--------+ +| Leaf area index, high vegetation | lai_hv | 0 | ++------------------------------------------------+-----------+--------+ +| Leaf area index, low vegetation | lai_lv | 0 | ++------------------------------------------------+-----------+--------+ +| Low cloud cover | lcc | 0 | ++------------------------------------------------+-----------+--------+ +| Low vegetation cover | cvl | 0 | ++------------------------------------------------+-----------+--------+ +| Maximum temperature at 2 metres since previous | mx2t | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| Maximum individual wave height | hmax | 0 | ++------------------------------------------------+-----------+--------+ +| Maximum total precipitation rate since | mxtpr | 0 | +| previous post-processing | | | ++------------------------------------------------+-----------+--------+ +| Mean boundary layer dissipation | mbld | 0 | ++------------------------------------------------+-----------+--------+ +| Mean convective precipitation rate | mcpr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean convective snowfall rate | mcsr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean direction of total swell | mdts | 0 | ++------------------------------------------------+-----------+--------+ +| Mean direction of wind waves | mdww | 0 | ++------------------------------------------------+-----------+--------+ +| Mean eastward gravity wave surface stress | megwss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean eastward turbulent surface stress | metss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean evaporation rate | mer | 0 | ++------------------------------------------------+-----------+--------+ +| Mean gravity wave dissipation | mgwd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale precipitation fraction | mlspf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale precipitation rate | mlspr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale snowfall rate | mlssr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean northward gravity wave surface stress | mngwss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean northward turbulent surface stress | mntss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean period of total swell | mpts | 0 | ++------------------------------------------------+-----------+--------+ +| Mean period of wind waves | mpww | 0 | ++------------------------------------------------+-----------+--------+ +| Mean potential evaporation rate | mper | 0 | ++------------------------------------------------+-----------+--------+ +| Mean runoff rate | mror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean sea level pressure | msl | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snow evaporation rate | mser | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snowfall rate | msr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snowmelt rate | msmr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean square slope of waves | msqs | 0 | ++------------------------------------------------+-----------+--------+ +| Mean sub-surface runoff rate | mssror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface direct short-wave radiation flux | msdrswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface direct short-wave radiation flux, | m | 0 | +| clear sky | sdrswrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward long-wave radiation flux | msdwlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface downward long-wave radiation | m | 0 | +| flux, clear sky | sdwlwrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward short-wave radiation | msdwswrf | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward short-wave radiation | m | 0 | +| flux, clear sky | sdwswrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward UV radiation flux | msdwuvrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface latent heat flux | mslhf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net long-wave radiation flux | msnlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net long-wave radiation flux, | msnlwrfcs | 0 | +| clear sky | | | ++------------------------------------------------+-----------+--------+ +| Mean surface net short-wave radiation flux | msnswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net short-wave radiation flux, | msnswrfcs | 0 | +| clear sky | | | ++------------------------------------------------+-----------+--------+ +| Mean surface runoff rate | msror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface sensible heat flux | msshf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top downward short-wave radiation flux | mtdwswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net long-wave radiation flux | mtnlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net long-wave radiation flux, clear | mtnlwrfcs | 0 | +| sky | | | ++------------------------------------------------+-----------+--------+ +| Mean top net short-wave radiation flux | mtnswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net short-wave radiation flux, clear | mtnswrfcs | 0 | +| sky | | | ++------------------------------------------------+-----------+--------+ +| Mean total precipitation rate | mtpr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean vertical gradient of refractivity inside | dndza | 0 | +| trapping layer | | | ++------------------------------------------------+-----------+--------+ +| Mean vertically integrated moisture divergence | mvimd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction | mwd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of first swell partition | p140122 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of second swell partition | p140125 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of third swell partition | p140128 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period | mwp | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment | mp1 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment for | p1ps | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment for | p1ww | 0 | +| wind waves | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on second moment for | p2ps | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on second moment for | p2ww | 0 | +| wind waves | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period of first swell partition | p140123 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period of second swell partition | p140126 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period of third swell partition | p140129 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean zero-crossing wave period | mp2 | 0 | ++------------------------------------------------+-----------+--------+ +| Medium cloud cover | mcc | 0 | ++------------------------------------------------+-----------+--------+ +| Minimum temperature at 2 metres since previous | mn2t | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| Minimum total precipitation rate since | mntpr | 0 | +| previous post-processing | | | ++------------------------------------------------+-----------+--------+ +| Minimum vertical gradient of refractivity | dndzn | 0 | +| inside trapping layer | | | ++------------------------------------------------+-----------+--------+ +| Model bathymetry | wmb | 0 | ++------------------------------------------------+-----------+--------+ +| Near IR albedo for diffuse radiation | alnid | 0 | ++------------------------------------------------+-----------+--------+ +| Near IR albedo for direct radiation | alnip | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized energy flux into ocean | phioc | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized energy flux into waves | phiaw | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized stress into ocean | tauoc | 0 | ++------------------------------------------------+-----------+--------+ +| Northward gravity wave surface stress | mgws | 0 | ++------------------------------------------------+-----------+--------+ +| Northward turbulent surface stress | nsss | 0 | ++------------------------------------------------+-----------+--------+ +| Peak wave period | pp1d | 0 | ++------------------------------------------------+-----------+--------+ +| Period corresponding to maximum individual | tmax | 0 | +| wave height | | | ++------------------------------------------------+-----------+--------+ +| Potential evaporation | pev | 1 | ++------------------------------------------------+-----------+--------+ +| Precipitation type | ptype | 0 | ++------------------------------------------------+-----------+--------+ +| Runoff | ro | 0 | ++------------------------------------------------+-----------+--------+ +| Sea ice area fraction | siconc | 0 | ++------------------------------------------------+-----------+--------+ +| Sea surface temperature | sst | 0 | ++------------------------------------------------+-----------+--------+ +| Significant height of combined wind waves and | swh | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Significant height of total swell | shts | 0 | ++------------------------------------------------+-----------+--------+ +| Significant height of wind waves | shww | 0 | ++------------------------------------------------+-----------+--------+ +| Significant wave height of first swell | p140121 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Significant wave height of second swell | p140124 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Significant wave height of third swell | p140127 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Skin reservoir content | src | 0 | ++------------------------------------------------+-----------+--------+ +| Skin temperature | skt | 0 | ++------------------------------------------------+-----------+--------+ +| Slope of sub-gridscale orography | slor | 0 | ++------------------------------------------------+-----------+--------+ +| Snow albedo | asn | 0 | ++------------------------------------------------+-----------+--------+ +| Snow density | rsn | 1 | ++------------------------------------------------+-----------+--------+ +| Snow depth | sd | 1 | ++------------------------------------------------+-----------+--------+ +| Snow evaporation | es | 0 | ++------------------------------------------------+-----------+--------+ +| Snowfall | sf | 0 | ++------------------------------------------------+-----------+--------+ +| Snowmelt | smlt | 0 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 1 | stl1 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 2 | stl2 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 3 | stl3 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 4 | stl4 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil type | slt | 1 | ++------------------------------------------------+-----------+--------+ +| Standard deviation of filtered subgrid | sdfor | 0 | +| orography | | | ++------------------------------------------------+-----------+--------+ +| Standard deviation of orography | sdor | 0 | ++------------------------------------------------+-----------+--------+ +| Sub-surface runoff | ssro | 0 | ++------------------------------------------------+-----------+--------+ +| Surface latent heat flux | slhf | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net solar radiation | ssr | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net solar radiation, clear sky | ssrc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net thermal radiation | str | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net thermal radiation, clear sky | strc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface pressure | sp | 0 | ++------------------------------------------------+-----------+--------+ +| Surface runoff | sro | 0 | ++------------------------------------------------+-----------+--------+ +| Surface sensible heat flux | sshf | 0 | ++------------------------------------------------+-----------+--------+ +| Surface solar radiation downward clear-sky | ssrdc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface solar radiation downwards | ssrd | 0 | ++------------------------------------------------+-----------+--------+ +| Surface thermal radiation downward clear-sky | strdc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface thermal radiation downwards | strd | 0 | ++------------------------------------------------+-----------+--------+ +| Temperature of snow layer | tsn | 0 | ++------------------------------------------------+-----------+--------+ +| TOA incident solar radiation | tisr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net solar radiation | tsr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net solar radiation, clear sky | tsrc | 0 | ++------------------------------------------------+-----------+--------+ +| Top net thermal radiation | ttr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net thermal radiation, clear sky | ttrc | 0 | ++------------------------------------------------+-----------+--------+ +| Total cloud cover | tcc | 0 | ++------------------------------------------------+-----------+--------+ +| Total column cloud ice water | tciw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column cloud liquid water | tclw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column ozone | tco3 | 0 | ++------------------------------------------------+-----------+--------+ +| Total column rain water | tcrw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column snow water | tcsw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column supercooled liquid water | tcslw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column water | tcw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column water vapour | tcwv | 0 | ++------------------------------------------------+-----------+--------+ +| Total precipitation | tp | 1 | ++------------------------------------------------+-----------+--------+ +| Total sky direct solar radiation at surface | fdir | 0 | ++------------------------------------------------+-----------+--------+ +| Total totals index | totalx | 0 | ++------------------------------------------------+-----------+--------+ +| Trapping layer base height | tplb | 0 | ++------------------------------------------------+-----------+--------+ +| Trapping layer top height | tplt | 0 | ++------------------------------------------------+-----------+--------+ +| Type of high vegetation | tvh | 0 | ++------------------------------------------------+-----------+--------+ +| Type of low vegetation | tvl | 0 | ++------------------------------------------------+-----------+--------+ +| U-component stokes drift | ust | 0 | ++------------------------------------------------+-----------+--------+ +| UV visible albedo for diffuse radiation | aluvd | 0 | ++------------------------------------------------+-----------+--------+ +| UV visible albedo for direct radiation | aluvp | 0 | ++------------------------------------------------+-----------+--------+ +| V-component stokes drift | vst | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of cloud | p80.162 | 0 | +| frozen water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of cloud | p79.162 | 0 | +| liquid water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of | p85.162 | 0 | +| geopotential flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of kinetic | p82.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of mass flux | p81.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of moisture | p84.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of ozone flux | p87.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of thermal | p83.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of total | p86.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward cloud frozen | p90.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward cloud liquid | p88.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward geopotential | p73.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward heat flux | p69.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward kinetic energy | p67.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward mass flux | p65.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward ozone flux | p77.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward total energy | p75.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward water vapour | p71.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of energy conversion | p64.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of kinetic energy | p59.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of mass of atmosphere | p53.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of mass tendency | p92.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward cloud frozen | p91.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward cloud liquid | p89.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward geopotential | p74.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward heat flux | p70.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward kinetic energy | p68.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward mass flux | p66.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward ozone flux | p78.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward total energy | p76.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward water vapour | p72.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of potential+internal energy | p61.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of potential+internal+latent | p62.162 | 0 | +| energy | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of temperature | p54.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of thermal energy | p60.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of total energy | p63.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertically integrated moisture divergence | vimd | 0 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 1 | swvl1 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 2 | swvl2 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 3 | swvl3 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 4 | swvl4 | 1 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width | wdw | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width for swell | dwps | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width for wind waves | dwww | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral kurtosis | wsk | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral peakedness | wsp | 0 | ++------------------------------------------------+-----------+--------+ +| Wave Spectral Skewness | wss | 0 | ++------------------------------------------------+-----------+--------+ +| Zero degree level | deg0l | 0 | ++------------------------------------------------+-----------+--------+ diff --git a/docs/variables_era5land.rst b/docs/variables_era5land.rst new file mode 100644 index 0000000..cf9d8fc --- /dev/null +++ b/docs/variables_era5land.rst @@ -0,0 +1,114 @@ +ERA5-Land Variables +------------------- + +The following table contains all variable names that can be used for the +``era5land download`` and ``era5land reshuffle`` programs. + +Note that the “default” column indicates which variables are downloaded +if none are specified. + ++----------------------------------------------+------------+----------+ +| **Variable** | **Short | **D | +| | Name** | efault** | ++==============================================+============+==========+ +| 10 metre U wind component | u10 | 0 | ++----------------------------------------------+------------+----------+ +| 10 metre V wind component | v10 | 0 | ++----------------------------------------------+------------+----------+ +| 2 metre dewpoint temperature | d2m | 1 | ++----------------------------------------------+------------+----------+ +| 2 metre temperature | t2m | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from bare soil | evabs | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from open water surfaces | evaow | 1 | +| excluding oceans | | | ++----------------------------------------------+------------+----------+ +| Evaporation from the top of canopy | evatc | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from vegetation transpiration | evavt | 1 | ++----------------------------------------------+------------+----------+ +| Forecast albedo | fal | 1 | ++----------------------------------------------+------------+----------+ +| Lake bottom temperature | lblt | 0 | ++----------------------------------------------+------------+----------+ +| Lake ice depth | licd | 0 | ++----------------------------------------------+------------+----------+ +| Lake ice temperature | lict | 0 | ++----------------------------------------------+------------+----------+ +| Lake mix-layer depth | lmld | 0 | ++----------------------------------------------+------------+----------+ +| Lake mix-layer temperature | lmlt | 0 | ++----------------------------------------------+------------+----------+ +| Lake shape factor | lshf | 0 | ++----------------------------------------------+------------+----------+ +| Lake total layer temperature | ltlt | 0 | ++----------------------------------------------+------------+----------+ +| Leaf area index high vegetation | lai_hv | 1 | ++----------------------------------------------+------------+----------+ +| Leaf area index low vegetation | lai_lv | 1 | ++----------------------------------------------+------------+----------+ +| Potential evaporation | pev | 1 | ++----------------------------------------------+------------+----------+ +| Runoff | ro | 1 | ++----------------------------------------------+------------+----------+ +| Skin reservoir content | src | 1 | ++----------------------------------------------+------------+----------+ +| Skin temperature | skt | 1 | ++----------------------------------------------+------------+----------+ +| Snow albedo | asn | 1 | ++----------------------------------------------+------------+----------+ +| Snow Cover | snowc | 1 | ++----------------------------------------------+------------+----------+ +| Snow density | rsn | 1 | ++----------------------------------------------+------------+----------+ +| Snow depth | sd | 1 | ++----------------------------------------------+------------+----------+ +| Snow Depth Water Equivalent | swe | 1 | ++----------------------------------------------+------------+----------+ +| Snow evaporation | es | 1 | ++----------------------------------------------+------------+----------+ +| Snowfall | sf | 1 | ++----------------------------------------------+------------+----------+ +| Snowmelt | smlt | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 1 | stl1 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 2 | stl2 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 3 | stl3 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 4 | stl4 | 1 | ++----------------------------------------------+------------+----------+ +| Sub-surface runoff | ssro | 1 | ++----------------------------------------------+------------+----------+ +| Surface latent heat flux | slhf | 1 | ++----------------------------------------------+------------+----------+ +| Surface net solar radiation | ssr | 1 | ++----------------------------------------------+------------+----------+ +| Surface net thermal radiation | str | 1 | ++----------------------------------------------+------------+----------+ +| Surface pressure | sp | 1 | ++----------------------------------------------+------------+----------+ +| Surface runoff | sro | 1 | ++----------------------------------------------+------------+----------+ +| Surface sensible heat flux | sshf | 1 | ++----------------------------------------------+------------+----------+ +| Surface solar radiation downwards | ssrd | 1 | ++----------------------------------------------+------------+----------+ +| Surface thermal radiation downwards | strd | 1 | ++----------------------------------------------+------------+----------+ +| Temperature of snow layer | tsn | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation | e | 1 | ++----------------------------------------------+------------+----------+ +| Total precipitation | tp | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 1 | swvl1 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 2 | swvl2 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 3 | swvl3 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 4 | swvl4 | 1 | ++----------------------------------------------+------------+----------+ diff --git a/setup.cfg b/setup.cfg index 03c07be..509f8ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -63,6 +63,16 @@ testing = coverage pytest +docs = + sphinx<7 + sphinx_rtd_theme + mock + pillow + recommonmark + readthedocs-sphinx-ext + myst_parser + nbsphinx + [options.entry_points] # Add here console scripts like: # console_scripts = diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index 13be6d8..e59f85d 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -242,15 +242,15 @@ def download_and_move( A grid on which to remap the data using CDO. This must be a dictionary using CDO's grid description format, e.g.:: - grid = { - "gridtype": "lonlat", - "xsize": 720, - "ysize": 360, - "xfirst": -179.75, - "yfirst": 89.75, - "xinc": 0.5, - "yinc": -0.5, - } + grid = { + "gridtype": "lonlat", + "xsize": 720, + "ysize": 360, + "xfirst": -179.75, + "yfirst": 89.75, + "xinc": 0.5, + "yinc": -0.5, + } Default is to use no regridding. To use this option, it is necessary that CDO is installed. From a7559ad0f78bd39b52187b44818f06195698fa39 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 20:14:46 +0200 Subject: [PATCH 13/20] Add dockerfile --- Dockerfile | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5e55b08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM mambaorg/micromamba:1.3.1-alpine +MAINTAINER Wolfgang Preimesberger + +USER root + +RUN apk update && \ + apk upgrade && \ + apk add git && \ + apk add build-base && \ + apk add g++ && \ + apk add bsd-compat-headers && \ + apk add tiff + +WORKDIR /app + +COPY . /app + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN micromamba install -y -n base -c conda-forge python=3.12 pygrib netcdf4 pyresample pykdtree +RUN pip install /app/. + +RUN micromamba clean --all --yes + +# Clean up the src code, as it is installed now +RUN rm -rf /app + +ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] \ No newline at end of file From c7019be028ce94fa7f2ff1cbdacdcfc5deafa62c Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 20:18:15 +0200 Subject: [PATCH 14/20] Add CDO --- .github/workflows/ci.yml | 1 + setup.cfg | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0571245..ed809ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,7 @@ jobs: - name: Install dependencies and package shell: bash -l {0} run: | + conda install -c conda-forge cdo # On macos pykdtree was built with OpenMP (C-level parallelization library) support if [ ${{ matrix.os }} == "macos-latest" ] then diff --git a/setup.cfg b/setup.cfg index 509f8ca..ee38985 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,7 @@ install_requires = cdsapi>=0.7.3 repurpose>=0.13 pygeogrids + cdo # The usage of test_requires is discouraged, see `Dependency Management` docs # tests_require = pytest; pytest-cov From 202236eda7c4ba7bcecc8b95ac67aabbd3747d46 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 20:44:23 +0200 Subject: [PATCH 15/20] Add CDO --- .github/workflows/ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed809ac..4139f45 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,6 @@ jobs: - name: Install dependencies and package shell: bash -l {0} run: | - conda install -c conda-forge cdo # On macos pykdtree was built with OpenMP (C-level parallelization library) support if [ ${{ matrix.os }} == "macos-latest" ] then @@ -63,6 +62,12 @@ jobs: then conda install -c conda-forge pygrib fi + # CDO we can test on ubuntu + if [ ${{ matrix.os }} == "ubuntu-latest" ] + then + conda install -c conda-forge cdo + fi + pip install -e .[testing] - name: Run all tests env: From 0f907c99d0e0ba224cc92249ab98f8a9d7ab6549 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 21:02:55 +0200 Subject: [PATCH 16/20] Add CDO --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ee38985..509f8ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,7 +42,6 @@ install_requires = cdsapi>=0.7.3 repurpose>=0.13 pygeogrids - cdo # The usage of test_requires is discouraged, see `Dependency Management` docs # tests_require = pytest; pytest-cov From c7387e3736c1a80a028f3e1d2ae9d3f7f4ae63f6 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sat, 26 Oct 2024 21:53:09 +0200 Subject: [PATCH 17/20] Added CLI for update_img --- docs/download.rst | 10 ++++- src/ecmwf_models/cli.py | 54 ++++++++++++++++++------- src/ecmwf_models/era5/download.py | 8 ++-- src/ecmwf_models/utils.py | 13 ++++-- tests/tests_era5/test_era5_reshuffle.py | 2 +- 5 files changed, 63 insertions(+), 24 deletions(-) diff --git a/docs/download.rst b/docs/download.rst index 0ee917c..f4bb2d1 100644 --- a/docs/download.rst +++ b/docs/download.rst @@ -38,4 +38,12 @@ We recommend downloading data in netcdf format, however, using the ``--as_grib`` option, you can also download data in grib format. For all other available options, type ``era5 download --help``, -or ``era5land download --help`` respectively. \ No newline at end of file +or ``era5land download --help`` respectively + +Updating an existing record +--------------------------- +After some time, new ERA data will become available. You can then use the +program ``era5 update_img`` with a path to the existing record, to download +new images with the same settings that became available since the last time +the record was downloaded. You might even set up a cron job to check for new +data in regular intervals to keep your copy up-to-date. \ No newline at end of file diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py index 38c0cbe..4531823 100644 --- a/src/ecmwf_models/cli.py +++ b/src/ecmwf_models/cli.py @@ -45,16 +45,16 @@ "--keep_original", type=click.BOOL, default=False, - help="Also keep the original image stacks as downloaded from CDS, " - "instead of deleting them after extracting individual images. " - "Default is False.") + help="To keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images, pass " + "`--keep_original True`") @click.option( "-grb", "--as_grib", type=click.BOOL, default=False, - help="Download data in grib format instead of netcdf. " - "Default is False.") + help="To download data in grib format instead of netcdf pass " + "`--as_grib True`.") @click.option( "--h_steps", type=str, @@ -65,6 +65,16 @@ "two images for each day, at 0:00 and 12:00 respectively. " "By default, we download 6-hourly images starting at 0:00 UTC, " "(i.e. `--h_steps 0,6,12,18`") +@click.option( + "--keep_prelim", + type=click.BOOL, + default=True, + help="The last 1-2 month of data are usually 'preliminary' (labelled as " + "'ERA5-T' and 'ERA5-Land-T') and might be changed if an issue is " + "detected. When this option is deactivated (`--keep_prelim False`), " + "only the final data will be kept, while the ERA5-T data is discarded " + "after download. By default, we also keep preliminary files, but they " + "get a different file name as the final data.") @click.option( "--max_request_size", type=int, @@ -84,7 +94,7 @@ "Alternatively, you can also set an environment variable " "`CDSAPI_KEY` with your token.") def cli_download_era5(path, start, end, variables, keep_original, as_grib, - h_steps, max_request_size, cds_token): + h_steps, keep_prelim, max_request_size, cds_token): """ Download ERA5 image data within the chosen period. NOTE: Before using this program, create a CDS account and set up a `.cdsapirc` file as described @@ -115,6 +125,7 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, keep_original=keep_original, stepsize='month', n_max_request=max_request_size, + keep_prelim=keep_prelim, cds_token=cds_token, ) @@ -159,16 +170,16 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "--keep_original", type=click.BOOL, default=False, - help="Also keep the original image stacks as downloaded from CDS, " - "instead of deleting them after extracting individual images. " - "Default is False.") + help="To keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images, pass " + "`--keep_original True`") @click.option( "-grb", "--as_grib", type=click.BOOL, default=False, - help="Download data in grib format instead of netcdf. " - "Default is False.") + help="To download data in grib format instead of netcdf pass " + "`--as_grib True`.") @click.option( "--h_steps", type=click.STRING, @@ -179,6 +190,16 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "two images for each day, at 0:00 and 12:00 respectively. " "By default, we download 6-hourly images starting at 0:00 UTC, " "(i.e. `--h_steps 0,6,12,18`") +@click.option( + "--keep_prelim", + type=click.BOOL, + default=True, + help="The last 1-2 month of data are usually 'preliminary' (labelled as " + "'ERA5-T' and 'ERA5-Land-T') and might be changed if an issue is " + "detected. When this option is deactivated (`--keep_prelim False`), " + "only the final data will be kept, while the ERA5-T data is discarded " + "after download. By default, we also keep preliminary files, but they " + "get a different file name as the final data.") @click.option( "--max_request_size", type=int, @@ -198,7 +219,7 @@ def cli_download_era5(path, start, end, variables, keep_original, as_grib, "Alternatively, you can also set an environment variable " "`CDSAPI_KEY` with your token.") def cli_download_era5land(path, start, end, variables, keep_original, as_grib, - h_steps, max_request_size, cds_token): + h_steps, keep_prelim, max_request_size, cds_token): """ Download ERA5-Land image data within a chosen period. NOTE: Before using this program, create a CDS account and set up a @@ -230,6 +251,7 @@ def cli_download_era5land(path, start, end, variables, keep_original, as_grib, keep_original=keep_original, stepsize='month', n_max_request=max_request_size, + keep_prelim=keep_prelim, cds_token=cds_token) return status_code @@ -301,11 +323,11 @@ def cli_update_img(path, cds_token=None): @click.option( '--land_points', '-l', - is_flag=True, + type=click.BOOL, show_default=True, default=False, - help="Store only time series for points that are over land. " - "Default is False.") + help="To store only time series for points that are over land, pass " + "`--land_points True`. ") @click.option( '--bbox', nargs=4, @@ -430,6 +452,7 @@ def era5(): era5.add_command(cli_download_era5) +era5.add_command(cli_update_img) era5.add_command(cli_reshuffle) era5.add_command(cli_extend_ts) @@ -441,5 +464,6 @@ def era5land(): era5land.add_command(cli_download_era5land) +era5land.add_command(cli_update_img) era5land.add_command(cli_reshuffle) era5land.add_command(cli_extend_ts) diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index e59f85d..af4f601 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -21,6 +21,7 @@ default_variables, split_array, check_api_ready, + get_first_last_image_date ) from ecmwf_models.extract import save_ncs_from_nc, save_gribs_from_grib @@ -387,7 +388,7 @@ def _download(curr_start, curr_end): i += 1 continue - if status_code == 0: + if status_code == 0 and os.path.exists(dl_file): if grb: save_gribs_from_grib( dl_file, @@ -490,8 +491,9 @@ def download_record_extension(path, dry_run=False, cds_token=None): """ props = read_summary_yml(path) - startdate = pd.to_datetime(props['period_to']).to_pydatetime() \ - + timedelta(days=1) # next day + last_img = get_first_last_image_date(path) + + startdate = pd.to_datetime(last_img).to_pydatetime() + timedelta(days=1) enddate = pd.to_datetime(datetime.now().date()).to_pydatetime() \ - timedelta(days=1) # yesterday diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index 7839b95..218d2e7 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -24,6 +24,7 @@ Utility functions for all data products in this package. """ import os +import warnings from datetime import datetime import pandas as pd import numpy as np @@ -378,10 +379,14 @@ def update_image_summary_file(data_path: str, If not specified, then `data_path` is used. If a file already exists, it will be overwritten. """ - first_image_date = get_first_last_image_date( - data_path, start_from_last=False) - last_image_date = get_first_last_image_date( - data_path, start_from_last=True) + try: + first_image_date = get_first_last_image_date( + data_path, start_from_last=False) + last_image_date = get_first_last_image_date( + data_path, start_from_last=True) + except ValueError: + warnings.warn(f"Could not infer date from filenames in {data_path}") + return props = img_infer_file_props(data_path, start_from_last=False) _ = props.pop("datetime") diff --git a/tests/tests_era5/test_era5_reshuffle.py b/tests/tests_era5/test_era5_reshuffle.py index 191f6c2..9e53b1a 100644 --- a/tests/tests_era5/test_era5_reshuffle.py +++ b/tests/tests_era5/test_era5_reshuffle.py @@ -63,7 +63,7 @@ def test_cli_reshuffle_and_update(): ts_path = tempdir / 'ts' subprocess.call(["era5", "reshuffle", img_path, ts_path, "2010-01-01", - "2010-01-01", "-v", "swvl1,swvl2", "-l", + "2010-01-01", "-v", "swvl1,swvl2", "-l", "True" "--bbox", "12.0", "46.0", "17.0", "50.0", "--h_steps", "0"]) From 2b56a2041450095ca1bd9eab1c120dbeccb20e34 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sun, 27 Oct 2024 11:43:17 +0100 Subject: [PATCH 18/20] Update test --- src/ecmwf_models/utils.py | 2 +- tests/tests_era5/test_era5_download_cds.py | 8 ++++++++ tests/tests_era5/test_era5_reshuffle.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index 218d2e7..373aeb4 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -386,7 +386,7 @@ def update_image_summary_file(data_path: str, data_path, start_from_last=True) except ValueError: warnings.warn(f"Could not infer date from filenames in {data_path}") - return + return props = img_infer_file_props(data_path, start_from_last=False) _ = props.pop("datetime") diff --git a/tests/tests_era5/test_era5_download_cds.py b/tests/tests_era5/test_era5_download_cds.py index a05ab8f..1f9d240 100644 --- a/tests/tests_era5/test_era5_download_cds.py +++ b/tests/tests_era5/test_era5_download_cds.py @@ -11,6 +11,7 @@ from ecmwf_models.era5.reader import ERA5NcImg from ecmwf_models.utils import check_api_ready +from ecmwf_models.era5.download import download_and_move try: check_api_ready() @@ -39,6 +40,9 @@ def test_cli_download(self): subprocess.call(['era5', 'download'] + args) + # download_and_move(dl_path, startdate, enddate, variables=['swvl1'], + # h_steps=[0]) + out_path = os.path.join(dl_path, '2023', '001') assert(os.path.exists(out_path)) imgs = os.listdir(out_path) @@ -47,3 +51,7 @@ def test_cli_download(self): ds = ERA5NcImg(os.path.join(out_path, imgs[0]), parameter='swvl1') img = ds.read(datetime(2023, 1, 1)) assert img.data['swvl1'].shape == (721, 1440) + + +if __name__ == '__main__': + DownloadTest().test_cli_download() \ No newline at end of file diff --git a/tests/tests_era5/test_era5_reshuffle.py b/tests/tests_era5/test_era5_reshuffle.py index 9e53b1a..1461211 100644 --- a/tests/tests_era5/test_era5_reshuffle.py +++ b/tests/tests_era5/test_era5_reshuffle.py @@ -63,7 +63,7 @@ def test_cli_reshuffle_and_update(): ts_path = tempdir / 'ts' subprocess.call(["era5", "reshuffle", img_path, ts_path, "2010-01-01", - "2010-01-01", "-v", "swvl1,swvl2", "-l", "True" + "2010-01-01", "-v", "swvl1,swvl2", "-l", "True", "--bbox", "12.0", "46.0", "17.0", "50.0", "--h_steps", "0"]) From cf45bdffad35f62e02dba93d301a02971c6e6fd4 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sun, 27 Oct 2024 11:50:35 +0100 Subject: [PATCH 19/20] Update test --- .github/workflows/ci.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4139f45..5fa0d30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,8 @@ jobs: - name: Install dependencies and package shell: bash -l {0} run: | + conda install -c conda-forge pygrib netcdf4 pyresample pykdtree + # On macos pykdtree was built with OpenMP (C-level parallelization library) support if [ ${{ matrix.os }} == "macos-latest" ] then @@ -62,11 +64,6 @@ jobs: then conda install -c conda-forge pygrib fi - # CDO we can test on ubuntu - if [ ${{ matrix.os }} == "ubuntu-latest" ] - then - conda install -c conda-forge cdo - fi pip install -e .[testing] - name: Run all tests From 893b4dc9dd1f890a3feaba0e23656b00a13f7909 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sun, 27 Oct 2024 14:04:12 +0100 Subject: [PATCH 20/20] Update docs --- README.rst | 3 +++ docs/download.rst | 8 ++++---- docs/img2ts.rst | 23 ++++++++++++++++++++++- docs/variables_era5.rst | 2 ++ docs/variables_era5land.rst | 2 ++ 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 54bc8ae..42346fd 100644 --- a/README.rst +++ b/README.rst @@ -74,6 +74,9 @@ DataFrame. 2024-04-05 00:00:00 0.350266 0.346558 2024-04-05 12:00:00 0.343994 0.344498 +More programs are available to keep an exisiting image and time series record +up-to-date. Type ``era5 --help`` and ``era5land --help`` to see all available +programs. CDS API Setup ============= diff --git a/docs/download.rst b/docs/download.rst index f4bb2d1..2c4dc9d 100644 --- a/docs/download.rst +++ b/docs/download.rst @@ -1,5 +1,5 @@ Downloading ERA5 and ERA5-Land data -=================================== +----------------------------------- ERA5 (and ERA5-Land) data can be downloaded manually from the `Copernicus Data Store (CDS) `_ or automatically via the CDS api, @@ -25,9 +25,9 @@ resolution of the images is 6 hours by default, but can be changed using the --variables swvl1,swvl2,swvl3,swvl4 --h_steps 0,6,18,24 The names of the variables to download can be its long names, the short names -(as in the example) or the parameter IDs. We use the file -``ecmwf_models/era5/era5_lut.csv`` and ``ecmwf_models/era5/era5-land_lut.csv`` -to look up the right name for the CDS API. +(as in the example). See the :ref:`ERA5 variable table ` +and :ref:`ERA5-Land variable table ` to look up the right +name for the CDS API. By default, the command expects that you have set up your ``.cdsapirc`` file to identify with the data store as described above. Alternatively you can pass diff --git a/docs/img2ts.rst b/docs/img2ts.rst index e712574..1299742 100644 --- a/docs/img2ts.rst +++ b/docs/img2ts.rst @@ -1,5 +1,5 @@ Conversion to time series format -================================ +-------------------------------- For a lot of applications it is favorable to convert the image based format into a format which is optimized for fast time series retrieval. This is what we @@ -39,3 +39,24 @@ For all other option see the output up ``era5 reshuffle --help`` and Conversion to time series is performed by the `repurpose package `_ in the background. + +Append new image data to existing time series +--------------------------------------------- +Similar to the ``update_img`` program, we also provide programs to +simplify updating an existing time series record with newly downloaded +images via the ``era5 update_ts`` and ``era5land update_ts`` programs. +This will use the settings file created during the initial time series +conversion (with ``reshuffle``) and look for new image data in the same path +that is not yet available in the given time series record. + +This option is ideally used together with the ``update_img`` program in, e.g. +a cron job, to first download new images, and then append them to their time +series counterpart. + +.. code-block:: + + era5 update_ts /existing/ts/record + +Alternatively, you can also use the ``reshuffle`` command, with a target path +that already contains time series. This will also append new data (but make sure +you use the same settings as before). \ No newline at end of file diff --git a/docs/variables_era5.rst b/docs/variables_era5.rst index c0d505a..d2459bf 100644 --- a/docs/variables_era5.rst +++ b/docs/variables_era5.rst @@ -1,3 +1,5 @@ +.. _variables_era5: + ERA5 Variables -------------- diff --git a/docs/variables_era5land.rst b/docs/variables_era5land.rst index cf9d8fc..20d866a 100644 --- a/docs/variables_era5land.rst +++ b/docs/variables_era5land.rst @@ -1,3 +1,5 @@ +.. _variables_era5land: + ERA5-Land Variables -------------------