From 9a6c3f7fe27543896becffe2e5cfd782d21f5cfb Mon Sep 17 00:00:00 2001 From: veenstrajelmer <60435591+veenstrajelmer@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:23:31 +0200 Subject: [PATCH] improved authentication check for cds (#1004) * improved authentication check for cds * updated whatsnew * fixed mindeps test * made test independent of cds * skip slow era5 tests on github --- .github/workflows/pytest-py313.yml | 2 +- .github/workflows/pytest-py39-mindeps.yml | 3 +- .github/workflows/pytest.yml | 2 +- dfm_tools/download.py | 19 ++-- docs/whats-new.md | 3 + pyproject.toml | 1 + tests/conftest.py | 12 +++ tests/test_download.py | 104 +++++++++++----------- tests/test_modelbuilder.py | 14 +-- tests/test_xarray_helpers.py | 1 + 10 files changed, 87 insertions(+), 74 deletions(-) diff --git a/.github/workflows/pytest-py313.yml b/.github/workflows/pytest-py313.yml index 354af4b97..64c0b7754 100644 --- a/.github/workflows/pytest-py313.yml +++ b/.github/workflows/pytest-py313.yml @@ -41,4 +41,4 @@ jobs: COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} run: | - pytest -m "not requireslocaldata" --cov=dfm_tools --cov-report xml --cov-report term + pytest -m "not requireslocaldata and not era5slow" --cov=dfm_tools --cov-report xml --cov-report term diff --git a/.github/workflows/pytest-py39-mindeps.yml b/.github/workflows/pytest-py39-mindeps.yml index 5369b631e..89164b286 100644 --- a/.github/workflows/pytest-py39-mindeps.yml +++ b/.github/workflows/pytest-py39-mindeps.yml @@ -29,6 +29,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -r requirements.txt + pip install -e . - name: list env contents run: | conda info @@ -46,4 +47,4 @@ jobs: COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} run: | - pytest -m "not requireslocaldata" --cov=dfm_tools --cov-report xml --cov-report term + pytest -m "not requireslocaldata and not era5slow" --cov=dfm_tools --cov-report xml --cov-report term diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 417e3aae5..b262217b5 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -45,7 +45,7 @@ jobs: COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} run: | - pytest -m "not requireslocaldata" --cov=dfm_tools --cov-report xml --cov-report term + pytest -m "not requireslocaldata and not era5slow" --cov=dfm_tools --cov-report xml --cov-report term - uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}} \ No newline at end of file diff --git a/dfm_tools/download.py b/dfm_tools/download.py index af152590b..699d92355 100644 --- a/dfm_tools/download.py +++ b/dfm_tools/download.py @@ -11,6 +11,7 @@ import shutil import subprocess import sys +from requests import HTTPError __all__ = [ "download_ERA5", @@ -129,18 +130,11 @@ def cds_credentials(): # check if the authentication works # TODO: requested "check authentication" method in https://github.com/ecmwf/cdsapi/issues/111 try: - # checks whether CDS apikey is in environment variable or ~/.cdsapirc file c = cdsapi.Client() - # checks whether authentication is succesful (correct combination of url and apikey) - c.retrieve(name='dummy', request={}) - except Exception as e: - if "dummy not found" in str(e): # Exception - # catching incorrect name, but authentication was successful - print('found ECMWF API-key and authorization successful') - elif "Authentication failed" in str(e): # HTTPError - cds_remove_credentials_raise(reason='Authentication failed') - else: - raise e + c.client.check_authentication() + print('found ECMWF API-key and authorization successful') + except HTTPError as e: + cds_remove_credentials_raise(reason=str(e)) def cds_get_file(): @@ -181,7 +175,8 @@ def cds_remove_credentials_raise(reason=''): if os.path.isfile(file_cds_credentials): os.remove(file_cds_credentials) - raise ValueError(f"{reason}. The CDS/ECMWF apikey environment variables and rcfile were deleted. Please try again.") + raise ValueError(f"{reason}. The CDS/ECMWF apikey file (~/.cdsapirc) was deleted. " + "Please try again and provide your apikey when prompted.") def download_CMEMS(varkey, diff --git a/docs/whats-new.md b/docs/whats-new.md index 348b12e29..64f1395bc 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -2,6 +2,9 @@ ## UNRELEASED +### Fix +- more robust CDS/ECMWF authentication check and support for new cdsapi versions in [#1004](https://github.com/Deltares/dfm_tools/pull/1004) + ## 0.27.0 (2024-09-09) diff --git a/pyproject.toml b/pyproject.toml index 30c4466c3..bcb497170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ markers = [ "acceptance: mark a test as acceptance. Used for non-functional requirements and data that needs to be human-verified", "requireslocaldata: mark a test that cannot run on Github", "requiressecrets: mark a test that requires environment variables to be set (e.g. via Github secrets)", + "era5slow: mark a test that causes timeout errors to prevent failing github testbanks due to badly performing cds-beta", ] [tool.flake8] diff --git a/tests/conftest.py b/tests/conftest.py index 49a8bd299..e1854308d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,8 @@ import pytest import dfm_tools as dfmt import os +import xarray as xr +import pandas as pd @pytest.mark.requiressecrets @@ -25,3 +27,13 @@ def file_nc_era5_pattern(tmp_path): # assert downloaded files file_nc_era5_pattern = os.path.join(tmp_path, "*.nc") return file_nc_era5_pattern + + +@pytest.fixture +def ds_era5_empty(): + # create dummy dataset + ds_era5_empty = xr.Dataset() + ds_era5_empty['longitude'] = xr.DataArray() + time_data = pd.date_range('2010-01-31', '2010-02-01', freq="3h") + ds_era5_empty['time'] = xr.DataArray(time_data, dims='time') + return ds_era5_empty diff --git a/tests/test_download.py b/tests/test_download.py index fe0cc9cdc..9283641ea 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -76,7 +76,7 @@ def test_cds_credentials_newurl_incorrectkey_rcfile(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Authentication failed" in str(e.value) - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.unittest @@ -90,7 +90,7 @@ def test_cds_credentials_newurl_incorrectkey_envvars(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Authentication failed" in str(e.value) - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.unittest @@ -109,7 +109,7 @@ def test_cds_credentials_oldurl_incorrectkey_rcfile(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Authentication failed" in str(e.value) # should actually be "Old CDS URL found", but the url from the file is ignored, which is acceptable - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.unittest @@ -123,7 +123,7 @@ def test_cds_credentials_oldurl_incorrectkey_envvars(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Old CDS URL found" in str(e.value) - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.unittest @@ -142,7 +142,7 @@ def test_cds_credentials_newurl_oldkey_rcfile(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Old CDS API-key found (with :)" in str(e.value) - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.unittest @@ -156,7 +156,7 @@ def test_cds_credentials_newurl_oldkey_envvars(): cds_credentials() set_cds_credentials_ifnot_none(cds_url, cds_apikey) assert "Old CDS API-key found (with :)" in str(e.value) - assert "The CDS/ECMWF apikey environment variables and rcfile were deleted" in str(e.value) + assert "The CDS/ECMWF apikey file (~/.cdsapirc) was deleted" in str(e.value) @pytest.mark.requiressecrets @@ -165,52 +165,6 @@ def test_copernicusmarine_credentials(): copernicusmarine_credentials() -@pytest.mark.requiressecrets -@pytest.mark.unittest -@pytest.mark.timeout(60) # useful since CDS downloads are terribly slow sometimes, so skip in that case -def test_download_era5(file_nc_era5_pattern): - # file_nc_era5_pattern comes from conftest.py - file_list = glob.glob(file_nc_era5_pattern) - assert len(file_list) == 2 - - ds = xr.open_mfdataset(file_nc_era5_pattern) - - assert 'valid_time' in ds.dims # TODO: if this fails, remove the exception below and in preprocess_ERA5 - - timedim = 'time' - # datasets retrieved with new cds-beta have valid_time instead of time dimn/varn - # https://forum.ecmwf.int/t/new-time-format-in-era5-netcdf-files/3796/5?u=jelmer_veenstra - # TODO: can be removed after https://github.com/Unidata/netcdf4-python/issues/1357 or https://forum.ecmwf.int/t/3796 is fixed - if 'valid_time' in ds.dims: - timedim = 'valid_time' - - assert ds.sizes[timedim] == 1416 - assert ds[timedim].to_pandas().iloc[0] == pd.Timestamp('2010-01-01') - assert ds[timedim].to_pandas().iloc[-1] == pd.Timestamp('2010-02-28 23:00') - - # check if there are no integers in the dataset anymore - # this was the case before CDS-beta in https://github.com/Deltares/dfm_tools/issues/239 - msl_encoding = ds['msl'].encoding - assert str(msl_encoding['dtype']) == 'float32' - assert 'scale_factor' not in msl_encoding.keys() - - -@pytest.mark.requiressecrets -@pytest.mark.unittest -@pytest.mark.timeout(60) # useful since CDS downloads are terribly slow sometimes, so skip in that case -def test_download_era5_unsupported_varkey(): - date_min = '2010-01-31' - date_max = '2010-02-01' - longitude_min, longitude_max, latitude_min, latitude_max = 2, 3, 51, 52 #test domain - varkey = 'unexisting' - with pytest.raises(KeyError) as e: - dfmt.download_ERA5(varkey, - longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max, - date_min=date_min, date_max=date_max, - dir_output='.', overwrite=True) - assert '"unexisting" not available' in str(e.value) - - @pytest.mark.requiressecrets @pytest.mark.unittest def test_download_cmems_my(tmp_path): @@ -280,3 +234,49 @@ def test_download_hycom(tmp_path): assert ds.sizes["time"] == 2 assert ds.time.to_pandas().iloc[0] == pd.Timestamp('2010-01-01') assert ds.time.to_pandas().iloc[-1] == pd.Timestamp('2010-01-02') + + +@pytest.mark.requiressecrets +@pytest.mark.unittest +def test_download_era5_unsupported_varkey(): + date_min = '2010-01-31' + date_max = '2010-02-01' + longitude_min, longitude_max, latitude_min, latitude_max = 2, 3, 51, 52 #test domain + varkey = 'unexisting' + with pytest.raises(KeyError) as e: + dfmt.download_ERA5(varkey, + longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max, + date_min=date_min, date_max=date_max, + dir_output='.', overwrite=True) + assert '"unexisting" not available' in str(e.value) + + +@pytest.mark.requiressecrets +@pytest.mark.unittest +@pytest.mark.era5slow # temporarily skip these on github +@pytest.mark.timeout(60) # useful since CDS downloads are terribly slow sometimes, so skip in that case +def test_download_era5(file_nc_era5_pattern): + # file_nc_era5_pattern comes from conftest.py + file_list = glob.glob(file_nc_era5_pattern) + assert len(file_list) == 2 + + ds = xr.open_mfdataset(file_nc_era5_pattern) + + assert 'valid_time' in ds.dims # TODO: if this fails, remove the exception below and in preprocess_ERA5 + + timedim = 'time' + # datasets retrieved with new cds-beta have valid_time instead of time dimn/varn + # https://forum.ecmwf.int/t/new-time-format-in-era5-netcdf-files/3796/5?u=jelmer_veenstra + # TODO: can be removed after https://github.com/Unidata/netcdf4-python/issues/1357 or https://forum.ecmwf.int/t/3796 is fixed + if 'valid_time' in ds.dims: + timedim = 'valid_time' + + assert ds.sizes[timedim] == 1416 + assert ds[timedim].to_pandas().iloc[0] == pd.Timestamp('2010-01-01') + assert ds[timedim].to_pandas().iloc[-1] == pd.Timestamp('2010-02-28 23:00') + + # check if there are no integers in the dataset anymore + # this was the case before CDS-beta in https://github.com/Deltares/dfm_tools/issues/239 + msl_encoding = ds['msl'].encoding + assert str(msl_encoding['dtype']) == 'float32' + assert 'scale_factor' not in msl_encoding.keys() diff --git a/tests/test_modelbuilder.py b/tests/test_modelbuilder.py index e7ca62e36..6dc52cf9e 100644 --- a/tests/test_modelbuilder.py +++ b/tests/test_modelbuilder.py @@ -186,18 +186,18 @@ def test_make_paths_relative(tmp_path): @pytest.mark.unittest -@pytest.mark.requiressecrets -@pytest.mark.timeout(60) # useful since CDS downloads are terribly slow sometimes, so skip in that case -def test_preprocess_merge_meteofiles_era5_unsupported_varlist(file_nc_era5_pattern, tmp_path): +def test_preprocess_merge_meteofiles_era5_unsupported_varlist(tmp_path, ds_era5_empty): + file_nc = os.path.join(tmp_path,"era5_msl_empty.nc") + ds_era5_empty.to_netcdf(file_nc) + ext_old = None # this won't be reached, so not relevant what to supply - date_min = '2010-01-31' - date_max = '2010-02-01' + date_min = ds_era5_empty.time.to_pandas().iloc[0] + date_max = ds_era5_empty.time.to_pandas().iloc[-1] varlist_list = ['msl'] - dir_output_data_era5 = os.path.dirname(file_nc_era5_pattern) with pytest.raises(KeyError) as e: ext_old = dfmt.preprocess_merge_meteofiles_era5(ext_old=ext_old, varkey_list=varlist_list, - dir_data=dir_output_data_era5, + dir_data=tmp_path, dir_output=tmp_path, time_slice=slice(date_min, date_max)) assert "is not supported by dfmt.preprocess_merge_meteofiles_era5" in str(e.value) diff --git a/tests/test_xarray_helpers.py b/tests/test_xarray_helpers.py index 2fda86e9b..2361feb1d 100644 --- a/tests/test_xarray_helpers.py +++ b/tests/test_xarray_helpers.py @@ -14,6 +14,7 @@ @pytest.mark.unittest @pytest.mark.requiressecrets +@pytest.mark.era5slow # temporarily skip these on github @pytest.mark.timeout(60) # useful since CDS downloads are terribly slow sometimes, so skip in that case def test_merge_meteofiles(file_nc_era5_pattern): # file_nc_era5_pattern comes from file_nc_era5_pattern() in conftest.py