diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01a4f6f..5fa0d30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,45 +4,42 @@ # For more information see: # https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Automated Tests +name: CI/CD on: push: pull_request: workflow_dispatch: - schedule: # only upstream, won't trigger on forks! - - cron: '0 0 * * *' # daily + schedule: + - cron: '0 0 * * *' # nightly build + +env: # Required to upload docker image + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + jobs: build: strategy: matrix: - python-version: [ '3.8', '3.9', '3.10' ] - os: [ "ubuntu-latest" ] - ymlfile: [ "latest.yml" ] - include: - - os: "windows-latest" - python-version: "3.10" - ymlfile: "latest.yml" - name: Py${{ matrix.python-version }}@${{ matrix.os }}|${{ matrix.ymlfile }} + python-version: ['3.9', '3.12'] + os: [ "ubuntu-latest" , "windows-latest", "macos-latest"] + name: Py${{ matrix.python-version }}@${{ matrix.os }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: true fetch-depth: 0 lfs: true - name: Checkout LFS objects run: git lfs checkout - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" auto-update-conda: true - channel-priority: flexible python-version: ${{ matrix.python-version }} - environment-file: environment/${{ matrix.ymlfile }} - mamba-version: "*" - activate-environment: ecmwf_models # todo: must match with name in latest.yml + channel-priority: flexible + activate-environment: ecmwf_models auto-activate-base: false - name: Print Infos shell: bash -l {0} @@ -52,22 +49,35 @@ jobs: pip list which pip which python - - name: Export Environment + - name: Install dependencies and package shell: bash -l {0} run: | - mkdir -p .artifacts - if [ ${{ matrix.ymlfile }} == "latest.yml" ] + conda install -c conda-forge pygrib netcdf4 pyresample pykdtree + + # On macos pykdtree was built with OpenMP (C-level parallelization library) support + if [ ${{ matrix.os }} == "macos-latest" ] then - filename=pinned_${{ matrix.python-version }}_${{ matrix.os }}.yml - conda env export --no-builds | grep -v "prefix" > .artifacts/$filename + conda install -c conda-forge pykdtree fi - - name: Install package and test + # And on windows pygrib wheels are not available + if [ ${{ matrix.os }} == "windows-latest" ] + then + conda install -c conda-forge pygrib + fi + + pip install -e .[testing] + - name: Run all tests env: - CDSAPI_KEY: ${{ secrets.CDSAPI_KEY }} + CDS_APIKEY: ${{ secrets.CDS_APIKEY }} shell: bash -l {0} - run: | - pip install . + run: | pytest + - name: Export Environment + shell: bash -l {0} + run: | + mkdir -p artifacts + filename=env_py${{ matrix.python-version }}_${{ matrix.os }}.yml + conda env export --no-builds | grep -v "prefix" > artifacts/$filename - name: Upload Coverage shell: bash -l {0} run: | @@ -79,23 +89,25 @@ jobs: - name: Create wheel and dist package shell: bash -l {0} run: | - pip install setuptools_scm + pip install setuptools_scm twine if [ ${{ matrix.os }} == "windows-latest" ] then # build whls on windows pip install wheel - python setup.py bdist_wheel --dist-dir .artifacts/dist + python setup.py bdist_wheel --dist-dir artifacts/dist else # build dist on linux - python setup.py sdist --dist-dir .artifacts/dist + python setup.py sdist --dist-dir artifacts/dist fi - ls .artifacts/dist + ls artifacts/dist + twine check artifacts/dist/* - name: Upload Artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Artifacts - path: .artifacts/* - coveralls: + name: Artifacts-py${{ matrix.python-version }}-${{ matrix.os }} + path: artifacts/* + + publish-coverage: name: Submit Coveralls 👚 needs: build runs-on: ubuntu-latest @@ -106,9 +118,13 @@ jobs: pip3 install --upgrade coveralls && coveralls --service=github --finish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - publish: - name: Upload to PyPI - if: startsWith(github.ref, 'refs/tags/v') && startsWith(github.repository, 'TUW-GEO') + + publish-pypi-package: + name: Publish PyPI 🚀 + # Will only trigger when Tests have passed on release/tag from the TUW-GEO repository + if: | + startsWith(github.ref, 'refs/tags/v') && + startsWith(github.repository, 'TUW-GEO') needs: build runs-on: ubuntu-latest steps: @@ -117,7 +133,11 @@ jobs: echo "GITHUB_REF = $GITHUB_REF" echo "GITHUB_REPOSITORY = $GITHUB_REPOSITORY" - name: Download Artifacts - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 + with: + path: Artifacts + pattern: Artifacts-* + merge-multiple: true - name: Display downloaded files run: ls -aR - name: Upload to PyPI @@ -128,4 +148,46 @@ jobs: verify_metadata: true packages_dir: Artifacts/dist/ user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} # todo: add pypi token to GHA + password: ${{ secrets.PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets + + publish-docker-image: + name: Publish Docker 📦 + # Will only trigger when Tests have passed on release/tag from the TUW-GEO repository + if: | + startsWith(github.ref, 'refs/tags/v') && + startsWith(github.repository, 'TUW-GEO') + needs: build + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml index a7c33b6..4093fff 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,19 +1,17 @@ version: 2 -sphinx: - configuration: docs/conf.py - -formats: - - pdf - -submodules: - include: all - -conda: - environment: environment/latest.yml +build: + os: ubuntu-22.04 + tools: + python: mambaforge-4.10 python: - version: 3.7 install: - method: pip - path: . + path: .[docs] + +sphinx: + configuration: docs/conf.py + +conda: + environment: docs/env.yml \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 96aab72..9f6cd88 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -87,7 +87,7 @@ Before you start coding, we recommend creating an isolated `virtual environment`_ to avoid any problems with your installed Python packages. This can easily be done via Miniconda_:: - conda env create -f environment/latest.yml + conda create -n ecmwf_models python=3.12 pygrib netcdf4 pyresample pykdtree conda activate ecmwf_models Clone the repository diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5e55b08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM mambaorg/micromamba:1.3.1-alpine +MAINTAINER Wolfgang Preimesberger + +USER root + +RUN apk update && \ + apk upgrade && \ + apk add git && \ + apk add build-base && \ + apk add g++ && \ + apk add bsd-compat-headers && \ + apk add tiff + +WORKDIR /app + +COPY . /app + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN micromamba install -y -n base -c conda-forge python=3.12 pygrib netcdf4 pyresample pykdtree +RUN pip install /app/. + +RUN micromamba clean --all --yes + +# Clean up the src code, as it is installed now +RUN rm -rf /app + +ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] \ No newline at end of file diff --git a/README.rst b/README.rst index 096c1c7..42346fd 100644 --- a/README.rst +++ b/README.rst @@ -2,87 +2,113 @@ ecmwf_models ============ +|ci| |cov| |pip| |doc| -.. image:: https://github.com/TUW-GEO/ecmwf_models/workflows/Automated%20Tests/badge.svg?branch=master +.. |ci| image:: https://github.com/TUW-GEO/ecmwf_models/actions/workflows/ci.yml/badge.svg?branch=master :target: https://github.com/TUW-GEO/ecmwf_models/actions -.. image:: https://coveralls.io/repos/github/TUW-GEO/ecmwf_models/badge.svg?branch=master - :target: https://coveralls.io/github/TUW-GEO/ecmwf_models?branch=master +.. |cov| image:: https://coveralls.io/repos/TUW-GEO/ecmwf_models/badge.png?branch=master + :target: https://coveralls.io/r/TUW-GEO/ecmwf_models?branch=master -.. image:: https://badge.fury.io/py/ecmwf-models.svg +.. |pip| image:: https://badge.fury.io/py/ecmwf-models.svg :target: https://badge.fury.io/py/ecmwf-models -.. image:: https://readthedocs.org/projects/ecmwf-models/badge/?version=latest +.. |doc| image:: https://readthedocs.org/projects/ecmwf-models/badge/?version=latest :target: https://ecmwf-models.readthedocs.io/en/latest/ -Readers and converters for data from the `ECMWF reanalysis models + +Readers and converters for `ECMWF reanalysis (ERA) data `_. Written in Python. Works great in combination with `pytesmo `_. -Citation -======== - -.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.593533.svg - :target: https://doi.org/10.5281/zenodo.593533 - -If you use the software in a publication then please cite it using the Zenodo DOI. -Be aware that this badge links to the latest package version. - -Please select your specific version at https://doi.org/10.5281/zenodo.593533 to get the DOI of that version. -You should normally always use the DOI for the specific version of your record in citations. -This is to ensure that other researchers can access the exact research artefact you used for reproducibility. - -You can find additional information regarding DOI versioning at http://help.zenodo.org/#versioning Installation ============ Install required C-libraries via conda. For installation we recommend -`Miniconda `_. So please install it according -to the official installation instructions. As soon as you have the ``conda`` -command in your shell you can continue: +`Miniconda `_: .. code:: - conda install -c conda-forge pandas pygrib netcdf4 pyresample xarray + conda install -c conda-forge pygrib netcdf4 pyresample pykdtree -The following command will download and install all the needed pip packages as well -as the ecmwf-model package itself. +Afterwards the following command will install all remaining dependencies as +well as the ``ecmwf_models`` package itself. .. code:: pip install ecmwf_models -To create a full development environment with conda, the `yml` files inside -the folder `environment/` in this repository can be used. Both environements -should work. The file `latest` should install the newest version of most -dependencies. The file `pinned` is a fallback option and should always work. +Quickstart +========== + +Download image data from CDS using the ``era5 download`` and ``era5land download`` +shell command (see ``era5 download --help`` for all options) ... -.. code:: +.. code-block:: shell + + era5land download /tmp/era5/img -s 2024-04-01 -e 2024-04-05 -v swvl1,swvl2 --h_steps 0,12 + +... and convert them to time series (ideally for a longer period). Check ``era5 reshuffle --help`` - git clone --recursive git@github.com:TUW-GEO/ecmwf_models.git ecmwf_models - cd ecmwf_models - conda env create -f environment/latest.yml - source activate ecmwf_models - python setup.py develop - pytest +.. code-block:: shell + era5land reshuffle /tmp/era5/img /tmp/era5/ts 2024-04-01 2024-04-05 --land_points + +Finally, in python, read the time series data for a location as a pandas +DataFrame. + +.. code-block:: python + + >> from ecmwf_models.interface import ERATs + >> ds = ERATs('/tmp/era5/ts') + >> ds.read(18, 48) # (lon, lat) + + swvl1 swvl2 + 2024-04-01 00:00:00 0.318054 0.329590 + 2024-04-01 12:00:00 0.310715 0.325958 + 2024-04-02 00:00:00 0.360229 0.323502 + ... ... ... + 2024-04-04 12:00:00 0.343353 0.348755 + 2024-04-05 00:00:00 0.350266 0.346558 + 2024-04-05 12:00:00 0.343994 0.344498 + +More programs are available to keep an exisiting image and time series record +up-to-date. Type ``era5 --help`` and ``era5land --help`` to see all available +programs. + +CDS API Setup +============= + +In order to download data from CDS, this package uses the CDS API +(https://pypi.org/project/cdsapi/). You can either pass your credentials +directly on the command line (which might be unsafe) or set up a +.cdsapirc file in your home directory (recommended). +Please see the description at https://cds.climate.copernicus.eu/how-to-api. Supported Products ================== At the moment this package supports -- **ERA Interim** (deprecated) - **ERA5** - **ERA5-Land** reanalysis data in **grib** and **netcdf** format (download, reading, time series creation) with a default spatial -sampling of 0.75 degrees (ERA Interim), 0.25 degrees (ERA5), resp. 0.1 degrees (ERA5-Land). +sampling of 0.25 degrees (ERA5), and 0.1 degrees (ERA5-Land). It should be easy to extend the package to support other ECMWF reanalysis products. This will be done as need arises. +Citation +======== + +.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.593533.svg + :target: https://doi.org/10.5281/zenodo.593533 + +If you use the software in a publication then please cite it using the Zenodo DOI. +Be aware that this badge links to the latest package version. + Contribute ========== diff --git a/docs/download.rst b/docs/download.rst index 600bb6c..2c4dc9d 100644 --- a/docs/download.rst +++ b/docs/download.rst @@ -1,70 +1,49 @@ -Downloading ERA5 Data -========================== +Downloading ERA5 and ERA5-Land data +----------------------------------- ERA5 (and ERA5-Land) data can be downloaded manually from the `Copernicus Data Store (CDS) `_ or automatically via the CDS api, -as done in the download module (era5_download). Before you can use this, you -have to set up an `account at the CDS -`_ and setup -the `CDS key `_. - -Then you can use the program ``era5_download`` to download ERA5 images between -a passed start and end date. -``era5_download --help`` will show additional information on using the command. +as done in the download modules (``era5 download`` and ``era5land download``). +Before you can use this, you have to set up an `account at the CDS +`_ and get your +`API key `_. +Then you can use the programs ``era5 download`` and ``era5land download`` to +download ERA5 images between a passed start and end date. +Passing ``--help`` will show additional information on using the commands. For example, the following command in your terminal would download ERA5 images for all available layers of soil moisture in netcdf format, between -January 1st and February 1st 2000 in grib format into ``/path/to/storage``. +January 1st and February 1st 2000 in netcdf format into ``/path/to/storage``. The data will be stored in subfolders of the format ``YYYY/jjj``. The temporal -resolution of the images is 6 hours by default. +resolution of the images is 6 hours by default, but can be changed using the +``--h_steps`` option. .. code-block:: shell - era5_download /path/to/storage -s 2000-01-01 -e 2000-02-01 --variables swvl1 swvl2 swvl3 swvl4 + era5 download /path/to/storage -s 2000-01-01 -e 2000-02-01 \ + --variables swvl1,swvl2,swvl3,swvl4 --h_steps 0,6,18,24 The names of the variables to download can be its long names, the short names -(as in the example) or the parameter IDs. We use the ``era5_lut.csv`` file to -look up the right name for the CDS API. -Other flags, that can be activated in ``era5_download`` are: - -- **-h (--help)** : shows the help text for the download function -- **-p (--product)**: specify the ERA5 product to download. Choose either ERA5 or ERA5-Land. Default is ERA5. -- **-keep (--keep_original)** : keeps the originally downloaded files as well. - We split the downloaded, monthly stacks into single images and discard the original - files by default. -- **-grb (--as_grib)** : download the data in grib format instead of the default nc4 - format (grib reading is not supported on Windows OS). -- **--h_steps** : full hours for which images are downloaded (e.g. --h_steps 0 - would download only data at 00:00 UTC). By default we use 0, 6, 12 and 18. - - -Downloading ERA Interim Data -================================= -**ERA-Interim has been decommissioned. Use ERA5 instead.** - -ERA-Interim data can be downloaded manually from the ECMWF servers. It can also -be done automatically using the ECMWF API. To use the ECMWF API you have to be -registered, install the ecmwf-api Python package and setup the ECMWF API Key. A -guide for this is provided by `ECMWF -`_. - -After that you can use the command line program ``eraint_download`` to download -images with a temporal resoltuion of 6 hours between a passed start and end date. -``eraint_download --help`` will show additional information on using the command. - -For example, the following command in your terminal would download ERA Interim -soil moisture images of all available layers (see the -`Variable DB `_) in netcdf format on -the default gaussian grid for ERA-Interim (0.75°x0.75°) into -the folder ``/path/to/storage`` between January 1st and February 1st 2000. -The data will be stored in subfolders of the format ``YYYY/jjj``, where ``YYYY`` -describes the year and ``jjj`` the day of the year for the downloaded files. - -.. code-block:: shell - - eraint_download /path/to/storage -s 2000-01-01 -e 2000-02-01 --variables swvl1 swvl2 swvl3 swvl4 - -Additional optional parameters allow downloading images in netcdf format, and -in a different spatial resolution (see the --help function and descriptions for -downloading ERA5 data) \ No newline at end of file +(as in the example). See the :ref:`ERA5 variable table ` +and :ref:`ERA5-Land variable table ` to look up the right +name for the CDS API. + +By default, the command expects that you have set up your ``.cdsapirc`` file +to identify with the data store as described above. Alternatively you can pass +your token directly with the download command using the ``--cds_token`` option. +Or you can set an environment variable ``CDSAPI_KEY`` that contains your token. + +We recommend downloading data in netcdf format, however, using the ``--as_grib`` +option, you can also download data in grib format. + +For all other available options, type ``era5 download --help``, +or ``era5land download --help`` respectively + +Updating an existing record +--------------------------- +After some time, new ERA data will become available. You can then use the +program ``era5 update_img`` with a path to the existing record, to download +new images with the same settings that became available since the last time +the record was downloaded. You might even set up a cron job to check for new +data in regular intervals to keep your copy up-to-date. \ No newline at end of file diff --git a/docs/env.yml b/docs/env.yml new file mode 100644 index 0000000..a379691 --- /dev/null +++ b/docs/env.yml @@ -0,0 +1,7 @@ +# To keep the RTD build as small as possible, we define a separate .yml here +name: docs +channels: +- conda-forge +- defaults +dependencies: +- python=3.12 \ No newline at end of file diff --git a/docs/img2ts.rst b/docs/img2ts.rst index fd1d1b1..1299742 100644 --- a/docs/img2ts.rst +++ b/docs/img2ts.rst @@ -1,5 +1,5 @@ Conversion to time series format -================================ +-------------------------------- For a lot of applications it is favorable to convert the image based format into a format which is optimized for fast time series retrieval. This is what we @@ -7,44 +7,56 @@ often need for e.g. validation studies. This can be done by stacking the images into a netCDF file and choosing the correct chunk sizes or a lot of other methods. We have chosen to do it in the following way: -- Store only the reduced gaußian grid points (for grib data) since that saves space. -- Store the time series in netCDF4 in the Climate and Forecast convention +- Store the time series as netCDF4 Climate and Forecast convention (CF) `Orthogonal multidimensional array representation `_ -- Store the time series in 5x5 degree cells. This means there will be 2566 cell +- Store the time series in 5x5 degree cells. This means there will be up to 2566 cell files and a file called ``grid.nc`` which contains the information about which grid point is stored in which file. This allows us to read a whole 5x5 degree area into memory and iterate over the time series quickly. .. image:: 5x5_cell_partitioning.png - :target: _images/5x5_cell_partitioning.png + :target: 5x5_cell_partitioning.png -This conversion can be performed using the ``era5_reshuffle`` (respectively -``eraint_reshuffle``) command line program. An example would be: +This conversion can be performed using the ``era5 reshuffle`` (respectively +``era5land reshuffle``) command line program. An example would be: .. code-block:: shell - era5_reshuffle /era_data /timeseries/data 2000-01-01 2001-01-01 swvl1 swvl2 + era5 reshuffle /path/to/img /out/ts/path 2000-01-01 2000-12-31 \ + -v swvl1,swvl2 --h_steps 0,12 --bbox -10 30 30 60 --land_points -Which would take 6-hourly ERA5 images stored in ``/era_data`` from January -1st 2000 to January 1st 2001 and store the parameters "swvl1" and "swvl2" as time -series in the folder ``/timeseries/data``. If you time series should have a different -resolution than 6H, use the ``h_steps`` flag here accordingly (images to use for time -series generation have to be in the downloaded raw data). -The passed names have to correspond with the names in the downloaded file, -i.e. use the variable short names here. -Other flags, that can be used in ``era5_reshuffle`` are: +Which would take (previously downloaded) ERA5 images (at time stamps 0:00 and 12:00 UTC) +stored in `/path/to/img` from January 1st 2000 to December 31st 2000 and store the +data within land points of the selected bounding box of variables "swvl1" and +"swvl2" as time series in the folder ``/out/ts/path``. -- **-h (--help)** : Shows the help text for the reshuffle function -- **--land_points** : Reshuffle and store only data over land points. -- **-h_steps (--as_grib)** : full hours for which images are reshuffled (e.g. --h_steps 0 - would reshuffle only data at 00:00 UTC). By default we use 0, 6, 12 and 18. -- **--imgbuffer** : The number of images that are read into memory before converting - them into time series. Bigger numbers make the conversion faster but consume more memory. +The passed variable names (``-v``) have to correspond with the names in the +downloaded file, i.e. use the variable short names here. +For all other option see the output up ``era5 reshuffle --help`` and +``era5land reshuffle --help`` Conversion to time series is performed by the `repurpose package -`_ in the background. For custom settings -or other options see the `repurpose documentation -`_ and the code in -``ecmwf_models.reshuffle``. +`_ in the background. + +Append new image data to existing time series +--------------------------------------------- +Similar to the ``update_img`` program, we also provide programs to +simplify updating an existing time series record with newly downloaded +images via the ``era5 update_ts`` and ``era5land update_ts`` programs. +This will use the settings file created during the initial time series +conversion (with ``reshuffle``) and look for new image data in the same path +that is not yet available in the given time series record. + +This option is ideally used together with the ``update_img`` program in, e.g. +a cron job, to first download new images, and then append them to their time +series counterpart. + +.. code-block:: + + era5 update_ts /existing/ts/record + +Alternatively, you can also use the ``reshuffle`` command, with a target path +that already contains time series. This will also append new data (but make sure +you use the same settings as before). \ No newline at end of file diff --git a/docs/img_read.rst b/docs/img_read.rst index 2e55c57..67a9d1e 100644 --- a/docs/img_read.rst +++ b/docs/img_read.rst @@ -1,48 +1,44 @@ Reading data ============ -After downloading the data for ERA Interim or ERA5 via ``eraint_download`` resp. -``era5_download``, images can be read with the ``ERA5GrbDs`` and -``ERA5NcDs`` (for grib and netcdf image stacks), respectively the -``ERA5GrbImg`` and ``ERA5NcImg`` (for single grib and netcdf images) classes. -The respective functions for reading images are defined in -``ecmwf_models.erainterim.interface`` ``ecmwf_models.era5.interface``. +To read the downloaded image data in python we recommend standard libraries such +as `xarray `_ or +`netCDF4 `_. -The following examples are shown for ERA5 data, but work the same way with the -respective ERA Interim functions. +However, you can also use the internal classes from this package. The main +purpose of these, however, is to use them in the time series conversion +module. -For example, you can read the image for a single variable at a specific date. -In this case for a stack of downloaded image files: +For example, you can read the image for some variables at a specific date. +In this case for a stack of downloaded image files (the chosen date +must be available of course): .. code-block:: python - # Script to load a stack of downloaded netcdf images - # and read a variable for a single date. - from ecmwf_models.era5.interface import ERA5NcDs - root_path = "/path/to/netcdf_storage" - ds = ERA5NcDs(root_path, parameter='swvl1') - data = ds.read(datetime(2010, 1, 1, 0)) - - # Script to load a stack of downloaded grib images - # and read a variable for a single date. - from ecmwf_models.era5.interface import ERA5GrbDs - root_path = "/path/to/grib_storage" - ds = ERA5GrbDs(root_path, parameter='swvl1') - data = ds.read(datetime(2010, 1, 1, 0)) - - -You can also read multiple variables at a specific date by passing a list of parameters. -In this case for a set of netcdf files: - -.. code-block:: python - - # Script to load a stack of downloaded netcdf images - # and read two variables for a single date. - from ecmwf_models.era5.interface import ERA5NcDs - root_path = "/path/to/storage" - ds = ERA5NcDs(root_path, parameter=['swvl1', 'swvl2']) - data = ds.read(datetime(2000, 1, 1, 0)) - - -All images between two given dates can be read using the -``iter_images`` methods of all the image stack reader classes. \ No newline at end of file + >> from ecmwf_models.era5.reader import ERA5NcDs + >> root_path = "/path/to/netcdf_storage" + >> ds = ERA5NcDs(root_path, parameter=['swvl1']) + >> img = ds.read(datetime(2010, 1, 1, 0)) + + # To read the coordinates + >> img.lat # also: img.lon + array([[ 90. , 90. , 90. , ..., 90. , 90. , 90. ], + [ 89.9, 89.9, 89.9, ..., 89.9, 89.9, 89.9], + [ 89.8, 89.8, 89.8, ..., 89.8, 89.8, 89.8], + ..., + [-89.8, -89.8, -89.8, ..., -89.8, -89.8, -89.8], + [-89.9, -89.9, -89.9, ..., -89.9, -89.9, -89.9], + [-90. , -90. , -90. , ..., -90. , -90. , -90. ]]) + + # To read the data variables + >> img.data['swvl1'] + array([[ nan, nan, nan, ..., nan, nan, nan], + [ nan, nan, nan, ..., nan, nan, nan], + [ nan, nan, nan, ..., nan, nan, nan], + ..., + [0.159 , 0.1589, 0.1588, ..., 0.1595, 0.1594, 0.1592], + [0.1582, 0.1582, 0.1581, ..., 0.1588, 0.1587, 0.1584], + [0.206 , 0.206 , 0.206 , ..., 0.206 , 0.206 , 0.206 ]]) + + +The equivalent class to read grib files is called in ``ERA5GrbDs``. diff --git a/docs/index.rst b/docs/index.rst index 0ec6abf..04cfd6b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,10 @@ -.. include:: ../README.rst - +.. include:: readme.rst .. include:: download.rst - .. include:: img_read.rst - .. include:: img2ts.rst - .. include:: ts_read.rst +.. include:: variables_era5 +.. include:: variables_era5land Contents ======== @@ -18,6 +16,8 @@ Contents Image Reading Time Series Creation Time Series Reading + All variables available for ERA5 + All variables available for ERA5-Land License Authors Changelog diff --git a/docs/ts_read.rst b/docs/ts_read.rst index d8f0b37..fa81123 100644 --- a/docs/ts_read.rst +++ b/docs/ts_read.rst @@ -1,21 +1,32 @@ - Reading converted time series data ---------------------------------- -For reading time series data, that the ``era5_reshuffle`` and ``eraint_reshuffle`` -command produces, the class ``ERATs`` can be used. -Optional arguments that are passed to the parent class +For reading time series data, that the ``era5 reshuffle`` and ``era5land reshuffle`` +command produces, the class ``ERATs`` can be used. This will return a time series +of values for the chosen location. + +Optional arguments that are forwarded to the parent class (``OrthoMultiTs``, as defined in `pynetcf.time_series `_) can be passed as well: .. code-block:: python - from ecmwf_models import ERATs + >> from ecmwf_models import ERATs # read_bulk reads full files into memory # read_ts takes either lon, lat coordinates to perform a nearest neighbour search # or a grid point index (from the grid.nc file) and returns a pandas.DataFrame. - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read_ts(45, 15) + >> ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) + + >> ds.read(18, 48) # (lon, lat) + + swvl1 swvl2 + 2024-04-01 00:00:00 0.318054 0.329590 + 2024-04-01 12:00:00 0.310715 0.325958 + 2024-04-02 00:00:00 0.360229 0.323502 + ... ... ... + 2024-04-04 12:00:00 0.343353 0.348755 + 2024-04-05 00:00:00 0.350266 0.346558 + 2024-04-05 12:00:00 0.343994 0.344498 Bulk reading speeds up reading multiple points from a cell file by storing the file in memory for subsequent calls. Either Longitude and Latitude can be passed diff --git a/docs/variables_era5.rst b/docs/variables_era5.rst new file mode 100644 index 0000000..d2459bf --- /dev/null +++ b/docs/variables_era5.rst @@ -0,0 +1,584 @@ +.. _variables_era5: + +ERA5 Variables +-------------- + +The following table contains all variable names that can be used for the +``era5 download`` and ``era5 reshuffle`` programs. + +Note that the “default” column indicates which variables are downloaded +if none are specified. + ++------------------------------------------------+-----------+--------+ +| **Variable** | **Short | **Def | +| | Name** | ault** | ++================================================+===========+========+ +| 100 metre U wind component | u100 | 0 | ++------------------------------------------------+-----------+--------+ +| 100 metre V wind component | v100 | 0 | ++------------------------------------------------+-----------+--------+ +| Neutral wind at 10 m u-component | u10n | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre U wind component | u10 | 0 | ++------------------------------------------------+-----------+--------+ +| Neutral wind at 10 m v-component | v10n | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre V wind component | v10 | 0 | ++------------------------------------------------+-----------+--------+ +| 10 metre wind gust since previous | fg10 | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| 2 metre dewpoint temperature | d2m | 0 | ++------------------------------------------------+-----------+--------+ +| 2 metre temperature | t2m | 0 | ++------------------------------------------------+-----------+--------+ +| Air density over the oceans | p140209 | 0 | ++------------------------------------------------+-----------+--------+ +| Angle of sub-gridscale orography | anor | 0 | ++------------------------------------------------+-----------+--------+ +| Anisotropy of sub-gridscale orography | isor | 0 | ++------------------------------------------------+-----------+--------+ +| Benjamin-Feir index | bfi | 0 | ++------------------------------------------------+-----------+--------+ +| Boundary layer dissipation | bld | 0 | ++------------------------------------------------+-----------+--------+ +| Boundary layer height | blh | 0 | ++------------------------------------------------+-----------+--------+ +| Charnock | chnk | 0 | ++------------------------------------------------+-----------+--------+ +| Clear-sky direct solar radiation at surface | cdir | 0 | ++------------------------------------------------+-----------+--------+ +| Cloud base height | cbh | 0 | ++------------------------------------------------+-----------+--------+ +| Coefficient of drag with waves | cdww | 0 | ++------------------------------------------------+-----------+--------+ +| Convective available potential energy | cape | 0 | ++------------------------------------------------+-----------+--------+ +| Convective inhibition | cin | 0 | ++------------------------------------------------+-----------+--------+ +| Convective precipitation | cp | 0 | ++------------------------------------------------+-----------+--------+ +| Convective rain rate | crr | 0 | ++------------------------------------------------+-----------+--------+ +| Convective snowfall | csf | 0 | ++------------------------------------------------+-----------+--------+ +| Convective snowfall rate water equivalent | csfr | 1 | ++------------------------------------------------+-----------+--------+ +| Downward UV radiation at the surface | uvb | 0 | ++------------------------------------------------+-----------+--------+ +| Duct base height | dctb | 0 | ++------------------------------------------------+-----------+--------+ +| Eastward gravity wave surface stress | lgws | 0 | ++------------------------------------------------+-----------+--------+ +| Eastward turbulent surface stress | ewss | 0 | ++------------------------------------------------+-----------+--------+ +| Evaporation | e | 1 | ++------------------------------------------------+-----------+--------+ +| Forecast albedo | fal | 0 | ++------------------------------------------------+-----------+--------+ +| Forecast logarithm of surface roughness for | flsr | 0 | +| heat | | | ++------------------------------------------------+-----------+--------+ +| Forecast surface roughness | fsr | 0 | ++------------------------------------------------+-----------+--------+ +| Free convective velocity over the oceans | p140208 | 0 | ++------------------------------------------------+-----------+--------+ +| Friction velocity | zust | 0 | ++------------------------------------------------+-----------+--------+ +| Geopotential | z | 0 | ++------------------------------------------------+-----------+--------+ +| Gravity wave dissipation | gwd | 0 | ++------------------------------------------------+-----------+--------+ +| High cloud cover | hcc | 0 | ++------------------------------------------------+-----------+--------+ +| High vegetation cover | cvh | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 1 | istl1 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 2 | istl2 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 3 | istl3 | 0 | ++------------------------------------------------+-----------+--------+ +| Ice temperature layer 4 | istl4 | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous 10 metre wind gust | i10fg | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous eastward turbulent surface | iews | 0 | +| stress | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous large-scale surface | ilspf | 0 | +| precipitation fraction | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous moisture flux | ie | 0 | ++------------------------------------------------+-----------+--------+ +| Instantaneous northward turbulent surface | inss | 0 | +| stress | | | ++------------------------------------------------+-----------+--------+ +| Instantaneous surface sensible heat flux | ishf | 0 | ++------------------------------------------------+-----------+--------+ +| K index | kx | 0 | ++------------------------------------------------+-----------+--------+ +| Lake bottom temperature | lblt | 0 | ++------------------------------------------------+-----------+--------+ +| Lake cover | cl | 0 | ++------------------------------------------------+-----------+--------+ +| Lake depth | dl | 0 | ++------------------------------------------------+-----------+--------+ +| Lake ice depth | licd | 0 | ++------------------------------------------------+-----------+--------+ +| Lake ice temperature | lict | 0 | ++------------------------------------------------+-----------+--------+ +| Lake mix-layer depth | lmld | 0 | ++------------------------------------------------+-----------+--------+ +| Lake mix-layer temperature | lmlt | 0 | ++------------------------------------------------+-----------+--------+ +| Lake shape factor | lshf | 0 | ++------------------------------------------------+-----------+--------+ +| Lake total layer temperature | ltlt | 0 | ++------------------------------------------------+-----------+--------+ +| Land-sea mask | lsm | 1 | ++------------------------------------------------+-----------+--------+ +| Large-scale precipitation | lsp | 0 | ++------------------------------------------------+-----------+--------+ +| Large-scale precipitation fraction | lspf | 0 | ++------------------------------------------------+-----------+--------+ +| Large scale rain rate | lsrr | 0 | ++------------------------------------------------+-----------+--------+ +| Large-scale snowfall | lsf | 0 | ++------------------------------------------------+-----------+--------+ +| Large scale snowfall rate water equivalent | lssfr | 1 | ++------------------------------------------------+-----------+--------+ +| Leaf area index, high vegetation | lai_hv | 0 | ++------------------------------------------------+-----------+--------+ +| Leaf area index, low vegetation | lai_lv | 0 | ++------------------------------------------------+-----------+--------+ +| Low cloud cover | lcc | 0 | ++------------------------------------------------+-----------+--------+ +| Low vegetation cover | cvl | 0 | ++------------------------------------------------+-----------+--------+ +| Maximum temperature at 2 metres since previous | mx2t | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| Maximum individual wave height | hmax | 0 | ++------------------------------------------------+-----------+--------+ +| Maximum total precipitation rate since | mxtpr | 0 | +| previous post-processing | | | ++------------------------------------------------+-----------+--------+ +| Mean boundary layer dissipation | mbld | 0 | ++------------------------------------------------+-----------+--------+ +| Mean convective precipitation rate | mcpr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean convective snowfall rate | mcsr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean direction of total swell | mdts | 0 | ++------------------------------------------------+-----------+--------+ +| Mean direction of wind waves | mdww | 0 | ++------------------------------------------------+-----------+--------+ +| Mean eastward gravity wave surface stress | megwss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean eastward turbulent surface stress | metss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean evaporation rate | mer | 0 | ++------------------------------------------------+-----------+--------+ +| Mean gravity wave dissipation | mgwd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale precipitation fraction | mlspf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale precipitation rate | mlspr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean large-scale snowfall rate | mlssr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean northward gravity wave surface stress | mngwss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean northward turbulent surface stress | mntss | 0 | ++------------------------------------------------+-----------+--------+ +| Mean period of total swell | mpts | 0 | ++------------------------------------------------+-----------+--------+ +| Mean period of wind waves | mpww | 0 | ++------------------------------------------------+-----------+--------+ +| Mean potential evaporation rate | mper | 0 | ++------------------------------------------------+-----------+--------+ +| Mean runoff rate | mror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean sea level pressure | msl | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snow evaporation rate | mser | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snowfall rate | msr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean snowmelt rate | msmr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean square slope of waves | msqs | 0 | ++------------------------------------------------+-----------+--------+ +| Mean sub-surface runoff rate | mssror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface direct short-wave radiation flux | msdrswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface direct short-wave radiation flux, | m | 0 | +| clear sky | sdrswrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward long-wave radiation flux | msdwlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface downward long-wave radiation | m | 0 | +| flux, clear sky | sdwlwrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward short-wave radiation | msdwswrf | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward short-wave radiation | m | 0 | +| flux, clear sky | sdwswrfcs | | ++------------------------------------------------+-----------+--------+ +| Mean surface downward UV radiation flux | msdwuvrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface latent heat flux | mslhf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net long-wave radiation flux | msnlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net long-wave radiation flux, | msnlwrfcs | 0 | +| clear sky | | | ++------------------------------------------------+-----------+--------+ +| Mean surface net short-wave radiation flux | msnswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface net short-wave radiation flux, | msnswrfcs | 0 | +| clear sky | | | ++------------------------------------------------+-----------+--------+ +| Mean surface runoff rate | msror | 0 | ++------------------------------------------------+-----------+--------+ +| Mean surface sensible heat flux | msshf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top downward short-wave radiation flux | mtdwswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net long-wave radiation flux | mtnlwrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net long-wave radiation flux, clear | mtnlwrfcs | 0 | +| sky | | | ++------------------------------------------------+-----------+--------+ +| Mean top net short-wave radiation flux | mtnswrf | 0 | ++------------------------------------------------+-----------+--------+ +| Mean top net short-wave radiation flux, clear | mtnswrfcs | 0 | +| sky | | | ++------------------------------------------------+-----------+--------+ +| Mean total precipitation rate | mtpr | 0 | ++------------------------------------------------+-----------+--------+ +| Mean vertical gradient of refractivity inside | dndza | 0 | +| trapping layer | | | ++------------------------------------------------+-----------+--------+ +| Mean vertically integrated moisture divergence | mvimd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction | mwd | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of first swell partition | p140122 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of second swell partition | p140125 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave direction of third swell partition | p140128 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period | mwp | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment | mp1 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment for | p1ps | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on first moment for | p1ww | 0 | +| wind waves | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on second moment for | p2ps | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period based on second moment for | p2ww | 0 | +| wind waves | | | ++------------------------------------------------+-----------+--------+ +| Mean wave period of first swell partition | p140123 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period of second swell partition | p140126 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean wave period of third swell partition | p140129 | 0 | ++------------------------------------------------+-----------+--------+ +| Mean zero-crossing wave period | mp2 | 0 | ++------------------------------------------------+-----------+--------+ +| Medium cloud cover | mcc | 0 | ++------------------------------------------------+-----------+--------+ +| Minimum temperature at 2 metres since previous | mn2t | 0 | +| post-processing | | | ++------------------------------------------------+-----------+--------+ +| Minimum total precipitation rate since | mntpr | 0 | +| previous post-processing | | | ++------------------------------------------------+-----------+--------+ +| Minimum vertical gradient of refractivity | dndzn | 0 | +| inside trapping layer | | | ++------------------------------------------------+-----------+--------+ +| Model bathymetry | wmb | 0 | ++------------------------------------------------+-----------+--------+ +| Near IR albedo for diffuse radiation | alnid | 0 | ++------------------------------------------------+-----------+--------+ +| Near IR albedo for direct radiation | alnip | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized energy flux into ocean | phioc | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized energy flux into waves | phiaw | 0 | ++------------------------------------------------+-----------+--------+ +| Normalized stress into ocean | tauoc | 0 | ++------------------------------------------------+-----------+--------+ +| Northward gravity wave surface stress | mgws | 0 | ++------------------------------------------------+-----------+--------+ +| Northward turbulent surface stress | nsss | 0 | ++------------------------------------------------+-----------+--------+ +| Peak wave period | pp1d | 0 | ++------------------------------------------------+-----------+--------+ +| Period corresponding to maximum individual | tmax | 0 | +| wave height | | | ++------------------------------------------------+-----------+--------+ +| Potential evaporation | pev | 1 | ++------------------------------------------------+-----------+--------+ +| Precipitation type | ptype | 0 | ++------------------------------------------------+-----------+--------+ +| Runoff | ro | 0 | ++------------------------------------------------+-----------+--------+ +| Sea ice area fraction | siconc | 0 | ++------------------------------------------------+-----------+--------+ +| Sea surface temperature | sst | 0 | ++------------------------------------------------+-----------+--------+ +| Significant height of combined wind waves and | swh | 0 | +| swell | | | ++------------------------------------------------+-----------+--------+ +| Significant height of total swell | shts | 0 | ++------------------------------------------------+-----------+--------+ +| Significant height of wind waves | shww | 0 | ++------------------------------------------------+-----------+--------+ +| Significant wave height of first swell | p140121 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Significant wave height of second swell | p140124 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Significant wave height of third swell | p140127 | 0 | +| partition | | | ++------------------------------------------------+-----------+--------+ +| Skin reservoir content | src | 0 | ++------------------------------------------------+-----------+--------+ +| Skin temperature | skt | 0 | ++------------------------------------------------+-----------+--------+ +| Slope of sub-gridscale orography | slor | 0 | ++------------------------------------------------+-----------+--------+ +| Snow albedo | asn | 0 | ++------------------------------------------------+-----------+--------+ +| Snow density | rsn | 1 | ++------------------------------------------------+-----------+--------+ +| Snow depth | sd | 1 | ++------------------------------------------------+-----------+--------+ +| Snow evaporation | es | 0 | ++------------------------------------------------+-----------+--------+ +| Snowfall | sf | 0 | ++------------------------------------------------+-----------+--------+ +| Snowmelt | smlt | 0 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 1 | stl1 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 2 | stl2 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 3 | stl3 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil temperature level 4 | stl4 | 1 | ++------------------------------------------------+-----------+--------+ +| Soil type | slt | 1 | ++------------------------------------------------+-----------+--------+ +| Standard deviation of filtered subgrid | sdfor | 0 | +| orography | | | ++------------------------------------------------+-----------+--------+ +| Standard deviation of orography | sdor | 0 | ++------------------------------------------------+-----------+--------+ +| Sub-surface runoff | ssro | 0 | ++------------------------------------------------+-----------+--------+ +| Surface latent heat flux | slhf | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net solar radiation | ssr | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net solar radiation, clear sky | ssrc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net thermal radiation | str | 0 | ++------------------------------------------------+-----------+--------+ +| Surface net thermal radiation, clear sky | strc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface pressure | sp | 0 | ++------------------------------------------------+-----------+--------+ +| Surface runoff | sro | 0 | ++------------------------------------------------+-----------+--------+ +| Surface sensible heat flux | sshf | 0 | ++------------------------------------------------+-----------+--------+ +| Surface solar radiation downward clear-sky | ssrdc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface solar radiation downwards | ssrd | 0 | ++------------------------------------------------+-----------+--------+ +| Surface thermal radiation downward clear-sky | strdc | 0 | ++------------------------------------------------+-----------+--------+ +| Surface thermal radiation downwards | strd | 0 | ++------------------------------------------------+-----------+--------+ +| Temperature of snow layer | tsn | 0 | ++------------------------------------------------+-----------+--------+ +| TOA incident solar radiation | tisr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net solar radiation | tsr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net solar radiation, clear sky | tsrc | 0 | ++------------------------------------------------+-----------+--------+ +| Top net thermal radiation | ttr | 0 | ++------------------------------------------------+-----------+--------+ +| Top net thermal radiation, clear sky | ttrc | 0 | ++------------------------------------------------+-----------+--------+ +| Total cloud cover | tcc | 0 | ++------------------------------------------------+-----------+--------+ +| Total column cloud ice water | tciw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column cloud liquid water | tclw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column ozone | tco3 | 0 | ++------------------------------------------------+-----------+--------+ +| Total column rain water | tcrw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column snow water | tcsw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column supercooled liquid water | tcslw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column water | tcw | 0 | ++------------------------------------------------+-----------+--------+ +| Total column water vapour | tcwv | 0 | ++------------------------------------------------+-----------+--------+ +| Total precipitation | tp | 1 | ++------------------------------------------------+-----------+--------+ +| Total sky direct solar radiation at surface | fdir | 0 | ++------------------------------------------------+-----------+--------+ +| Total totals index | totalx | 0 | ++------------------------------------------------+-----------+--------+ +| Trapping layer base height | tplb | 0 | ++------------------------------------------------+-----------+--------+ +| Trapping layer top height | tplt | 0 | ++------------------------------------------------+-----------+--------+ +| Type of high vegetation | tvh | 0 | ++------------------------------------------------+-----------+--------+ +| Type of low vegetation | tvl | 0 | ++------------------------------------------------+-----------+--------+ +| U-component stokes drift | ust | 0 | ++------------------------------------------------+-----------+--------+ +| UV visible albedo for diffuse radiation | aluvd | 0 | ++------------------------------------------------+-----------+--------+ +| UV visible albedo for direct radiation | aluvp | 0 | ++------------------------------------------------+-----------+--------+ +| V-component stokes drift | vst | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of cloud | p80.162 | 0 | +| frozen water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of cloud | p79.162 | 0 | +| liquid water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of | p85.162 | 0 | +| geopotential flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of kinetic | p82.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of mass flux | p81.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of moisture | p84.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of ozone flux | p87.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of thermal | p83.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of divergence of total | p86.162 | 0 | +| energy flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward cloud frozen | p90.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward cloud liquid | p88.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward geopotential | p73.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward heat flux | p69.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward kinetic energy | p67.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward mass flux | p65.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward ozone flux | p77.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward total energy | p75.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of eastward water vapour | p71.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of energy conversion | p64.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of kinetic energy | p59.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of mass of atmosphere | p53.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of mass tendency | p92.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward cloud frozen | p91.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward cloud liquid | p89.162 | 0 | +| water flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward geopotential | p74.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward heat flux | p70.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward kinetic energy | p68.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward mass flux | p66.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward ozone flux | p78.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward total energy | p76.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of northward water vapour | p72.162 | 0 | +| flux | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of potential+internal energy | p61.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of potential+internal+latent | p62.162 | 0 | +| energy | | | ++------------------------------------------------+-----------+--------+ +| Vertical integral of temperature | p54.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of thermal energy | p60.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertical integral of total energy | p63.162 | 0 | ++------------------------------------------------+-----------+--------+ +| Vertically integrated moisture divergence | vimd | 0 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 1 | swvl1 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 2 | swvl2 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 3 | swvl3 | 1 | ++------------------------------------------------+-----------+--------+ +| Volumetric soil water layer 4 | swvl4 | 1 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width | wdw | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width for swell | dwps | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral directional width for wind waves | dwww | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral kurtosis | wsk | 0 | ++------------------------------------------------+-----------+--------+ +| Wave spectral peakedness | wsp | 0 | ++------------------------------------------------+-----------+--------+ +| Wave Spectral Skewness | wss | 0 | ++------------------------------------------------+-----------+--------+ +| Zero degree level | deg0l | 0 | ++------------------------------------------------+-----------+--------+ diff --git a/docs/variables_era5land.rst b/docs/variables_era5land.rst new file mode 100644 index 0000000..20d866a --- /dev/null +++ b/docs/variables_era5land.rst @@ -0,0 +1,116 @@ +.. _variables_era5land: + +ERA5-Land Variables +------------------- + +The following table contains all variable names that can be used for the +``era5land download`` and ``era5land reshuffle`` programs. + +Note that the “default” column indicates which variables are downloaded +if none are specified. + ++----------------------------------------------+------------+----------+ +| **Variable** | **Short | **D | +| | Name** | efault** | ++==============================================+============+==========+ +| 10 metre U wind component | u10 | 0 | ++----------------------------------------------+------------+----------+ +| 10 metre V wind component | v10 | 0 | ++----------------------------------------------+------------+----------+ +| 2 metre dewpoint temperature | d2m | 1 | ++----------------------------------------------+------------+----------+ +| 2 metre temperature | t2m | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from bare soil | evabs | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from open water surfaces | evaow | 1 | +| excluding oceans | | | ++----------------------------------------------+------------+----------+ +| Evaporation from the top of canopy | evatc | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation from vegetation transpiration | evavt | 1 | ++----------------------------------------------+------------+----------+ +| Forecast albedo | fal | 1 | ++----------------------------------------------+------------+----------+ +| Lake bottom temperature | lblt | 0 | ++----------------------------------------------+------------+----------+ +| Lake ice depth | licd | 0 | ++----------------------------------------------+------------+----------+ +| Lake ice temperature | lict | 0 | ++----------------------------------------------+------------+----------+ +| Lake mix-layer depth | lmld | 0 | ++----------------------------------------------+------------+----------+ +| Lake mix-layer temperature | lmlt | 0 | ++----------------------------------------------+------------+----------+ +| Lake shape factor | lshf | 0 | ++----------------------------------------------+------------+----------+ +| Lake total layer temperature | ltlt | 0 | ++----------------------------------------------+------------+----------+ +| Leaf area index high vegetation | lai_hv | 1 | ++----------------------------------------------+------------+----------+ +| Leaf area index low vegetation | lai_lv | 1 | ++----------------------------------------------+------------+----------+ +| Potential evaporation | pev | 1 | ++----------------------------------------------+------------+----------+ +| Runoff | ro | 1 | ++----------------------------------------------+------------+----------+ +| Skin reservoir content | src | 1 | ++----------------------------------------------+------------+----------+ +| Skin temperature | skt | 1 | ++----------------------------------------------+------------+----------+ +| Snow albedo | asn | 1 | ++----------------------------------------------+------------+----------+ +| Snow Cover | snowc | 1 | ++----------------------------------------------+------------+----------+ +| Snow density | rsn | 1 | ++----------------------------------------------+------------+----------+ +| Snow depth | sd | 1 | ++----------------------------------------------+------------+----------+ +| Snow Depth Water Equivalent | swe | 1 | ++----------------------------------------------+------------+----------+ +| Snow evaporation | es | 1 | ++----------------------------------------------+------------+----------+ +| Snowfall | sf | 1 | ++----------------------------------------------+------------+----------+ +| Snowmelt | smlt | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 1 | stl1 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 2 | stl2 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 3 | stl3 | 1 | ++----------------------------------------------+------------+----------+ +| Soil temperature level 4 | stl4 | 1 | ++----------------------------------------------+------------+----------+ +| Sub-surface runoff | ssro | 1 | ++----------------------------------------------+------------+----------+ +| Surface latent heat flux | slhf | 1 | ++----------------------------------------------+------------+----------+ +| Surface net solar radiation | ssr | 1 | ++----------------------------------------------+------------+----------+ +| Surface net thermal radiation | str | 1 | ++----------------------------------------------+------------+----------+ +| Surface pressure | sp | 1 | ++----------------------------------------------+------------+----------+ +| Surface runoff | sro | 1 | ++----------------------------------------------+------------+----------+ +| Surface sensible heat flux | sshf | 1 | ++----------------------------------------------+------------+----------+ +| Surface solar radiation downwards | ssrd | 1 | ++----------------------------------------------+------------+----------+ +| Surface thermal radiation downwards | strd | 1 | ++----------------------------------------------+------------+----------+ +| Temperature of snow layer | tsn | 1 | ++----------------------------------------------+------------+----------+ +| Evaporation | e | 1 | ++----------------------------------------------+------------+----------+ +| Total precipitation | tp | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 1 | swvl1 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 2 | swvl2 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 3 | swvl3 | 1 | ++----------------------------------------------+------------+----------+ +| Volumetric soil water layer 4 | swvl4 | 1 | ++----------------------------------------------+------------+----------+ diff --git a/environment/latest.yml b/environment/latest.yml deleted file mode 100644 index 04cd718..0000000 --- a/environment/latest.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: ecmwf_models -channels: -- conda-forge -- defaults -dependencies: -- numpy -- pandas -- configobj -- pygrib>=2.0.1 -- hdf5 -- netcdf4>=1.4.2 -- scipy -- pyresample -- xarray -- pip -- pip: - - pygeobase - - pynetcf>=0.5.0 - - repurpose - - pygeogrids - - cdsapi>=0.6.1 - - ecmwf-api-client - - datedown>=0.3 - - requests - - more_itertools - - typing_extensions - - pre-commit - - sphinx_rtd_theme - - toml - - yapf - - pytest - - pytest-cov diff --git a/environment/pinned_linux.yml b/environment/pinned_linux.yml deleted file mode 100644 index da04d60..0000000 --- a/environment/pinned_linux.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: ecmwf_models -channels: - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - blosc=1.21.3=hafa529b_0 - - brotlipy=0.7.0=py39hb9d737c_1005 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2023.01.10=h06a4308_0 - - certifi=2023.5.7=pyhd8ed1ab_0 - - cffi=1.15.1=py39he91dace_3 - - cftime=1.6.2=py39h2ae25f5_1 - - charset-normalizer=3.1.0=pyhd8ed1ab_0 - - configobj=5.0.8=pyhd8ed1ab_0 - - cryptography=40.0.2=py39h079d5ae_0 - - curl=8.0.1=h588be90_0 - - eccodes=2.30.0=h95353b9_0 - - freeglut=3.2.2=h9c3ff4c_1 - - geos=3.11.2=hcb278e6_0 - - hdf4=4.2.15=h501b40f_6 - - hdf5=1.14.0=nompi_hb72d44e_103 - - icu=72.1=hcb278e6_0 - - idna=3.4=pyhd8ed1ab_0 - - importlib-metadata=6.6.0=pyha770c72_0 - - importlib_metadata=6.6.0=hd8ed1ab_0 - - jasper=4.0.0=h32699f2_1 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - ld_impl_linux-64=2.38=h1181459_1 - - lerc=4.0.0=h27087fc_0 - - libaec=1.0.6=hcb278e6_1 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcurl=8.0.1=h588be90_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h6a678d5_6 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libglu=9.0.0=he1b5a44_1001 - - libgomp=12.2.0=h65d4601_19 - - libiconv=1.17=h166bdaf_0 - - libjpeg-turbo=2.1.5.1=h0b41bf4_0 - - liblapack=3.9.0=16_linux64_openblas - - libnetcdf=4.9.2=nompi_hdf9a29f_104 - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libpng=1.6.39=h753d276_0 - - libsqlite=3.40.0=h753d276_1 - - libssh2=1.10.0=hf14f497_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libtiff=4.5.0=ha587672_6 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.0=h0b41bf4_0 - - libxcb=1.13=h7f98852_1004 - - libxml2=2.10.4=hfdac1af_0 - - libzip=1.9.2=hc929e4a_1 - - libzlib=1.2.13=h166bdaf_4 - - lz4-c=1.9.4=hcb278e6_0 - - ncurses=6.4=h6a678d5_0 - - netcdf4=1.6.3=nompi_py39h369ccc5_102 - - numpy=1.24.3=py39h6183b62_0 - - openssl=3.1.0=hd590300_3 - - packaging=23.1=pyhd8ed1ab_0 - - pandas=2.0.1=py39h40cae4c_1 - - pip=23.1.2=pyhd8ed1ab_0 - - platformdirs=3.5.0=pyhd8ed1ab_0 - - pooch=1.7.0=pyha770c72_3 - - proj=9.2.0=h8ffa02c_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pycparser=2.21=pyhd8ed1ab_0 - - pygrib=2.1.4=py39h0e146ac_7 - - pykdtree=1.3.7.post0=py39h389d5f1_0 - - pyopenssl=23.1.1=pyhd8ed1ab_0 - - pyproj=3.5.0=py39h718ffca_1 - - pyresample=1.26.1=py39h2ad29b5_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.9.16=h2782a2a_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.9=3_cp39 - - pytz=2023.3=pyhd8ed1ab_0 - - pyyaml=6.0=py39hb9d737c_5 - - readline=8.2=h5eee18b_0 - - scipy=1.10.1=py39he83f1e1_1 - - setuptools=66.0.0=py39h06a4308_0 - - shapely=2.0.1=py39hf1c3bca_1 - - six=1.16.0=pyh6c4a22f_0 - - snappy=1.1.10=h9fff704_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - typing-extensions=4.5.0=hd8ed1ab_0 - - typing_extensions=4.5.0=pyha770c72_0 - - tzdata=2023c=h04d1e81_0 - - urllib3=1.26.15=pyhd8ed1ab_0 - - wheel=0.38.4=py39h06a4308_0 - - xarray=2023.4.2=pyhd8ed1ab_0 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libx11=1.8.4=h0b41bf4_0 - - xorg-libxau=1.0.9=h7f98852_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.4.2=h5eee18b_0 - - yaml=0.2.5=h7f98852_2 - - zipp=3.15.0=pyhd8ed1ab_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h3eb15da_6 - - pip: - - alabaster==0.7.13 - - babel==2.12.1 - - cdsapi==0.6.1 - - cfgv==3.3.1 - - coverage==7.2.5 - - datedown==0.4 - - distlib==0.3.6 - - docutils==0.18.1 - - ecmwf-api-client==1.6.3 - - exceptiongroup==1.1.1 - - filelock==3.12.0 - - identify==2.5.24 - - imagesize==1.4.1 - - iniconfig==2.0.0 - - jinja2==3.1.2 - - markupsafe==2.1.2 - - more-itertools==9.1.0 - - nodeenv==1.7.0 - - pluggy==1.0.0 - - pre-commit==3.3.1 - - pygeobase==0.6.2 - - pygeogrids==0.4.2 - - pygments==2.15.1 - - pynetcf==0.5.0 - - pytest==7.3.1 - - pytest-cov==4.0.0 - - repurpose==0.9 - - requests==2.30.0 - - snowballstemmer==2.2.0 - - sphinx==6.2.1 - - sphinx-rtd-theme==1.2.0 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jquery==4.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - toml==0.10.2 - - tomli==2.0.1 - - tqdm==4.65.0 - - virtualenv==20.23.0 - - yapf==0.33.0 diff --git a/setup.cfg b/setup.cfg index 3a398b3..509f8ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ name = ecmwf_models description = Downloading, reading and TS conversion of ECMWF reanalysis data author = TU Wien -author-email = remote.sensing@geo.tuwien.ac.at +author-email = support@qa4sm.eu license = mit long-description = file: README.rst long-description-content-type = text/x-rst; charset=UTF-8 @@ -34,19 +34,15 @@ install_requires = numpy pandas netcdf4 - pyresample - xarray - cdsapi>=0.6.1 - ecmwf-api-client - datedown>=0.3 - pygeobase - pygeogrids - repurpose - pynetcf>=0.5.0 + click + parse + datedown pygrib - configobj - requests - more_itertools + c3s_sm>=0.3.1 + cdsapi>=0.7.3 + repurpose>=0.13 + pygeogrids + # The usage of test_requires is discouraged, see `Dependency Management` docs # tests_require = pytest; pytest-cov # Require a specific Python version, e.g. Python 2.7 or >= 3.4 @@ -67,16 +63,24 @@ testing = coverage pytest +docs = + sphinx<7 + sphinx_rtd_theme + mock + pillow + recommonmark + readthedocs-sphinx-ext + myst_parser + nbsphinx + [options.entry_points] # Add here console scripts like: # console_scripts = # script_name = ecmwf_models.module:function # For example: console_scripts = - eraint_download = ecmwf_models.erainterim.download:run - era5_download = ecmwf_models.era5.download:run - eraint_reshuffle = ecmwf_models.erainterim.reshuffle:run - era5_reshuffle = ecmwf_models.era5.reshuffle:run + era5 = ecmwf_models.cli:era5 + era5land = ecmwf_models.cli:era5land # And any other entry points, for example: # pyscaffold.cli = # awesome = pyscaffoldext.awesome.extension:AwesomeExtension diff --git a/src/ecmwf_models/cli.py b/src/ecmwf_models/cli.py new file mode 100644 index 0000000..4531823 --- /dev/null +++ b/src/ecmwf_models/cli.py @@ -0,0 +1,469 @@ +import click +from datetime import datetime + +from ecmwf_models.era5.download import download_and_move, download_record_extension +from ecmwf_models.utils import (default_variables) + +from ecmwf_models.era5.reshuffle import img2ts, extend_ts + + +@click.command( + "download", + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, + short_help="Download ERA5 reanalysis image data between two " + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " + "https://cds.climate.copernicus.eu/how-to-api") +@click.argument("PATH", type=click.Path(writable=True)) +@click.option( + '--start', + '-s', + type=click.STRING, + default=str(datetime(1979, 1, 1)), + help="First date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--end', + '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Last date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--variables', + '-v', + type=click.STRING, + default=','.join(default_variables('era5', 'short_name')), + help="Name of variables to download. To download multiple variables pass " + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") +@click.option( + "-k", + "--keep_original", + type=click.BOOL, + default=False, + help="To keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images, pass " + "`--keep_original True`") +@click.option( + "-grb", + "--as_grib", + type=click.BOOL, + default=False, + help="To download data in grib format instead of netcdf pass " + "`--as_grib True`.") +@click.option( + "--h_steps", + type=str, + default="0,6,12,18", + help="Temporal sampling of downloaded images. To download multiple time " + "stamps or each day, pass comma-separated values. " + "Pass a set of full hours here, like '--h_steps 0,12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e. `--h_steps 0,6,12,18`") +@click.option( + "--keep_prelim", + type=click.BOOL, + default=True, + help="The last 1-2 month of data are usually 'preliminary' (labelled as " + "'ERA5-T' and 'ERA5-Land-T') and might be changed if an issue is " + "detected. When this option is deactivated (`--keep_prelim False`), " + "only the final data will be kept, while the ERA5-T data is discarded " + "after download. By default, we also keep preliminary files, but they " + "get a different file name as the final data.") +@click.option( + "--max_request_size", + type=int, + default=1000, + help="Maximum number of requests to pass to the CDS API. " + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks (slower).") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") +def cli_download_era5(path, start, end, variables, keep_original, as_grib, + h_steps, keep_prelim, max_request_size, cds_token): + """ + Download ERA5 image data within the chosen period. NOTE: Before using this + program, create a CDS account and set up a `.cdsapirc` file as described + here: https://cds.climate.copernicus.eu/how-to-api + + \b + Required Parameters + ------------------- + > PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + h_steps = [int(h.strip()) for h in h_steps.split(',')] + variables = [str(v.strip()) for v in variables.split(',')] + + status_code = download_and_move( + target_path=path, + startdate=start, + enddate=end, + product="era5", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + keep_prelim=keep_prelim, + cds_token=cds_token, + ) + + return status_code + + +@click.command( + "download", + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, + short_help="Download ERA5 reanalysis image data between two " + "dates from the Copernicus Climate Data Store (CDS). " + "Before this program can be used, you have to " + "register an account at CDS, and should setup a `.cdsapirc` " + "file as described here: " + "https://cds.climate.copernicus.eu/how-to-api") +@click.argument("PATH", type=click.Path(writable=True)) +@click.option( + '--start', + '-s', + type=click.STRING, + default=str(datetime(1979, 1, 1)), + help="First date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--end', + '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Last date of the period to download data for. Format: YYYY-MM-DD") +@click.option( + '--variables', + '-v', + type=click.STRING, + default=','.join(default_variables('era5-land', 'short_name')), + help="Name of variables to download. To download multiple variables pass " + "comma-separated names (e.g. `-v swvl1,stl1`). " + "For all available variables see the docs. ") +@click.option( + "-k", + "--keep_original", + type=click.BOOL, + default=False, + help="To keep the original image stacks as downloaded from CDS, " + "instead of deleting them after extracting individual images, pass " + "`--keep_original True`") +@click.option( + "-grb", + "--as_grib", + type=click.BOOL, + default=False, + help="To download data in grib format instead of netcdf pass " + "`--as_grib True`.") +@click.option( + "--h_steps", + type=click.STRING, + default='0,6,12,18', + help="Temporal sampling of downloaded images. To download multiple time " + "stamps or each day, pass comma-separated values. " + "Pass a set of full hours here, like '--h_steps 0,12' to download " + "two images for each day, at 0:00 and 12:00 respectively. " + "By default, we download 6-hourly images starting at 0:00 UTC, " + "(i.e. `--h_steps 0,6,12,18`") +@click.option( + "--keep_prelim", + type=click.BOOL, + default=True, + help="The last 1-2 month of data are usually 'preliminary' (labelled as " + "'ERA5-T' and 'ERA5-Land-T') and might be changed if an issue is " + "detected. When this option is deactivated (`--keep_prelim False`), " + "only the final data will be kept, while the ERA5-T data is discarded " + "after download. By default, we also keep preliminary files, but they " + "get a different file name as the final data.") +@click.option( + "--max_request_size", + type=int, + default=1000, + help="Maximum number of requests to pass to the CDS API. " + "The default is 1000, but what is allowed, depends on server " + "settings. Server settings may change at some point. Change " + "accordingly here in case that 'the request is too large'. " + "A smaller number will results in smaller download chunks (slower).") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") +def cli_download_era5land(path, start, end, variables, keep_original, as_grib, + h_steps, keep_prelim, max_request_size, cds_token): + """ + Download ERA5-Land image data within a chosen period. + NOTE: Before using this program, create a CDS account and set up a + `.cdsapirc` file as described here: + https://cds.climate.copernicus.eu/how-to-api + + \b + Required Parameters + ------------------- + > PATH: string (required) + Root of local filesystem where the downloaded data will be stored. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + h_steps = [int(h.strip()) for h in h_steps.split(',')] + variables = [str(v.strip()) for v in variables.split(',')] + + status_code = download_and_move( + target_path=path, + startdate=start, + enddate=end, + product="era5-land", + variables=variables, + h_steps=h_steps, + grb=as_grib, + keep_original=keep_original, + stepsize='month', + n_max_request=max_request_size, + keep_prelim=keep_prelim, + cds_token=cds_token) + + return status_code + + +@click.command( + "update_img", + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, + short_help="Extend an existing set of images by downloading new data " + "with the same settings as before." + "NOTE: First use the `era5 download` or `era5land download` " + "programs.") +@click.argument("path", type=click.Path(writable=True)) +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required only if no `.cdsapirc` file " + "exists in the home directory (see documentation). " + "You can find your token/key on your CDS user profile page. " + "Alternatively, you can also set an environment variable " + "`CDSAPI_KEY` with your token.") +def cli_update_img(path, cds_token=None): + """ + Download new images from CDS to your existing local archive. Use the same + settings as before. + NOTE: First use the `era5 download` or `era5land download` programs. + The so-created archive can then be updated using this program. + + \b + Required Parameters + ------------------- + > PATH: string (required) + Path where previously downloaded images are stored. There must + be a file `overview.yml` which contains the download settings. + Make sure to set up the CDS API for your account as described at + https://cds.climate.copernicus.eu/how-to-api, + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + download_record_extension(path, cds_token=cds_token) + + +@click.command( + "reshuffle", + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, + short_help="Convert previously downloaded ERA5/ERA5-Land image " + "data into a time series format.") +@click.argument("IMG_PATH", type=click.Path(readable=True)) +@click.argument("TS_PATH", type=click.Path(writable=True)) +@click.argument("START", type=click.STRING) +@click.argument("END", type=click.STRING) +@click.option( + '--variables', + '-v', + type=click.STRING, + default=None, + help="Subset of variables to convert. Pass comma-separated names" + "to select multiple variables (short names, as in the input images, " + "e.g. `-v swvl1,stl1`). If not specified, all variables from the " + "first image file are used.") +@click.option( + '--land_points', + '-l', + type=click.BOOL, + show_default=True, + default=False, + help="To store only time series for points that are over land, pass " + "`--land_points True`. ") +@click.option( + '--bbox', + nargs=4, + type=click.FLOAT, + default=None, + help="4 NUMBERS | min_lon min_lat max_lon max_lat. " + "Set Bounding Box (lower left and upper right corner) " + "of area to reshuffle (WGS84). Default is global.") +@click.option( + "--h_steps", + type=click.STRING, + default="0,6,12,18", + help="Full hour time stamps of images to include in time series. " + "To select multiple, pass comma-separated values here, " + "e.g. '--h_steps 0,12' will only include data from images at " + "0:00 and 12:00 UTC and ignore all other available time stamps." + "By default, we include data for every 6th hour each day.") +@click.option( + '--imgbuffer', + '-b', + type=click.INT, + default=50, + help="Number of images to read into memory at once before " + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") +def cli_reshuffle(img_path, ts_path, start, end, variables, land_points, bbox, + h_steps, imgbuffer): + """ + Convert previously downloaded ERA5 or ERA5-Land image data into a + time series format. + + \b + Required Parameters + ------------------- + > IMG_PATH: string (required) + Directory where the downloaded image data is stored, i.e., where + annual folders are found. + > TS_PATH: string (required) + Root of local filesystem where the time series should be stored. + > START: string (required) + Date of the first image to include in the conversion. + Format: YYYY-MM-DD + > END: string (required) + Date of the last image to include in the conversion. + Format: YYYY-MM-DD + """ + + h_steps = [int(h.strip()) for h in h_steps.split(',')] + + if variables is not None: + variables = [str(v.strip()) for v in variables.split(',')] + + print(f"Converting ERA5/ERA5-Land images in period {start} to {end} into " + f"time series to {ts_path}.") + + img2ts( + input_root=img_path, + outputpath=ts_path, + startdate=start, + enddate=end, + variables=variables, + bbox=bbox, + h_steps=h_steps, + land_points=land_points, + imgbuffer=imgbuffer, + product=None, # Infer product automatically from files + ) + + +@click.command( + "update_ts", + context_settings={ + 'show_default': True, + 'help_option_names': ['-h', '--help'] + }, + short_help="Append new image data to an existing time series archive.") +@click.argument("TS_PATH", type=click.Path(writable=True)) +@click.option( + '--imgpath', + '-p', + type=click.STRING, + default=None, + help="Manually specify where the image data to extend the time " + "series are located. If this is not specified, we use the previously " + "used path from the `overview.yml` file stored with the time series " + "data.") +@click.option( + '--imgbuffer', + '-b', + type=click.INT, + default=50, + help="Number of images to read into memory at once before " + "conversion to time series. A larger buffer means faster " + "processing but requires more memory.") +def cli_extend_ts(ts_path, imgpath, imgbuffer): + """ + Append new image data to an existing time series archive. The archive must + be created first using the `reshuffle` program. We will use the same settings + as in the initial run (see `overview.yml` in TS_PATH) for consistent + extensions. + + \b + Required Parameters + ------------------- + > TS_PATH: string (required) + Root of local filesystem where the time series from a previous call + of `reshuffle` are stored. New data will be appended to the existing + files! + """ + kwargs = dict(ts_path=ts_path, imgbuffer=imgbuffer) + + if imgpath is not None: # otherwise use path from yml + kwargs["input_root"] = imgpath + + extend_ts(**kwargs) + + +@click.group(short_help="ERA5 Command Line Programs imported from the " + "`ecmwf_models` pip package.") +def era5(): + pass + + +era5.add_command(cli_download_era5) +era5.add_command(cli_update_img) +era5.add_command(cli_reshuffle) +era5.add_command(cli_extend_ts) + + +@click.group(short_help="ERA5-Land Command Line Programs imported from the " + "`ecmwf_models` pip package.") +def era5land(): + pass + + +era5land.add_command(cli_download_era5land) +era5land.add_command(cli_update_img) +era5land.add_command(cli_reshuffle) +era5land.add_command(cli_extend_ts) diff --git a/src/ecmwf_models/era5/download.py b/src/ecmwf_models/era5/download.py index 36572a9..af4f601 100644 --- a/src/ecmwf_models/era5/download.py +++ b/src/ecmwf_models/era5/download.py @@ -2,64 +2,28 @@ """ Module to download ERA5 from terminal in netcdf and grib format. """ -import pandas as pd - -from ecmwf_models.utils import ( - load_var_table, - lookup, - save_gribs_from_grib, - save_ncs_from_nc, - mkdate, - str2bool, -) import warnings -import errno -import argparse -import sys import os import logging -from datetime import datetime, time +from datetime import datetime, time, timedelta import shutil import cdsapi -import multiprocessing import numpy as np +import pandas as pd +from c3s_sm.misc import read_summary_yml +from repurpose.process import parallel_process +from repurpose.misc import delete_empty_directories -def default_variables(product="era5"): - """ - These variables are being downloaded, when None are passed by the user - - Parameters - --------- - product : str, optional (default: 'era5') - Name of the era5 product to read the default variables for. - Either 'era5' or 'era5-land'. - """ - lut = load_var_table(name=product) - defaults = lut.loc[lut["default"] == 1]["dl_name"].values - return defaults.tolist() - - -def split_array(array, chunk_size): - """ - Split an array into chunks of a given size. - - Parameters - ---------- - array : array-like - Array to split into chunks - chunk_size : int - Size of each chunk - - Returns - ------- - chunks : list - List of chunks - """ - chunks = [] - for i in range(0, len(array), chunk_size): - chunks.append(array[i:i + chunk_size]) - return chunks +from ecmwf_models.utils import ( + lookup, + update_image_summary_file, + default_variables, + split_array, + check_api_ready, + get_first_last_image_date +) +from ecmwf_models.extract import save_ncs_from_nc, save_gribs_from_grib def split_chunk(timestamps, @@ -157,8 +121,8 @@ def download_era5( default variables will be downloaded. target : str File name, where the data is stored. - geb : bool, optional (default: False) - Download data in grib format + grb : bool, optional (default: False) + Download data in grib format instead of netcdf product : str ERA5 data product to download, either era5 or era5-land dry_run: bool, optional (default: False) @@ -178,15 +142,17 @@ def download_era5( request = { "format": "grib" if grb else "netcdf", + "download_format": "unarchived", "variable": variables, "year": [str(y) for y in years], "month": [str(m).zfill(2) for m in months], "day": [str(d).zfill(2) for d in days], "time": [time(h, 0).strftime("%H:%M") for h in h_steps], } + request.update(cds_kwds) if product == "era5": - request["product_type"] = "reanalysis" + request["product_type"] = ["reanalysis"] c.retrieve("reanalysis-era5-single-levels", request, target) elif product == "era5-land": c.retrieve("reanalysis-era5-land", request, target) @@ -206,8 +172,9 @@ class CDSStatusTracker: statuscode_error = -1 statuscode_unavailable = 10 - def __init__(self): + def __init__(self, logger=logging.getLogger()): self.download_statuscode = self.statuscode_ok + self.logger = logger def handle_error_function(self, *args, **kwargs): message_prefix = args[0] @@ -218,7 +185,7 @@ def handle_error_function(self, *args, **kwargs): self.download_statuscode = self.statuscode_unavailable else: self.download_statuscode = self.statuscode_error - logging.error(*args, **kwargs) + self.logger.error(*args, **kwargs) def download_and_move( @@ -233,9 +200,11 @@ def download_and_move( dry_run=False, grid=None, remap_method="bil", - cds_kwds={}, + cds_kwds=None, stepsize="month", n_max_request=1000, + keep_prelim=True, + cds_token=None, ) -> int: """ Downloads the data from the ECMWF servers and moves them to the target @@ -253,33 +222,39 @@ def download_and_move( first date to download enddate: datetime last date to download - product : str, optional (default: ERA5) - Either ERA5 or ERA5Land + product : str, optional (default: era5) + Either era5 or era5-land variables : list, optional (default: None) - Name of variables to download + Name of variables to download, see the documentation for all variable + names. If None is chosen, then the 'default' variables are downloaded. keep_original: bool (default: False) - keep the original downloaded data - h_steps: list or tuple - List of full hours to download data at the selected dates e.g [0, 12] + If True, keep the original downloaded data stack as received from CDS + after slicing individual time stamps. + h_steps: tuple, optional (default: (0, 6, 12, 18)) + List of full hours to download data for at the selected dates e.g + [0, 12] would download at 0:00 and 12:00. Only full hours are possible. grb: bool, optional (default: False) - Download data as grib files + Download data as grib files instead of netcdf. + Note that downloading in grib format, does not allow on-the-fly + resampling (`grid` argument) dry_run: bool Do not download anything, this is just used for testing the functions grid : dict, optional (default: None) A grid on which to remap the data using CDO. This must be a dictionary using CDO's grid description format, e.g.:: - grid = { - "gridtype": "lonlat", - "xsize": 720, - "ysize": 360, - "xfirst": -179.75, - "yfirst": 89.75, - "xinc": 0.5, - "yinc": -0.5, - } + grid = { + "gridtype": "lonlat", + "xsize": 720, + "ysize": 360, + "xfirst": -179.75, + "yfirst": 89.75, + "xinc": 0.5, + "yinc": -0.5, + } Default is to use no regridding. + To use this option, it is necessary that CDO is installed. remap_method : str, optional (dafault: 'bil') Method to be used for regridding. Available methods are: - "bil": bilinear (default) @@ -289,22 +264,37 @@ def download_and_move( - "con": 1st order conservative remapping - "con2": 2nd order conservative remapping - "laf": largest area fraction remapping - cds_kwds: dict, optional (default: {}) - Additional arguments to be passed to the CDS API retrieve request. + cds_kwds: dict, optional (default: None) + Additional keyword arguments to be passed to the CDS API request. + This might be useful in the future, when new server-side options are + added which are not yet directly supported by this package. n_max_request : int, optional (default: 1000) Maximum size that a request can have to be processed by CDS. At the moment of writing this is 1000 (N_timstamps * N_variables in a request) but as this is a server side settings, it can change. + keep_prelim: bool, optional (default: True) + Keep preliminary data from ERA5T under a different file name. + These data are not yet final and might change if an issue is detected. + If False is chosen, then the preliminary data will be discarded and + not stored. + cds_token: str, optional (default: None) + To identify with the CDS. Required if no .cdsapirc file exists in + the home directory (see documentation). You can find your token/key + on your CDS user profile page. Alternatively, the CDSAPI_KEY + environment variable can be set manually instead of passing the token + here. Returns ------- status_code: int - 0 : Downloaded data ok - -1 : Error + Status code summary from all requests: + 0 : All Downloaded data ok + -1 : Error in at least one request -10 : No data available for requested time period """ h_steps = list(h_steps) product = product.lower() + cds_kwds = cds_kwds or dict() if variables is None: variables = default_variables(product=product) @@ -313,11 +303,19 @@ def download_and_move( variables = lookup(name=product, variables=variables) variables = variables["dl_name"].values.tolist() + # this logger name is also used by CDS API, don't change it + logger = logging.getLogger('cdsapi') + if dry_run: warnings.warn("Dry run does not create connection to CDS") c = None cds_status_tracker = None else: + if cds_token is not None: + os.environ["CDSAPI_KEY"] = cds_token + check_api_ready() + + os.makedirs(target_path, exist_ok=True) cds_status_tracker = CDSStatusTracker() c = cdsapi.Client( error_callback=cds_status_tracker.handle_error_function) @@ -328,22 +326,22 @@ def download_and_move( for t in pd.date_range(startdate, enddate, freq='D') ])) - daily_request = True if stepsize == "day" else False req_periods = split_chunk( timestamps, n_vars=len(variables), n_hsteps=len(h_steps), max_req_size=n_max_request, reduce=True, - daily_request=daily_request) + daily_request=True if stepsize == "day" else False) - # Since we download month/month or day/day we need to - # collect all the status codes to return a valid - # status code for the whole time period - all_status_codes = [] + logger.info(f"Request is split into {len(req_periods)} chunks") + logger.info(f"Target directory {target_path}") + + downloaded_data_path = os.path.join(target_path, "temp_downloaded") + if not os.path.exists(downloaded_data_path): + os.mkdir(downloaded_data_path) - pool = multiprocessing.Pool(1) - for curr_start, curr_end in req_periods: + def _download(curr_start, curr_end): curr_start = pd.to_datetime(curr_start).to_pydatetime() curr_end = pd.to_datetime(curr_end).to_pydatetime() @@ -352,12 +350,8 @@ def download_and_move( fname = "{start}_{end}.{ext}".format( start=curr_start.strftime("%Y%m%d"), end=curr_end.strftime("%Y%m%d"), - ext="grb" if grb else "nc", - ) + ext="grb" if grb else "nc") - downloaded_data_path = os.path.join(target_path, "temp_downloaded") - if not os.path.exists(downloaded_data_path): - os.mkdir(downloaded_data_path) dl_file = os.path.join(downloaded_data_path, fname) finished, i = False, 0 @@ -394,39 +388,42 @@ def download_and_move( i += 1 continue - if status_code == 0: + if status_code == 0 and os.path.exists(dl_file): if grb: - pool.apply_async( - save_gribs_from_grib, - args=(dl_file, target_path), - kwds=dict( - product_name=product.upper(), - keep_original=keep_original, - ), - ) + save_gribs_from_grib( + dl_file, + target_path, + product_name=product.upper(), + keep_original=keep_original, + keep_prelim=keep_prelim) else: - pool.apply_async( - save_ncs_from_nc, - args=( - dl_file, - target_path, - ), - kwds=dict( - product_name=product.upper(), - grid=grid, - remap_method=remap_method, - keep_original=keep_original, - ), - ) + save_ncs_from_nc( + dl_file, + target_path, + product_name=product.upper(), + grid=grid, + remap_method=remap_method, + keep_original=keep_original, + keep_prelim=keep_prelim) - all_status_codes.append(status_code) + return status_code - pool.close() - pool.join() + # Since we download month/month or day/day we need to + # collect all the status codes to return a valid + # status code for the whole time period + all_status_codes = parallel_process( + _download, + ITER_KWARGS={ + 'curr_start': [p[0] for p in req_periods], + 'curr_end': [p[1] for p in req_periods] + }, + logger_name='cdsapi', + loglevel='DEBUG') # remove temporary files if not keep_original: shutil.rmtree(downloaded_data_path) + if grid is not None: gridpath = os.path.join(target_path, "grid.txt") if os.path.exists(gridpath): @@ -435,152 +432,76 @@ def download_and_move( if os.path.exists(weightspath): os.unlink(weightspath) + delete_empty_directories(target_path) + + dl_settings = { + 'product': product, + 'variables': variables, + 'keep_original': keep_original, + 'h_steps': h_steps, + 'grb': grb, + 'grid': grid, + 'remap_method': remap_method, + 'cds_kwds': cds_kwds, + 'stepsize': stepsize, + 'n_max_request': n_max_request, + 'keep_prelim': keep_prelim, + } + + update_image_summary_file(target_path, dl_settings) + + handlers = logger.handlers[:] + + for handler in handlers: + logger.removeHandler(handler) + handler.close() + handlers.clear() + # if any of the sub-periods was successful we want the function to return 0 consolidated_status_code = max(all_status_codes) return consolidated_status_code -def parse_args(args): +def download_record_extension(path, dry_run=False, cds_token=None): """ - Parse command line parameters for recursive download + Uses information from an existing record to download additional data + from CDS. Parameters ---------- - args : list - Command line parameters as list of strings + path: str + Path where the image data to extend is stored. Must also contain + a `summary.yml` file. + dry_run: bool, optional + Do not download anything, this is just used for testing the functions + cds_token: str, optional (default: None) + To identify with the CDS. Required if no `.cdsapirc` file exists in + the home directory (see documentation). You can find your token/key + on your CDS user profile page. Alternatively, the CDSAPI_KEY + environment variable can be set manually instead of passing the token + here. Returns - ---------- - clparams : argparse.Namespace - Parsed command line parameters + ------- + status_code: int + Status code summary from all requests: + 0 : All Downloaded data ok + -1 : Error in at least one request + -10 : No data available for requested time period """ + props = read_summary_yml(path) - parser = argparse.ArgumentParser( - description="Download ERA 5 reanalysis data images between two " - "dates. Before this program can be used, you have to " - "register at the CDS and setup your .cdsapirc file " - "as described here: " - "https://cds.climate.copernicus.eu/api-how-to") - parser.add_argument( - "localroot", - help="Root of local filesystem where the downloaded data will be " - "stored.", - ) - parser.add_argument( - "-s", - "--start", - type=mkdate, - default=datetime(1979, 1, 1), - help=("Startdate in format YYYY-MM-DD. " - "If no data is found there then the first available date of the " - "product is used."), - ) - parser.add_argument( - "-e", - "--end", - type=mkdate, - default=datetime.now(), - help=("Enddate in format YYYY-MM-DD. " - "If not given then the current date is used."), - ) - parser.add_argument( - "-p", - "--product", - type=str, - default="ERA5", - help=("The ERA5 product to download. Choose either ERA5 or ERA5-Land. " - "Default is ERA5."), - ) - parser.add_argument( - "-var", - "--variables", - metavar="variables", - type=str, - default=None, - nargs="+", - help=("Name of variables to download. If None are passed, we use the " - "default ones from the " - "era5_lut.csv resp. era5-land_lut.csv files in this package. " - "See the ERA5/ERA5-LAND documentation for more variable names: " - " https://confluence.ecmwf.int/display/CKB/" - "ERA5+data+documentation " - " https://confluence.ecmwf.int/display/CKB/" - "ERA5-Land+data+documentation"), - ) - parser.add_argument( - "-keep", - "--keep_original", - type=str2bool, - default="False", - help=("Also keep the downloaded image stack as retrieved from CDS - " - "before slicing it into single images - instead of deleting it " - "after extracting all images. " - "Pass either True or False. Default is False."), - ) - parser.add_argument( - "-grb", - "--as_grib", - type=str2bool, - default="False", - help=("Download data in grib format instead of netcdf. " - "Pass either True or False. Default is False."), - ) - parser.add_argument( - "--h_steps", - type=int, - default=[0, 6, 12, 18], - nargs="+", - help=("Temporal resolution of downloaded images. " - "Pass a set of full hours here, like '--h_steps 0 12'. " - "By default 6H images (starting at 0:00 UTC, i.e. 0 6 12 18) " - "will be downloaded"), - ) - - parser.add_argument( - "--max_request_size", - type=int, - default=1000, - help=("Maximum number of requests that the CDS API allows. " - "The default is 1000, but depends on server side settings. " - "Server settings may change at some point. Change " - "accordingly here in case that 'the request is too large'. " - "A smaller number will results in smaller download chunks.")) - - args = parser.parse_args(args) - - print("Downloading {p} {f} files between {s} and {e} into folder {root}" - .format( - p=args.product, - f="grib" if args.as_grib is True else "netcdf", - s=args.start.isoformat(), - e=args.end.isoformat(), - root=args.localroot, - )) - return args - - -def main(args): - args = parse_args(args) - status_code = download_and_move( - target_path=args.localroot, - startdate=args.start, - enddate=args.end, - product=args.product, - variables=args.variables, - h_steps=args.h_steps, - grb=args.as_grib, - keep_original=args.keep_original, - stepsize='month', - n_max_request=args.max_request_size, - ) - return status_code - - -def run(): - status_code = main(sys.argv[1:]) - if status_code == -10: - return_code = errno.ENODATA # Default no data status code of 61 - else: - return_code = status_code + last_img = get_first_last_image_date(path) + + startdate = pd.to_datetime(last_img).to_pydatetime() + timedelta(days=1) + + enddate = pd.to_datetime(datetime.now().date()).to_pydatetime() \ + - timedelta(days=1) # yesterday - sys.exit(return_code) + return download_and_move( + path, + startdate=startdate, + enddate=enddate, + cds_token=cds_token, + dry_run=dry_run, + **props['download_settings']) diff --git a/src/ecmwf_models/era5/era5-land_lut.csv b/src/ecmwf_models/era5/era5-land_lut.csv index be6911d..ea14871 100644 --- a/src/ecmwf_models/era5/era5-land_lut.csv +++ b/src/ecmwf_models/era5/era5-land_lut.csv @@ -43,6 +43,7 @@ surface_sensible_heat_flux,Surface sensible heat flux,sshf,1 surface_solar_radiation_downwards,Surface solar radiation downwards,ssrd,1 surface_thermal_radiation_downwards,Surface thermal radiation downwards,strd,1 temperature_of_snow_layer,Temperature of snow layer,tsn,1 +total_evaporation,Evaporation,e,1 total_precipitation,Total precipitation,tp,1 volumetric_soil_water_layer_1,Volumetric soil water layer 1,swvl1,1 volumetric_soil_water_layer_2,Volumetric soil water layer 2,swvl2,1 diff --git a/src/ecmwf_models/era5/era5_lut.csv b/src/ecmwf_models/era5/era5_lut.csv index 48393e3..5038aec 100644 --- a/src/ecmwf_models/era5/era5_lut.csv +++ b/src/ecmwf_models/era5/era5_lut.csv @@ -5,15 +5,10 @@ dl_name,long_name,short_name,default 10m_u_component_of_wind,10 metre U wind component,u10,0 10m_v_component_of_neutral_wind,Neutral wind at 10 m v-component,v10n,0 10m_v_component_of_wind,10 metre V wind component,v10,0 -10m_wind_direction,10 metre wind direction,dwi,0 10m_wind_gust_since_previous_post_processing,10 metre wind gust since previous post-processing,fg10,0 -10m_wind_speed,10 metre wind speed,wind,0 2m_dewpoint_temperature,2 metre dewpoint temperature,d2m,0 2m_temperature,2 metre temperature,t2m,0 air_density_over_the_oceans,Air density over the oceans,p140209,0 -altimeter_corrected_wave_height,Altimeter corrected wave height,acwh,0 -altimeter_range_relative_correction,Altimeter range relative correction,arrc,0 -altimeter_wave_height,Altimeter wave height,awh,0 angle_of_sub_gridscale_orography,Angle of sub-gridscale orography,anor,0 anisotropy_of_sub_gridscale_orography,Anisotropy of sub-gridscale orography,isor,0 benjamin_feir_index,Benjamin-Feir index,bfi,0 @@ -39,6 +34,7 @@ forecast_logarithm_of_surface_roughness_for_heat,Forecast logarithm of surface r forecast_surface_roughness,Forecast surface roughness,fsr,0 free_convective_velocity_over_the_oceans,Free convective velocity over the oceans,p140208,0 friction_velocity,Friction velocity,zust,0 +geopotential,Geopotential,z,0 gravity_wave_dissipation,Gravity wave dissipation,gwd,0 high_cloud_cover,High cloud cover,hcc,0 high_vegetation_cover,High vegetation cover,cvh,0 @@ -147,7 +143,6 @@ normalized_energy_flux_into_waves,Normalized energy flux into waves,phiaw,0 normalized_stress_into_ocean,Normalized stress into ocean,tauoc,0 northward_gravity_wave_surface_stress,Northward gravity wave surface stress,mgws,0 northward_turbulent_surface_stress,Northward turbulent surface stress,nsss,0 -orography,Geopotential,z,0 peak_wave_period,Peak wave period,pp1d,0 period_corresponding_to_maximum_individual_wave_height,Period corresponding to maximum individual wave height,tmax,0 potential_evaporation,Potential evaporation,pev,1 diff --git a/src/ecmwf_models/era5/interface.py b/src/ecmwf_models/era5/reader.py similarity index 82% rename from src/ecmwf_models/era5/interface.py rename to src/ecmwf_models/era5/reader.py index 48c85a6..7992b6d 100644 --- a/src/ecmwf_models/era5/interface.py +++ b/src/ecmwf_models/era5/reader.py @@ -1,24 +1,15 @@ # -*- coding: utf-8 -*- - """ This module contains ERA5/ERA5-Land specific child classes of the netcdf and grib base classes, that are used for reading all ecmwf products. """ -from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs from typing import Optional, Collection from typing_extensions import Literal from pygeogrids.grids import CellGrid -# ERA5 products supported by the reader. -_supported_products = ['era5', 'era5-land'] - - -def _assert_product(product: str) -> str: - if product not in _supported_products: - raise ValueError(f"Got product {product} but expected one of " - f"{_supported_products}") - return product +from ecmwf_models.utils import assert_product +from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs class ERA5NcImg(ERANcImg): @@ -26,7 +17,7 @@ class ERA5NcImg(ERANcImg): def __init__( self, filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, product: Literal['era5', 'era5-land'] = 'era5', subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, @@ -39,8 +30,9 @@ def __init__( ---------- filename: str Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter product: str, optional (default: 'era5') What era5 product, either era5 or era5-land. subgrid: pygeogrids.CellGrid, optional (default: None) @@ -55,7 +47,7 @@ def __init__( super(ERA5NcImg, self).__init__( filename=filename, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, mask_seapoints=mask_seapoints, @@ -71,10 +63,11 @@ class ERA5NcDs(ERANcDs): ---------- root_path: str Path to the image files to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter product: str, optional (default: 'era5') - What era5 product, either era5 or era5-land. + What era5 product, either `era5` or `era5-land`. h_steps : list, optional (default: (0,6,12,18)) List of full hours to read images for. subgrid: pygeogrids.CellGrid, optional (default: None) @@ -89,8 +82,8 @@ class ERA5NcDs(ERANcDs): def __init__( self, root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), - product: Literal['era5', 'era5-land'] = 'era5', + parameter: Collection[str] = None, + product: str = 'era5', h_steps: Collection[int] = (0, 6, 12, 18), subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, @@ -98,7 +91,7 @@ def __init__( ): super(ERA5NcDs, self).__init__( root_path=root_path, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, h_steps=h_steps, @@ -112,7 +105,7 @@ class ERA5GrbImg(ERAGrbImg): def __init__( self, filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, array_1D=False, @@ -124,8 +117,9 @@ def __init__( ---------- filename: str Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter subgrid: pygeogrids.CellGrid, optional (default: None) Read only data for points of this grid and not global values. mask_seapoints : bool, optional (default: False) @@ -150,9 +144,9 @@ class ERA5GrbDs(ERAGrbDs): def __init__( self, root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), + parameter: Collection[str] = None, h_steps: Collection[int] = (0, 6, 12, 18), - product: Literal['era5', 'era5-land'] = "era5", + product: str = "era5", subgrid: Optional[CellGrid] = None, mask_seapoints: Optional[bool] = False, array_1D: Optional[bool] = False, @@ -164,12 +158,13 @@ def __init__( ---------- root_path: str Path to the image files to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) + parameter: list[str] or str, optional (default: None) Name of parameters to read from the image file. + None means all parameter h_steps : list, optional (default: [0,6,12,18]) List of full hours to read images for. product: str, optional (default: 'era5') - What era5 product, either era5 or era5-land. + What era5 product, either `era5` or `era5-land`. subgrid: pygeogrids.CellGrid, optional (default: None) Read only data for points of this grid and not global values. mask_seapoints : bool, optional (default: False) @@ -181,7 +176,7 @@ def __init__( super(ERA5GrbDs, self).__init__( root_path=root_path, - product=_assert_product(product), + product=assert_product(product.lower()), parameter=parameter, subgrid=subgrid, h_steps=h_steps, diff --git a/src/ecmwf_models/era5/reshuffle.py b/src/ecmwf_models/era5/reshuffle.py index e0b10bd..2e21e39 100644 --- a/src/ecmwf_models/era5/reshuffle.py +++ b/src/ecmwf_models/era5/reshuffle.py @@ -1,28 +1,74 @@ # -*- coding: utf-8 -*- - """ -Module for a command line interface to convert the ERA Interim data into a -time series format using the repurpose package +Image to time series conversion tools. """ +import warnings import os -import sys -import argparse +import pandas as pd import numpy as np +from datetime import time, datetime, timedelta from repurpose.img2ts import Img2Ts -from ecmwf_models.era5.interface import ERA5NcDs, ERA5GrbDs +from c3s_sm.misc import update_ts_summary_file, read_summary_yml + +from ecmwf_models.era5.reader import ERA5NcDs, ERA5GrbDs from ecmwf_models.grid import ERA_RegularImgGrid, ERA5_RegularImgLandGrid -from ecmwf_models.utils import mkdate, parse_filetype, parse_product, str2bool -from datetime import time, datetime +from ecmwf_models.utils import parse_filetype, parse_product, get_first_last_image_date -def reshuffle( +def extend_ts(ts_path, **img2ts_kwargs): + """ + Append any new data from the image path to the time series data. + This function is only applied to time series file that were created + using the img2ts function. + This will use the start from the previously written metadata, + and process only parameters that are already present in the time series + files. + + Parameters + ---------- + ts_path: str + Directory containing time series files to extend. It is also + expected that there is a `overview.yml` file in this directory. + img2ts_kwargs: + All kwargs are optional, if they are not given, we use them from the + previously created `overview.yml` file. If they are passed here, + they will override the values from the yml file. + """ + if not os.path.isfile(os.path.join(ts_path, 'overview.yml')): + raise FileNotFoundError( + "No overview.yml file found in the passed directory. " + "This file is required to use the same settings to extend an " + "existing record. NOTE: Use the `era5 download` or " + "`era5land download` programs first.") + + ts_props = read_summary_yml(ts_path) + + for k, v in ts_props['img2ts_kwargs'].items(): + if k not in img2ts_kwargs: + if k == 'startdate': + old_enddate = pd.to_datetime( + ts_props["img2ts_kwargs"]["enddate"]).to_pydatetime() + startdate = old_enddate + timedelta(days=1) + img2ts_kwargs['startdate'] = startdate + elif k in ['enddate', 'outputpath']: # fields from yml to ignore + continue + else: + img2ts_kwargs[k] = v + + if 'enddate' not in img2ts_kwargs: + img2ts_kwargs['enddate'] = None # All available images + + img2ts(outputpath=ts_path, **img2ts_kwargs) + + +def img2ts( input_root, outputpath, startdate, - enddate, - variables, + enddate=None, + variables=None, product=None, bbox=None, h_steps=(0, 6, 12, 18), @@ -32,6 +78,8 @@ def reshuffle( """ Reshuffle method applied to ERA images for conversion into netcdf time series format. + Note: ERA5 and ERA5-Land files are preferred over their T-counterpart, + in case that multiple files for a time stamp are present! Parameters ---------- @@ -39,11 +87,14 @@ def reshuffle( Input path where ERA image data was downloaded to. outputpath : str Output path, where the reshuffled netcdf time series are stored. - startdate : datetime + startdate : str Start date, from which images are read and time series are generated. - enddate : datetime + Format YYYY-mm-dd + enddate : str, optional (default: None) End date, from which images are read and time series are generated. - variables: tuple or list or str + Format YYYY-mm-dd. If None is passed, then the last available image + date is used. + variables: tuple or str Variables to read from the passed images and convert into time series format. product : str, optional (default: None) @@ -60,42 +111,46 @@ def reshuffle( Reshuffle only land points. Uses the ERA5 land mask to create a land grid. The land grid is fixed to 0.25*0.25 or 0.1*0.1 deg for now. - imgbuffer: int, optional (default: 200) + imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. This number affects how many images are stored in memory and should be chosen according to the available amount of memory and the size of a single image. """ + startdate = pd.to_datetime(startdate).to_pydatetime().date() + + if enddate is None: + enddate = get_first_last_image_date(input_root) - if h_steps is None: - h_steps = (0, 6, 12, 18) + enddate = pd.to_datetime(enddate).to_pydatetime().date() + + if startdate > enddate: + warnings.warn( + f"Start date {startdate} is greater than end date {enddate}. Abort." + ) + return filetype = parse_filetype(input_root) - product = parse_product(input_root) if not product else product + product: str = parse_product(input_root) if product is None else product if land_points: if product == "era5": - grid = ERA5_RegularImgLandGrid( - res_lat=0.25, res_lon=0.25, bbox=bbox) + grid = ERA5_RegularImgLandGrid(resolution=0.25, bbox=bbox) elif product == "era5-land": - grid = ERA5_RegularImgLandGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) + grid = ERA5_RegularImgLandGrid(resolution=0.1, bbox=bbox) else: raise NotImplementedError( product, "Land grid not implemented for product.") else: - if product == "era5": - grid = ERA_RegularImgGrid(res_lat=0.25, res_lon=0.25, bbox=bbox) - elif product == "era5-land": - grid = ERA_RegularImgGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) + if product.lower() in ['era5-land', 'era5-land-t']: + grid = ERA_RegularImgGrid(resolution=0.1, bbox=bbox) + elif product.lower() in ['era5', 'era5-t']: + grid = ERA_RegularImgGrid(resolution=0.25, bbox=bbox) else: raise NotImplementedError(product, "Grid not implemented for product.") if filetype == "grib": - if land_points: - raise NotImplementedError( - "Reshuffling land points only implemented for netcdf files") - input_dataset = ERA5GrbDs( root_path=input_root, parameter=variables, @@ -124,17 +179,37 @@ def reshuffle( global_attr = {f"product": f"{product.upper()} (from {filetype})"} # get time series attributes from first day of data. - first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) + first_date_time = datetime.combine(startdate, time(h_steps[0], 0)) # get time series attributes from first day of data. data = input_dataset.read(first_date_time) ts_attributes = data.metadata + props = { + 'product': product, + 'filetype': filetype, + 'last_update': datetime.now() + } + + kwargs = { + 'input_root': input_root, + 'outputpath': outputpath, + 'variables': None if variables is None else list(variables), + 'land_points': land_points, + 'startdate': startdate, + 'enddate': enddate, + 'h_steps': h_steps, + 'bbox': None if bbox is None else list(bbox), + 'imgbuffer': imgbuffer + } + + props["img2ts_kwargs"] = kwargs + reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, - startdate=startdate, - enddate=enddate, + startdate=str(startdate), + enddate=str(enddate), input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, @@ -144,109 +219,10 @@ def reshuffle( zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes, - ) - reshuffler.calc() - - -def parse_args(args): - """ - Parse command line parameters for conversion from image to time series. - - Parameters - ---------- - args: list - command line parameters as list of strings - - Returns - ---------- - args : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Convert downloaded ERA5/ERA5-Land image data into time " - "series format.") - parser.add_argument( - "dataset_root", - help="Root of local filesystem where the image data is stored.", - ) - parser.add_argument( - "timeseries_root", - help="Root of local filesystem where the time series should " - "be stored.", - ) - parser.add_argument( - "start", type=mkdate, help=("Startdate in format YYYY-MM-DD")) - parser.add_argument( - "end", type=mkdate, help=("Enddate in format YYYY-MM-DD")) - parser.add_argument( - "variables", - metavar="variables", - nargs="+", - help=("Short name of variables as stored in the images, " - "which are reshuffled. " - "See documentation on image download for resp. ERA products, " - "for more information on variable names of the product. "), - ) - parser.add_argument( - "--land_points", - type=str2bool, - default="False", - help=("Store only time series for points that are over land. " - "Default is False."), - ) - parser.add_argument( - "--bbox", - type=float, - default=None, - nargs=4, - help=("min_lon min_lat max_lon max_lat. " - "Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). By default all data is loaded."), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=( - "Time steps (full hours) of images that will be reshuffled " - "(must be in the images). " - "By default 6H images (starting at 0:00 UTC) will be reshuffled: " - "0 6 12 18"), - ) - parser.add_argument( - "--imgbuffer", - type=int, - default=50, - help=( - "How many images to read at once. Bigger numbers make the " - "conversion faster but consume more memory. Choose this according " - "to your system and the size of a single image. Default is 50."), - ) - args = parser.parse_args(args) - - print("Converting ERA5/ERA5-Land data from {} to {} into {}.".format( - args.start.isoformat(), args.end.isoformat(), args.timeseries_root)) - - return args - - -def main(args): - args = parse_args(args) - - reshuffle( - input_root=args.dataset_root, - outputpath=args.timeseries_root, - startdate=args.start, - enddate=args.end, - variables=args.variables, - bbox=args.bbox, - h_steps=args.h_steps, - land_points=args.land_points, - imgbuffer=args.imgbuffer, + backend='multiprocessing', + n_proc=1, ) + reshuffler.calc() -def run(): - main(sys.argv[1:]) + update_ts_summary_file(outputpath, props) diff --git a/src/ecmwf_models/erainterim/__init__.py b/src/ecmwf_models/erainterim/__init__.py deleted file mode 100644 index 40a96af..0000000 --- a/src/ecmwf_models/erainterim/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/src/ecmwf_models/erainterim/download.py b/src/ecmwf_models/erainterim/download.py deleted file mode 100644 index 07986d3..0000000 --- a/src/ecmwf_models/erainterim/download.py +++ /dev/null @@ -1,378 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Module to download ERA Interim from terminal. -""" - -from ecmwfapi import ECMWFDataServer -import argparse -import sys -from datetime import datetime, timedelta -import shutil -import os -import warnings - -from ecmwf_models.utils import ( - load_var_table, - save_ncs_from_nc, - save_gribs_from_grib, - lookup, - mkdate, - str2bool -) - - -def default_variables() -> list: - "These variables are being downloaded, when None are passed by the user" - lut = load_var_table(name="ERAINT") - defaults = lut.loc[lut["default"] == 1]["dl_name"].values - return defaults.tolist() - - -def download_eraint( - target_path, - start, - end, - variables, - grid_size=None, - type="fc", - h_steps=(0, 6, 12, 18), - grb=False, - dry_run=False, - steps=(0,), -): - """ - Download era interim data - - Parameters - ---------- - target_path : str - path at which to save the downloaded grib file - start : date - start date - end : date - end date - variables : list - parameter ids, see wiki - product : str, optional - Name of the model, "ERA-interim" (default) or "ERA5" - grid_size: [float,float], optional - size of the grid in form (lon, lat), which the data is resampled to - If None is passed the minimum grid for the accoring product is chosen - h_steps: tuple, optional (default: (0, 6, 12, 18)) - List of full hours to download data at the selected dates - grb: bool, optional (default: False) - Download data as grb files instead of nc files - dry_run: bool - Do not download anything, this is just used for testing the functions - """ - if dry_run: - warnings.warn("Dry run does not create connection to ECMWF") - server = None - else: - server = ECMWFDataServer() - - param_strings = [] - - dataset = "interim" - dataclass = "ei" - - for variable in variables: - param_strings.append(str(variable)) - - timestep_strings = [] - for timestep in h_steps: - timestep_strings.append("%02d" % timestep) - - param_string = "/".join(param_strings) - timestep_string = "/".join(timestep_strings) - date_string = "%s/to/%s" % ( - start.strftime("%Y-%m-%d"), - end.strftime("%Y-%m-%d"), - ) - - grid_size = "%f/%f" % (grid_size[0], grid_size[1]) if grid_size else None - - step = "/".join([str(s) for s in steps]) - # ATTENTION: When downloading netcdf files steps and times - # must not overlap!! see: - # https://software.ecmwf.int/wiki/display/CKB/What+to+do+with+ECCODES+ERROR+%3A+Try+using+the+-T+option # noqa: E501 - - dl_params = { - "class": dataclass, - "dataset": dataset, - "expver": "1", - "stream": "oper", - "type": type, - "levtype": "sfc", - "param": param_string, - "date": date_string, - "time": timestep_string, - "step": step, - "grid": grid_size, - "format": "grib1" if grb else "netcdf", - "target": target_path, - } - - if not grid_size: - if not grb: - grid_size = "%f/%f" % (0.75, 0.75) - dl_params["grid"] = grid_size - else: - del dl_params["grid"] - else: - if any(size < 0.75 for size in grid_size): - raise Warning( - "Custom grid smaller than original ERA Interim resolution. " - "See https://software.ecmwf.int/wiki/display/CKB/" - "Does+downloading+data+at+higher+resolution+improve+the+output" # noqa: E501 - ) - if not dry_run: - server.retrieve(dl_params) - - -def download_and_move( - target_path, - startdate, - enddate, - variables=None, - keep_original=False, - grid_size=None, - type="an", - h_steps=(0, 6, 12, 18), - steps=(0,), - grb=False, - dry_run=False, -): - """ - Downloads the data from the ECMWF servers and moves them to the target - path. This is done in 30 days increments between start and end date to - be efficient with the MARS system. - See the recommendation for doing it this way in - https://software.ecmwf.int/wiki/display/WEBAPI/ERA-Interim+daily+retrieval+efficiency - - The files are then extracted into separate grib/nc files and stored in - yearly folders under the target_path. - - Parameters - ---------- - target_path: str - Path to which to copy the extracted parameter files - startdate: datetime - First date to download - enddate: datetime - Last date to download - variables : list, optional (default: None) - List of variable ids to pass to the client, if None are passed, - the default variable ids will be downloaded. - keep_original: bool, optional (default: False) - Keep the original downloaded data - grid_size: list, optional (default: None) - [lon, lat] extent of the grid (regular for netcdf, at lat=0 for grib) - If None is passed, the default grid size for the data product is used. - type : str, optional (default: 'an') - Data stream, model to download data for (fc=forecase) - h_steps: list, optional (default: [0, 6, 12, 18]) - List of full hours to download data at the selected dates - grb: bool, optional (default: False) - Download data as grib files instead of netcdf files - dry_run: bool - Do not download anything, this is just used for testing the functions - """ - product = "eraint" - if variables is None: - variables = default_variables() - else: - # find the dl_names - variables = lookup(name=product, variables=variables) - variables = variables["dl_name"].values.tolist() - - td = timedelta(days=30) - current_start = startdate - - while current_start <= enddate: - current_end = current_start + td - if current_end >= enddate: - current_end = enddate - - fname = "{start}_{end}.{ext}".format( - start=current_start.strftime("%Y%m%d"), - end=current_end.strftime("%Y%m%d"), - ext="grb" if grb else "nc", - ) - - downloaded_data_path = os.path.join(target_path, "temp_downloaded") - if not os.path.exists(downloaded_data_path): - os.mkdir(downloaded_data_path) - - dl_file = os.path.join(downloaded_data_path, fname) - - download_eraint( - dl_file, - current_start, - current_end, - variables, - grid_size=grid_size, - h_steps=h_steps, - type=type, - steps=steps, - grb=grb, - dry_run=dry_run, - ) - - if grb: - save_gribs_from_grib(dl_file, target_path, product.upper()) - else: - save_ncs_from_nc(dl_file, target_path, product.upper()) - - if not keep_original: - shutil.rmtree(downloaded_data_path) - current_start = current_end + timedelta(days=1) - - -def parse_args(args): - """ - Parse command line parameters for recursive download - - Parameters - ---------- - args : list - Command line parameters as list of strings - - Returns - ---------- - clparams : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Download ERA Interim data (6H) between two dates. " - "Before this program can be used, you have to register at ECMWF " - "and setup your .ecmwfapirc file as described here: " - "https://confluence.ecmwf.int//display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key" # noqa: E501 - ) - parser.add_argument( - "localroot", - help="Root of local filesystem where the downloaded data is stored.", - ) - parser.add_argument( - "-s", - "--start", - type=mkdate, - default=datetime(1979, 1, 1), - help=( - "Startdate in format YYYY-MM-DD. " - "If no data is found there then the first available date of " - "the product is used." - ), - ) - parser.add_argument( - "-e", - "--end", - type=mkdate, - default=datetime.now(), - help=( - "Enddate in format YYYY-MM-DD. " - "If not given then the current date is used." - ), - ) - parser.add_argument( - "-var", - "--variables", - metavar="variables", - type=str, - default=None, - nargs="+", - help=( - "Name of variables to download. " - "A list of possible IDs is available at " - "https://github.com/TUW-GEO/ecmwf_models/tree/master/ecmwf_models/erainterim/eraint_lut.csv " # noqa: E501 - "or by using the 'View MARS request' option in the web based " - "ordering system." - ), - ) - parser.add_argument( - "-keep", - "--keep_original", - type=str2bool, - default="False", - help=( - "Keep the originally, temporally downloaded file as it is " - "instead of deleting it afterwards" - ), - ) - parser.add_argument( - "-grb", - "--as_grib", - type=str2bool, - default="False", - help=( - "Download data in grib1 format instead of the default " - "netcdf format" - ), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=("Manually change the temporal resolution of donwloaded images"), - ) - parser.add_argument( - "--steps", - type=int, - default=None, - nargs="+", - help=("Manually change the steps"), - ) - parser.add_argument( - "--type", - type=str, - default="an", - help=("Manually set the data stream, e.g. 'an' (default) or 'fc'"), - ) - parser.add_argument( - "--grid_size", - type=float, - default=None, - nargs="+", - help=( - "lon lat. Size of the grid that the data is stored to. " - "Should be at least (and is by default) " - "(0.75, 0.75) for ERA-Interim " - ), - ) - - args = parser.parse_args(args) - - print("ERA Interim data is deprecated. Use ERA5 instead.") - print( - "Downloading ERA Interim {} data from {} to {} into folder {}".format( - "grib" if args.as_grib is True else "netcdf", - args.start.isoformat(), - args.end.isoformat(), - args.localroot, - ) - ) - - return args - - -def main(args): - args = parse_args(args) - - download_and_move( - target_path=args.localroot, - startdate=args.start, - enddate=args.end, - variables=args.variables, - keep_original=args.keep_original, - grid_size=args.grid_size, - h_steps=args.h_steps, - type=args.type, - grb=args.as_grib, - ) - - -def run(): - main(sys.argv[1:]) diff --git a/src/ecmwf_models/erainterim/eraint_lut.csv b/src/ecmwf_models/erainterim/eraint_lut.csv deleted file mode 100644 index 1f77147..0000000 --- a/src/ecmwf_models/erainterim/eraint_lut.csv +++ /dev/null @@ -1,80 +0,0 @@ -dl_name,long_name,levtype,short_name,default -31.128,Sea ice area fraction,an,ci, -32.128,Snow albedo,an,asn, -33.128,Snow density,an,rsn,1 -34.128,Sea surface temperature,an,sst, -35.128,Ice temperature layer 1,an,istl1, -36.128,Ice temperature layer 2,an,istl2, -37.128,Ice temperature layer 3,an,istl3, -38.128,Ice temperature layer 4,an,istl4, -39.128,Volumetric soil water layer 1,an,swvl1,1 -40.128,Volumetric soil water layer 2,an,swvl2,1 -41.128,Volumetric soil water layer 3,an,swvl3,1 -42.128,Volumetric soil water layer 4,an,swvl4,1 -53.162,Vertical integral of mass of atmosphere,an,p53.162, -54.162,Vertical integral of temperature,an,p54.162, -55.162,Vertical integral of water vapour,an,p55.162, -56.162,Vertical integral of cloud liquid water,an,p56.162, -57.162,Vertical integral of cloud frozen water,an,p57.162, -58.162,Vertical integral of ozone,an,p58.162, -59.162,Vertical integral of kinetic energy,an,p59.162, -60.162,Vertical integral of thermal energy,an,p60.162, -61.162,Vertical integral of potential+internal energy,an,p61.162, -62.162,Vertical integral of potential+internal+latent energy,an,p62.162, -63.162,Vertical integral of total energy,an,p63.162, -64.162,Vertical integral of energy conversion,an,p64.162, -65.162,Vertical integral of eastward mass flux,an,p65.162, -66.162,Vertical integral of northward mass flux,an,p66.162, -67.162,Vertical integral of eastward kinetic energy flux,an,p67.162, -68.162,Vertical integral of northward kinetic energy flux,an,p68.162, -69.162,Vertical integral of eastward heat flux,an,p69.162, -70.162,Vertical integral of northward heat flux,an,p70.162, -71.162,Vertical integral of eastward water vapour flux,an,p71.162, -72.162,Vertical integral of northward water vapour flux,an,p72.162, -73.162,Vertical integral of eastward geopotential flux,an,p73.162, -74.162,Vertical integral of northward geopotential flux,an,p74.162, -75.162,Vertical integral of eastward total energy flux,an,p75.162, -76.162,Vertical integral of northward total energy flux,an,p76.162, -77.162,Vertical integral of eastward ozone flux,an,p77.162, -78.162,Vertical integral of northward ozone flux,an,p78.162, -79.162,Vertical integral of divergence of cloud liquid water flux,an,p79.162, -80.162,Vertical integral of divergence of cloud frozen water flux,an,p80.162, -81.162,Vertical integral of divergence of mass flux,an,p81.162, -82.162,Vertical integral of divergence of kinetic energy flux,an,p82.162, -83.162,Vertical integral of divergence of thermal energy flux,an,p83.162, -84.162,Vertical integral of divergence of moisture flux,an,p84.162, -85.162,Vertical integral of divergence of geopotential flux,an,p85.162, -86.162,Vertical integral of divergence of total energy flux,an,p86.162, -87.162,Vertical integral of divergence of ozone flux,an,p87.162, -88.162,Vertical integral of eastward cloud liquid water flux,an,p88.162, -89.162,Vertical integral of northward cloud liquid water flux,an,p89.162, -90.162,Vertical integral of eastward cloud frozen water flux,an,p90.162, -91.162,Vertical integral of northward cloud frozen water flux,an,p91.162, -92.162,Vertical integral of mass tendency,an,p92.162, -134.128,Surface pressure,an,sp, -136.128,Total column water,an,tcw, -137.128,Total column water vapour,an,tcwv, -139.128,Soil temperature level 1,an,stl1,1 -141.128,Snow depth,an,sd,1 -148.128,Charnock,an,chnk, -151.128,Mean sea level pressure,an,msl, -164.128,Total cloud cover,an,tcc, -165.128,10 metre U wind component,an,u10, -166.128,10 metre V wind component,an,v10, -167.128,2 metre temperature,an,t2m, -168.128,2 metre dewpoint temperature,an,d2m, -169.128,Surface solar radiation downwards,fc,ssrd, -170.128,Soil temperature level 2,an,stl2,1 -172.128,Land-sea mask,an,lsm,1 -173.128,Surface roughness,an,sr, -174.128,Albedo,an,al, -183.128,Soil temperature level 3,an,stl3,1 -186.128,Low cloud cover,an,lcc, -187.128,Medium cloud cover,an,mcc, -188.128,High cloud cover,an,hcc, -198.128,Skin reservoir content,an,src, -206.128,Total column ozone,an,tco3, -234.128,Logarithm of surface roughness length for heat,an,lsrh, -235.128,Skin temperature,an,skt, -236.128,Soil temperature level 4,an,stl4,1 -238.128,Temperature of snow layer,an,tsn, \ No newline at end of file diff --git a/src/ecmwf_models/erainterim/interface.py b/src/ecmwf_models/erainterim/interface.py deleted file mode 100644 index 752202c..0000000 --- a/src/ecmwf_models/erainterim/interface.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -This module contains ERA Interim specific child classes of the netcdf and grib -base classes, that are used for reading all ecmwf products. -""" - -from ecmwf_models.interface import ERANcImg, ERANcDs, ERAGrbImg, ERAGrbDs -import warnings -from typing import Collection, Optional -from pygeogrids.grids import CellGrid - - -class ERAIntNcImg(ERANcImg): - def __init__( - self, - filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - mode: Optional[str] = "r", - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - array_1D: Optional[bool] = False, - ): - """ - Reader for a single ERA INT netcdf image file. - - Parameters - ---------- - filename: str - Path to the image file to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) - Name of parameters to read from the image file. - subgrid: CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and - set them to nan. This option needs the 'lsm' parameter to be - in the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntNcImg, self).__init__( - filename=filename, - product=product, - parameter=parameter, - mode=mode, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - array_1D=array_1D, - ) - - -class ERAIntNcDs(ERANcDs): - def __init__( - self, - root_path: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - h_steps: Collection[int] = (0, 6, 12, 18), - array_1D: Optional[bool] = False, - ): - - """ - Reader for a stack of ERA INT netcdf image files. - - Parameters - ---------- - root_path: str - Path to the image files to read. - parameter: list or str, optional (default: ('swvl1', 'swvl2')) - Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them - to nan. This option needs the 'lsm' parameter to be in the file! - h_steps : list, optional (default: (0,6,12,18)) - List of full hours to read images for. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntNcDs, self).__init__( - root_path=root_path, - product=product, - parameter=parameter, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - array_1D=array_1D, - ) - - -class ERAIntGrbImg(ERAGrbImg): - def __init__( - self, - filename: str, - parameter: Optional[Collection[str]] = ("swvl1", "swvl2"), - mode: Optional[str] = "r", - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - array_1D: Optional[bool] = False, - ): - """ - Reader for a single ERA5 grib image file. - - Parameters - ---------- - filename: str - Path to the image file to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - mode: str, optional (default: "r") - File mode, better don't change... - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set - them to nan. This option needs the 'lsm' parameter to be in - the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntGrbImg, self).__init__( - filename=filename, - product=product, - parameter=parameter, - mode=mode, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - array_1D=array_1D, - ) - - -class ERAIntGrbDs(ERAGrbDs): - def __init__( - self, - root_path: str, - parameter: Collection[str] = ("swvl1", "swvl2"), - subgrid: Optional[CellGrid] = None, - mask_seapoints: Optional[bool] = False, - h_steps: Collection[int] = (0, 6, 12, 18), - array_1D: Optional[bool] = False, - ): - """ - Reader for a stack of ERA INT grib image file. - - Parameters - ---------- - root_path: str - Path to the image files to read. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - subgrid: CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them - to nan. This option needs the 'lsm' parameter to be in the file! - h_steps : list, optional (default: [0,6,12,18]) - List of full hours to read images for. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - """ - warnings.warn( - "ERA Interim data is deprecated. Use ERA5 instead.", - DeprecationWarning, - ) - product = "ERAINT" - super(ERAIntGrbDs, self).__init__( - root_path=root_path, - product=product, - parameter=parameter, - subgrid=subgrid, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - array_1D=array_1D, - ) diff --git a/src/ecmwf_models/erainterim/reshuffle.py b/src/ecmwf_models/erainterim/reshuffle.py deleted file mode 100644 index edfb4ef..0000000 --- a/src/ecmwf_models/erainterim/reshuffle.py +++ /dev/null @@ -1,218 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Module for a command line interface to convert the ERA Interim data into a -time series format using the repurpose package -""" - -import os -import sys -import argparse -import numpy as np - -from pygeogrids import BasicGrid - -from repurpose.img2ts import Img2Ts -from ecmwf_models.erainterim.interface import ERAIntGrbDs, ERAIntNcDs -from ecmwf_models.utils import mkdate, parse_filetype -from datetime import time, datetime - - -def reshuffle( - input_root, - outputpath, - startdate, - enddate, - variables, - mask_seapoints=False, - h_steps=(0, 6, 12, 18), - imgbuffer=50, -): - """ - Reshuffle method applied to ERA images for conversion into netcdf time - series format. - - Parameters - ---------- - input_root: str - Input path where ERA image data was downloaded to. - outputpath : str - Output path, where the reshuffled netcdf time series are stored. - startdate : datetime - Start date, from which images are read and time series are generated. - enddate : datetime - End date, from which images are read and time series are generated. - variables: list or str or tuple - Variables to read from the passed images and convert into time - series format. - mask_seapoints: bool, optional (default: False) - Mask points over sea, replace them with nan. - h_steps: tuple, optional (default: (0,6,12,18)) - Full hours for which images are available. - imgbuffer: int, optional (default: 50) - How many images to read at once before writing time series. This number - affects how many images are stored in memory and should be chosen according - to the available amount of memory and the size of a single image. - """ - - filetype = parse_filetype(input_root) - - if filetype == "grib": - input_dataset = ERAIntGrbDs( - root_path=input_root, - parameter=variables, - subgrid=None, - array_1D=True, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - ) - elif filetype == "netcdf": - input_dataset = ERAIntNcDs( - root_path=input_root, - parameter=variables, - subgrid=None, - array_1D=True, - mask_seapoints=mask_seapoints, - h_steps=h_steps, - ) - else: - raise Exception("Unknown file format") - - if not os.path.exists(outputpath): - os.makedirs(outputpath) - - global_attr = {"product": "ERA Interim (from {})".format(filetype)} - - # get time series attributes from first day of data. - first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) - - data = input_dataset.read(first_date_time) - ts_attributes = data.metadata - - grid = BasicGrid(data.lon, data.lat) - - reshuffler = Img2Ts( - input_dataset=input_dataset, - outputpath=outputpath, - startdate=startdate, - enddate=enddate, - input_grid=grid, - imgbuffer=imgbuffer, - cellsize_lat=5.0, - cellsize_lon=5.0, - ts_dtypes=np.dtype("float32"), - global_attr=global_attr, - zlib=True, - unlim_chunksize=1000, - ts_attributes=ts_attributes, - ) - reshuffler.calc() - - -def parse_args(args): - """ - Parse command line parameters for conversion from image to time series. - - Parameters - ---------- - args: list - command line parameters as list of strings - - Returns - ---------- - args : argparse.Namespace - Parsed command line parameters - """ - - parser = argparse.ArgumentParser( - description="Convert downloaded ERA image data into time series " - "format." - ) - parser.add_argument( - "dataset_root", - help="Root of local filesystem where the image data is stored.", - ) - parser.add_argument( - "timeseries_root", - help="Root of local filesystem where the time series " - "should be stored.", - ) - parser.add_argument( - "start", type=mkdate, help=("Startdate in format YYYY-MM-DD") - ) - parser.add_argument( - "end", type=mkdate, help=("Enddate in format YYYY-MM-DD") - ) - parser.add_argument( - "variables", - metavar="variables", - nargs="+", - help=( - "Short name of variables as stored in the images, " - "which are reshuffled. " - "See documentation on image download for resp. ERA products, " - "for more information on variable names of the product. " - ), - ) - parser.add_argument( - "--mask_seapoints", - type=bool, - default=False, - help=( - "Replace points over water with nan. This option needs the " - "LandSeaMask (lsm) variable in the image data (will use mask " - "from first available file). " - "To use a dynamic LSM, reshuffle the LSM variable to time series." - ), - ) - parser.add_argument( - "--h_steps", - type=int, - default=None, - nargs="+", - help=( - "Time steps (full hours) of images that will be reshuffled " - "(must be in the images). " - "By default 6H images (starting at 0:00 UTC) will be reshuffled." - ), - ) - parser.add_argument( - "--imgbuffer", - type=int, - default=50, - help=( - "How many images to read at once. Bigger numbers make the " - "conversion faster but consume more memory. " - "Choose this according to your system and the size of a " - "single image." - ), - ) - args = parser.parse_args(args) - - print("ERA Interim data is deprecated. Use ERA5 instead.") - print( - "Converting data from {} to {} into {}.".format( - args.start.isoformat(), args.end.isoformat(), args.timeseries_root - ) - ) - - return args - - -def main(args): - args = parse_args(args) - - reshuffle( - input_root=args.dataset_root, - outputpath=args.timeseries_root, - startdate=args.start, - enddate=args.end, - variables=args.variables, - mask_seapoints=args.mask_seapoints, - h_steps=args.h_steps, - imgbuffer=args.imgbuffer, - ) - - -def run(): - main(sys.argv[1:]) diff --git a/src/ecmwf_models/extract.py b/src/ecmwf_models/extract.py new file mode 100644 index 0000000..e5648f6 --- /dev/null +++ b/src/ecmwf_models/extract.py @@ -0,0 +1,209 @@ +from datetime import datetime +import logging +import os +import pandas as pd +import xarray as xr +from datedown.fname_creator import create_dt_fpath + +from ecmwf_models.globals import (IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, EXPVER, SUBDIRS) +from ecmwf_models.globals import ( + Cdo, + cdo_available, + CdoNotFoundError, + pygrib, + pygrib_available, + PygribNotFoundError, +) + + +def save_ncs_from_nc( + input_nc, + output_path, + product_name, + grid=None, + keep_original=True, + remap_method="bil", + keep_prelim=True, +): + """ + Split the downloaded netcdf file into daily files and add to folder + structure necessary for reshuffling. + + Parameters + ---------- + input_nc : str + Filepath of the downloaded .nc file + output_path : str + Where to save the resulting netcdf files + product_name : str + Name of the ECMWF model (only for filename generation) + keep_original: bool + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. + """ + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='nc') + + nc_in = xr.open_dataset(input_nc, mask_and_scale=True) + if 'valid_time' in nc_in.variables: + nc_in = nc_in.rename_vars({"valid_time": 'time'}) + + if grid is not None: + if not cdo_available: + raise CdoNotFoundError() + cdo = Cdo() + + gridpath = os.path.join(output_path, "grid.txt") + weightspath = os.path.join(output_path, "remap_weights.nc") + if not os.path.exists(gridpath): + with open(gridpath, "w") as f: + for k, v in grid.items(): + f.write(f"{k} = {v}\n") + + for time in nc_in["time"].values: + subset = nc_in.sel({"time": time}) + + # Expver identifies preliminary data + if 'expver' in subset: + expver = str(subset['expver'].values) + subset = subset.drop_vars('expver') + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + else: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {time}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format(product=product_name + + '-' + ext) + else: + filename_templ = _filename_templ.format(product=product_name) + + if 'number' in subset.variables: + subset = subset.drop_vars('number') + + timestamp = pd.Timestamp(time).to_pydatetime() + filepath = create_dt_fpath( + timestamp, + root=output_path, + fname=filename_templ, + subdirs=SUBDIRS, + ) + + if not os.path.exists(os.path.dirname(filepath)): + os.makedirs(os.path.dirname(filepath)) + + if grid is not None: + if not os.path.exists(weightspath): + # create weights file + getattr(cdo, "gen" + remap_method)( + gridpath, input=subset, output=weightspath) + subset = cdo.remap( + ",".join([gridpath, weightspath]), + input=subset, + returnXDataset=True, + ) + + # same compression for all variables + var_encode = {"zlib": True, "complevel": 6} + subset.to_netcdf( + filepath, encoding={var: var_encode for var in subset.variables}) + + nc_in.close() + + if not keep_original: + os.remove(input_nc) + if grid is not None: + cdo.cleanTempDir() + + +def save_gribs_from_grib( + input_grib, + output_path, + product_name, + keep_original=True, + keep_prelim=True, +): + """ + Split the downloaded grib file into daily files and add to folder structure + necessary for reshuffling. + + Parameters + ---------- + input_grib : str + Filepath of the downloaded .grb file + output_path : str + Where to save the resulting grib files + product_name : str + Name of the ECMWF model (only for filename generation) + keep_original: bool + keep the original downloaded data too, before it is sliced into + individual images. + keep_prelim: bool, optional (default: True) + True to keep preliminary data from ERA5T with a different file name, or + False drop these files and only keep the final records. + """ + if not pygrib_available: + raise PygribNotFoundError() + grib_in = pygrib.open(input_grib) + + _filename_templ = IMG_FNAME_TEMPLATE.format( + product="{product}", + type='AN', + datetime=IMG_FNAME_DATETIME_FORMAT, + ext='grb') + + grib_in.seek(0) + prev_date = None + + for grb in grib_in: + filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) + + expver = grb['expver'] + + try: + ext = EXPVER[expver] + except KeyError: + ext = '' + + if len(ext) > 0 and not keep_prelim: + logging.info(f"Dropping preliminary data {filedate}") + continue + + if len(ext) > 0: + filename_templ = _filename_templ.format(product=product_name + + '-' + ext) + else: + filename_templ = _filename_templ.format(product=product_name) + + filepath = create_dt_fpath( + filedate, root=output_path, fname=filename_templ, subdirs=SUBDIRS) + + if not os.path.exists(os.path.dirname(filepath)): + os.makedirs(os.path.dirname(filepath)) + + if prev_date != filedate: # to overwrite old files + mode = 'wb' + prev_date = filedate + else: + mode = "ab" + + with open(filepath, mode) as grb_out: + grb_out.write(grb.tostring()) + + grib_in.close() + + if not keep_original: + os.remove(input_grib) diff --git a/src/ecmwf_models/globals.py b/src/ecmwf_models/globals.py new file mode 100644 index 0000000..c573416 --- /dev/null +++ b/src/ecmwf_models/globals.py @@ -0,0 +1,50 @@ +import os +from pathlib import Path + +IMG_FNAME_TEMPLATE = "{product}_{type}_{datetime}.{ext}" +IMG_FNAME_DATETIME_FORMAT = "%Y%m%d_%H%M" + +# ERA5 products supported by the reader. +SUPPORTED_PRODUCTS = ['era5', 'era5-land'] + +CDS_API_URL = "https://cds.climate.copernicus.eu/api" + +# CDSAPI_RC variable must be set or we use home dir +DOTRC = os.environ.get('CDSAPI_RC', os.path.join(Path.home(), '.cdsapirc')) + +EXPVER = {'0005': 'T', '0001': ''} + +SUBDIRS = ["%Y", "%j"] + +try: + import pygrib + pygrib_available = True +except ImportError: + pygrib_available = False + pygrib = None + +try: + from cdo import Cdo + cdo_available = True +except ImportError: + cdo_available = False + Cdo = None + + +class PygribNotFoundError(ModuleNotFoundError): + + def __init__(self, msg=None): + _default_msg = ("pygrib could not be imported. " + "Pleas run `conda install -c conda-forge pygrib` to " + "install it.") + self.msg = _default_msg if msg is None else msg + + +class CdoNotFoundError(ModuleNotFoundError): + + def __init__(self, msg=None): + _default_msg = ( + "cdo and/or python-cdo not installed. " + "Pleas run `conda install -c conda-forge cdo` and also " + "`pip install cdo`.") + self.msg = _default_msg if msg is None else msg diff --git a/src/ecmwf_models/grid.py b/src/ecmwf_models/grid.py index b1d4ce7..a5fc178 100644 --- a/src/ecmwf_models/grid.py +++ b/src/ecmwf_models/grid.py @@ -4,10 +4,56 @@ """ import numpy as np -from pygeogrids.grids import BasicGrid, CellGrid -from netCDF4 import Dataset +from pygeogrids.grids import CellGrid, gridfromdims import os from typing import Tuple +import xarray as xr + + +def trafo_lon(lon): + """ + 0...360 -> 0...180...-180 + + Parameters + ---------- + lon: np.array + Longitude array + + Returns + ------- + lon_transformed: np.array + Transformed longitude array + """ + lons_gt_180 = np.where(lon > 180.) + lon[lons_gt_180] = lon[lons_gt_180] - 360.0 + return lon + + +def safe_arange(start, stop, step): + """ + Like numpy.arange, but floating point precision is kept. + Compare: `np.arange(0, 100, 0.01)[-1]` vs `safe_arange(0, 100, 0.01)[-1]` + + Parameters + ---------- + start: float + Start of interval + stop: float + End of interval (not included) + step: float + Stepsize + + Returns + ------- + arange: np.array + Range of values in interval at the given step size / sampling + """ + f_step = (1. / float(step)) + vals = np.arange( + float(start) * f_step, + float(stop) * f_step, + float(step) * f_step) + return vals / f_step def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): @@ -38,8 +84,7 @@ def get_grid_resolution(lats: np.ndarray, lons: np.ndarray) -> (float, float): def ERA5_RegularImgLandGrid( - res_lat: float = 0.25, - res_lon: float = 0.25, + resolution: float = 0.25, bbox: Tuple[float, float, float, float] = None, ) -> CellGrid: """ @@ -48,49 +93,52 @@ def ERA5_RegularImgLandGrid( Parameters ---------- - res_lat: float, optional (default: 0.25) - Grid resolution (in degrees) in latitude direction. - res_lon: float, optional (default: 0.25) - Grid resolution (in degrees) in longitude direction. + resolution: float, optional (default: 0.25) + Grid resolution in degrees. Either 0.25 (ERA5) or 0.1 (ERA5-Land) bbox: tuple, optional (default: None) - (min_lon, min_lat, max_lon, max_lat) - wgs84 - bbox to cut the global grid to. + WGS84 (min_lon, min_lat, max_lon, max_lat) + Values must be between -180 to 180 and -90 to 90 + bbox to cut the global grid to + + Returns + ------- + landgrid: CellGrid + ERA Land grid at the given resolution, cut to the given bounding box """ + if resolution not in [0.25, 0.1]: + raise ValueError("Unsupported resolution. Choose one of 0.25 or 0.1") try: - ds = Dataset( + ds = xr.open_dataset( os.path.join( os.path.abspath(os.path.dirname(__file__)), "era5", "land_definition_files", - f"landmask_{res_lat}_{res_lon}.nc", - )) + f"landmask_{resolution}_{resolution}.nc", + ))["land"] + ds = ds.assign_coords({'longitude': trafo_lon(ds['longitude'].values)}) + if bbox is not None: + ds = ds.sel(latitude=slice(bbox[3], bbox[1])) + ds = ds.isel( + longitude=np.where(((ds['longitude'].values >= bbox[0]) + & (ds['longitude'].values <= bbox[2])))[0]) + + land_mask = np.array(ds.values == 1.0) + except FileNotFoundError: raise FileNotFoundError( "Land definition for this grid resolution not yet available. " "Please create and add it.") - global_grid = ERA_RegularImgGrid(res_lat, res_lon, bbox=None) - - land_mask = ds.variables["land"][:].flatten().filled(0.0) == 1.0 - land_points = np.ma.masked_array(global_grid.get_grid_points()[0], - ~land_mask) + full_grid = ERA_RegularImgGrid(resolution, bbox=bbox) - land_grid = global_grid.subgrid_from_gpis( - land_points[~land_points.mask].filled().astype("int")) - - land_grid = land_grid.to_cell_grid(5.0, 5.0) - - if bbox is not None: - gpis = land_grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - land_grid = land_grid.subgrid_from_gpis(gpis) + land_gpis = full_grid.get_grid_points()[0][land_mask.flatten()] + land_grid = full_grid.subgrid_from_gpis(land_gpis) return land_grid def ERA_RegularImgGrid( - res_lat: float = 0.25, - res_lon: float = 0.25, + resolution: float = 0.25, bbox: Tuple[float, float, float, float] = None, ) -> CellGrid: """ @@ -99,12 +147,12 @@ def ERA_RegularImgGrid( Parameters ---------- - res_lat: float, optional (default: 0.25) - Grid resolution (in degrees) in latitude direction. - res_lon: float, optional (default: 0.25) - Grid resolution (in degrees) in longitude direction. + resolution: float, optional (default: 0.25) + Grid resolution (in degrees) in both directions. + Either 0.25 (ERA5) or 0.1 (ERA5-Land) bbox: tuple, optional (default: None) - (min_lon, min_lat, max_lon, max_lat) - wgs84 + (min_lon, min_lat, max_lon, max_lat) + wgs84 (Lon -180 to 180) bbox to cut the global grid to. Returns @@ -112,46 +160,19 @@ def ERA_RegularImgGrid( CellGrid : CellGrid Regular, CellGrid with 5DEG*5DEG cells for the passed bounding box. """ + # to get precise coordinates... + lon = safe_arange(-180, 180, resolution) + lat = safe_arange(-90, 90 + resolution, resolution)[::-1] - # np.arange is not precise... - f_lon = 1.0 / res_lon - f_lat = 1.0 / res_lat - res_lon = res_lon * f_lon - res_lat = res_lat * f_lat - lon = np.arange(0.0, 360.0 * f_lon, res_lon) - lat = np.arange(90.0 * f_lat, -90 * f_lat - res_lat, -1 * res_lat) - lons_gt_180 = np.where(lon > (180.0 * f_lon)) - lon[lons_gt_180] = lon[lons_gt_180] - (360.0 * f_lon) - - lon, lat = np.meshgrid(lon, lat) + # ERA grid LLC point has Lon=0 + lon = np.roll(lon, int(len(lon) * 0.5)) + grid = gridfromdims(lon, lat, origin='top') - glob_basic_grid = BasicGrid(lon.flatten() / f_lon, lat.flatten() / f_lat) - glob_cell_grid = glob_basic_grid.to_cell_grid(cellsize=5.0) - - if bbox is not None: - gpis = glob_cell_grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - glob_cell_grid = glob_cell_grid.subgrid_from_gpis(gpis) - - return glob_cell_grid - - -def ERA_IrregularImgGrid( - lons: np.ndarray, - lats: np.ndarray, - bbox: Tuple[float, float, float, float] = None, -) -> CellGrid: - """ - Create a irregular grid from the passed coordinates. - """ - lons_gt_180 = np.where(lons > 180.0) - lons[lons_gt_180] = lons[lons_gt_180] - 360 - grid = BasicGrid(lons.flatten(), lats.flatten())\ - .to_cell_grid(cellsize=5.0) + grid = grid.to_cell_grid(cellsize=5.0) if bbox is not None: - gpis = grid.get_bbox_grid_points( - lonmin=bbox[0], latmin=bbox[1], lonmax=bbox[2], latmax=bbox[3]) - grid = grid.subgrid_from_gpis(gpis) + subgpis = grid.get_bbox_grid_points( + latmin=bbox[1], latmax=bbox[3], lonmin=bbox[0], lonmax=bbox[2]) + grid = grid.subgrid_from_gpis(subgpis) return grid diff --git a/src/ecmwf_models/interface.py b/src/ecmwf_models/interface.py index 6d59486..3af58df 100644 --- a/src/ecmwf_models/interface.py +++ b/src/ecmwf_models/interface.py @@ -19,49 +19,51 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - +""" +Base classes for reading downloaded ERA netcdf and grib images and stacks +""" import warnings import os -from pygeobase.io_base import ImageBase, MultiTemporalImageBase -from pygeobase.object_base import Image -import numpy as np +import glob from datetime import timedelta, datetime # noqa: F401 +import numpy as np +import xarray as xr -from pygeogrids.netcdf import load_grid +from pygeobase.io_base import ImageBase, MultiTemporalImageBase +from pygeobase.object_base import Image from pynetcf.time_series import GriddedNcOrthoMultiTs -from ecmwf_models.grid import ( - ERA_RegularImgGrid, - get_grid_resolution, - ERA_IrregularImgGrid, -) -from ecmwf_models.utils import lookup -import glob - -import xarray as xr +from pygeogrids.grids import BasicGrid, gridfromdims +from pygeogrids.netcdf import load_grid -try: - import pygrib -except ImportError: - warnings.warn("pygrib has not been imported") -""" -Base classes for reading downloaded ERA netcdf and grib images and stacks -""" +from ecmwf_models.grid import trafo_lon +from ecmwf_models.utils import lookup +from ecmwf_models.globals import ( + IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, + SUPPORTED_PRODUCTS, + SUBDIRS, +) +from ecmwf_models.globals import (pygrib, pygrib_available, + PygribNotFoundError) class ERANcImg(ImageBase): """ - Reader for a single ERA netcdf file. + Reader for a single ERA netcdf file. The main purpose of this class is + to use it in the time series conversion routine. To read downloaded image + files, we recommend using xarray (https://docs.xarray.dev/en/stable/). Parameters ---------- filename: str Path to the image file to read. product : str - 'era5' or 'era5-land' or 'eraint' + 'era5' or 'era5-land' parameter: list or str, optional (default: ['swvl1', 'swvl2']) Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional (default: None) + Read only data for points of this grid. + If None is passed, we read all points from the file. mask_seapoints : bool, optional (default: False) Read the land-sea mask to mask points over water and set them to nan. This option needs the 'lsm' parameter to be in the file! @@ -70,27 +72,27 @@ class ERANcImg(ImageBase): mode : str, optional (default: 'r') Mode in which to open the file, changing this can cause data loss. This argument should not be changed! - """ def __init__( self, filename, product, - parameter=["swvl1", "swvl2"], + parameter=None, subgrid=None, mask_seapoints=False, array_1D=False, - mode="r", + mode='r', ): super(ERANcImg, self).__init__(filename, mode=mode) - if type(parameter) == str: - parameter = [parameter] - - # look up short names - self.parameter = lookup(product, parameter)["short_name"].values + if parameter is not None: + # look up short names + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values + else: + self.parameter = None self.mask_seapoints = mask_seapoints self.array_1D = array_1D @@ -110,24 +112,16 @@ def read(self, timestamp=None): timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ - return_img = {} - return_metadata = {} try: dataset = xr.open_dataset( self.filename, engine="netcdf4", mask_and_scale=True) except IOError as e: - print(e) print(" ".join([self.filename, "can not be opened"])) raise e - res_lat, res_lon = get_grid_resolution( - dataset.variables["latitude"][:], - dataset.variables["longitude"][:]) - - grid = ( - ERA_RegularImgGrid(res_lat, res_lon) - if not self.subgrid else self.subgrid) + if self.parameter is None: + self.parameter = list(dataset.data_vars) if self.mask_seapoints: if "lsm" not in dataset.variables.keys(): @@ -135,66 +129,84 @@ def read(self, timestamp=None): " passed image for masking.") else: sea_mask = dataset.variables["lsm"].values + else: + sea_mask = None + + return_img = {} + return_metadata = {} - for name in dataset.variables: - if name in self.parameter: + grid = gridfromdims( + trafo_lon(dataset['longitude'].values), + dataset['latitude'].values, + origin='top') + + if self.subgrid is not None: + gpis = grid.find_nearest_gpi(self.subgrid.activearrlon, + self.subgrid.activearrlat)[0] + else: + gpis = None + + for name in self.parameter: + try: variable = dataset[name] + except KeyError: + path, f = os.path.split(self.filename) + warnings.warn(f"Cannot load variable {name} from file {f}. " + f"Filling image with NaNs.") + dat = np.full( + grid.shape if gpis is None else len(gpis), np.nan) + return_img[name] = dat + continue - if 'expver' in variable.dims and (variable.data.ndim == 3): - warnings.warn( - f"Found experimental data in {self.filename}") - param_data = variable.data[-1] - for vers_data in variable.data: - if not np.all(np.isnan(vers_data)): - param_data = vers_data - else: - param_data = variable.data - - if self.mask_seapoints: - param_data = np.ma.array( - param_data, - mask=np.logical_not(sea_mask), - fill_value=np.nan, - ) - param_data = param_data.filled() + if 'expver' in variable.dims and (variable.data.ndim == 3): + warnings.warn(f"Found experimental data in {self.filename}") + param_data = variable.data[-1] + for vers_data in variable.data: + if not np.all(np.isnan(vers_data)): + param_data = vers_data + else: + param_data = variable.data - param_data = param_data.flatten() + if self.mask_seapoints: + param_data = np.ma.array( + param_data, + mask=np.logical_not(sea_mask), + fill_value=np.nan, + ) + param_data = param_data.filled() - return_metadata[name] = variable.attrs - return_img.update( - dict([(str(name), param_data[grid.activegpis])])) + param_data = param_data.flatten() - try: - return_img[name] - except KeyError: - path, thefile = os.path.split(self.filename) - warnings.warn( - "Cannot load variable {var} from file {thefile}. " - "Filling image with NaNs.".format( - var=name, thefile=thefile)) - return_img[name] = np.empty(grid.activegpis.size).fill( - np.nan) + if gpis is not None: + param_data = param_data[gpis] + + return_metadata[name] = variable.attrs + return_img[name] = param_data dataset.close() + if self.subgrid is None: + self.subgrid = grid + if self.array_1D: return Image( - grid.activearrlon, - grid.activearrlat, + self.subgrid.activearrlon, + self.subgrid.activearrlat, return_img, return_metadata, timestamp, ) else: - nlat = np.unique(grid.activearrlat).size - nlon = np.unique(grid.activearrlon).size + if len(self.subgrid.shape) != 2: + raise ValueError("Passed subgrid does not have a 2d shape." + "Did you mean to read data as 1d arrays?") for key in return_img: - return_img[key] = return_img[key].reshape((nlat, nlon)) + return_img[key] = return_img[key].reshape(self.subgrid.shape) return Image( - grid.activearrlon.reshape(nlat, nlon), - grid.activearrlat.reshape(nlat, nlon), + self.subgrid.activearrlon.reshape(self.subgrid.shape), + self.subgrid.activearrlat.reshape(self.subgrid.shape), return_img, return_metadata, timestamp, @@ -212,29 +224,41 @@ def close(self): class ERANcDs(MultiTemporalImageBase): """ - Class for reading ERA 5 images in nc format. + Reader to extract individual images from a multi-image netcdf dataset. + The main purpose of this class is to use it in the time series conversion + routine. To read downloaded image files, we recommend using + xarray (https://docs.xarray.dev/en/stable/). Parameters ---------- root_path: str Root path where image data is stored. - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Parameter or list of parameters to read from image files. - subgrid: pygeogrids.CellGrid, optional (default: None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Use the land-sea-mask parameter in the file to mask points over water. - h_steps : list, optional (default: [0,6,12,18]) - List of full hours for which images exist. - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. + product: str + ERA5 or ERA5-LAND + parameter: list[str] or str, optional (default: None) + Parameter or list of parameters to read. None reads all available + Parameters. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints: bool, optional (default: False) + All points that are not over land are replaced with NaN values. + This requires that the land sea mask (lsm) parameter is included + in the image files! + h_steps: tuple, optional (default: (0, 6, 12, 18)) + Time stamps available for each day. Numbers refer to full hours. + array_1D: bool, optional (default: True) + Read data as 1d arrays. This is required when the passed subgrid + is 1-dimensional (e.g. when only landpoints are read). Otherwise + when a 2d (subgrid) is used, this switch means that the extracted + image data is also 2-dimensional (lon, lat). """ def __init__( self, root_path, product, - parameter=("swvl1", "swvl2"), + parameter=None, subgrid=None, mask_seapoints=False, h_steps=(0, 6, 12, 18), @@ -242,10 +266,13 @@ def __init__( ): self.h_steps = h_steps - subpath_templ = ["%Y", "%j"] - if type(parameter) == str: - parameter = [parameter] + if parameter is not None: + # look up short names + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values + else: + self.parameter = None ioclass_kws = { 'product': product, @@ -255,17 +282,22 @@ def __init__( 'array_1D': array_1D } - # the goal is to use expERA5*.nc if necessary, but perfer ERA5*.nc + # the goal is to use ERA5-T*.nc if necessary, but prefer ERA5*.nc self.fn_templ_priority = [ - 'ERA5_*_{datetime}.nc', 'expERA5_*_{datetime}.nc' + IMG_FNAME_TEMPLATE.format( + product=(p + ext).upper(), + type='*', + datetime="{datetime}", + ext='nc') for ext in ['', '-T'] for p in SUPPORTED_PRODUCTS ] super(ERANcDs, self).__init__( root_path, ERANcImg, - fname_templ='*_{datetime}.nc', - datetime_format="%Y%m%d_%H%M", - subpath_templ=subpath_templ, + fname_templ=IMG_FNAME_TEMPLATE.format( + product='*', type='*', datetime='{datetime}', ext='nc'), + datetime_format=IMG_FNAME_DATETIME_FORMAT, + subpath_templ=SUBDIRS, exact_templ=False, ioclass_kws=ioclass_kws) @@ -351,47 +383,50 @@ def tstamps_for_daterange(self, start_date, end_date): class ERAGrbImg(ImageBase): - """ - Base class for reader for a single ERA Grib file. - Parameters - ---------- - filename: str - Path to the image file to read. - product : str - ERA5 or ERAINT - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Name of parameters to read from the image file. - subgrid: pygeogrids.CellGrid, optional (default:None) - Read only data for points of this grid and not global values. - mask_seapoints : bool, optional (default: False) - Read the land-sea mask to mask points over water and set them to nan. - This option needs the 'lsm' parameter to be in the file! - array_1D: bool, optional (default: False) - Read data as list, instead of 2D array, used for reshuffling. - mode : str, optional (default: 'r') - Mode in which to open the file, changing this can cause data loss. - This argument should not be changed! - """ - - def __init__( - self, - filename, - product, - parameter=("swvl1", "swvl2"), - subgrid=None, - mask_seapoints=False, - array_1D=True, - mode="r", - ): + def __init__(self, + filename, + product, + parameter=None, + subgrid=None, + mask_seapoints=False, + array_1D=True, + mode='r'): + """ + Reader for a single ERA grib file. The main purpose of this class is + to use it in the time series conversion routine. To read downloaded image + files, we recommend using xarray (https://docs.xarray.dev/en/stable/). + Parameters + ---------- + filename: str + Path to the image file to read. + product : str + ERA5 or ERA5-LAND + parameter: list or str, optional + Name of parameters to read from the image file. None means all + available data variables. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional (default: None) + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints : bool, optional (default: False) + Read the land-sea mask to mask points over water and set them to nan. + This option needs the 'lsm' parameter to be in the file! + array_1D: bool, optional (default: False) + Read data as list, instead of 2D array, used for reshuffling. + mode : str, optional (default: 'r') + Mode in which to open the file, changing this can cause data loss. + This argument should not be changed! + """ super(ERAGrbImg, self).__init__(filename, mode=mode) - if type(parameter) == str: - parameter = [parameter] + if parameter is None: + self.parameter = None + else: + # look up short names + self.parameter = lookup( + product, np.atleast_1d(parameter))["short_name"].values - self.parameter = lookup( - product, parameter)["short_name"].values # look up short names self.product = product self.mask_seapoints = mask_seapoints @@ -407,53 +442,52 @@ def read(self, timestamp=None): timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ + if not pygrib_available: + raise PygribNotFoundError() grbs = pygrib.open(self.filename) - grid = self.subgrid - return_img = {} return_metadata = {} - var_msg_lut = {p: None for p in self.parameter} - sea_mask = None - for N in range(grbs.messages): - n = N + 1 - message = grbs.message(n) - param_name = str(message.cfVarNameECMF) - - if param_name == "lsm": - if self.mask_seapoints and sea_mask is None: - sea_mask = message.values.flatten() + grid = None - if param_name not in self.parameter: - continue + for n in range(1, grbs.messages + 1): + message = grbs.message(n) + try: + param_name = str(message.cfVarNameECMF) # old field? + except RuntimeError: + param_name = str(message.shortName) + + if self.mask_seapoints and (param_name == "lsm"): + pass + elif self.parameter is None: + pass + elif param_name in self.parameter: + pass else: - var_msg_lut[param_name] = n - - # available variables - shape = None - for param_name, n in var_msg_lut.items(): - if n is None: continue return_metadata[param_name] = {} message = grbs.message(n) - - param_data = message.values.flatten() - if not shape: - shape = param_data.shape - return_img[param_name] = param_data + lats, lons = message.latlons() + param_data = message.values if grid is None: - lats, lons = message.latlons() - try: - res_lat, res_lon = get_grid_resolution(lats, lons) - grid = ERA_RegularImgGrid(res_lat, res_lon) - except ValueError: # when grid not regular - lons_gt_180 = np.where(lons > 180.0) - lons[lons_gt_180] = lons[lons_gt_180] - 360 - grid = ERA_IrregularImgGrid(lons, lats) + grid = BasicGrid( + trafo_lon(lons).flatten(), + lats.flatten(), + shape=param_data.shape) + + param_data = param_data.flatten() + + if self.subgrid is not None: + gpis = grid.find_nearest_gpi(self.subgrid.activearrlon, + self.subgrid.activearrlat)[0] + + param_data = param_data[gpis] + + return_img[param_name] = param_data return_metadata[param_name]["units"] = message["units"] return_metadata[param_name]["long_name"] = \ @@ -463,56 +497,66 @@ def read(self, timestamp=None): return_metadata[param_name]["depth"] = "{:} cm".format( message["levels"]) + grbs.close() + + # Set data for non-land points to NaN if self.mask_seapoints: - if sea_mask is None: + if 'lsm' not in return_img: raise IOError( "No land sea mask parameter (lsm) in passed image" " for masking.") else: # mask the loaded data + mask = np.logical_not(return_img['lsm'].flatten()) for name in return_img.keys(): param_data = return_img[name] param_data = np.ma.array( param_data, - mask=np.logical_not(sea_mask), + mask=mask, fill_value=np.nan, ) param_data = param_data.filled() return_img[name] = param_data - grbs.close() + if (self.parameter is not None) and ('lsm' not in self.parameter): + return_img.pop('lsm') + return_metadata.pop('lsm') - # missing variables - for param_name, n in var_msg_lut.items(): - if n is not None: - continue - param_data = np.full(shape, np.nan) - warnings.warn("Cannot load variable {var} from file {thefile}. " - "Filling image with NaNs.".format( - var=param_name, thefile=self.filename)) - return_img[param_name] = param_data - return_metadata[param_name] = {} - return_metadata[param_name]["long_name"] = lookup( - self.product, [param_name]).iloc[0]["long_name"] + if self.subgrid is None: + self.subgrid = grid + + # Add empty arrays for missing variables + if self.parameter is not None: + for p in self.parameter: + if p not in return_img: + param_data = np.full(np.prod(self.subgrid.shape), np.nan) + warnings.warn( + f"Cannot load variable {param_name} from file " + f"{self.filename}. Filling image with NaNs.") + return_img[param_name] = param_data + return_metadata[param_name] = {} + return_metadata[param_name]["long_name"] = lookup( + self.product, [param_name]).iloc[0]["long_name"] if self.array_1D: return Image( - grid.activearrlon, - grid.activearrlat, + self.subgrid.activearrlon, + self.subgrid.activearrlat, return_img, return_metadata, timestamp, ) else: - nlat = np.unique(grid.activearrlat).size - nlon = np.unique(grid.activearrlon).size + if len(self.subgrid.shape) != 2: + raise ValueError("Passed subgrid does not have a 2d shape." + "Did you mean to read data as 1d arrays?") for key in return_img: - return_img[key] = return_img[key].reshape((nlat, nlon)) + return_img[key] = return_img[key].reshape(self.subgrid.shape) return Image( - grid.activearrlon.reshape(nlat, nlon), - grid.activearrlat.reshape(nlat, nlon), + self.subgrid.activearrlon.reshape(self.subgrid.shape), + self.subgrid.activearrlat.reshape(self.subgrid.shape), return_img, return_metadata, timestamp, @@ -529,40 +573,52 @@ def close(self): class ERAGrbDs(MultiTemporalImageBase): - """ - Reader for a stack of ERA grib files. - - Parameters - ---------- - root_path: string - Root path where the data is stored - product : str - ERA5 or ERAINT - parameter: list or str, optional (default: ['swvl1', 'swvl2']) - Parameter or list of parameters to read - expand_grid: bool, optional (default: True) - If the reduced gaussian grid should be expanded to a full - gaussian grid. - """ def __init__( self, root_path, product, - parameter=("swvl1", "swvl2"), + parameter=None, subgrid=None, mask_seapoints=False, h_steps=(0, 6, 12, 18), array_1D=True, ): + """ + Reader to extract individual images from a multi-image grib dataset. + The main purpose of this class is to use it in the time series conversion + routine. To read downloaded image files, we recommend using + xarray (https://docs.xarray.dev/en/stable/). + Parameters + ---------- + root_path: str + Root path where the downloaded image data is stored in grib format. + We assume that image files are organized in subfolders by year, + with each year containing subfolders for individual days of the + year. + product : str + ERA5 or ERA5-Land + parameter: list[str] or str, optional (default: None) + Parameter or list of parameters to read. None reads all available + Parameters. + subgrid: ERA_RegularImgGrid or ERA_RegularImgLandGrid, optional + Read only data for points of this grid. + If None is passed, we read all points from the file. + mask_seapoints: bool, optional (default: False) + All points that are not over land are replaced with NaN values. + This requires that the land sea mask (lsm) parameter is included + in the image files! + h_steps: tuple, optional (default: (0, 6, 12, 18)) + Time stamps available for each day. Numbers refer to full hours. + array_1D: bool, optional (default: True) + Read data as 1d arrays. This is required when the passed subgrid + is 1-dimensional (e.g. when only landpoints are read). Otherwise + when a 2d (subgrid) is used, this switch means that the extracted + image data is also 2-dimensional (lon, lat). + """ self.h_steps = h_steps - subpath_templ = ["%Y", "%j"] - - if type(parameter) == str: - parameter = [parameter] - ioclass_kws = { "product": product, "parameter": parameter, @@ -571,12 +627,15 @@ def __init__( "array_1D": array_1D, } + fname_templ = IMG_FNAME_TEMPLATE.format( + product="*", type="*", datetime="{datetime}", ext="grb") + super(ERAGrbDs, self).__init__( root_path, ERAGrbImg, - fname_templ="*_{datetime}.grb", - datetime_format="%Y%m%d_%H%M", - subpath_templ=subpath_templ, + fname_templ=fname_templ, + datetime_format=IMG_FNAME_DATETIME_FORMAT, + subpath_templ=SUBDIRS, exact_templ=False, ioclass_kws=ioclass_kws, ) @@ -595,7 +654,8 @@ def tstamps_for_daterange(self, start_date, end_date): Returns ---------- timestamps : list - List of datetimes + List of datetime values (between start and end date) for all + required time stamps. """ img_offsets = np.array([timedelta(hours=h) for h in self.h_steps]) @@ -611,21 +671,24 @@ def tstamps_for_daterange(self, start_date, end_date): class ERATs(GriddedNcOrthoMultiTs): """ Time series reader for all reshuffled ERA reanalysis products in time - series format. + series format (pynetcf OrthoMultiTs format) Use the read_ts(lon, lat) resp. read_ts(gpi) function of this class - to read data for locations! + to read data for a location! + """ - Parameters - ---------- - ts_path : str - Directory where the netcdf time series files are stored - grid_path : str, optional (default: None) - Path to grid file, that is used to organize the location of time - series to read. If None is passed, grid.nc is searched for in the - ts_path. - - Optional keyword arguments that are passed to the Gridded Base when used - ------------------------------------------------------------------------ + def __init__(self, ts_path, grid_path=None, **kwargs): + """ + Parameters + ---------- + ts_path : str + Directory where the netcdf time series files are stored + grid_path : str, optional (default: None) + Path to grid file, that is used to organize the location of time + series to read. If None is passed, grid.nc is searched for in the + ts_path. + + Optional keyword arguments that are passed to the Gridded Base when used + ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. @@ -645,12 +708,9 @@ class ERATs(GriddedNcOrthoMultiTs): If false, dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets. - """ - - def __init__(self, ts_path, grid_path=None, **kwargs): - + """ if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") - grid = load_grid(grid_path) + super(ERATs, self).__init__(ts_path, grid, **kwargs) diff --git a/src/ecmwf_models/utils.py b/src/ecmwf_models/utils.py index 8b573de..373aeb4 100644 --- a/src/ecmwf_models/utils.py +++ b/src/ecmwf_models/utils.py @@ -20,220 +20,25 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - """ Utility functions for all data products in this package. """ - -import warnings import os +import warnings from datetime import datetime -import xarray as xr import pandas as pd -from datedown.fname_creator import create_dt_fpath -import argparse import numpy as np from netCDF4 import Dataset from collections import OrderedDict +from parse import parse +import yaml -try: - from cdo import Cdo - cdo_available = True -except ImportError: - cdo_available = False - -try: - import pygrib - pygrib_available = True -except ImportError: - pygrib_available = False - - -class CdoNotFoundError(ModuleNotFoundError): - - def __init__(self, msg=None): - _default_msg = "cdo and/or python-cdo not installed. " \ - "Use conda to install it them under Linux." - self.msg = _default_msg if msg is None else msg - - -def str2bool(v): - """ - Parse a string to True/False - - Parameters - --------- - v : str - String to parse, must be part of the lists below. - - Return - --------- - str2bool : bool - The parsed bool from the passed string - """ - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -def save_ncs_from_nc( - input_nc, - output_path, - product_name, - filename_templ="{product}_AN_%Y%m%d_%H%M.nc", - grid=None, - keep_original=True, - remap_method="bil", -): - """ - Split the downloaded netcdf file into daily files and add to folder - structure necessary for reshuffling. - - Parameters - ---------- - input_nc : str - Filepath of the downloaded .nc file - output_path : str - Where to save the resulting netcdf files - product_name : str - Name of the ECMWF model (only for filename generation) - filename_templ : str, optional (default: "{product}_AN_%Y%m%d_%H%M.nc") - Template for naming each separated nc file - keep_original: bool - keep the original downloaded data - """ - localsubdirs = ["%Y", "%j"] - - nc_in = xr.open_dataset(input_nc, mask_and_scale=True) - - if grid is not None: - if not cdo_available: - raise CdoNotFoundError() - - cdo = Cdo() - gridpath = os.path.join(output_path, "grid.txt") - weightspath = os.path.join(output_path, "remap_weights.nc") - if not os.path.exists(gridpath): - with open(gridpath, "w") as f: - for k, v in grid.items(): - f.write(f"{k} = {v}\n") - - for time in nc_in.time.values: - subset = nc_in.sel(time=time) - if (abs((datetime(2022, 1, 1) - datetime.now()).days) < 90): - warnings.warn( - f'Data for {time} may contain experimental versions of ' - f'variables') - if 'expver' in subset.dims: - subset_merge = subset.sel(expver=subset['expver'].values[0]) - for e in subset['expver'].values[1:]: - subset_merge = subset_merge.combine_first(subset.sel(expver=e)) - subset = subset_merge - else: - filename_templ = filename_templ.format(product=product_name) - - timestamp = pd.Timestamp(time).to_pydatetime() - filepath = create_dt_fpath( - timestamp, - root=output_path, - fname=filename_templ, - subdirs=localsubdirs, - ) - if not os.path.exists(os.path.dirname(filepath)): - os.makedirs(os.path.dirname(filepath)) - - if grid is not None: - if not os.path.exists(weightspath): - # create weights file - getattr(cdo, "gen" + remap_method)( - gridpath, input=subset, output=weightspath) - subset = cdo.remap( - ",".join([gridpath, weightspath]), - input=subset, - returnXDataset=True, - ) - - # same compression for all variables - var_encode = {"zlib": True, "complevel": 6} - subset.to_netcdf( - filepath, encoding={var: var_encode for var in subset.variables}) - - nc_in.close() - - if not keep_original: - os.remove(input_nc) - if grid is not None: - cdo.cleanTempDir() - - -def save_gribs_from_grib( - input_grib, - output_path, - product_name, - filename_templ="{product}_AN_%Y%m%d_%H%M.grb", - keep_original=True, -): - """ - Split the downloaded grib file into daily files and add to folder structure - necessary for reshuffling. - - Parameters - ---------- - input_grib : str - Filepath of the downloaded .grb file - output_path : str - Where to save the resulting grib files - product_name : str - Name of the ECMWF model (only for filename generation) - filename_templ : str, optional (default: product_OPER_0001_AN_date_time) - Template for naming each separated grb file - """ - localsubdirs = ["%Y", "%j"] - grib_in = pygrib.open(input_grib) - - grib_in.seek(0) - for grb in grib_in: - template = filename_templ - filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) - - template = template.format(product=product_name) - - filepath = create_dt_fpath( - filedate, root=output_path, fname=template, subdirs=localsubdirs) - - if not os.path.exists(os.path.dirname(filepath)): - os.makedirs(os.path.dirname(filepath)) - - grb_out = open(filepath, "ab") - - grb_out.write(grb.tostring()) - grb_out.close() - grib_in.close() - if not keep_original: - os.remove(input_grib) - - -def mkdate(datestring): - """ - Turns a datetime string into a datetime object - - Parameters - ----------- - datestring: str - Input datetime string +from ecmwf_models.extract import save_gribs_from_grib +from repurpose.misc import find_first_at_depth - Returns - ----------- - datetime : datetime - Converted string - """ - if len(datestring) == 10: - return datetime.strptime(datestring, "%Y-%m-%d") - if len(datestring) == 16: - return datetime.strptime(datestring, "%Y-%m-%dT%H:%M") +from ecmwf_models.globals import (DOTRC, CDS_API_URL, IMG_FNAME_TEMPLATE, + IMG_FNAME_DATETIME_FORMAT, + SUPPORTED_PRODUCTS) def parse_product(inpath: str) -> str: @@ -244,63 +49,44 @@ def parse_product(inpath: str) -> str: Parameters ---------- inpath: str - Input path where ERA data was downloaded to + Input path where ERA data was downloaded to. Contains annual folders. Returns ------- product : str Product name """ - onedown = os.path.join(inpath, os.listdir(inpath)[0]) - twodown = os.path.join(onedown, os.listdir(onedown)[0]) - - for path, subdirs, files in os.walk(twodown): - for name in files: - filename, extension = os.path.splitext(name) - parts = filename.split("_") + props = img_infer_file_props(inpath) - if "ERA5-LAND" in parts: - return "era5-land" - elif "ERA5" in parts: - return "era5" - elif "ERAINT" in parts: - return "eraint" - else: - continue + if "era5-land" in props['product'].lower(): + return "era5-land" # also era5-land-t + elif "era5" in props['product'].lower(): + return "era5" # also era5-t + else: + raise ValueError( + f"Could not derive product name from data in {inpath}") -def parse_filetype(inpath): +def parse_filetype(inpath: str) -> str: """ - Tries to find out the file type by searching for - grib or nc files two subdirectories into the passed input path. - If function fails, grib is assumed. + Tries to find out the file type by parsing filenames in the passed + directory. Parameters ---------- inpath: str - Input path where ERA data was downloaded to + Input path where ERA data was downloaded to. Contains annual folders. Returns ------- - filetype : str - File type string. + product : str + Product name """ - onedown = os.path.join(inpath, os.listdir(inpath)[0]) - twodown = os.path.join(onedown, os.listdir(onedown)[0]) - - filelist = [] - for path, subdirs, files in os.walk(twodown): - for name in files: - filename, extension = os.path.splitext(name) - filelist.append(extension) - - if ".nc" in filelist and ".grb" not in filelist: - return "netcdf" - elif ".grb" in filelist and ".nc" not in filelist: - return "grib" + props = img_infer_file_props(inpath) + if props['ext'] == 'grb': + return 'grib' else: - # if file type cannot be detected, guess grib - return "grib" + return 'netcdf' def load_var_table(name="era5", lut=False): @@ -323,12 +109,6 @@ def load_var_table(name="era5", lut=False): "era5", "era5-land_lut.csv", ) - elif name == "eraint": - era_vars_csv = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "erainterim", - "eraint_lut.csv", - ) else: raise ValueError(name, "No LUT for the selected dataset found.") @@ -343,7 +123,7 @@ def load_var_table(name="era5", lut=False): def lookup(name, variables): """ Search the passed elements in the lookup table, if one does not exists, - raise a Warning + print a Warning """ lut = load_var_table(name=name, lut=True) @@ -377,6 +157,25 @@ def get_default_params(name="era5"): return vars.loc[vars.default == 1.0] +def default_variables(product="era5", format='dl_name'): + """ + These variables are being downloaded, when None are passed by the user + + Parameters + --------- + product : str, optional (default: 'era5') + Name of the era5 product to read the default variables for. + Either 'era5' or 'era5-land'. + format: str, optional (default: 'dl_name') + 'dl_name' for name as in the downloaded image data + 'short_name' for short name + 'long_name' for long name + """ + lut = load_var_table(name=product) + defaults = lut.loc[lut["default"] == 1][format].values + return defaults.tolist() + + def make_era5_land_definition_file( data_file, out_file, @@ -392,15 +191,15 @@ def make_era5_land_definition_file( Parameters ---------- data_file : str - Path to the downloaded file that cotains the image that is used as the + Path to the downloaded file that contains the image that is used as the reference for creating the land definition file. out_file: str - Full output path to the land defintion file to create. + Full output path to the land definition file to create. data_file_y_res : float, optional (default: 0.25) The resolution of the data file in latitude direction. ref_var: str, optional (default: 'lsm') A variable in the data_file that is the reference for the - land definiton. + land definition. By default, we use the land-sea-mask variable. threshold: float, optional (default: 0.5) Threshold value below which a point is declared water, @@ -455,3 +254,169 @@ def make_era5_land_definition_file( ds_in.close() ds_out.close() + + +def split_array(array, chunk_size): + """ + Split an array into chunks of a given size. + + Parameters + ---------- + array : array-like + Array to split into chunks + chunk_size : int + Size of each chunk + + Returns + ------- + chunks : list + List of chunks + """ + chunks = [] + for i in range(0, len(array), chunk_size): + chunks.append(array[i:i + chunk_size]) + return chunks + + +def check_api_ready() -> bool: + """ + Verify that the API is ready to be used. Otherwise raise an Error. + + Returns: + -------- + api_ready: bool + True if api is ready + """ + if not os.path.isfile(DOTRC): + key = os.environ.get('CDSAPI_KEY') + if "CDSAPI_URL" not in os.environ: + os.environ['CDSAPI_URL'] = CDS_API_URL + + if key is None: + raise ValueError( + 'Neither CDSAPI_KEY variable nor .cdsapirc file found, ' + 'download will not work! ' + 'Please create a .cdsapirc file with your API key. ' + 'See: https://cds.climate.copernicus.eu/how-to-api') + else: + return True + else: + if "CDSAPI_URL" in os.environ: + os.environ.pop("CDSAPI_URL") # Use URL from file + return True + + +def img_infer_file_props(img_root_path: str, + fntempl: str = IMG_FNAME_TEMPLATE, + start_from_last=False) -> dict: + """ + Parse file names to retrieve properties from fntempl. + Does not open any files. + + Parameters + ---------- + img_root_path: str + Root directory where annual directories are located + fntempl: str, optional + Filename template to parse filenames with + start_from_last: bool, optional + Use the last available file instead of the first one. + """ + fname = find_first_at_depth(img_root_path, 2, reverse=start_from_last) + + if fname is None: + raise ValueError(f"No matching files for chosen template found in " + f"the directory {img_root_path}") + else: + file_args = parse(fntempl, fname) + return file_args.named + + +def get_first_last_image_date(path, start_from_last=True): + """ + Parse files in the given directory (or any subdir) using the passed + filename template. props will contain all fields specified in the template. + the `datetime` field is required and used to determine the last image date. + + Parameters + ---------- + path: str + Path to the directory containing the image files + start_from_last: bool, optional (default: True') + Get date from last available file instead of the first available one. + + Returns + ------- + date: str + Parse date from the last found image file that matches `fntempl`. + """ + try: + props = img_infer_file_props( + path, fntempl=IMG_FNAME_TEMPLATE, start_from_last=start_from_last) + dt = datetime.strptime(props['datetime'], IMG_FNAME_DATETIME_FORMAT) + except ValueError: + raise ValueError('Could not infer date from filenames. ' + 'Check filename template.') + + return str(dt) + + +def update_image_summary_file(data_path: str, + other_props: dict = None, + out_file=None): + """ + Summarize image metadata as yml file + + Parameters + ---------- + data_path: str + Root path to the image archive + other_props: dict, optional (default: None) + Other properties to write into the yml file. E.g. download + options to enable time series update. + out_file: str, optional (default: None) + Path to summary file. File will be created/updated. + If not specified, then `data_path` is used. If a file already exists, + it will be overwritten. + """ + try: + first_image_date = get_first_last_image_date( + data_path, start_from_last=False) + last_image_date = get_first_last_image_date( + data_path, start_from_last=True) + except ValueError: + warnings.warn(f"Could not infer date from filenames in {data_path}") + return + + props = img_infer_file_props(data_path, start_from_last=False) + _ = props.pop("datetime") + props['period_from'] = str(pd.to_datetime(first_image_date).date()) + props['period_to'] = str(pd.to_datetime(last_image_date).date()) + + props['last_update'] = str(datetime.now()) + + props['download_settings'] = {} + + if other_props is not None: + for k, v in other_props.items(): + props['download_settings'][k] = v + + if out_file is None: + out_file = os.path.join(data_path, f"overview.yml") + + with open(out_file, 'w') as f: + yaml.dump(props, f, default_flow_style=False) + + +def assert_product(product: str) -> str: + if product not in SUPPORTED_PRODUCTS: + raise ValueError(f"Got product {product} but expected one of " + f"{SUPPORTED_PRODUCTS}") + return product + + +if __name__ == '__main__': + save_gribs_from_grib( + "/tmp/era5/grb/temp_downloaded/20240730_20240731.grb", + output_path='/tmp/era5/grb', + product_name='ERA5') diff --git a/tests/test_grid.py b/tests/test_grid.py index 00e8911..c422d14 100644 --- a/tests/test_grid.py +++ b/tests/test_grid.py @@ -6,14 +6,13 @@ from ecmwf_models.grid import ( ERA_RegularImgGrid, get_grid_resolution, - ERA_IrregularImgGrid, ERA5_RegularImgLandGrid, ) import numpy as np def test_ERA_regular_grid(): - reg_grid = ERA_RegularImgGrid(0.3, 0.3) + reg_grid = ERA_RegularImgGrid(0.3) assert np.unique(reg_grid.activearrlat).size == 601 assert np.unique(reg_grid.activearrlon).size == 1200 assert get_grid_resolution(reg_grid.activearrlat, @@ -22,27 +21,15 @@ def test_ERA_regular_grid(): 0.3, ) - -def test_ERA_irregular_grid(): - # we test this with a regular grid, because it's easier - lon = np.arange(0, 360 - 1.0 / 2, 1.0) - lat = np.arange(90, -1 * 90 - 1.0 / 2, -1 * 1.0) - lons, lats = np.meshgrid(lon, lat) - - grid = ERA_IrregularImgGrid(lons, lats) - - assert grid == ERA_RegularImgGrid(1.0, 1.0) - - def test_ERA5_landgrid_025(): - grid = ERA5_RegularImgLandGrid(0.25, 0.25) # 0.25*0.25 + grid = ERA5_RegularImgLandGrid(0.25) # 0.25*0.25 assert grid.get_grid_points()[0].size == 244450 assert grid.find_nearest_gpi(16.25, 48.25) == (240545, 0.0) assert grid.gpi2cell(240545) == 1431 def test_ERA5_landgrid_01(): - grid = ERA5_RegularImgLandGrid(0.1, 0.1) # 0.1*0.1 + grid = ERA5_RegularImgLandGrid(0.1) # 0.1*0.1 assert grid.get_grid_points()[0].size == 1544191 assert grid.find_nearest_gpi(16.4, 48.1) == (1508564, 0.0) np.testing.assert_almost_equal(grid.gpi2lonlat(1508564)[0], 16.4) diff --git a/tests/test_utils.py b/tests/test_utils.py index e587fcb..6d4f2a8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,30 +3,19 @@ Testing the utility functions ''' +import os +import tempfile +import numpy as np +from netCDF4 import Dataset + from ecmwf_models.utils import ( - str2bool, - mkdate, parse_filetype, parse_product, load_var_table, lookup, get_default_params, - make_era5_land_definition_file) -import os -from datetime import datetime -import tempfile -import numpy as np -from netCDF4 import Dataset - - -def test_str2bool(): - assert str2bool('true') - assert not str2bool('false') - - -def test_mkdate(): - assert mkdate('2000-01-01') == datetime(2000, 1, 1) - assert mkdate('2000-01-01T06:00') == datetime(2000, 1, 1, 6) + make_era5_land_definition_file, +) def test_parse_product(): @@ -45,27 +34,21 @@ def test_parse_filetype(): def test_load_var_table(): table = load_var_table('era5') - assert table.index.size == 265 + assert table.index.size == 260 assert table.loc[ - 100].dl_name == 'mean_surface_direct_short_wave_radiation_flux' - assert table.loc[100].short_name == 'msdrswrf' + 100].dl_name == 'mean_surface_downward_short_wave_radiation_flux' + assert table.loc[100].short_name == 'msdwswrf' table = load_var_table('era5-land') - assert table.index.size == 49 - assert table.loc[45].dl_name == 'volumetric_soil_water_layer_1' - assert table.loc[45].short_name == 'swvl1' - - table = load_var_table('eraint') - assert table.index.size == 79 - assert table.loc[8].dl_name == 39.128 - assert table.loc[8].long_name == 'Volumetric soil water layer 1' - assert table.loc[8].short_name == 'swvl1' + assert table.index.size == 50 + assert table.loc[46].dl_name == 'volumetric_soil_water_layer_1' + assert table.loc[46].short_name == 'swvl1' def test_lookup(): lut = lookup('era5', ['swvl1', 'stl1']) - assert lut.loc[254].dl_name == 'volumetric_soil_water_layer_1' - assert lut.loc[171].dl_name == 'soil_temperature_level_1' + assert lut.loc[249].dl_name == 'volumetric_soil_water_layer_1' + assert lut.loc[166].dl_name == 'soil_temperature_level_1' def test_get_default_params(): diff --git a/tests/tests_era5/test_era5_download.py b/tests/tests_era5/test_era5_download.py index fe80a99..9f1d309 100644 --- a/tests/tests_era5/test_era5_download.py +++ b/tests/tests_era5/test_era5_download.py @@ -23,10 +23,6 @@ ''' Tests for transferring downloaded data to netcdf or grib files ''' - -from ecmwf_models.era5.download import download_and_move, save_ncs_from_nc -from ecmwf_models.utils import CdoNotFoundError, cdo_available - import os import shutil from datetime import datetime @@ -34,7 +30,15 @@ import xarray as xr import pytest import tempfile -import time + +from c3s_sm.misc import read_summary_yml + +from ecmwf_models.era5.download import download_and_move +from ecmwf_models.extract import save_ncs_from_nc +from ecmwf_models.globals import ( + cdo_available, + CdoNotFoundError +) grid = { "gridtype": "lonlat", @@ -58,7 +62,6 @@ def test_download_with_cdo_not_installed(): save_ncs_from_nc( infile, out_path, 'ERA5', grid=grid, keep_original=True) - def test_dry_download_nc_era5(): with tempfile.TemporaryDirectory() as dl_path: dl_path = os.path.join(dl_path, 'era5') @@ -75,20 +78,26 @@ def test_dry_download_nc_era5(): assert os.path.isfile(trgt) startdate = enddate = datetime(2010, 1, 1) - - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=False, - dry_run=True) - - assert (os.listdir(dl_path) == ['2010']) - assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) - assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) + variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] + + with pytest.warns(UserWarning, match='Dry run*'): + download_and_move( + dl_path, + startdate, + enddate, + variables=variables, + keep_original=False, + h_steps=h_steps, + grb=False, + dry_run=True) + + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont + + assert os.listdir(os.path.join(dl_path, '2010')) == ['001'] + assert len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2 should_dlfiles = [ 'ERA5_AN_20100101_0000.nc', 'ERA5_AN_20100101_1200.nc' @@ -97,9 +106,12 @@ def test_dry_download_nc_era5(): assert (sorted(os.listdir(os.path.join( dl_path, '2010', '001'))) == sorted(should_dlfiles)) - assert (os.listdir(dl_path) == ['2010']) - assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) - assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) + props = read_summary_yml(dl_path) + assert props['product'].lower() == 'era5' + assert props['download_settings']['grb'] is False + assert props['period_to'] == props['period_from'] == '2010-01-01' + assert len(props['download_settings']['variables']) == len(variables) + assert props['download_settings']['h_steps'] == h_steps def test_dry_download_grb_era5(): @@ -117,18 +129,24 @@ def test_dry_download_grb_era5(): os.path.join(dl_path, 'temp_downloaded', '20100101_20100101.grb')) startdate = enddate = datetime(2010, 1, 1) + variables, h_steps = ['swvl1', 'swvl2', 'lsm'], [0, 12] + + with pytest.warns(UserWarning, match="Dry run*"): + download_and_move( + dl_path, + startdate, + enddate, + variables=variables, + keep_original=False, + h_steps=h_steps, + grb=True, + dry_run=True) + + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=True, - dry_run=True) - - assert (os.listdir(dl_path) == ['2010']) assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) @@ -139,6 +157,13 @@ def test_dry_download_grb_era5(): assert (sorted(os.listdir(os.path.join( dl_path, '2010', '001'))) == sorted(should_dlfiles)) + props = read_summary_yml(dl_path) + assert props['product'].lower() == 'era5' + assert props['download_settings']['grb'] is True + assert props['period_to'] == props['period_from'] == '2010-01-01' + assert len(props['download_settings']['variables']) == len(variables) + assert props['download_settings']['h_steps'] == h_steps + @pytest.mark.skipif(not cdo_available, reason="CDO is not installed") def test_download_nc_era5_regridding(): @@ -156,18 +181,24 @@ def test_download_nc_era5_regridding(): os.path.join(dl_path, 'temp_downloaded', '20100101_20100101.nc')) startdate = enddate = datetime(2010, 1, 1) - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=False, - dry_run=True, - grid=grid) - - assert (os.listdir(dl_path) == ['2010']) + + with pytest.warns(UserWarning, match="Dry run*"): + download_and_move( + dl_path, + startdate, + enddate, + variables=['swvl1', 'swvl2', 'lsm'], + keep_original=False, + h_steps=[0, 12], + grb=False, + dry_run=True, + grid=grid) + + cont = os.listdir(dl_path) + assert len(cont) == 2 + assert 'overview.yml' in cont + assert '2010' in cont + assert (os.listdir(os.path.join(dl_path, '2010')) == ['001']) assert (len(os.listdir(os.path.join(dl_path, '2010', '001'))) == 2) @@ -180,9 +211,11 @@ def test_download_nc_era5_regridding(): for f in os.listdir(os.path.join(dl_path, "2010", "001")): ds = xr.open_dataset(os.path.join(dl_path, "2010", "001", f)) - assert dict(ds.dims) == {"lon": 720, "lat": 360} + assert ds['lon'].shape == (720,) + assert ds['lat'].shape == (360,) assert np.all(np.arange(89.75, -90, -0.5) == ds.lat) assert np.all(np.arange(-179.75, 180, 0.5) == ds.lon) + if __name__ == '__main__': test_dry_download_nc_era5() diff --git a/tests/tests_era5/test_era5_download_cds.py b/tests/tests_era5/test_era5_download_cds.py index 053e4a3..1f9d240 100644 --- a/tests/tests_era5/test_era5_download_cds.py +++ b/tests/tests_era5/test_era5_download_cds.py @@ -4,40 +4,44 @@ import os import tempfile - -from ecmwf_models.era5.download import main -from ecmwf_models.era5.interface import ERA5NcImg import unittest from datetime import datetime import pytest +import subprocess + +from ecmwf_models.era5.reader import ERA5NcImg +from ecmwf_models.utils import check_api_ready +from ecmwf_models.era5.download import download_and_move +try: + check_api_ready() + api_ready = True +except ValueError: + api_ready = False class DownloadTest(unittest.TestCase): # these tests only run if a username and pw are set in the environment # variables. To manually set them: `export USERNAME="my_username"` etc. + @unittest.skipIf( - os.environ.get('CDSAPI_KEY') is None, - 'CDSAPI_KEY not found. Make sure the environment variable exists.' + os.environ.get('CDSAPI_KEY') is None and not api_ready, + 'CDSAPI_KEY not found.' ) - @pytest.mark.wget - def test_full_download(self): - - home = os.path.expanduser('~') - - with open(os.path.join(home, '.cdsapirc'), 'w') as f: - f.write("url: https://cds.climate.copernicus.eu/api/v2\n") - f.write(f"key: {os.environ.get('CDSAPI_KEY')}") + def test_cli_download(self): with tempfile.TemporaryDirectory() as dl_path: startdate = enddate = "2023-01-01" args = [ - dl_path, '-s', startdate, '-e', enddate, '-p', 'ERA5', - '-var', 'swvl1', '--h_steps', '0' + dl_path, '-s', startdate, '-e', enddate, + '-v', 'swvl1', '--h_steps', '0' ] - main(args) + subprocess.call(['era5', 'download'] + args) + + # download_and_move(dl_path, startdate, enddate, variables=['swvl1'], + # h_steps=[0]) out_path = os.path.join(dl_path, '2023', '001') assert(os.path.exists(out_path)) @@ -47,3 +51,7 @@ def test_full_download(self): ds = ERA5NcImg(os.path.join(out_path, imgs[0]), parameter='swvl1') img = ds.read(datetime(2023, 1, 1)) assert img.data['swvl1'].shape == (721, 1440) + + +if __name__ == '__main__': + DownloadTest().test_cli_download() \ No newline at end of file diff --git a/tests/tests_era5/test_era5_interface.py b/tests/tests_era5/test_era5_interface.py index 6c63c30..b71579f 100644 --- a/tests/tests_era5/test_era5_interface.py +++ b/tests/tests_era5/test_era5_interface.py @@ -2,7 +2,7 @@ import os import numpy.testing as nptest -from ecmwf_models.era5.interface import ( +from ecmwf_models.era5.reader import ( ERA5NcDs, ERA5NcImg, ERA5GrbImg, ERA5GrbDs) import numpy as np from datetime import datetime @@ -14,7 +14,7 @@ def test_ERA5_nc_image_landpoints(): os.path.dirname(os.path.abspath(__file__)), '..', "ecmwf_models-test-data", "ERA5", "netcdf", "2010", "001", 'ERA5_AN_20100101_0000.nc') - subgrid = ERA5_RegularImgLandGrid(0.25, 0.25) + subgrid = ERA5_RegularImgLandGrid(0.25) dset = ERA5NcImg( fname, parameter=['swvl1', 'swvl2'], @@ -328,3 +328,6 @@ def test_ERA5_grb_ds(): nptest.assert_allclose(data.lat[-1], -90.0) nptest.assert_allclose(data.lon[0], 0.0) nptest.assert_allclose(data.lon[720], 180.0) # middle of image + +if __name__ == '__main__': + test_ERA5_grb_image() \ No newline at end of file diff --git a/tests/tests_era5/test_era5_reshuffle.py b/tests/tests_era5/test_era5_reshuffle.py index ad76f43..1461211 100644 --- a/tests/tests_era5/test_era5_reshuffle.py +++ b/tests/tests_era5/test_era5_reshuffle.py @@ -29,69 +29,87 @@ import tempfile import numpy as np import numpy.testing as nptest +import subprocess +import shutil +from pathlib import Path +import xarray as xr +import yaml from datetime import datetime -from ecmwf_models.era5.reshuffle import main +from c3s_sm.misc import read_summary_yml + +from ecmwf_models.era5.reshuffle import img2ts from ecmwf_models import ERATs -from ecmwf_models.era5.reshuffle import parse_args - - -def test_parse_args(): - - args = parse_args([ - "/in", - "/out", - "2000-01-01", - "2010-12-31", - "swvl1", - "swvl2", - "--land_points", - "True", - "--imgbuffer", - "1000", - "--bbox", - "12", - "46", - "17", - "50", - ]) - - assert isinstance(args.dataset_root, str) and args.dataset_root == "/in" - assert (isinstance(args.timeseries_root, str) and - args.timeseries_root == "/out") - assert isinstance(args.start, datetime) and args.start == datetime( - 2000, 1, 1) - assert isinstance(args.end, datetime) and args.end == datetime( - 2010, 12, 31) - assert isinstance(args.variables, list) and len(args.variables) == 2 - assert isinstance(args.land_points, bool) and args.land_points is True - assert isinstance(args.imgbuffer, int) and args.imgbuffer == 1000 - assert (isinstance(args.bbox, list) and len(args.bbox) == 4 and - all([isinstance(a, float) for a in args.bbox])) +inpath = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "..", + "ecmwf_models-test-data", + "ERA5", +) + +def test_cli_reshuffle_and_update(): + testdata_path = Path(os.path.join(inpath, 'netcdf')) + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + img_path = tempdir / 'images' + shutil.copytree(testdata_path, img_path) + + # we duplicate 1 file to check whether prioritizing final images over T images works + ds = xr.open_dataset(testdata_path / '2010' / '001' / "ERA5_AN_20100101_0000.nc") + ds['swvl1'].values = np.full(ds['swvl1'].values.shape, 99) + ds.to_netcdf(img_path / '2010' / '001' / "ERA5-T_AN_20100101_0000.nc") + + ts_path = tempdir / 'ts' + + subprocess.call(["era5", "reshuffle", img_path, ts_path, "2010-01-01", + "2010-01-01", "-v", "swvl1,swvl2", "-l", "True", + "--bbox", "12.0", "46.0", "17.0", "50.0", + "--h_steps", "0"]) + + ts_reader = ERATs(ts_path) + ts = ts_reader.read(15, 48) + assert 99 not in ts['swvl1'].values # verify ERA5-T was NOT used! + swvl1_values_should = np.array([0.402825], dtype=np.float32) + nptest.assert_allclose( + ts["swvl1"].values, swvl1_values_should, rtol=1e-5) + + ts_reader.close() + + # Manipulate settings to update with different time stamp for same day + props = read_summary_yml(ts_path) + props['img2ts_kwargs']['h_steps'] = [12] + props['img2ts_kwargs']['startdate'] = datetime(2009,12,31) + props['img2ts_kwargs']['enddate'] = datetime(2009,12,31) + + with open(ts_path / 'overview.yml', 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) + + subprocess.call(["era5", "update_ts", ts_path]) + ts_reader = ERATs(ts_path) + ts = ts_reader.read(15, 48) + swvl1_values_should = np.array([0.402825, 0.390983], dtype=np.float32) + nptest.assert_allclose( + ts["swvl1"].values, swvl1_values_should, rtol=1e-5) + + assert 'swvl2' in ts.columns + + ts_reader.close() def test_ERA5_reshuffle_nc(): # test reshuffling era5 netcdf images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "ecmwf_models-test-data", - "ERA5", - "netcdf", - ) - startdate = "2010-01-01" enddate = "2010-01-01" parameters = ["swvl1", "swvl2"] - h_steps = ["--h_steps", "0", "12"] - landpoints = ["--land_points", "True"] - bbox = ["--bbox", "12", "46", "17", "50"] + h_steps = [0, 12] + landpoints = True + bbox = (12, 46, 17, 50) with tempfile.TemporaryDirectory() as ts_path: - args = ([inpath, ts_path, startdate, enddate] + parameters + h_steps + - landpoints + bbox) - main(args) + img2ts(os.path.join(inpath, 'netcdf'), ts_path, startdate, enddate, + variables=parameters, h_steps=h_steps, land_points=landpoints, + bbox=bbox) assert (len(glob.glob(os.path.join(ts_path, "*.nc"))) == 5) # less files because only land points and bbox ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) @@ -108,26 +126,19 @@ def test_ERA5_reshuffle_nc(): def test_ERA5_reshuffle_grb(): # test reshuffling era5 netcdf images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "ecmwf_models-test-data", - "ERA5", - "netcdf", - ) + startdate = "2010-01-01" enddate = "2010-01-01" parameters = ["swvl1", "swvl2"] - h_steps = ["--h_steps", "0", "12"] - landpoints = ["--land_points", "False"] - bbox = ["--bbox", "12", "46", "17", "50"] + h_steps = [0, 12] + landpoints = False + bbox = (12, 46, 17, 50) with tempfile.TemporaryDirectory() as ts_path: - args = ([inpath, ts_path, startdate, enddate] + parameters + h_steps + - landpoints + bbox) - - main(args) + img2ts(os.path.join(inpath, 'grib'), ts_path, startdate, enddate, + variables=parameters, + h_steps=h_steps, land_points=landpoints, bbox=bbox) assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 5 ds = ERATs(ts_path, ioclass_kws={"read_bulk": True}) @@ -138,4 +149,4 @@ def test_ERA5_reshuffle_grb(): ts["swvl1"].values, swvl1_values_should, rtol=1e-5) swvl2_values_should = np.array([0.390514, 0.390980], dtype=np.float32) nptest.assert_allclose( - ts["swvl2"].values, swvl2_values_should, rtol=1e-5) + ts["swvl2"].values, swvl2_values_should, rtol=1e-5) \ No newline at end of file diff --git a/tests/tests_eraint/test_eraint_download.py b/tests/tests_eraint/test_eraint_download.py deleted file mode 100644 index c137cfd..0000000 --- a/tests/tests_eraint/test_eraint_download.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Tests for transferring downloaded data to netcdf or grib files -''' - -from ecmwf_models.erainterim.download import download_and_move - -import os -import shutil -from datetime import datetime -from tempfile import TemporaryDirectory - - -def test_dry_download_nc_eraint(): - - with TemporaryDirectory() as dl_path: - - dldir = os.path.join(dl_path, 'temp_downloaded') - os.makedirs(dldir, exist_ok=True) - - thefile = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '..', "ecmwf_models-test-data", "download", - "eraint_example_downloaded_raw.nc") - shutil.copyfile(thefile, os.path.join(dl_path, 'temp_downloaded', - '20000101_20000101.nc')) - - startdate = enddate = datetime(2000, 1, 1) - - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 12], - grb=False, - dry_run=True) - - assert (os.listdir(dl_path) == ['2000']) - assert (os.listdir(os.path.join(dl_path, '2000')) == ['001']) - assert(len(os.listdir(os.path.join(dl_path, '2000', '001'))) == 2) - - should_dlfiles = [ - 'ERAINT_AN_20000101_0000.nc', - 'ERAINT_AN_20000101_1200.nc', - ] - - assert (len(os.listdir(os.path.join(dl_path, '2000', - '001'))) == len(should_dlfiles)) - - assert (sorted(os.listdir(os.path.join( - dl_path, '2000', '001'))) == sorted(should_dlfiles)) - - -def test_dry_download_grb_eraint(): - - with TemporaryDirectory() as dl_path: - - dldir = os.path.join(dl_path, 'temp_downloaded') - os.makedirs(dldir, exist_ok=True) - - thefile = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '..', "ecmwf_models-test-data", "download", - "eraint_example_downloaded_raw.grb") - shutil.copyfile(thefile, os.path.join(dl_path, 'temp_downloaded', - '20000101_20000101.grb')) - - startdate = enddate = datetime(2000, 1, 1) - - download_and_move( - dl_path, - startdate, - enddate, - variables=['swvl1', 'swvl2', 'lsm'], - keep_original=False, - h_steps=[0, 6, 12, 18], - grb=True, - dry_run=True) - - assert (os.listdir(dl_path) == ['2000']) - assert (os.listdir(os.path.join(dl_path, '2000')) == ['001']) - assert(len(os.listdir(os.path.join(dl_path, '2000', '001'))) == 2) - - should_dlfiles = [ - 'ERAINT_AN_20000101_0000.grb', - 'ERAINT_AN_20000101_1200.grb', - ] - assert (len(os.listdir(os.path.join(dl_path, '2000', - '001'))) == len(should_dlfiles)) - - assert (sorted(os.listdir(os.path.join( - dl_path, '2000', '001'))) == sorted(should_dlfiles)) diff --git a/tests/tests_eraint/test_eraint_interface.py b/tests/tests_eraint/test_eraint_interface.py deleted file mode 100644 index 669c0b8..0000000 --- a/tests/tests_eraint/test_eraint_interface.py +++ /dev/null @@ -1,288 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import numpy.testing as nptest -from ecmwf_models.erainterim.interface import (ERAIntGrbImg, ERAIntNcImg, - ERAIntGrbDs, ERAIntNcDs) -import numpy as np -from datetime import datetime - - -def test_ERAInt_nc_image(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf", "2000", "001", - 'ERAINT_AN_20000101_0000.nc') - - dset = ERAIntNcImg( - fname, parameter=['swvl1', 'swvl2'], mask_seapoints=True) - data = dset.read() - assert data.data['swvl1'].shape == (241, 480) - assert data.data['swvl2'].shape == (241, 480) - assert data.lon.shape == (241, 480) - assert data.lat.shape == (241, 480) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[100, 64], 48.0, rtol=1e-5) - nptest.assert_allclose(data.lat[100, 64], 15.0, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][100, 64], 0.17185359, rtol=1e-5) - nptest.assert_allclose(data.data['swvl2'][100, 64], 0.17981644, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[136, 108], 81.0, rtol=1e-5) - nptest.assert_allclose(data.lat[136, 108], -12.0, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][136, 108], np.nan) - nptest.assert_allclose(data.data['swvl2'][136, 108], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0, 0], 90.0) - nptest.assert_allclose(data.lat[-1, 0], -90.0) - nptest.assert_allclose(data.lon[0, 0], 0.0) - nptest.assert_allclose(data.lon[0, 240], 180.0) # middle of image - - -def test_ERAInt_nc_image_1d(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf", "2000", "001", - 'ERAINT_AN_20000101_0000.nc') - - dset = ERAIntNcImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=True) - data = dset.read() - assert data.data['swvl1'].shape == (241 * 480,) - assert data.data['swvl2'].shape == (241 * 480,) - assert data.lon.shape == (241 * 480,) - assert data.lat.shape == (241 * 480,) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[100 * 480 + 64], 48.0) - nptest.assert_allclose(data.lat[100 * 480 + 64], 15.0) - nptest.assert_allclose( - data.data['swvl1'][100 * 480 + 64], 0.17185359, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl2'][100 * 480 + 64], 0.17981644, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[136 * 480 + 108], 81.0) - nptest.assert_allclose(data.lat[136 * 480 + 108], -12.0) - nptest.assert_allclose(data.data['swvl1'][136 * 480 + 108], np.nan) - nptest.assert_allclose(data.data['swvl2'][136 * 480 + 108], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0], 90.0) - nptest.assert_allclose(data.lat[-1], -90.0) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[len(data.lon) // 2], - 180.0) # middle of image - - -def test_ERAInt_grb_image(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib", "2000", "001", - 'ERAINT_AN_20000101_0000.grb') - - dset = ERAIntGrbImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=False) - data = dset.read() - assert data.data['swvl1'].shape == (256, 512) - assert data.data['swvl2'].shape == (256, 512) - assert data.lon.shape == (256, 512) - assert data.lat.shape == (256, 512) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.data['swvl1'][106, 68], 0.171760559) - nptest.assert_allclose(data.data['swvl2'][106, 68], 0.178138732) - nptest.assert_allclose(data.lon[106, 68], 47.81238356, rtol=1e-5) - nptest.assert_allclose(data.lat[106, 68], 15.08768995, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[145, 115], 80.85917808219178, rtol=1e-5) - nptest.assert_allclose(data.lat[145, 115], -12.280678058301, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][145, 115], np.nan) - nptest.assert_allclose(data.data['swvl2'][145, 115], np.nan) - - # boundaries - nptest.assert_allclose(data.lat[0, 0], 89.46282157) - nptest.assert_allclose(data.lat[-1, 0], -89.4628215685774) - nptest.assert_allclose(data.lon[0, 0], 0.0) - nptest.assert_allclose(data.lon[0, 256], - 179.99956164383) # middle of image - - -def test_ERAInt_grb_image_1d(): - fname = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib", "2000", "001", - 'ERAINT_AN_20000101_0000.grb') - - dset = ERAIntGrbImg( - fname, - parameter=['swvl1', 'swvl2'], - mask_seapoints=True, - array_1D=True) - data = dset.read() - assert data.data['swvl1'].shape == (256 * 512,) - assert data.data['swvl2'].shape == (256 * 512,) - assert data.lon.shape == (256 * 512,) - assert data.lat.shape == (256 * 512,) - metadata_should = { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl1']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl1']['units'] == metadata_should['units'] - - metadata_should = { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - assert data.metadata['swvl2']['long_name'] == metadata_should['long_name'] - assert data.metadata['swvl2']['units'] == metadata_should['units'] - - # data over land - nptest.assert_allclose(data.lon[106 * 512 + 68], 47.81238356, rtol=1e-5) - nptest.assert_allclose(data.lat[106 * 512 + 68], 15.08768995, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl1'][106 * 512 + 68], 0.171760559, rtol=1e-5) - nptest.assert_allclose( - data.data['swvl2'][106 * 512 + 68], 0.178138732, rtol=1e-5) - # data over water - nptest.assert_allclose(data.lon[145 * 512 + 115], 80.8591780, rtol=1e-5) - nptest.assert_allclose(data.lat[145 * 512 + 115], -12.280678058, rtol=1e-5) - nptest.assert_allclose(data.data['swvl1'][145 * 512 + 115], np.nan) - nptest.assert_allclose(data.data['swvl2'][145 * 512 + 115], np.nan) - - #boundaries - nptest.assert_allclose(data.lat[0], 89.46282157) - nptest.assert_allclose(data.lat[-1], -89.4628215685774) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[256], 179.99956164383) # middle of image - - -def test_ERAInt_nc_ds(): - root_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf") - - tstamps_should = [datetime(2000, 1, 1), datetime(2000, 1, 1, 12)] - - ds = ERAIntNcDs( - root_path, - parameter=['swvl1', 'swvl2'], - array_1D=True, - mask_seapoints=True, - h_steps=[0, 12]) - - for data, tstamp_should in zip( - ds.iter_images(datetime(2000, 1, 1), datetime(2000, 1, 1)), - tstamps_should): - assert data.data['swvl1'].shape == (241 * 480,) - assert data.data['swvl2'].shape == (241 * 480,) - assert data.lon.shape == (241 * 480,) - assert data.lat.shape == (241 * 480,) - assert data.timestamp == tstamp_should - metadata_should = { - 'swvl1': { - 'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3' - }, - 'swvl2': { - 'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3' - } - } - for var in ['swvl1', 'swvl2']: - assert data.metadata[var]['long_name'] == metadata_should[var][ - 'long_name'] - assert data.metadata[var]['units'] == metadata_should[var]['units'] - - nptest.assert_allclose(data.lat[0], 90.0) - nptest.assert_allclose(data.lat[-1], -90.0) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[240], 180.0) # middle of image - - -def test_ERAInt_grb_ds(): - root_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib") - - tstamps_should = [datetime(2000, 1, 1), datetime(2000, 1, 1, 12)] - - ds = ERAIntGrbDs( - root_path, - parameter=['swvl1', 'swvl2'], - array_1D=True, - mask_seapoints=True, - h_steps=[0, 12]) - - for data, tstamp_should in zip( - ds.iter_images(datetime(2000, 1, 1), datetime(2000, 1, 1)), - tstamps_should): - assert data.data['swvl1'].shape == (256 * 512,) - assert data.data['swvl2'].shape == (256 * 512,) - assert data.lon.shape == (256 * 512,) - assert data.lat.shape == (256 * 512,) - assert data.timestamp == tstamp_should - metadata_should = \ - { - 'swvl1': {'long_name': u'Volumetric soil water layer 1', - 'units': u'm**3 m**-3'}, - 'swvl2': {'long_name': u'Volumetric soil water layer 2', - 'units': u'm**3 m**-3'} - } - for var in ['swvl1', 'swvl2']: - assert data.metadata[var]['long_name'] == metadata_should[var][ - 'long_name'] - assert data.metadata[var]['units'] == metadata_should[var][ - 'units'] - - nptest.assert_allclose(data.lat[0], 89.46282157) - nptest.assert_allclose(data.lat[-1], -89.4628215685774) - nptest.assert_allclose(data.lon[0], 0.0) - nptest.assert_allclose(data.lon[256], - 179.99956164383) # middle of image diff --git a/tests/tests_eraint/test_eraint_reshuffle.py b/tests/tests_eraint/test_eraint_reshuffle.py deleted file mode 100644 index 038666e..0000000 --- a/tests/tests_eraint/test_eraint_reshuffle.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Test module for image to time series conversion. -''' - -import os -import glob -from tempfile import TemporaryDirectory -import numpy as np -import numpy.testing as nptest - -from ecmwf_models.erainterim.reshuffle import main -from ecmwf_models import ERATs - - -def test_ERAInterim_reshuffle_grb(): - # test reshuffling era interim grib images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "grib") - startdate = '2000-01-01' - enddate = '2000-01-01' - parameters = ["swvl1", "swvl2"] - h_steps = ['--h_steps', '0', '12'] - - with TemporaryDirectory() as ts_path: - args = [inpath, ts_path, startdate, enddate] + parameters + h_steps - - main(args) - - assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 2593 - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read(48, 15) - ds.close() - swvl1_should = np.array([0.171761, 0.171738], dtype=np.float32) - nptest.assert_allclose(ts['swvl1'].values, swvl1_should, rtol=1e-5) - swvl2_should = np.array([0.178139, 0.178200], dtype=np.float32) - nptest.assert_allclose(ts['swvl2'].values, swvl2_should, rtol=1e-5) - - -def test_ERAInterim_reshuffle_nc(): - # test reshuffling era interim grib images to time series - inpath = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '..', - "ecmwf_models-test-data", "ERA-Interim", "netcdf") - startdate = '2000-01-01' - enddate = '2000-01-01' - parameters = ["swvl1", "swvl2"] - h_steps = ['--h_steps', '0', '12'] - - with TemporaryDirectory() as ts_path: - args = [inpath, ts_path, startdate, enddate] + parameters + h_steps - main(args) - assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 2593 - ds = ERATs(ts_path, ioclass_kws={'read_bulk': True}) - ts = ds.read(48, 15) - ds.close() - swvl1_should = np.array([0.171854, 0.171738], dtype=np.float32) - nptest.assert_allclose(ts['swvl1'].values, swvl1_should, rtol=1e-5) - swvl2_should = np.array([0.179816, 0.179860], dtype=np.float32) - nptest.assert_allclose(ts['swvl2'].values, swvl2_should, rtol=1e-5)