From 88a75539fed570561426da0c752e8526c56ebbbd Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Sat, 30 May 2020 15:23:47 +1200 Subject: [PATCH] :white_check_mark: Properly tested lonlat_to_xy function Collapse the geographic reprojection code into a one-liner! Basically wraps around pyproj, and handles lazy dask.DataFrame and xarray.DataArray objects by including the will-be released workaround for handling __array__ objects (scheduled for pyproj 3.0). Reinstated the 'catalog' variable in atl06_play.ipynb, as it's used further down the notebook. Also hashing python files in deepicedrain to check whether we should bust the CI cache to reinstall `deepicedrain`, instead of manually bumping dependencies each time. --- .github/workflows/python-app.yml | 2 +- atl06_play.ipynb | 29 +++++--------- atl06_play.py | 21 +++++----- atl11_play.ipynb | 17 +------- atl11_play.py | 11 +----- deepicedrain/__init__.py | 2 +- deepicedrain/spatiotemporal.py | 26 +++++++++++++ .../tests/test_spatiotemporal_conversions.py | 39 ++++++++++++++++++- 8 files changed, 89 insertions(+), 58 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 630692e..608beda 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -38,7 +38,7 @@ jobs: with: path: | /usr/share/miniconda3/envs/deepicedrain - key: cache-venv-${{ github.ref }}-${{ hashFiles('**/environment.yml') }}-${{ hashFiles('**/poetry.lock') }} + key: cache-venv-${{ github.ref }}-${{ hashFiles('**/environment.yml') }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('**/deepicedrain/*.py') }} restore-keys: | cache-venv-refs/heads/master- diff --git a/atl06_play.ipynb b/atl06_play.ipynb index 38b1836..0fa589a 100644 --- a/atl06_play.ipynb +++ b/atl06_play.ipynb @@ -109,7 +109,13 @@ "\n", "Use our [intake catalog](https://intake.readthedocs.io/en/latest/catalog.html) to get some sample ATL06 data\n", "(while making sure we have our Earthdata credentials set up properly),\n", - "and view it using [xarray](https://xarray.pydata.org) and [hvplot](https://hvplot.pyviz.org)." + "and view it using [xarray](https://xarray.pydata.org) and [hvplot](https://hvplot.pyviz.org).\n", + "\n", + "open the local intake data catalog file containing ICESat-2 stuff\n", + "catalog = intake.open_catalog(\"deepicedrain/atlas_catalog.yaml\")\n", + "or if the deepicedrain python package is installed, you can use either of the below:\n", + "catalog = deepicedrain.catalog\n", + "catalog = intake.cat.atlas_cat" ] }, { @@ -1041,10 +1047,8 @@ " )\n", " raise\n", "\n", - "# open the local intake data catalog file containing ICESat-2 stuff\n", "# data download will depend on having a .netrc file in home folder\n", - "# dataset = intake.cat.atlas_cat.icesat2atl06.to_dask().unify_chunks()\n", - "dataset = deepicedrain.catalog.icesat2atl06.to_dask().unify_chunks()\n", + "dataset = catalog.icesat2atl06.to_dask().unify_chunks()\n", "dataset" ] }, @@ -1744,21 +1748,8 @@ "metadata": {}, "outputs": [], "source": [ - "transformer = pyproj.Transformer.from_crs(\n", - " crs_from=pyproj.CRS.from_epsg(4326),\n", - " crs_to=pyproj.CRS.from_epsg(3031),\n", - " always_xy=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "dfs[\"x\"], dfs[\"y\"] = transformer.transform(\n", - " xx=dfs.longitude.values, yy=dfs.latitude.values\n", + "dfs[\"x\"], dfs[\"y\"] = deepicedrain.lonlat_to_xy(\n", + " longitude=dfs.longitude, latitude=dfs.latitude\n", ")" ] }, diff --git a/atl06_play.py b/atl06_play.py index 2a8ec67..4a95868 100644 --- a/atl06_play.py +++ b/atl06_play.py @@ -73,6 +73,12 @@ # (while making sure we have our Earthdata credentials set up properly), # and view it using [xarray](https://xarray.pydata.org) and [hvplot](https://hvplot.pyviz.org). +# open the local intake data catalog file containing ICESat-2 stuff +catalog = intake.open_catalog("deepicedrain/atlas_catalog.yaml") +# or if the deepicedrain python package is installed, you can use either of the below: +# catalog = deepicedrain.catalog +# catalog = intake.cat.atlas_cat + # %% try: netrc.netrc() @@ -84,10 +90,8 @@ ) raise -# open the local intake data catalog file containing ICESat-2 stuff # data download will depend on having a .netrc file in home folder -# dataset = intake.cat.atlas_cat.icesat2atl06.to_dask().unify_chunks() -dataset = deepicedrain.catalog.icesat2atl06.to_dask().unify_chunks() +dataset = catalog.icesat2atl06.to_dask().unify_chunks() dataset # %% @@ -343,15 +347,8 @@ def six_laser_beams(filepaths: list) -> dask.dataframe.DataFrame: # ### Transform from EPSG:4326 (lat/lon) to EPSG:3031 (Antarctic Polar Stereographic) # %% -transformer = pyproj.Transformer.from_crs( - crs_from=pyproj.CRS.from_epsg(4326), - crs_to=pyproj.CRS.from_epsg(3031), - always_xy=True, -) - -# %% -dfs["x"], dfs["y"] = transformer.transform( - xx=dfs.longitude.values, yy=dfs.latitude.values +dfs["x"], dfs["y"] = deepicedrain.lonlat_to_xy( + longitude=dfs.longitude, latitude=dfs.latitude ) # %% diff --git a/atl11_play.ipynb b/atl11_play.ipynb index 8769a21..c5b02c5 100644 --- a/atl11_play.ipynb +++ b/atl11_play.ipynb @@ -159,24 +159,11 @@ }, "outputs": [], "source": [ - "lonlat_to_xy = lambda longitude, latitude: pyproj.Proj(projparams=3031)(\n", - " longitude, latitude\n", + "ds[\"x\"], ds[\"y\"] = deepicedrain.lonlat_to_xy(\n", + " longitude=ds.longitude, latitude=ds.latitude\n", ")" ] }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "x, y = lonlat_to_xy(ds.longitude.values, ds.latitude.values)\n", - "ds[\"x\"] = xr.DataArray(data=x, coords=ds.longitude.coords)\n", - "ds[\"y\"] = xr.DataArray(data=y, coords=ds.latitude.coords)" - ] - }, { "cell_type": "code", "execution_count": 7, diff --git a/atl11_play.py b/atl11_play.py index ebe7035..fcb74c8 100644 --- a/atl11_play.py +++ b/atl11_play.py @@ -86,17 +86,10 @@ # to the Antarctic Polar Stereographic (EPSG:3031) projection. # %% -lonlat_to_xy = lambda longitude, latitude: pyproj.Proj(projparams=3031)( - longitude, latitude +ds["x"], ds["y"] = deepicedrain.lonlat_to_xy( + longitude=ds.longitude, latitude=ds.latitude ) - -# %% -x, y = lonlat_to_xy(ds.longitude.values, ds.latitude.values) -ds["x"] = xr.DataArray(data=x, coords=ds.longitude.coords) -ds["y"] = xr.DataArray(data=y, coords=ds.latitude.coords) - - # %% # Also set x, y as coordinates in xarray.Dataset ds = ds.set_coords(names=["x", "y"]) diff --git a/deepicedrain/__init__.py b/deepicedrain/__init__.py index c23e436..f695f62 100644 --- a/deepicedrain/__init__.py +++ b/deepicedrain/__init__.py @@ -5,7 +5,7 @@ import deepicedrain from deepicedrain.deltamath import calculate_delta -from deepicedrain.spatiotemporal import Region, deltatime_to_utctime +from deepicedrain.spatiotemporal import Region, deltatime_to_utctime, lonlat_to_xy __version__: str = "0.1.0" diff --git a/deepicedrain/spatiotemporal.py b/deepicedrain/spatiotemporal.py index 3bafaf2..3d95be4 100644 --- a/deepicedrain/spatiotemporal.py +++ b/deepicedrain/spatiotemporal.py @@ -6,6 +6,7 @@ import datetime import numpy as np +import pyproj import xarray as xr @@ -72,3 +73,28 @@ def deltatime_to_utctime( utc_time: xr.DataArray = dataarray.__class__(start_epoch) + dataarray return utc_time + + +def lonlat_to_xy( + longitude: xr.DataArray, latitude: xr.DataArray, epsg: int = 3031 +) -> (xr.DataArray, xr.DataArray): + """ + Reprojects longitude/latitude EPSG:4326 coordinates to x/y coordinates. + Default conversion is to Antarctic Stereographic Projection EPSG:3031. + """ + if hasattr(longitude, "__array__") and callable(longitude.__array__): + # TODO upgrade to PyProj 3.0 to remove this workaround for passing in + # dask.dataframe.core.Series or xarray.DataArray objects + # Based on https://github.com/pyproj4/pyproj/pull/625 + _longitude = longitude.__array__() + _latitude = latitude.__array__() + + x, y = pyproj.Proj(projparams=epsg)(_longitude, _latitude) + + if hasattr(longitude, "coords"): + return ( + xr.DataArray(data=x, coords=longitude.coords), + xr.DataArray(data=y, coords=latitude.coords), + ) + else: + return x, y diff --git a/deepicedrain/tests/test_spatiotemporal_conversions.py b/deepicedrain/tests/test_spatiotemporal_conversions.py index b863a37..d6f03e4 100644 --- a/deepicedrain/tests/test_spatiotemporal_conversions.py +++ b/deepicedrain/tests/test_spatiotemporal_conversions.py @@ -9,7 +9,7 @@ import pandas as pd import xarray as xr -from deepicedrain import catalog, deltatime_to_utctime +from deepicedrain import catalog, deltatime_to_utctime, lonlat_to_xy def test_deltatime_to_utctime(): @@ -45,3 +45,40 @@ def test_deltatime_to_utctime(): ) atl11_dataset.close() + + +def test_lonlat_to_xy_dask_series(): + """ + Test that converting from longitude/latitude to x/y in EPSG:3031 works when + passing them in as dask.dataframe.core.Series objects. + """ + atl11_dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() + atl11_dataframe: dask.dataframe.core.DataFrame = atl11_dataset.to_dask_dataframe() + + x, y = lonlat_to_xy( + longitude=atl11_dataframe.longitude, latitude=atl11_dataframe.latitude, + ) + npt.assert_equal(actual=x.mean(), desired=-56900105.00307033) + npt.assert_equal(actual=y.mean(), desired=48141607.48486084) + + atl11_dataset.close() + + +def test_lonlat_to_xy_xarray_dataarray(): + """ + Test that converting from longitude/latitude to x/y in EPSG:3031 works when + passing them in as xarray.DataArray objects. Ensure that the xarray + dimensions are preserved in the process. + """ + atl11_dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() + + x, y = lonlat_to_xy( + longitude=atl11_dataset.longitude, latitude=atl11_dataset.latitude + ) + + assert x.dims == y.dims == ("ref_pt",) + assert x.shape == y.shape == (1404,) + npt.assert_equal(actual=x.mean().data, desired=-56900105.00307034) + npt.assert_equal(actual=y.mean().data, desired=48141607.48486084) + + atl11_dataset.close()