Skip to content

Commit

Permalink
Merge pull request #1 from pangeo-data/master
Browse files Browse the repository at this point in the history
f
  • Loading branch information
charlesbluca authored Apr 2, 2020
2 parents e98e66f + 731ea6e commit eee836c
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 34 deletions.
9 changes: 1 addition & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ services:
sudo: false
env:
global:
- DOCKER_IMAGE=pangeo/pangeo-notebook:2019.04.19
- DOCKER_IMAGE=pangeo/pangeo-notebook:2019.11.15
- secure: "oDx29SLKJxzbSg9N9G713DcjLPzsgRbrloLdpt5H+pxCKxxY50h7iLgikcBOzDWFcJlfYopsDeIHVSq4iDQtQuhANKberjUsIp6gWozZVZu5g4wUgftvCaHgVTbzcTUwuWDlSPbY6tdnVOXGewGfy7R5Jz4I+AXwMzX1RM/LP5joEtluwBH7J87YWmUO8rN5sWzT9HdlhTHhwRkPKNSiFqpJYOz4QUyEJ13R3UEe6jxfqT6Vqw3alJdltam7WCYg9tfI3wzv3FS4lD7Ep6HkF/eYI2OJexY3VKPPTigm+qLfMz3RMi7TuT1OvoxCCEDKXWFzJpSd9mTozN0PTEUOb/a4K4n1s1UC/GGJ34sdC+wFx3Qfs89W1rFEA7cc+WeRQHJ8MvCfFkLtzcm/xqA+V7Pv3TTt4HrNFR5DM6/hoqytE2tU2p1P0XZZ29mZVhAVjgVUX0ELSxSfmhoqVzVfFJnqlbzMlz8xzg8LdDYWn4/d9n7Rz3FRN92ACM9TKv6DXbUFrjg4n81wkMX0MG1Zd0kQ9PcuMGS29Wws0iRgG61b7IdmazKjqJ9tj+FaaDCoDaOB11hc1QVE2hr5Y/wvqFMgOEjgdGB6GNw0TIldDsK4OYU2dEWvf/+zlzxZMb/t5IW9HG2yvaLrRc/sjDNPHlGzWCUauXST/gyjYWR45+c="

before_install:
Expand All @@ -28,10 +28,3 @@ script:
- >
docker exec -it pangeo bash -c
'py.test --durations=9999 -v tests/*.py'
- >
docker exec -it pangeo bash -c
'python build_catalog_rst.py'
- docker cp pangeo:/home/jovyan/catalog-docs/. docs/
- set -e
- cd docs && make html && cd ..
- doctr deploy . --built-docs docs/_build/html
15 changes: 15 additions & 0 deletions intake-catalogs/atmosphere.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,18 @@ sources:
consolidated: True
storage_options:
requester_pays: True

era5_hourly_reanalysis_single_levels_sa:
description: "ERA5 hourly estimates of variables on single levels"
metadata:
url: 'https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels'
tags:
- ocean
- model
- atmosphere
driver: zarr
args:
urlpath: gs://pangeo-era5/reanalysis/spatial-analysis
consolidated: True
storage_options:
requester_pays: True
14 changes: 14 additions & 0 deletions intake-catalogs/climate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,17 @@ sources:
description: 'CMIP6 in Google Cloud Storage'
driver: intake_esm.esm_datastore
metadata: {}

GFDL_CM2_6:
args:
esmcol_path: "https://storage.googleapis.com/cmip6/gfdl_cm2_6.json"
description: "NOAA-GFDL CM2.6 in Google Cloud Storage"
driver: intake_esm.esm_datastore
metadata: {}

tracmip:
args:
esmcol_path: "https://storage.googleapis.com/cmip6/tracmip.json"
description: "TRACMIP in Pangeo Google Cloud Storage"
driver: intake_esm.esm_datastore
metadata: {}
43 changes: 43 additions & 0 deletions intake-catalogs/hydro.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,46 @@ sources:
chunks: {'y': 6000, 'x': 6000}
storage_options:
requester_pays: True

soil_grids_single_level:
description: Global gridded soil information in COG format
parameters:
variable:
description: soil variable
type: str
default: TAXNWRB
metadata:
url: https://soilgrids.org/
tags: Soil
driver: rasterio
args:
urlpath: "gs://pangeo-data/soilgrids/{{ variable }}_250m.tif"
chunks: {'y': 5120, 'x': 5120}
storage_options: {'anon': True} # TODO: remove, see GH#61

soil_grids_multi_level:
description: Global gridded soil information in COG format
parameters:
variable:
description: soil variable
type: str
default: AWCh1_M
level:
description: soil level
type: int
default: 1
metadata:
url: https://soilgrids.org/
tags: Soil
driver: rasterio
args:
urlpath: "gs://pangeo-data/soilgrids/{{ variable }}_sl{{ '%01d' % level }}_250m.tif"
chunks: {'y': 5120, 'x': 5120}
storage_options: {'anon': True} # TODO: remove, see GH#61

camels:
args:
path: "{{CATALOG_DIR}}/hydro/camels.yaml"
description: 'Pangeo Camels Dataset Catalog'
driver: intake.catalog.local.YAMLFileCatalog
metadata: {}
94 changes: 94 additions & 0 deletions intake-catalogs/hydro/camels.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
sources:
basin_mean_forcing_gcp:
description: Camels Basin Mean Forcing
parameters: # User parameters
basin_id:
description: Basin ID
type: str
default: "05056000"
source:
description: Source dataset
type: str
default: daymet
driver: csv
args:
urlpath: 'gs://pangeo-data/camels/basin_dataset_public_v1p2/basin_mean_forcing/{{ source }}/*/{{ basin_id }}_*.txt'
csv_kwargs:
sep: '\s+'
header: 3
parse_dates: {'date': ['Year', 'Mnth', 'Day', 'Hr']}
metadata:
origin_url: 'https://ral.ucar.edu/solutions/products/camels'

basin_mean_forcing_aws:
description: Camels Basin Mean Forcing
parameters: # User parameters
basin_id:
description: Basin ID
type: str
default: "05056000"
source:
description: Source dataset
type: str
default: daymet
driver: csv
args:
urlpath: 's3://pangeo-camels/basin_timeseries_v1p2_metForcing_obsFlow/basin_dataset_public_v1p2/basin_mean_forcing/{{ source }}/*/{{ basin_id }}_*.txt'
csv_kwargs:
sep: '\s+'
header: 3
parse_dates: {'date': ['Year', 'Mnth', 'Day', 'Hr']}
metadata:
origin_url: 'https://ral.ucar.edu/solutions/products/camels'

usgs_streamflow_gcp:
description: Camels USGS Streamflow
parameters: # User parameters
basin_id:
description: Basin ID
type: str
default: "05056000"
driver: csv
args:
urlpath: 'gs://pangeo-data/camels/basin_dataset_public_v1p2/usgs_streamflow/*/{{ basin_id }}_streamflow_qc.txt'
csv_kwargs:
sep: '\s+'
names: ['basin', 'Year', 'Mnth', 'Day', 'QObs', 'flag']
parse_dates: {'date': ['Year', 'Mnth', 'Day']}
na_values: -999.0
metadata:
origin_url: 'https://ral.ucar.edu/solutions/products/camels'


usgs_streamflow_aws:
description: Camels USGS Streamflow
parameters: # User parameters
basin_id:
description: Basin ID
type: str
default: "05056000"
driver: csv
args:
urlpath: 's3://pangeo-camels/basin_timeseries_v1p2_metForcing_obsFlow/basin_dataset_public_v1p2/usgs_streamflow/*/{{ basin_id }}_streamflow_qc.txt'
csv_kwargs:
sep: '\s+'
names: ['basin', 'Year', 'Mnth', 'Day', 'QObs', 'flag']
parse_dates: {'date': ['Year', 'Mnth', 'Day']}
na_values: -999.0
metadata:
origin_url: 'https://ral.ucar.edu/solutions/products/camels'

attributes_aws:
description: Camels Attributes
parameters: # User parameters
type:
description: type of attribute
type: str
default: "topo"
driver: csv
args:
urlpath: 's3://pangeo-camels/camels_attributes_v2.0/camels_{{ type }}.txt'
csv_kwargs:
sep: ';'
metadata:
origin_url: 'https://ral.ucar.edu/solutions/products/camels'
50 changes: 39 additions & 11 deletions intake-catalogs/ocean/GFDL_CM2.6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/control/ocean
urlpath: gs://cmip6/GFDL_CM2_6/control/ocean
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -27,7 +27,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/control/surface
urlpath: gs://cmip6/GFDL_CM2_6/control/surface
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -41,7 +41,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/control/ocean_3d
urlpath: gs://cmip6/GFDL_CM2_6/control/ocean_3d
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -55,7 +55,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/control/ocean_transport
urlpath: gs://cmip6/GFDL_CM2_6/control/ocean_transport
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -69,7 +69,21 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/control/ocean_boundary
urlpath: gs://cmip6/GFDL_CM2_6/control/ocean_boundary
consolidated: True
storage_options:
requester_pays: True

GFDL_CM2_6_control_ocean_budgets:
description: "GFDL CM2.6 climate model control run monthly ocean budgets fields"
metadata:
url: 'https://www.gfdl.noaa.gov/cm2-6/'
tags:
- ocean
- model
driver: zarr
args:
urlpath: gs://cmip6/GFDL_CM2_6/control/ocean_budgets
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -83,7 +97,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/one_percent/ocean
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/ocean
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -97,7 +111,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/one_percent/surface
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/surface
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -111,7 +125,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/one_percent/ocean_3d
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/ocean_3d
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -125,7 +139,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/one_percent/ocean_transport
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/ocean_transport
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -139,7 +153,21 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/one_percent/ocean_boundary
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/ocean_boundary
consolidated: True
storage_options:
requester_pays: True

GFDL_CM2_6_one_percent_ocean_budgets:
description: "GFDL CM2.6 climate model one-percent CO2 increase monthly ocean budgets fields"
metadata:
url: 'https://www.gfdl.noaa.gov/cm2-6/'
tags:
- ocean
- model
driver: zarr
args:
urlpath: gs://cmip6/GFDL_CM2_6/one_percent/ocean_budgets
consolidated: True
storage_options:
requester_pays: True
Expand All @@ -153,7 +181,7 @@ sources:
- model
driver: zarr
args:
urlpath: gs://pangeo-gfdl-cm26/grid
urlpath: gs://cmip6/GFDL_CM2_6/grid
consolidated: True
storage_options:
requester_pays: True
26 changes: 11 additions & 15 deletions tests/validate_datasets.py → tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,24 @@ def get_master_catalog():
'../intake-catalogs/master.yaml')
return intake.Catalog(fname)

ALL_ENTRIES = list(get_master_catalog().walk(depth=10))

@pytest.fixture(scope="module")
def catalog(request):
return get_master_catalog()

def test_open_master_catalog(catalog):
pass

ALL_ENTRIES = list(get_master_catalog().walk(depth=10))
print(ALL_ENTRIES)

@pytest.fixture(scope="module", params=ALL_ENTRIES, ids=ALL_ENTRIES)
def dataset_name(request):
return request.param

#@pytest.mark.parametrize("dataset_name", ALL_ENTRIES, ids=['.'.join(name) for name in ALL_ENTRIES])
def test_get_intake_source(catalog, dataset_name):
item = catalog[dataset_name]

#@pytest.mark.parametrize("dataset_name", ALL_ENTRIES, ids=['.'.join(name) for name in ALL_ENTRIES])
def test_intake_dataset_to_dask(catalog, dataset_name):
item = catalog[dataset_name]
try:
ds = item.to_dask()
except NotImplementedError:
pytest.skip(f"Item {item} can't be loaded with `.to_dask()`")
if item.container == "catalog":
item.reload()
else:
if item._driver in ["csv", "rasterio", "zarr"]:
pytest.skip("need to resolve credentials issue for requester-pays data")
# ds = item.to_dask()
elif item._driver == "intake_esm.esm_datastore":
pytest.skip("need to resolve credentials issue for requester-pays data")
# col = item.get()

0 comments on commit eee836c

Please sign in to comment.