Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #72

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- main
paths: # only run on changes to the Dockerfile
- 'Dockerfile'


jobs:
build-and-push:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ default_language_version:
python: python3.9
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.4
rev: v0.8.0
hooks:
# Run the linter.
- id: ruff
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM quay.io/pangeo/pangeo-notebook:2023.10.24
FROM quay.io/pangeo/pangeo-notebook:2023.10.24
LABEL maintainer="Julius Busecked"
LABEL repo="https://github.com/ocean-transport/scale-aware-air-sea"


RUN mamba install -n=notebook aerobulk-python -y
RUN mamba install -n=notebook aerobulk-python -y
RUN pip install coiled
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ pip install .
```

### Custom Docker image
The science published from the repository is using a [custom Docker image](/Dockerfile) (installing additional dependencies on top of the [pangeo docker image](https://github.com/pangeo-data/pangeo-docker-images)).
The science published from the repository is using a [custom Docker image](/Dockerfile) (installing additional dependencies on top of the [pangeo docker image](https://github.com/pangeo-data/pangeo-docker-images)).
You can find the image on [quay.io](https://quay.io/repository/jbusecke/scale-aware-air-sea?tab=tags) and refer to specific tags used in the notebooks in `./pipeline`.

#### Pulling custom image on LEAP-Pangeo Jupyterhub
To work with the custom image on the LEAP-Pangeo Jupyterhub, just follow the instructions to use a [custom image](), and enter `quay.io/jbusecke/scale-aware-air-sea:<tag>`, where you replace tag with the relevant version tag.
To work with the custom image on the LEAP-Pangeo Jupyterhub, just follow the instructions to use a [custom image](), and enter `quay.io/jbusecke/scale-aware-air-sea:<tag>`, where you replace tag with the relevant version tag.


E.g. for tag `68b654d76dce`:
Expand Down
2 changes: 1 addition & 1 deletion notebooks/jbusecke/new-workspace.jupyterlab-workspace
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"split-area","orientation":"horizontal","sizes":[0.5,0.5],"children":[{"type":"tab-area","currentIndex":2,"widgets":["notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","terminal:1","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb"]},{"type":"tab-area","currentIndex":3,"widgets":["dask-dashboard-launcher:/individual-progress","dask-dashboard-launcher:/individual-workers","dask-dashboard-launcher:/individual-task-stream","dask-dashboard-launcher:/individual-workers-memory"]}]},"current":"dask-dashboard-launcher:/individual-workers-memory"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","dask-dashboard-launcher","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke"},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb","factory":"Notebook"}},"terminal:1":{"data":{"name":"1"}},"dask-dashboard-launcher":{"url":"https://us-central1-b.gcp.pangeo.io/services/dask-gateway/clusters/prod.1e69eadfa71b4df0842dbaaeb5c7b01d/","cluster":""},"dask-dashboard-launcher:/individual-progress":{"data":{"route":"/individual-progress","label":"Progress","key":"Progress"}},"dask-dashboard-launcher:/individual-workers":{"data":{"route":"/individual-workers","label":"Workers","key":"Workers"}},"dask-dashboard-launcher:/individual-task-stream":{"data":{"route":"/individual-task-stream","label":"Task Stream","key":"Task Stream"}},"dask-dashboard-launcher:/individual-workers-memory":{"data":{"route":"/individual-workers-memory","label":"Workers Memory","key":"Workers Memory"}}},"metadata":{"id":"new-workspace","last_modified":"2022-06-23T17:40:03.102459+00:00","created":"2022-06-23T17:40:03.102459+00:00"}}
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"split-area","orientation":"horizontal","sizes":[0.5,0.5],"children":[{"type":"tab-area","currentIndex":2,"widgets":["notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","terminal:1","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb"]},{"type":"tab-area","currentIndex":3,"widgets":["dask-dashboard-launcher:/individual-progress","dask-dashboard-launcher:/individual-workers","dask-dashboard-launcher:/individual-task-stream","dask-dashboard-launcher:/individual-workers-memory"]}]},"current":"dask-dashboard-launcher:/individual-workers-memory"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","dask-dashboard-launcher","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke"},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb","factory":"Notebook"}},"terminal:1":{"data":{"name":"1"}},"dask-dashboard-launcher":{"url":"https://us-central1-b.gcp.pangeo.io/services/dask-gateway/clusters/prod.1e69eadfa71b4df0842dbaaeb5c7b01d/","cluster":""},"dask-dashboard-launcher:/individual-progress":{"data":{"route":"/individual-progress","label":"Progress","key":"Progress"}},"dask-dashboard-launcher:/individual-workers":{"data":{"route":"/individual-workers","label":"Workers","key":"Workers"}},"dask-dashboard-launcher:/individual-task-stream":{"data":{"route":"/individual-task-stream","label":"Task Stream","key":"Task Stream"}},"dask-dashboard-launcher:/individual-workers-memory":{"data":{"route":"/individual-workers-memory","label":"Workers Memory","key":"Workers Memory"}}},"metadata":{"id":"new-workspace","last_modified":"2022-06-23T17:40:03.102459+00:00","created":"2022-06-23T17:40:03.102459+00:00"}}
1 change: 0 additions & 1 deletion notebooks/jbusecke/testing/xesmf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
ds_atmos = xr.open_zarr('gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr', **kwargs)

regridder = xe.Regridder(ds_atmos.olr.isel(time=0), ds.surface_temp.isel(time=0), 'bilinear', periodic=True) # all the atmos data is on the cell center AFAIK

15 changes: 7 additions & 8 deletions notebooks/jbusecke/workflow/cm26_pipeline-debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# coding: utf-8

# # Debug issues with exceeding wind stress
#
#
# ## Tasks
# - Find timestep(s) that exhibit the behavior
# - Confirm that this behavior only affects certain algorithms
Expand Down Expand Up @@ -31,7 +31,7 @@
from dask.diagnostics import ProgressBar
from cm26_utils import write_split_zarr, noskin_ds_wrapper, load_and_merge_cm26

# 👇 replace with your key
# 👇 replace with your key
with open('/home/jovyan/keys/pangeo-forge-ocean-transport-4967-347e2048c5a1.json') as token_file:
token = json.load(token_file)

Expand Down Expand Up @@ -63,7 +63,7 @@
# I did `jupyter nbconvert --to python cm26_pipeline-debug.ipynb`
# and then I get the error about the wind stress.

# I have executed this with all algos and I get crashes for:
# I have executed this with all algos and I get crashes for:
# 'coare3p6'
# 'andreas'
# 'coare3p0'
Expand All @@ -87,7 +87,7 @@


# ## Investigate the max windstress values we are getting with the working algos
#
#
# Is there a correlation betweewn max wind speeds and stresses? Yeah definitely!

# In[ ]:
Expand Down Expand Up @@ -118,8 +118,8 @@

# ## Ok can we actually get around this and get some results at all?
# If not we need to raise the tau cut of in aerobulk.
#
# My simple approach right here is to set every wind value larger than `threshold` to zero. This is not a feasible solution for our processing, but I just want to see how low we have to go to get all algos to go through!
#
# My simple approach right here is to set every wind value larger than `threshold` to zero. This is not a feasible solution for our processing, but I just want to see how low we have to go to get all algos to go through!

# In[ ]:

Expand All @@ -133,7 +133,7 @@
mask = ds_masked.wind>threshold
ds_masked['u_ref'] = ds_masked['u_ref'].where(mask, 0)
ds_masked['v_ref'] = ds_masked['v_ref'].where(mask, 0)

break
ds_out = noskin_ds_wrapper(ds_merged, algo=algo, input_range_check=False)
with ProgressBar():
Expand All @@ -143,4 +143,3 @@
stress_max = stress.max(['xt_ocean', 'yt_ocean']).assign_coords(algo=algo)
print(stress_max)
datasets.append(stress_max)

10 changes: 5 additions & 5 deletions notebooks/jbusecke/workflow/cm26_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dask.diagnostics import ProgressBar
from cm26_utils import write_split_zarr, noskin_ds_wrapper

# 👇 replace with your key
# 👇 replace with your key
with open('/home/jovyan/keys/pangeo-forge-ocean-transport-4967-347e2048c5a1.json') as token_file:
token = json.load(token_file)
fs = gcsfs.GCSFileSystem(token=token)
Expand Down Expand Up @@ -87,7 +87,7 @@

############################# wind cutting ##############
# ok so lets not add nans into fields like above. Instead, lets see in which timesteps this actually occurs and for noe completely ignore these timesteps
# This is not ideal in the long run, but maybe at least gives us a way to output
# This is not ideal in the long run, but maybe at least gives us a way to output

# ds_cut = ds_merged.isel(time=slice(0,500))
# wind = ds_cut.wind
Expand All @@ -96,7 +96,7 @@
threshold = 30
with ProgressBar():
strong_wind_cells = (wind > threshold).sum(['xt_ocean','yt_ocean']).load()

strong_wind_index = strong_wind_cells > 0

# double check that these events are still rare in space and time
Expand All @@ -121,6 +121,6 @@
if fs.exists(path) and overwrite:
# # # delete the mapper (only uncomment if you want to start from scratch!)
print("DELETE existing store")
fs.rm(path, recursive=True)
fs.rm(path, recursive=True)

write_split_zarr(mapper, ds_out, split_interval=64)
write_split_zarr(mapper, ds_out, split_interval=64)
24 changes: 12 additions & 12 deletions notebooks/jbusecke/workflow/cm26_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,37 @@
# - Adjust units for aerobulk input
# - Calculate relative wind components
# """
# kwargs = dict(consolidated=True, use_cftime=True, inline_array=inline_array, engine='zarr')#,
# kwargs = dict(consolidated=True, use_cftime=True, inline_array=inline_array, engine='zarr')#,
# print('Load Data')
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/surface")
# # ds_ocean = xr.open_dataset(mapper, chunks='auto', **kwargs)
# # ds_ocean = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# ds_ocean = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)
# # cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean/GFDL_CM2.6.yaml")
# # ds_ocean = cat["GFDL_CM2_6_control_ocean_surface"].to_dask()

# # ds_flux = cat["GFDL_CM2_6_control_ocean_boundary_flux"].to_dask()
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/ocean_boundary")
# # ds_flux = xr.open_dataset(mapper, chunks='auto', **kwargs)
# # ds_flux = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# ds_flux = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)


# # xarray says not to do this
# # ds_atmos = xr.open_zarr('gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr', chunks={'time':1}, **kwargs)
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr")
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':120}, **kwargs)
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)
# ds_atmos = xr.open_dataset(mapper, chunks={'time':120}, **kwargs).chunk({'time':3})

# # # instead do this
# # ds_atmos = ds_atmos.chunk({'time':1})

# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/grid")
# ds_oc_grid = xr.open_dataset(mapper, chunks={}, **kwargs)
# # ds_oc_grid = cat["GFDL_CM2_6_grid"].to_dask()

# print('Align in time')
# # cut to same time
# all_dims = set(list(ds_ocean.dims)+list(ds_atmos.dims))
Expand Down Expand Up @@ -78,7 +78,7 @@
# mapper = filesystem.get_mapper(path)
# ds_regridder = xr.open_zarr(mapper).load()
# regridder = xe.Regridder(
# ds_atmos.olr.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),# this is the same dumb problem I keep having with
# ds_atmos.olr.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),# this is the same dumb problem I keep having with
# ds_ocean.surface_temp.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),
# 'bilinear',
# weights=ds_regridder,
Expand All @@ -100,17 +100,17 @@
# # fix units for aerobulk
# ds_merged['surface_temp'] = ds_merged['surface_temp'] + 273.15
# ds_merged['slp'] = ds_merged['slp'] * 100 # check this

# print('Mask nans')
# # atmos missing values are filled with 0s, which causes issues with the filtering
# # Ideally this should be masked before the regridding, but xesmf fills with 0 again...
# mask = ~np.isnan(ds_merged['surface_temp'])
# for mask_var in ['slp', 't_ref', 'q_ref']:
# ds_merged[mask_var] = ds_merged[mask_var].where(mask)

# # Calculate relative wind
# print('Calculate relative wind')
# ds_merged['u_relative'] = ds_merged['u_ref'] - ds_merged['u_ocean']
# ds_merged['v_relative'] = ds_merged['v_ref'] - ds_merged['v_ocean']
# return ds_merged

# return ds_merged
36 changes: 18 additions & 18 deletions notebooks/jbusecke/workflow/cm26_utils_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ def write_split_zarr(store, ds, split_dim='time', chunks=1, split_interval=180):
This can be helpful to e.g. avoid problems with overly eager dask schedulers
"""
# Got my inspiration for this mostly here: https://github.com/pydata/xarray/issues/6069

# determine the variables and coordinates that depend on the split_dim
other_dims = [di for di in ds.dims if di != split_dim]
split_vars_coords = [va for va in ds.variables if split_dim in ds[va].dims and va not in ds.dims]
non_split_vars_coords = [va for va in ds.variables if va not in split_vars_coords and va not in ds.dims]

# Generate a stripped dataset that only contains variables/coordinates that do not depend on `split_dim`
ds_stripped = ds.drop_vars(split_vars_coords+[split_dim])

# initialize the store without writing values
print('initializing store')
ds.to_zarr(
Expand All @@ -28,45 +28,45 @@ def write_split_zarr(store, ds, split_dim='time', chunks=1, split_interval=180):
encoding={split_dim:{"chunks":[chunks]}},
consolidated=True, # TODO: Not sure if this is proper. Might have to consolidate the whole thing as a last step?
)

# Write out only the non-split variables/coordinates
if len(non_split_vars_coords) > 0:
print('Writing coordinates')
ds_stripped.to_zarr(store, mode='a') #I guess a is 'add'. This is honestly not clear enough in the xarray docs.
# with `w` there are issues with the shape.
# with `w` there are issues with the shape.

# TODO: what about the attrs?

# Populate split chunks as regions
n = len(ds[split_dim])
splits = list(range(0,n,split_interval))

# Make sure the last item in the list covers the full length of the time on our dataset
if splits[-1] != n:
splits = splits + [n]

for ii in tqdm(range(len(splits)-1)):
print(f'Writing split {ii}')
# TODO: put some retry logic in here...
start = splits[ii]
stop = splits[ii+1]

ds_write = ds.isel({split_dim:slice(start, stop)})
print(f'Start: {ds_write[split_dim][0].data}')
print(f'Stop: {ds_write[split_dim][-1].data}')

# strip everything except the values
drop_vars = non_split_vars_coords+other_dims
ds_write = ds_write.drop_vars(drop_vars)

with ProgressBar():
ds_write.to_zarr(store, region={split_dim:slice(start, stop)}, mode='a')#why are the variables not instantiated in the init step
# TODO: This is model agnostic and should live somewhere else?

# TODO: This is model agnostic and should live somewhere else?
def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
ds_out = xr.Dataset()
ds_in = ds_in.copy(deep=False)

sst = ds_in.surface_temp + 273.15
t_zt = ds_in.t_ref
hum_zt = ds_in.q_ref
Expand All @@ -75,7 +75,7 @@ def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
slp = ds_in.slp * 100 # check this
zu = 10
zt = 2

ql, qh, taux, tauy, evap = noskin(
sst,
t_zt,
Expand All @@ -91,7 +91,7 @@ def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
ds_out['ql'] = ql
ds_out['qh'] = qh
ds_out['evap'] = evap
ds_out['taux'] = taux
ds_out['taux'] = taux
ds_out['tauy'] = tauy
return ds_out

Expand All @@ -114,7 +114,7 @@ def load_and_merge_cm26(regridder_token):
)
# instead do this
ds_atmos = ds_atmos.chunk({'time':1})

fs = gcsfs.GCSFileSystem(token=regridder_token)
path = 'ocean-transport-group/scale-aware-air-sea/regridding_weights/CM26_atmos2ocean.zarr'
mapper = fs.get_mapper(path)
Expand All @@ -138,4 +138,4 @@ def load_and_merge_cm26(regridder_token):
ds_merged = ds_merged.transpose(
'xt_ocean', 'yt_ocean', 'time'
)
return ds_merged
return ds_merged
Loading
Loading