diff --git a/jupyter-images/uah24s/.condarc b/jupyter-images/uah24s/.condarc new file mode 100644 index 00000000..7c536d94 --- /dev/null +++ b/jupyter-images/uah24s/.condarc @@ -0,0 +1,2 @@ +envs_dirs: + - /home/jovyan/additional-envs diff --git a/jupyter-images/uah24s/Acknowledgements.ipynb b/jupyter-images/uah24s/Acknowledgements.ipynb new file mode 100644 index 00000000..19cb249b --- /dev/null +++ b/jupyter-images/uah24s/Acknowledgements.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c86cd54f-b73c-4781-b6eb-89c79d3d3b22", + "metadata": {}, + "source": [ + "## Acknowledgements\n", + "\n", + "Launching this JupyterHub server is the result of a collaboration between several research and academic institutions and their staff. For Jetstream2 and JupyterHub expertise, we thank Andrea Zonca (San Diego Supercomputing Center), Jeremy Fischer, Mike Lowe (Indiana University), the NSF Jetstream2 (`doi:10.1145/3437359.3465565`) team.\n", + "\n", + "This work employs the NSF Jetstream2 Cloud at Indiana University through allocation EES220002 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program, which is supported by National Science Foundation grants #2138259, #2138286, #2138307, #2137603, and #2138296.\n", + "\n", + "Unidata is one of the University Corporation for Atmospheric Research (UCAR)'s Community Programs (UCP), and is funded primarily by the National Science Foundation (AGS-1901712).\n", + "\n", + "## To Acknowledge This JupyterHub and the Unidata Science Gateway\n", + "\n", + "If you have benefited from the Unidata Science Gateway, please cite `doi:10.5065/688s-2w73`. Additional citation information can be found in this [Citation File Format file](https://raw.githubusercontent.com/Unidata/science-gateway/master/CITATION.cff).\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter-images/uah24s/Dockerfile b/jupyter-images/uah24s/Dockerfile new file mode 100644 index 00000000..bf45b851 --- /dev/null +++ b/jupyter-images/uah24s/Dockerfile @@ -0,0 +1,41 @@ +# Heavily borrowed from docker-stacks/minimal-notebook/ +# https://github.com/jupyter/docker-stacks/blob/main/minimal-notebook/Dockerfile + +ARG BASE_CONTAINER=jupyter/minimal-notebook +FROM $BASE_CONTAINER + +ENV DEFAULT_ENV_NAME=uah24s + +LABEL maintainer="Unidata " + +USER root + +RUN apt-get update && \ + apt-get install -y --no-install-recommends vim curl ffmpeg && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +USER $NB_UID + +COPY environment-tf.yaml environment-numba.yaml additional-env.yaml /tmp + +RUN mamba install --quiet --yes \ + 'conda-forge::nb_conda_kernels' \ + 'conda-forge::jupyterlab-git' \ + 'conda-forge::ipywidgets' && \ + mamba env update --name $DEFAULT_ENV_NAME -f /tmp/environment-tf.yaml && \ + mamba env update --name $DEFAULT_ENV_NAME -f /tmp/additional-env.yaml && \ + mamba env update --name ${DEFAULT_ENV_NAME}-numba -f /tmp/environment-numba.yaml && \ + mamba env update --name ${DEFAULT_ENV_NAME}-numba -f /tmp/additional-env.yaml && \ + pip install --no-cache-dir nbgitpuller && \ + mamba clean --all -f -y && \ + jupyter lab clean -y && \ + npm cache clean --force && \ + rm -rf /home/$NB_USER/.cache/yarn && \ + rm -rf /home/$NB_USER/.node-gyp && \ + fix-permissions $CONDA_DIR && \ + fix-permissions /home/$NB_USER + +COPY update_material.ipynb Acknowledgements.ipynb default_kernel.py / + +USER $NB_UID diff --git a/jupyter-images/uah24s/additional-env.yaml b/jupyter-images/uah24s/additional-env.yaml new file mode 100644 index 00000000..fff65773 --- /dev/null +++ b/jupyter-images/uah24s/additional-env.yaml @@ -0,0 +1,11 @@ +name: additional-env +channels: + - conda-forge +dependencies: + # User requested packages + - seaborn + - pip: + # It is recommended to install a package using pip as a last resort, i.e. + # when it is not found in the conda repos + - optuna + - jupyterlab-optuna diff --git a/jupyter-images/uah24s/additional_kernels.ipynb b/jupyter-images/uah24s/additional_kernels.ipynb new file mode 100644 index 00000000..658cb6fb --- /dev/null +++ b/jupyter-images/uah24s/additional_kernels.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a9d9cf3f-590d-40ef-8421-a9789a03bb07", + "metadata": {}, + "source": [ + "### Creating Additional Kernels\n", + "\n", + "You can also create additional kernels and have them be available via the kernel menu. Your kernel must contain the `nb_conda_kernels` and `ipykernel` packages for this to work. For example, if you wish to have a kernel with the `seaborn` package, you can create the following `environment.yml` from the terminal with the `pico` editor:\n", + "\n", + "```yaml\n", + " name: myenv\n", + " channels:\n", + " - conda-forge\n", + " dependencies:\n", + " - python=3\n", + " - seaborn\n", + " - nb_conda_kernels\n", + " - ipykernel\n", + "```\n", + "\n", + "followed by\n", + "\n", + "`conda env update --name myenv -f environment.yml`\n", + "\n", + "at this point `myenv` will be available via the `Kernel → Change kernel...` menu." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter-images/uah24s/build.sh b/jupyter-images/uah24s/build.sh new file mode 100755 index 00000000..2b6fad0a --- /dev/null +++ b/jupyter-images/uah24s/build.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Check if an image name is provided +if [ -z "$1" ]; then + echo "Error: No image name provided." + echo "Usage: $0 " + exit 1 +fi + +IMAGE_NAME=$1 + +DATE_TAG=$(date "+%Y%b%d_%H%M%S") +RANDOM_HEX=$(openssl rand -hex 2) +TAG="${DATE_TAG}_${RANDOM_HEX}" + +FULL_TAG="unidata/$IMAGE_NAME:$TAG" + +echo "Building Docker image with tag: $FULL_TAG" + +docker build --no-cache --pull --tag "$FULL_TAG" . + +# Check if the build was successful +if [ $? -eq 0 ]; then + echo "Docker image built successfully: $FULL_TAG" +else + echo "Error: Docker build failed." + exit 1 +fi diff --git a/jupyter-images/uah24s/default_kernel.py b/jupyter-images/uah24s/default_kernel.py new file mode 100755 index 00000000..f4d6d81e --- /dev/null +++ b/jupyter-images/uah24s/default_kernel.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +import argparse +import glob +import json +import os +import re + + +def update_kernelspec_in_notebooks(directory, new_name): + """ + Updates the kernelspec in all Jupyter Notebook files within the specified + directory and its subdirectories, while preserving the original file + formatting. + + Args: + directory (str): The path to the directory containing .ipynb files. + new_name (str): The new name to set in the kernelspec. + """ + for file_path in glob.glob(f'{directory}/**/*.ipynb', recursive=True): + try: + with open(file_path, 'r', encoding='utf-8') as file: + file_contents = file.read() + notebook = json.loads(file_contents) + + if 'kernelspec' not in notebook.get('metadata', {}): + print(f"No kernelspec found in {file_path}. Skipping file.") + continue + + kernelspec = notebook['metadata']['kernelspec'] + kernelspec['display_name'] = f"Python [conda env:{new_name}]" + kernelspec['name'] = f"conda-env-{new_name}-py" + + # Convert the updated kernelspec dictionary to a JSON-formatted + # string with indentation + updated_kernelspec = json.dumps(kernelspec, indent=4) + + # Replace the existing kernelspec section in the original file + # contents with the updated JSON string. The regular expression + # looks for the "kernelspec" key and replaces its entire value + # (including nested structures), preserving the overall structure + # and formatting of the file. + updated_contents = re.sub( + r'"kernelspec": \{.*?\}', + f'"kernelspec": {updated_kernelspec}', + file_contents, flags=re.DOTALL + ) + + with open(file_path, 'w', encoding='utf-8') as file: + file.write(updated_contents) + + except Exception as e: + print(f"Error processing file {file_path}: {e}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Update the kernel name in " + "Jupyter Notebook files in directory " + "tree.") + parser.add_argument("new_kernel_name", help="New kernel name to set.") + parser.add_argument("directory_path", nargs='?', default=os.getcwd(), + help="Directory containing .ipynb files (default: " + "current directory).") + + args = parser.parse_args() + + update_kernelspec_in_notebooks(args.directory_path, args.new_kernel_name) diff --git a/jupyter-images/uah24s/environment-numba.yaml b/jupyter-images/uah24s/environment-numba.yaml new file mode 100644 index 00000000..91a9fe11 --- /dev/null +++ b/jupyter-images/uah24s/environment-numba.yaml @@ -0,0 +1,33 @@ +name: uah24s-numba +channels: + - conda-forge +dependencies: + # Required by JupyterLab + - python=3 + - nb_conda_kernels + - ipykernel + # User requested packages + - numpy + - matplotlib + - cartopy + - metpy + - siphon + - pandas + - pip + - xarray + - ipywidgets + - python-awips + - scikit-learn + - tobac + - s3fs + - arm_pyart + - netCDF4 + - zarr + # numba and cuda + # See https://numba.pydata.org/numba-doc/latest/user/installing.html + - numba + - cudatoolkit + - pip: + # It is recommended to install a package using pip as a last resort, i.e. + # when it is not found in the conda repos + - palmerpenguins diff --git a/jupyter-images/uah24s/environment-tf.yaml b/jupyter-images/uah24s/environment-tf.yaml new file mode 100644 index 00000000..ec61c645 --- /dev/null +++ b/jupyter-images/uah24s/environment-tf.yaml @@ -0,0 +1,42 @@ +name: uah24s +channels: + - conda-forge +dependencies: + # Required by JupyterLab + - python=3 + - nb_conda_kernels + - ipykernel + # User requested packages + - numpy + - matplotlib + - cartopy + - metpy + - siphon + - pandas + - pip + - xarray + - ipywidgets + - python-awips + - scikit-learn + - tobac + - s3fs + - arm_pyart + - netCDF4 + - zarr + - pip: + # It is recommended to install a package using pip as a last resort, i.e. + # when it is not found in the conda repos + - palmerpenguins + - tensorflow==2.15.post1 + - nvidia-cublas-cu12==12.2.5.6 + - nvidia-cuda-cupti-cu12==12.2.142 + - nvidia-cuda-nvcc-cu12==12.2.140 + - nvidia-cuda-nvrtc-cu12==12.2.140 + - nvidia-cuda-runtime-cu12==12.2.140 + - nvidia-cudnn-cu12==8.9.4.25 + - nvidia-cufft-cu12==11.0.8.103 + - nvidia-curand-cu12==10.3.3.141 + - nvidia-cusolver-cu12==11.5.2.141 + - nvidia-cusparse-cu12==12.1.2.141 + - nvidia-nccl-cu12==2.16.5 + - nvidia-nvjitlink-cu12==12.2.140 diff --git a/jupyter-images/uah24s/gpu/Dockerfile b/jupyter-images/uah24s/gpu/Dockerfile new file mode 100644 index 00000000..8a3bebbe --- /dev/null +++ b/jupyter-images/uah24s/gpu/Dockerfile @@ -0,0 +1,66 @@ +# Heavily borrowed from docker-stacks +# https://github.com/jupyter/docker-stacks/blob/main/docker-stacks-foundation/Dockerfile + +ARG BASE_CONTAINER=nvcr.io/nvidia/tensorflow:22.04-tf2-py3 +FROM $BASE_CONTAINER + +LABEL maintainer="Unidata " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update --yes && \ + apt-get upgrade --yes && \ + apt-get install --yes --no-install-recommends \ + bzip2 ca-certificates locales sudo wget software-properties-common \ + libproj-dev proj-data proj-bin libgeos-dev ffmpeg && \ + # updating Python messes up Tensorflow from base container, unfort. + # add-apt-repository ppa:deadsnakes/ppa && apt-get update --yes && \ + # apt-get install -y python3.10 python3.10-distutils && \ + # curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 && \ + # ln -sfn /usr/bin/python3.10 /usr/bin/python3 && \ + # ln -sfn /usr/bin/python3 /usr/bin/python && \ + # ln -sfn /usr/bin/pip3 /usr/bin/pip && \ + python3 -m pip install --no-cache-dir jupyterhub==3.0.0 jupyterlab>=3 \ + notebook jupyter_server cartopy catboost metpy minisom netCDF4 pillow \ + pyvista[all,trame] pyvista-xarray seaborn shapely torch torchaudio \ + torchvision verde xarray ipywidgets jupyterlab_widgets \ + jupyter-server-proxy --upgrade && \ + python3 -m pip uninstall vtk -y && \ + python3 -m pip install --no-cache-dir --upgrade --extra-index-url \ + https://wheels.vtk.org vtk-osmesa --extra-index-url \ + https://download.pytorch.org/whl/cu112 && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen + +ARG NB_USER="jovyan" +ARG NB_UID="1000" +ARG NB_GID="100" + +COPY fix-permissions /usr/local/bin/fix-permissions +RUN chmod a+rx /usr/local/bin/fix-permissions + +ENV HOME="/home/${NB_USER}" + +# Enable prompt color in the skeleton .bashrc before creating the default NB_USER +# hadolint ignore=SC2016 +RUN sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashrc + +# Create NB_USER with name jovyan user with UID=1000 and in the 'users' group +# and make sure these dirs are writable by the `users` group. +RUN echo "auth requisite pam_deny.so" >> /etc/pam.d/su && \ + useradd -l -m -s /bin/bash -N -u "${NB_UID}" "${NB_USER}" && \ + chmod g+w /etc/passwd && \ + fix-permissions "${HOME}" + +COPY Acknowledgements.ipynb / +COPY gpu.ipynb / +COPY weatherbench_TF.ipynb / +COPY MNIST_Example_PyTorch.ipynb / + +USER ${NB_UID} + +WORKDIR "${HOME}" diff --git a/jupyter-images/uah24s/gpu/MNIST_Example_PyTorch.ipynb b/jupyter-images/uah24s/gpu/MNIST_Example_PyTorch.ipynb new file mode 100644 index 00000000..47858cbd --- /dev/null +++ b/jupyter-images/uah24s/gpu/MNIST_Example_PyTorch.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fa74adca-4591-40c5-b182-2db3df1ac7b0", + "metadata": {}, + "source": [ + "# MNIST CNN Example in PyTorch" + ] + }, + { + "cell_type": "markdown", + "id": "96b89b0e-146f-463c-922a-13041ca76478", + "metadata": {}, + "source": [ + "This example is borrowed from this [book](https://www.packtpub.com/product/machine-learning-with-pytorch-and-scikit-learn/9781801819312) found [here](https://github.com/rasbt/machine-learning-book) shared under a MIT license.\n", + "\n", + "This is a great book! Check out a short blog post on some ML books I enjoy. [Blog post here]( https://www.unidata.ucar.edu/blogs/news/entry/recommended-books-for-machine-learning)\n", + "\n", + "-Thomas Martin, AI/ML Software Engineer, Unidata" + ] + }, + { + "cell_type": "markdown", + "id": "75943822-c798-470c-908b-10a2bdc20764", + "metadata": { + "tags": [] + }, + "source": [ + "This notebook should take less than 5 minutes to run from top to bottom. This is not designed to be a true tutorial, just to show that GPU enabled workflows using PyTorch is possible on a JS2 virtual machine. If you are unfamiliar with CNNs, check out the video [here](https://www.youtube.com/watch?v=HGwBXDKFk9I) by StatQuest. " + ] + }, + { + "cell_type": "markdown", + "id": "fb61c34e-3ca9-4215-a2a3-df6991c1d4ef", + "metadata": {}, + "source": [ + "### Importing Packages " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b339b3cd-4af0-4fff-8cb8-100fbb237887", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "## Load in the necessary python packages to train a CNN\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "from torch.utils.data import Subset, DataLoader\n", + "from torch.autograd import Variable\n", + "from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d\n", + "\n", + "import torchvision \n", + "from torchvision import transforms \n", + "\n", + "print('torch version:', torch.__version__ )\n", + "print('torchvision version:', torchvision.__version__ )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edb2dcdd-fed8-4b50-9dfb-f31be62c0ff0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "image_path = './'\n", + "transform = transforms.Compose([transforms.ToTensor()])\n", + "\n", + "mnist_dataset = torchvision.datasets.MNIST(root=image_path, \n", + " train=True, \n", + " transform=transform, \n", + " download=True)\n", + "\n", + "mnist_valid_dataset = Subset(mnist_dataset, torch.arange(10000)) \n", + "mnist_train_dataset = Subset(mnist_dataset, torch.arange(10000, len(mnist_dataset)))\n", + "mnist_test_dataset = torchvision.datasets.MNIST(root=image_path, \n", + " train=False, \n", + " transform=transform, \n", + " download=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dbe1766-e434-433a-9cd7-1ef801fd09c6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "torch.manual_seed(1)\n", + "train_dl = DataLoader(mnist_train_dataset, batch_size, shuffle=True)\n", + "valid_dl = DataLoader(mnist_valid_dataset, batch_size, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "id": "ef10fe5b-f507-4dd5-9e8a-6771f890a2df", + "metadata": {}, + "source": [ + "### Build Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23460620-8aa0-4b17-8122-cd24a4b69d23", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "model = nn.Sequential()\n", + "model.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2))\n", + "model.add_module('relu1', nn.ReLU()) \n", + "model.add_module('pool1', nn.MaxPool2d(kernel_size=2)) \n", + "model.add_module('conv2', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2))\n", + "model.add_module('relu2', nn.ReLU()) \n", + "model.add_module('pool2', nn.MaxPool2d(kernel_size=2)) \n", + "\n", + "x = torch.ones((4, 1, 28, 28))\n", + "model(x).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d25c5dcb-59a1-4646-bbd6-94d6d49f8a96", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "model.add_module('flatten', nn.Flatten()) \n", + "\n", + "x = torch.ones((4, 1, 28, 28))\n", + "model(x).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08596270-07c6-41bc-82d4-5c84672eb98b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "model.add_module('fc1', nn.Linear(3136, 1024)) \n", + "model.add_module('relu3', nn.ReLU()) \n", + "model.add_module('dropout', nn.Dropout(p=0.5)) \n", + "\n", + "model.add_module('fc2', nn.Linear(1024, 10)) " + ] + }, + { + "cell_type": "markdown", + "id": "532a3e07-7e97-4f0a-b2e3-44aba16d306d", + "metadata": {}, + "source": [ + "Pick if you want to use GPU or CPU. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e620ae9-bd39-4579-a7a8-3d93aa0d8e26", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "device = torch.device(\"cuda:0\")\n", + "# device = torch.device(\"cpu\") \n", + "\n", + "model = model.to(device) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9898c2b8-c833-41bd-84be-aa455188de5f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "loss_fn = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", + "\n", + "def train(model, num_epochs, train_dl, valid_dl):\n", + " loss_hist_train = [0] * num_epochs\n", + " accuracy_hist_train = [0] * num_epochs\n", + " loss_hist_valid = [0] * num_epochs\n", + " accuracy_hist_valid = [0] * num_epochs\n", + " for epoch in range(num_epochs):\n", + " model.train()\n", + " for x_batch, y_batch in train_dl:\n", + " x_batch = x_batch.to(device) \n", + " y_batch = y_batch.to(device) \n", + " pred = model(x_batch)\n", + " loss = loss_fn(pred, y_batch)\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + " loss_hist_train[epoch] += loss.item()*y_batch.size(0)\n", + " is_correct = (torch.argmax(pred, dim=1) == y_batch).float()\n", + " accuracy_hist_train[epoch] += is_correct.sum().cpu()\n", + "\n", + " loss_hist_train[epoch] /= len(train_dl.dataset)\n", + " accuracy_hist_train[epoch] /= len(train_dl.dataset)\n", + " \n", + " model.eval()\n", + " with torch.no_grad():\n", + " for x_batch, y_batch in valid_dl:\n", + " x_batch = x_batch.to(device) \n", + " y_batch = y_batch.to(device) \n", + " pred = model(x_batch)\n", + " loss = loss_fn(pred, y_batch)\n", + " loss_hist_valid[epoch] += loss.item()*y_batch.size(0) \n", + " is_correct = (torch.argmax(pred, dim=1) == y_batch).float() \n", + " accuracy_hist_valid[epoch] += is_correct.sum().cpu()\n", + "\n", + " loss_hist_valid[epoch] /= len(valid_dl.dataset)\n", + " accuracy_hist_valid[epoch] /= len(valid_dl.dataset)\n", + " \n", + " print(f'Epoch {epoch+1} accuracy: {accuracy_hist_train[epoch]:.4f} val_accuracy: {accuracy_hist_valid[epoch]:.4f}')\n", + " return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid\n", + "\n", + "torch.manual_seed(1)\n", + "num_epochs = 12\n", + "hist = train(model, num_epochs, train_dl, valid_dl)" + ] + }, + { + "cell_type": "markdown", + "id": "67ae6100-922b-4b6a-bdb4-fc68b6626763", + "metadata": {}, + "source": [ + "### Plot Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7624f45a-bb0a-4d42-b357-3efb1f76dfee", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "x_arr = np.arange(len(hist[0])) + 1\n", + "\n", + "fig = plt.figure(figsize=(12, 4))\n", + "ax = fig.add_subplot(1, 2, 1)\n", + "ax.plot(x_arr, hist[0], '-o', label='Train loss')\n", + "ax.plot(x_arr, hist[1], '--<', label='Validation loss')\n", + "ax.set_xlabel('Epoch', size=15)\n", + "ax.set_ylabel('Loss', size=15)\n", + "ax.legend(fontsize=15)\n", + "ax = fig.add_subplot(1, 2, 2)\n", + "ax.plot(x_arr, hist[2], '-o', label='Train acc.')\n", + "ax.plot(x_arr, hist[3], '--<', label='Validation acc.')\n", + "ax.legend(fontsize=15)\n", + "ax.set_xlabel('Epoch', size=15)\n", + "ax.set_ylabel('Accuracy', size=15)\n", + "\n", + "#plt.savefig('figures/14_13.png')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14658d9d-2999-435a-b9e3-215adff3b5e1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "torch.cuda.synchronize()\n", + "model_cpu = model.cpu()\n", + "pred = model(mnist_test_dataset.data.unsqueeze(1) / 255.)\n", + "is_correct = (torch.argmax(pred, dim=1) == mnist_test_dataset.targets).float()\n", + "print(f'Test accuracy: {is_correct.mean():.4f}') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c79e581-3643-4cd5-9f9c-f25a6a1afb00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12, 4))\n", + "for i in range(12):\n", + " ax = fig.add_subplot(2, 6, i+1)\n", + " ax.set_xticks([]); ax.set_yticks([])\n", + " img = mnist_test_dataset[i][0][0, :, :]\n", + " pred = model(img.unsqueeze(0).unsqueeze(1))\n", + " y_pred = torch.argmax(pred)\n", + " ax.imshow(img, cmap='gray_r')\n", + " ax.text(0.9, 0.1, y_pred.item(), \n", + " size=15, color='blue',\n", + " horizontalalignment='center',\n", + " verticalalignment='center', \n", + " transform=ax.transAxes)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b1108e15-58e9-4d44-96ce-305656188500", + "metadata": {}, + "source": [ + "The end! Enjoy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter-images/uah24s/gpu/fix-permissions b/jupyter-images/uah24s/gpu/fix-permissions new file mode 100644 index 00000000..d167578b --- /dev/null +++ b/jupyter-images/uah24s/gpu/fix-permissions @@ -0,0 +1,35 @@ +#!/bin/bash +# set permissions on a directory +# after any installation, if a directory needs to be (human) user-writable, +# run this script on it. +# It will make everything in the directory owned by the group ${NB_GID} +# and writable by that group. +# Deployments that want to set a specific user id can preserve permissions +# by adding the `--group-add users` line to `docker run`. + +# uses find to avoid touching files that already have the right permissions, +# which would cause massive image explosion + +# right permissions are: +# group=${NB_GID} +# AND permissions include group rwX (directory-execute) +# AND directories have setuid,setgid bits set + +set -e + +for d in "$@"; do + find "${d}" \ + ! \( \ + -group "${NB_GID}" \ + -a -perm -g+rwX \ + \) \ + -exec chgrp "${NB_GID}" -- {} \+ \ + -exec chmod g+rwX -- {} \+ + # setuid, setgid *on directories only* + find "${d}" \ + \( \ + -type d \ + -a ! -perm -6000 \ + \) \ + -exec chmod +6000 -- {} \+ +done diff --git a/jupyter-images/uah24s/gpu/gpu.ipynb b/jupyter-images/uah24s/gpu/gpu.ipynb new file mode 100644 index 00000000..b5729078 --- /dev/null +++ b/jupyter-images/uah24s/gpu/gpu.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "09fedf5c-6271-4c53-bf2f-52dc06f4ba79", + "metadata": {}, + "source": [ + "# GPU Testing Notebook" + ] + }, + { + "cell_type": "markdown", + "id": "906a989f-1064-438c-95a3-81dbf3d30161", + "metadata": {}, + "source": [ + "Small notebook to do a quick 'sniff test' of CUDA/GPU capabilities for Jetstream2 VM's. This is not an exhaustive test! Whole notebook should run in less than a minute, and might return some errors, but every cell should run. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbac73b7-12e7-40f2-9bf2-f7a579eff656", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "9f3730b8-e660-42fe-9b95-29e892ca8d96", + "metadata": {}, + "source": [ + "In a terminal window, if you want to watch/monitor this information, use the below code. This will refresh the output every second. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f61118e-d724-4762-ac0b-2abd9db2c04a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# watch -n 1 nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "6002a433-c946-4850-8a9d-3a6f076c9ba3", + "metadata": { + "tags": [] + }, + "source": [ + "## Tensorflow Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3644b3d4-95d3-43d6-a42e-0a69df65bb7f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73038352-88b2-4298-b864-049e1ed3bfbc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "tf.test.is_built_with_cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "180eb0e3-25d6-4568-89f4-1907a4f73b86", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "is_cuda_gpu_available = tf.config.list_physical_devices('GPU')\n", + "is_cuda_gpu_available" + ] + }, + { + "cell_type": "markdown", + "id": "f4afe606-9244-475d-8303-d5d601bbf518", + "metadata": {}, + "source": [ + "## PyTorch Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37dae2ae-1e57-49c4-9e07-3af13caa97d8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f554ddee-7c86-431f-b99d-6489113c4d22", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "torch.cuda.device_count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab8a33e4-dfeb-41f7-8ef5-e327a2495290", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "torch.cuda.device(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0d14f82-00c8-421b-afca-881719e96a9b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# setting device on GPU if available, else CPU\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", + "print('Using device:', device)\n", + "print()\n", + "\n", + "\n", + "#Additional Info when using cuda\n", + "if device.type == 'cuda':\n", + " print(torch.cuda.get_device_name(0))\n", + " print('Memory Usage:')\n", + " print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50ec833c-9a4e-40c1-b72f-0da885041dde", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "torch.cuda.get_device_properties(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22e8abec-62bf-40b5-9dd1-23a555418144", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "t = torch.randn(9,9)\n", + "t.is_cuda # returns False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0109ecbd-95b2-4286-aa4e-0817c3ef3182", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "t = torch.randn(2,2).cuda()\n", + "t.is_cuda # returns True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12df8c7d-6c7f-495f-9c05-4582dc2e0a24", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "t = t.cpu()\n", + "t.is_cuda # returns False" + ] + }, + { + "cell_type": "markdown", + "id": "994253b9-cab0-41f5-b28d-fd4c67557fd2", + "metadata": {}, + "source": [ + "## XGBoost Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec8cf6ca-ab3f-4c21-b911-e7fc063b5b17", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "\n", + "# Dataset we will load in\n", + "from sklearn.datasets import load_iris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8c43ec-ebf3-49f3-8a58-c1b288249976", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "iris = load_iris()\n", + "X = iris[\"data\"]\n", + "y = iris[\"target\"]\n", + "xgb_model = xgb.XGBRegressor(\n", + " # If there is no GPU, the tree_method kwarg will cause either\n", + " # - an error in `xgb_model.fit(X, y)` (seen with pytest) or\n", + " # - a warning printed to the console (seen in Spyder)\n", + " # It's unclear which of the two happens under what circumstances.\n", + " tree_method=\"gpu_hist\" # GPU!!\n", + " )\n", + "xgb_model.fit(X, y)\n", + "# Check that no warning was printed." + ] + }, + { + "cell_type": "markdown", + "id": "ce5144d0-68d8-43d6-8662-43632900636a", + "metadata": {}, + "source": [ + "## Catboost Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6f83eff-3983-4203-b6e1-16aa1d51364a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import catboost\n", + "\n", + "from catboost.utils import get_gpu_device_count\n", + "from catboost import CatBoostRegressor\n", + "catboost.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3648168-7a49-4e6a-8445-91d87bc731d8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "print('I see %i GPU devices' % get_gpu_device_count())" + ] + }, + { + "cell_type": "markdown", + "id": "e3a9d518-cc65-4af9-958d-3bc4340c5802", + "metadata": {}, + "source": [ + " I see 1 GPU devices\n", + "\n", + "\n", + "Below seems like a catboost issue, not a GPU one" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00430762-ed11-4e0a-97ae-cf4f69bdefa6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "model = CatBoostRegressor(iterations=1000,\n", + " task_type=\"GPU\",\n", + " gpu_ram_part=0.5\n", + " # n_models = 0.05\n", + " # devices='1'\n", + " )\n", + "model.fit(X,\n", + " y,\n", + " verbose=False)" + ] + }, + { + "cell_type": "markdown", + "id": "609ddbd6-1b2b-403e-97bc-8df7b686d597", + "metadata": {}, + "source": [ + "## CuPy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c0e1d27-9897-439b-8a50-ec6914b96cb0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import cupy as cp\n", + "x = cp.arange(6).reshape(2, 3).astype('f')\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53a21cf1-88cd-43c0-b72d-a593760b48c7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "x.sum(axis=1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter-images/uah24s/gpu/gpu/jupyterhub_gpu.yaml b/jupyter-images/uah24s/gpu/gpu/jupyterhub_gpu.yaml new file mode 100644 index 00000000..5eecf972 --- /dev/null +++ b/jupyter-images/uah24s/gpu/gpu/jupyterhub_gpu.yaml @@ -0,0 +1,45 @@ +singleuser: + profileList: + - display_name: "GPU Server" + description: "Spawns a notebook server with access to a GPU" + kubespawner_override: + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "CPU Server" + description: "Spawns a standard notebook server" + kubespawner_override: + image: "unidata/unidatahub:xxxxx" + image: + # name: zonca/nvidia-tensorflow-jupyterhub + # tag: "23.1.5" + name: unidata/jupyter-gpu + tag: "xxxxx" + # very small limits for testing on small instances + memory: + guarantee: 5G + limit: 5G + cpu: + guarantee: 0.3 + limit: 1 + storage: + type: dynamic + capacity: 10Gi + # default is 300s, sometimes Jetstream volumes are slow to attach + startTimeout: 600 + # See https://github.com/zonca/jupyterhub-deploy-kubernetes-jetstream/issues/38 + lifecycleHooks: + postStart: + exec: + command: + - "sh" + - "-c" + - > + chmod 700 .ssh; + chmod g-s .ssh; + chmod 600 .ssh/*; + cp /Acknowledgements.ipynb /home/jovyan; + cp /gpu.ipynb /home/jovyan; + cp /weatherbench_TF.ipynb /home/jovyan; + cp /MNIST_Example_PyTorch.ipynb /home/jovyan; + [ ! -h /home/jovyan/shared ] && ln -s /shared/shared /home/jovyan/shared || echo "sym link exists"; + exit 0 diff --git a/jupyter-images/uah24s/gpu/weatherbench_TF.ipynb b/jupyter-images/uah24s/gpu/weatherbench_TF.ipynb new file mode 100644 index 00000000..0911cd3d --- /dev/null +++ b/jupyter-images/uah24s/gpu/weatherbench_TF.ipynb @@ -0,0 +1,588 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "usMyjJA8-p7r" + }, + "source": [ + "# Simple example of a WeatherBench model\n", + "\n", + "In this notebook we will build a simple neural network on the WeatherBench dataset. We will walk you through all the code so if you are not familiar with python you can simply click through the code boxes (see \"How to run this exercise\" below). If you are familiar with python and machine learning, there is a stretch exercise at the end of the notebook.\n", + "\n", + "The aim of this example is to predict the geopotential at the 500hPa pressure level in the atmosphere and compare your solution with the benchmark dataset. This variable is important for identifying weather systems such as cyclones and anticyclones.\n", + "\n", + "With the data you will download, you can make a forecast for any number of days ahead, but in this exercise we focus on forecasting the geopotential five days ahead.\n", + "\n", + "We evaluate our model using the Root Mean Squared Error." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lWFzgKq3AUEx" + }, + "source": [ + "## How to run this exercise\n", + "\n", + "This exercise is in the form of a [Jupyter notebook](https://jupyter.org/). It can be \"run\" in a number of free cloud based environments (see two options below). These require no installation. When you click on one of the links below (\"Open in Colab\" or \"Open in Kaggle\") you will be prompted to create a free account, after which you will see the same page you see here. Follow the instructions below to connect to a GPU. After that you can run each block of code by selecting shift+control repeatedly, or by selecting the \"play\" icon. \n", + "\n", + "Advanced users may wish to run this exercise on their own computers by first installing Python and Jupyter, in addition to the packages listed below (xarray, numpy and tensorflow), or as listed in the \"environment.yml\" file in https://github.com/ecmwf-projects/mooc-machine-learning-weather-climate/tree/main/tier_1." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tGGur2rfAUEy" + }, + "source": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Run the tutorial via free cloud platforms: \n", + " \"Colab\"\n", + " \"Kaggle\"
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wDDNQgp796BP" + }, + "source": [ + "Note that to use Kaggle, you need to enable an option on the notebook. Please follow the instructions here to do this https://stackoverflow.com/questions/68142524/cannot-access-internet-on-kaggle-notebook. \n", + "\n", + "Since we will train a neural network later, it might make sense to connect to a GPU runtime:\n", + "\n", + "* In Google Colab this can be done under Runtime --> Change runtime type.\n", + "\n", + "* In Kaggle this can be done under Accelerator." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lL0pJLu_AUEz" + }, + "source": [ + "## Import packages\n", + "\n", + "Below is the very first code block to \"run\". This will import software packages needed to run the exercise. They include [numpy](https://numpy.org/doc/stable/) and [xarray](https://docs.xarray.dev/en/stable/), which are used for handling multidimentional arrays of data. They also include [tensorflow](https://www.tensorflow.org/) which is a popular Machine Learning package." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CsBr8fpZhDnu" + }, + "outputs": [], + "source": [ + "# Import packages\n", + "import xarray as xr\n", + "import numpy as np\n", + "\n", + "## Load in the necessary python packages to train a neural network\n", + "\n", + "import tensorflow.keras as keras\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E2TJnQs1-_s9" + }, + "source": [ + "## Download data\n", + "\n", + "We need to download the WeatherBench data from the public repository. We will only download the 500hPa geopotential data at 5.625 degrees, because training on the whole benchmark dataset using colab would take a very long time and is thus beyond the scope of this exercise. \n", + "\n", + "#### Note this download may take over 20 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-5lP1dbj0zjF", + "outputId": "dab95b90-7fc9-47b0-e0ef-b9560d76fac4" + }, + "outputs": [], + "source": [ + "try:\n", + " !wget https://get.ecmwf.int/repository/mooc-machine-learning-weather-climate/geopotential_500_5.625deg.zip\n", + "except:\n", + " !wget --no-check-certificate \"https://dataserv.ub.tum.de/s/m1524895/download?path=%2F5.625deg%2Fgeopotential_500&files=geopotential_500_5.625deg.zip\" -O geopotential_500_5.625deg.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k5Dys1d6n44X", + "outputId": "c1379204-953a-485f-816d-b5a47126aec7" + }, + "outputs": [], + "source": [ + "# Unzip the data\n", + "!unzip geopotential_500_5.625deg.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MGhPyQz__RbC" + }, + "source": [ + "Next we open the dataset using xarray. We will also, for the sake of speed and simplicity, load data only every 12 hours." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 322 + }, + "id": "qF8xFfBfhCZG", + "outputId": "57902c7a-6f0f-4387-85ec-271552b78a9d" + }, + "outputs": [], + "source": [ + "z500 = xr.open_mfdataset('geopotential_500*.nc', combine='by_coords').isel(time=slice(None, None, 12))\n", + "z500" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u_11kVFcjYD3" + }, + "outputs": [], + "source": [ + "# Now we can load the data into memory to speed up accessing data. This should take <30s\n", + "z500.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nHD8mLNjhKcq" + }, + "outputs": [], + "source": [ + "# Plot the geopotential at an example date-time\n", + "z500.z.isel(time=0).plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AOrv4B8yh164" + }, + "source": [ + "## Compute baselines\n", + "\n", + "Before training an ML model it is important to have some baselines. Here, we will compute a climatology baseline. For this, we will use the training time period (1979 to 2015) and compute a climatology for each day of the year. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O3jl5Gd1jl5Y" + }, + "outputs": [], + "source": [ + "# training dataset selection\n", + "train_years = slice('1979', '2015')\n", + "# validation dataset selection (this dataset helps with overfitting)\n", + "valid_years = slice('2016', '2016')\n", + "# test dataset selection\n", + "test_years = slice('2017', '2018')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4uUbB6mDiE9x" + }, + "outputs": [], + "source": [ + "def computed_weighted_rmse(fc, gt):\n", + " \"\"\"Error metric to compute the area averaged RMSE.\"\"\"\n", + " error = fc - gt\n", + " weights_lat = np.cos(np.deg2rad(error.lat))\n", + " weights_lat /= weights_lat.mean()\n", + " rmse = np.sqrt(((error)**2 * weights_lat).mean(('time', 'lat', 'lon')))\n", + " return rmse" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PB6zoeIZZO-2" + }, + "source": [ + "Here we consider two baselines: the persistence and the climatology. The persistence is calculated as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7QAqzAykZW-9" + }, + "outputs": [], + "source": [ + "# pick the forecast lead time\n", + "lead_time_steps = 10 # 5 day forecast because considering midday and midnight\n", + "\n", + "# compute persistent forecast \n", + "persistence_fc = z500.sel(time=test_years).isel(time=slice(0, -lead_time_steps))\n", + "persistence_fc['time'] = persistence_fc.time + np.timedelta64(5, 'D').astype('timedelta64[ns]')\n", + "\n", + "# target data\n", + "target = z500.sel(time=test_years)['z']\n", + "# compute RMSE\n", + "computed_weighted_rmse(persistence_fc, target)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zKeHi02PATZr" + }, + "source": [ + "The climatology is calculated for each day of year from the training time period" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dip-vm9Ci7AK" + }, + "outputs": [], + "source": [ + "clim = z500.sel(time=train_years).groupby('time.dayofyear').mean()\n", + "# compute RMSE\n", + "computed_weighted_rmse(clim.sel(dayofyear=z500.sel(time=test_years).time.dt.dayofyear), z500)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6kenbc82knEU" + }, + "source": [ + "## Train a simple CNN\n", + "\n", + "Now we can train a simple convolutional neural network. We will use Keras for this. First though we need to prepare the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m_svOFJvkpdL" + }, + "outputs": [], + "source": [ + "def get_train_valid_test_dataset(lead_steps, z500_dataset):\n", + " # Split train, valid and test dataset\n", + " train_data = z500_dataset.sel(time=train_years)\n", + " valid_data = z500_dataset.sel(time=valid_years)\n", + " test_data = z500_dataset.sel(time=test_years)\n", + "\n", + " # Normalize the data using the mean and standard deviation of the training data\n", + " mean = train_data.mean()\n", + " std = train_data.std()\n", + "\n", + " train_data = (train_data - mean) / std\n", + " valid_data = (valid_data - mean) / std\n", + " test_data = (test_data - mean) / std\n", + "\n", + " mean = mean['z'].values # extract numerical value from xarray Dataset\n", + " std = std['z'].values # extract numerical value from xarray Dataset\n", + "\n", + " # Create inputs and outputs that are shifted by lead_steps\n", + " X_train = train_data.z.isel(time=slice(None, -lead_steps)).values[..., None]\n", + " Y_train = train_data.z.isel(time=slice(lead_steps, None)).values[..., None]\n", + " X_valid = valid_data.z.isel(time=slice(None, -lead_steps)).values[..., None]\n", + " Y_valid = valid_data.z.isel(time=slice(lead_steps, None)).values[..., None] \n", + " X_test = test_data.z.isel(time=slice(None, -lead_steps)).values[..., None]\n", + " Y_test = test_data.z.isel(time=slice(lead_steps, None)).values[..., None]\n", + " return X_train, Y_train, X_valid, Y_valid, X_test, Y_test, mean, std" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xUXE9Z4jmkzG" + }, + "outputs": [], + "source": [ + "X_train, Y_train, X_valid, Y_valid, X_test, Y_test, mean, std = get_train_valid_test_dataset(lead_time_steps, z500)\n", + "print(X_train.shape)\n", + "print(Y_train.shape)\n", + "print(X_valid.shape)\n", + "print(Y_valid.shape)\n", + "print(X_test.shape)\n", + "print(Y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "va694KRDChmV" + }, + "source": [ + "### Build model\n", + "\n", + "Next we will build the model using Keras. There are many guides for Keras out there, for example [this](https://keras.io/examples/vision/mnist_convnet/). Here we will build a convolutional neural network which we briefly discussed at the end of the module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XOiL3zzynV96" + }, + "outputs": [], + "source": [ + "# CNN\n", + "\n", + "model = keras.Sequential([\n", + " keras.layers.Conv2D(32, 5, padding='same'), # 32 channels with a 5x5 convolution\n", + " keras.layers.ELU(), # Slightly smoother alternative to ReLU\n", + " keras.layers.Conv2D(32, 5, padding='same'), # Same padding keeps the size identical.\n", + " keras.layers.ELU(),\n", + " keras.layers.Conv2D(1, 5, padding='same'),\n", + " # No activation since we are solving a regression problem\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dj1bTrroEtz_" + }, + "source": [ + "Next we need to build the model using an example batch and compile it. As an optimizer we will use the standard Adam optimizer combined with a Mean Squared Error Loss. Details on the Adam optimizer can be found here: Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SPR_Jxn4qFgb" + }, + "outputs": [], + "source": [ + "model.build(X_train[:32].shape)\n", + "model.compile(keras.optimizers.Adam(1e-4), 'mse')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M9SGCnl3o5ZQ" + }, + "outputs": [], + "source": [ + "# With .summary() we can check the shape of the model\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iSw_xTEwp9lc" + }, + "outputs": [], + "source": [ + "# Finally we can fit the model.\n", + "# For each epoch, the entire training dataset has passed through the neural network exactly once\n", + "# Each epoch should take about 10s\n", + "\n", + "model.fit(X_train, Y_train, batch_size=32, epochs=30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FqMibmtJ1zh3" + }, + "outputs": [], + "source": [ + "# Convert predictions backto xarray\n", + "pred_test = X_test[:, :, :, 0].copy()\n", + "pred_test[:] = model.predict(X_test).squeeze() # To remove channel dimension which is 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lmBSJMtKFKTw" + }, + "source": [ + "For network training we normalized the data by subtracting the mean and dividing by the standard deviation. To evaluate the predictions, we now need to un-normalize the data using the mean and standard deviation we used to normalize it. Then we can compute the RMSE of the predictions. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FLFR7JklyVJz" + }, + "outputs": [], + "source": [ + "# Unnormalize\n", + "pred_test = pred_test * std + mean\n", + "# compute RMSE\n", + "computed_weighted_rmse(pred_test, target.isel(time=slice(lead_time_steps, None)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xq1O6BaX15YG" + }, + "source": [ + "How does the skill compare to the climatology?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pAFm8Ez839q3" + }, + "source": [ + "It is always important to visualize your models predictions. Here we take a time from the test period and visualize the ground truth, the climatology and the neural networks' predictions. What do you notice? How about if you try a different time?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vN9pX_IV37VW" + }, + "outputs": [], + "source": [ + "# Note first you need to modify your predictions so they are an xarray instead of a numpy array\n", + "# This way you can access the latitude, longitude and time for each point in the array\n", + "\n", + "# We do this by taking a copy of the original z500 object which has the correct time, \n", + "# latitude and longitude, and replacing the data in this array with the predictions\n", + "pred_xarray = z500.z.sel(time=test_years).isel(time=slice(lead_time_steps, None)).copy()\n", + "pred_xarray.data = pred_test\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "t = xr.DataArray(np.datetime64('2017-10-01T00'))\n", + "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 3))\n", + "\n", + "z500.z.sel(time=t).plot(ax=ax1)\n", + "ax1.set_title('Ground truth')\n", + "\n", + "clim.z.sel(dayofyear=t.dt.dayofyear).plot(ax=ax2)\n", + "ax2.set_title('Climatology')\n", + "\n", + "pred_xarray.sel(time=t).plot(ax=ax3)\n", + "ax3.set_title('Prediction')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZmM8EQtcAUE8" + }, + "source": [ + "The prediction is a lot smoother compared to the ground truth, almost as smooth as climatology. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2NA8xTj3HOj4" + }, + "source": [ + "## Ideas and Additional Exercises\n", + "\n", + "The model we built isn't particularly good. How could we make it better? Here are some ideas to try.\n", + "\n", + "- Make the neural net bigger/smaller? \n", + "\n", + " We provide the following function for you to easily construct a neural network with more layers:\n", + "\n", + "```\n", + "def build_model_cnn(no_of_layers):\n", + " \"\"\"Fully convolutional network\"\"\"\n", + " x = input = keras.layers.Input(shape=(32, 64, 1))\n", + " x = Conv2D(64, 5, padding = 'same')(x) \n", + " for i in range(no_of_layers):\n", + " x = Conv2D(64, 5, padding = 'same')(x)\n", + " x = tf.keras.layers.Activation('ReLU')(x)\n", + " output = Conv2D(1, 5, padding = 'same')(x)\n", + " model = tf.keras.models.Model(input, output)\n", + " model.compile(keras.optimizers.Adam(1e-4), 'mse')\n", + " print(model.summary())\n", + " return model\n", + "```\n", + "\n", + "- Add more variables, for example 850hPa temperature, which is available, check: https://github.com/pangeo-data/WeatherBench\n", + "- Currently, the convolutions do not wrap around the Earth. You could implement periodic convolutions, see [here](https://github.com/pangeo-data/WeatherBench/blob/master/src/train_nn.py#L102). " + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/jupyter-images/uah24s/secrets.yaml b/jupyter-images/uah24s/secrets.yaml new file mode 100644 index 00000000..75ac3311 --- /dev/null +++ b/jupyter-images/uah24s/secrets.yaml @@ -0,0 +1,102 @@ +hub: + cookieSecret: "xxx" + config: + Authenticator: + admin_users: + - admins + #If you have a large list of users, consider using allowed_users.yaml + allowed_users: + - users + GitHubOAuthenticator: + client_id: "xxx" + client_secret: "xxx" + oauth_callback_url: "https://uah24s-1.ees220002.projects.jetstream-cloud.org:443/oauth_callback" + JupyterHub: + authenticator_class: github + extraConfig: + 01-no-labels: | + from kubespawner import KubeSpawner + class CustomSpawner(KubeSpawner): + def _build_common_labels(self, extra_labels): + labels = super()._build_common_labels(extra_labels) + # Until https://github.com/jupyterhub/kubespawner/issues/498 + # is fixed + del labels['hub.jupyter.org/username'] + return labels + c.JupyterHub.spawner_class = CustomSpawner + + +proxy: + secretToken: "xxx" + +ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: "nginx" + cert-manager.io/cluster-issuer: "letsencrypt" + #For manually issuing certificates: see vms/jupyter/readme.md + #cert-manager.io/issuer: "incommon" + nginx.ingress.kubernetes.io/proxy-body-size: 500m + hosts: + - "uah24s-1.ees220002.projects.jetstream-cloud.org" + tls: + - hosts: + - "uah24s-1.ees220002.projects.jetstream-cloud.org" + secretName: certmanager-tls-jupyterhub + +#For having a dedicated core node: see vms/jupyter/readme.md +#scheduling: +# corePods: +# tolerations: +# - key: hub.jupyter.org/dedicated +# operator: Equal +# value: core +# effect: NoSchedule +# - key: hub.jupyter.org_dedicated +# operator: Equal +# value: core +# effect: NoSchedule +# nodeAffinity: +# matchNodePurpose: require + +singleuser: + extraEnv: + NBGITPULLER_DEPTH: "0" + storage: + capacity: 20Gi + startTimeout: 600 + memory: + guarantee: 6G + limit: 6G + cpu: + guarantee: 1 + limit: 4 + defaultUrl: "/lab" + image: + name: "unidata/uah24s" + tag: "xxx" + lifecycleHooks: + postStart: + exec: + command: + - "bash" + - "-c" + - > + dir="/home/jovyan/.ssh"; [ -d $dir ] && { chmod 700 $dir && \ + chmod -f 600 $dir/* && chmod -f 644 $dir/*.pub; } || true; + cp -t /home/jovyan /Acknowledgements.ipynb /update_material.ipynb; + gitpuller https://github.com/freemansw1/AES690ST-SP24-Public/tree/main main aes690st-sp24-public; + python /default_kernel.py $DEFAULT_ENV_NAME /home/jovyan; + +# Multiple profiles: see vms/jupyter/readme.md +profileList: +- display_name: "CPU Server (default)" + description: "A standard JupyterLab server" + default: true +- display_name: "GPU Server" + description: "A GPU Enabled JupyterLab Server" + kubespawner_override: + extra_resource_limits: + nvidia.com/gpu: "1" + image: "unidata/uah24s-gpu:" + node_selector: {'nodetype': 'gpu'} diff --git a/jupyter-images/uah24s/update_material.ipynb b/jupyter-images/uah24s/update_material.ipynb new file mode 100644 index 00000000..e99eda13 --- /dev/null +++ b/jupyter-images/uah24s/update_material.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "
\n", + "\n", + "
\n", + "\"Unidata\n", + "
\n", + "\n", + "

Update Notebook Material

\n", + "

by Unidata

\n", + "\n", + "
\n", + "
\n", + "\n", + "---\n", + "\n", + "This notebook can be used to update material whenever updates are posted.\n", + "\n", + "---\n", + "\n", + "## Running the Update\n", + "\n", + "When you run the following cell, any changes from the workshop GitHub repository will be applied to the material under the directory defined by `gitdir` in your workspace.\n" + ] + { + }, + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "branch = \"main\"\n", + "repourl = \"https://github.com/freemansw1/AES690ST-SP24-Public/tree/main\"\n", + "gitdir = \"aes690st-sp24-public\"\n", + "\n", + "!gitpuller {repourl} {branch} {gitdir}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## What to expect?\n", + "\n", + "### No Updates Available\n", + "\n", + "If there are no updates to the material, the output from the cell will look something like the following:\n", + "\n", + "~~~bash\n", + "$ git fetch\n", + "\n", + "$ git reset --mixed\n", + "\n", + "$ git -c user.email=nbgitpuller@nbgitpuller.link -c user.name=nbgitpuller merge -Xours origin/main\n", + "\n", + "Already up to date.\n", + "~~~\n", + "\n", + "### Updates are Available\n", + "If there are updates to the material, the output will show the updated files being pulled, for example:\n", + "\n", + "~~~bash\n", + "$ git fetch\n", + "\n", + "From https://github.com/Unidata/users-workshop-2023\n", + "\n", + " 193f5d9..392761e main -> origin/main\n", + "\n", + "$ git reset --mixed\n", + "\n", + "$ git -c user.email=nbgitpuller@nbgitpuller.link -c user.name=nbgitpuller merge -Xours origin/main\n", + "\n", + "Updating 193f5d9..392761e\n", + "\n", + "Fast-forward\n", + "\n", + " {monday => 1_monday}/martin_tabular/TabularSummer2023.ipynb | 0\n", + "\n", + " .../breakout_sessions/Intro_to_sklearn_2023.ipynb | 0\n", + "\n", + " {thursday => 4_thursday}/Grover_Open_Science/placeholder.rtf | 0\n", + "\n", + " README.md | 2 ++\n", + "\n", + " 4 files changed, 2 insertions(+)\n", + "\n", + " rename {monday => 1_monday}/martin_tabular/TabularSummer2023.ipynb (100%)\n", + "\n", + " rename {wednesday => 3_wednesday}/breakout_sessions/Intro_to_sklearn_2023.ipynb (100%)\n", + "\n", + " rename {thursday => 4_thursday}/Grover_Open_Science/placeholder.rtf (100%)\n", + "~~~\n", + "\n", + "That's it!\n", + "Now you have the latest and greatest material." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}