From 4395463890ad4f28416e59c1b88831f00826cded Mon Sep 17 00:00:00 2001 From: Brendt Wohlberg Date: Tue, 23 Nov 2021 09:54:36 -0800 Subject: [PATCH] Clean up notebooks build script (#108) * Initial version of pure python conversion script * Improve script capabilities * Minor improvement * Improve control flow logic * Update notes on tools in examples directory * Remove Makefile * Fix some bugs * Remove bash script to notebook converter * Rename notebook build script * Add flag indicating processing of all scripts * Better handling of check for ray presence * Fix list of selected scripts --- examples/Makefile | 24 ----- examples/README.rst | 14 +-- examples/makejnb.py | 87 ---------------- examples/makenotebooks.py | 212 ++++++++++++++++++++++++++++++++++++++ examples/pytojnb.sh | 30 ------ 5 files changed, 216 insertions(+), 151 deletions(-) delete mode 100644 examples/Makefile delete mode 100755 examples/makejnb.py create mode 100755 examples/makenotebooks.py delete mode 100755 examples/pytojnb.sh diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index 2b9571f25..000000000 --- a/examples/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# Makefile for Jypyter notebooks -# To rebuild the notebooks without executing them, do -# make [target] fast - -SRCPTH = scripts -DSTPTH = notebooks -SRC = $(SRCPTH)/*.py -SRCBASE = $(shell echo $(SRC) | xargs -n 1 basename) -DSTBASE = $(SRCBASE:%.py=%.ipynb) -DST = $(shell echo $(DSTBASE) | xargs -n 1 printf "notebooks/%s ") - -.PHONY: all fast -.SUFFIXES: .py .ipynb - -all: $(DST) -fast: $(DST) - -$(DSTPTH)/%.ipynb: $(SRCPTH)/%.py -ifneq (,$(findstring fast,$(MAKECMDGOALS))) # source: https://www.gnu.org/software/make/manual/html_node/Testing-Flags.html and https://www.gnu.org/software/make/manual/html_node/Goals.html - ./pytojnb $< $@ -else - ./pytojnb $< $@ - jupyter nbconvert --to=notebook --output $(shell basename $@) --execute $@ -endif diff --git a/examples/README.rst b/examples/README.rst index 69cf9f3fa..54766aa72 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -16,14 +16,14 @@ The procedure for adding a adding a new notebook to the documentation is: 2. Run ``makeindex.py`` to update the example scripts README file, the notebook index file, and the examples index in the docs. -3. Run ``makejnb.py`` to build the new notebook, as well as any other notebooks that are out of date with respect to their source scripts, as determined by the respective file timestamps. +3. Run ``makenotebooks.py`` to build the new notebook, as well as any other notebooks that are out of date with respect to their source scripts, as determined by the respective file timestamps. 4. Add and commit the new script, the ``scripts/index.rst`` script index file, the auto-generated ``scripts/README.rst`` file and ``docs/source/examples.rst`` index file, and the new or updated notebooks and the auto-generated ``notebooks/index.ipynb`` file in the notebooks directory (following the submodule handling procedure as described in the developer docs). The procedure for rebuilding notebook(s) after the source file(s) have been modified is: -1. Run ``makejnb.py`` to build the new notebook, as well as any other notebooks that are out of date with respect to their source scripts, as determined by the respective file timestamps. Note that timestamps for files retrieved from version control may not be meaningful for this purpose. In such cases, ``touch`` the relevant source scripts to force updating on the next run of ``makejnb.py``. +1. Run ``makenotebooks.py`` to build the new notebook, as well as any other notebooks that are out of date with respect to their source scripts, as determined by the respective file timestamps. Note that timestamps for files retrieved from version control may not be meaningful for this purpose. In such cases, ``touch`` the relevant source scripts to force updating on the next run of ``makenotebooks.py``. 2. Add and commit the modified script(s), and the updated notebooks (following the submodule handling procedure as described in the developer docs). @@ -40,17 +40,11 @@ A number of files in this directory assist in the mangement of the usage example `notebooks_requirements.txt `_ Requirements file (as used by ``pip``) listing additional dependencies for building the Jupyter notebooks from the usage example scripts. -`makejnb.py `_ - An alternative to the makefile for updating the auto-generated Jupyter notebooks. Notebooks are executed in parallel using the ``ray`` package. +`makenotebooks.py `_ + Auto-generate Jupyter notebooks from the example scripts. `makeindex.py `_ Auto-generate the docs example index ``docs/source/examples.rst`` from the example scripts index ``scripts/index.rst``. -`Makefile `_ - A makefile allowing use of the command ``make`` to update auto-generated Jupyter notebooks. Run as ``make no-execute=true`` to update the notebooks without executing them. Use of `makejnb.py` rather than this makefile is recommended. - -`pytojnb.sh `_ - Low-level python to Jupyter notebook conversion script. Used by both the makefile and `makejnb.py `_. - `scriptcheck.sh `_ Run all example scripts with a reduced number of iterations as a rapid check that they are functioning correctly. diff --git a/examples/makejnb.py b/examples/makejnb.py deleted file mode 100755 index 0d7a715f3..000000000 --- a/examples/makejnb.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python - -# Extract a list of Python scripts from "scripts/index.rst" and -# create/update and execute any Jupyter notebooks that are out -# of date with respect to their source Python scripts. If script -# names specified on command line, process them instead. -# Run as -# python makejnb.py [script_name_1 [script_name_2 [...]]] - -import os -import re -import sys -from pathlib import Path - -import nbformat -from nbconvert.preprocessors import ExecutePreprocessor - -try: - import ray -except ImportError: - raise RuntimeError("The ray package is required to run this script") - - -if sys.argv[1:]: - # Script names specified on command line - scriptnames = [os.path.basename(s) for s in sys.argv[1:]] -else: - # Read script names from index file - scriptnames = [] - srcidx = "scripts/index.rst" - with open(srcidx, "r") as idxfile: - for line in idxfile: - m = re.match(r"(\s+)- ([^\s]+.py)", line) - if m: - scriptnames.append(m.group(2)) - -# Ensure list entries are unique -scriptnames = list(set(scriptnames)) - -# Construct script paths -scripts = [Path("scripts") / Path(s) for s in scriptnames] - - -# Construct list of notebooks that are out of date with respect to the corresponding -# script, or that have not yet been constructed from the corresponding script, and -# construct/update each of them -notebooks = [] -for s in scripts: - nb = Path("notebooks") / (s.stem + ".ipynb") - if not nb.is_file() or s.stat().st_mtime > nb.stat().st_mtime: - # Make notebook file - os.popen(f"./pytojnb.sh {s} {nb}") - # Add it to the list for execution - notebooks.append(nb) -if sys.argv[1:]: - # If scripts specified on command line, add all corresonding notebooks to the - # list for execution - notebooks = [Path("notebooks") / (s.stem + ".ipynb") for s in scripts] - -ray.init() - -nproc = len(notebooks) -ngpu = 0 -ar = ray.available_resources() -ncpu = max(int(ar["CPU"]) // nproc, 1) -if "GPU" in ar: - ngpu = max(int(ar["GPU"]) // nproc, 1) -print(f"Running on {ncpu} CPUs and {ngpu} GPUs per process") - -# Function to execute each notebook with one GPU -@ray.remote(num_cpus=ncpu, num_gpus=ngpu) -def run_nb(fname): - with open(fname) as f: - nb = nbformat.read(f, as_version=4) - - ep = ExecutePreprocessor() - try: - out = ep.preprocess(nb) - with open(fname, "w", encoding="utf-8") as f: - nbformat.write(nb, f) - except CellExecutionError: - raise Exception(f"Error executing the notebook {fname}") - print(f"{fname} done") - - -# run all; blocking -ray.get([run_nb.remote(_) for _ in notebooks]) diff --git a/examples/makenotebooks.py b/examples/makenotebooks.py new file mode 100755 index 000000000..b64fa6e3d --- /dev/null +++ b/examples/makenotebooks.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python + +# Extract a list of Python scripts from "scripts/index.rst" and +# create/update and execute any Jupyter notebooks that are out +# of date with respect to their source Python scripts. If script +# names specified on command line, process them instead. +# Run +# python makenotebooks.py -h +# for usage details. + +import argparse +import os +import re +import sys +from pathlib import Path +from timeit import default_timer as timer + +import nbformat +from nbconvert.preprocessors import ExecutePreprocessor +from py2jn.tools import py_string_to_notebook, write_notebook + +have_ray = True +try: + import ray +except ImportError: + have_ray = False + + +def py_file_to_string(src): + """Preprocess example script file and return result as a string.""" + + with open(src, "r") as srcfile: + # Drop header comment + for line in srcfile: + if line[0] != "#": + break # assume first non-comment line is a newline that can be dropped + # Insert notebook plot config after last import + lines = [] + import_seen = False + for line in srcfile: + line = re.sub('^r"""', '"""', line) # remove r from r""" + line = re.sub(":cite:`([^`]+)`", r'', line) # fix cite format + if import_seen: + # Once an import statement has been seen, break on encountering a line that + # is neither an import statement nor a newline, nor a component of an import + # statement extended over multiple lines, nor components of a try/except + # construction (note that handling of these final two cases is probably not + # very robust). + if not re.match(r"(^import|^from|^\n$|^\W+[^\W]|^\)$|^try:$|^except)", line): + lines.append(line) + break + else: + # Set flag indicating that an import statement has been seen once one has + # been encountered + if re.match("^(import|from)", line): + import_seen = True + lines.append(line) + # Backtrack through list of lines to find last import statement + n = 1 + for line in lines[-2::-1]: + if re.match("^(import|from)", line): + break + else: + n += 1 + # Insert notebook plotting config directly after last import statement + lines.insert(-n, "plot.config_notebook_plotting()\n") + + # Process remainder of source file + for line in srcfile: + if re.match("^input", line): # end processing when input statement encountered + break + line = re.sub('^r"""', '"""', line) # remove r from r""" + line = re.sub(":cite:\`([^`]+)\`", r'', line) # fix cite format + lines.append(line) + + # Backtrack through list of lines to remove trailing newlines + n = 0 + for line in lines[::-1]: + if re.match("^\n$", line): + n += 1 + else: + break + lines = lines[0:-n] + + return "".join(lines) + + +def script_to_notebook(src, dst): + """Convert a Python example script into a Jupyter notebook.""" + + str = py_file_to_string(src) + nb = py_string_to_notebook(str) + write_notebook(nb, dst) + + +def execute_notebook(fname): + """Execute the specified notebook file.""" + + with open(fname) as f: + nb = nbformat.read(f, as_version=4) + ep = ExecutePreprocessor() + try: + t0 = timer() + out = ep.preprocess(nb) + t1 = timer() + with open(fname, "w", encoding="utf-8") as f: + nbformat.write(nb, f) + except CellExecutionError: + raise Exception(f"Error executing the notebook {fname}") + print(f"{fname} done in {(t1 - t0):.1e} s") + + +argparser = argparse.ArgumentParser( + description="Convert Python example scripts to Jupyter notebooks." +) +argparser.add_argument( + "--all", + action="store_true", + help="Process all notebooks, without checking timestamps. " + "Has no effect when files to process are explicitly specified.", +) +argparser.add_argument( + "--no-exec", action="store_true", help="Create/update notebooks but don't execute them" +) +argparser.add_argument( + "--no-ray", + action="store_true", + help="Execute notebooks serially, without the use of ray parallelization", +) +argparser.add_argument("filename", nargs="*", help="Optional Python example script filenames") +args = argparser.parse_args() + +# Raise error if ray needed but not present +if not have_ray and not args.no_ray: + raise RuntimeError("The ray package is required to run this script") + + +if args.filename: + # Script names specified on command line + scriptnames = [os.path.basename(s) for s in args.filename] +else: + # Read script names from index file + scriptnames = [] + srcidx = "scripts/index.rst" + with open(srcidx, "r") as idxfile: + for line in idxfile: + m = re.match(r"(\s+)- ([^\s]+.py)", line) + if m: + scriptnames.append(m.group(2)) + +# Ensure list entries are unique +scriptnames = list(set(scriptnames)) + +# Creat list of selected scripts and corresponding notebooks. +scripts = [] +notebooks = [] +for s in scriptnames: + sb = Path(s).stem + spath = Path("scripts") / Path(sb + ".py") + npath = Path("notebooks") / Path(sb + ".ipynb") + # If scripts specified on command line or --all flag specified, convert all scripts. + # Otherwise, only convert scripts that have a newer timestamp than their corresponding + # notebooks, or that have not previously been converted (i.e. corresponding notebook + # file does not exist). + if ( + args.all + or args.filename + or not npath.is_file() + or spath.stat().st_mtime > npath.stat().st_mtime + ): + # Add to the list of selected scripts + scripts.append(spath) + # Add to the list of selected notebooks + notebooks.append(npath) + +# Display status information +print(f"Processing scripts {', '.join([os.path.basename(s) for s in scriptnames])}") + +# Convert selected scripts to corresponding notebooks and create list of new/modified notebooks. +for spath in scripts: + npath = Path("notebooks") / Path(spath.stem + ".ipynb") + # Make notebook file + script_to_notebook(spath, npath) + +# Run relevant notebooks if no excecution flag not specified +if not args.no_exec: + + # Execute notebooks serially if not requested to avoid use of ray + if args.no_ray: + + for nbfile in notebooks: + execute_notebook(nbfile) + + # Execute notebooks in parallel using ray + else: + ray.init() + + nproc = len(notebooks) + ngpu = 0 + ar = ray.available_resources() + ncpu = max(int(ar["CPU"]) // nproc, 1) + if "GPU" in ar: + ngpu = max(int(ar["GPU"]) // nproc, 1) + print(f"Running on {ncpu} CPUs and {ngpu} GPUs per process") + + # Function to execute each notebook with available resources suitably divided + @ray.remote(num_cpus=ncpu, num_gpus=ngpu) + def ray_run_nb(fname): + execute_notebook(fname) + + # Execute relevant notebooks in parallel + ray.get([ray_run_nb.remote(nbfile) for nbfile in notebooks]) diff --git a/examples/pytojnb.sh b/examples/pytojnb.sh deleted file mode 100755 index 784a47469..000000000 --- a/examples/pytojnb.sh +++ /dev/null @@ -1,30 +0,0 @@ -#! /bin/bash - -# Convert a Python script into a Jupyter notebook -# Run as -# pytojnb.sh - -src=$1 -dst=$2 - -tmp=$(mktemp /tmp/pytojnb.XXXXXX) -trap "rm -f $tmp" 0 2 3 15 - -# sed usage from: -# https://stackoverflow.com/questions/37909388 - -# Remove trailing input -sed '${/^input(/d;}' $src > $tmp -# Remove header comment -sed -i '1,/^$/d' $tmp -# Remove r from r""" -sed -i 's/^r"""$/"""/' $tmp -# Insert notebook plot config after last import -sed -E -i '/^(from|import)[^\n]*/,$!b;//{x;//p;g};//!H;$!d;x;s//&\nplot.config_notebook_plotting()/' $tmp -# Convert citations to nbsphinx-recognized format -sed -E -i 's/:cite:`([^`]+)`//g' $tmp - -# Convert modified script to notebook -python -m py2jn $tmp $dst - -exit 0