Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve notebook build script #300

Merged
merged 5 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 113 additions & 43 deletions examples/makenotebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import os
import re
import signal
import sys
from pathlib import Path
from timeit import default_timer as timer

Expand All @@ -20,6 +21,8 @@
from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor
from py2jn.tools import py_string_to_notebook, write_notebook

examples_dir = Path(__file__).resolve().parent # absolute path to ../scico/examples/

have_ray = True
try:
import ray
Expand Down Expand Up @@ -113,6 +116,28 @@ def execute_notebook(fname):
return True


def script_uses_ray(fname):
"""Determine whether a script uses ray."""

with open(fname, "r") as f:
text = f.read()
return bool(re.search("^import ray", text, re.MULTILINE)) or bool(
re.search("^import scico.ray", text, re.MULTILINE)
)


def script_path(sname):
"""Get script path from script name."""

return examples_dir / "scripts" / Path(sname)


def notebook_path(sname):
"""Get notebook path from script path."""

return examples_dir / "notebooks" / Path(Path(sname).stem + ".ipynb")


argparser = argparse.ArgumentParser(
description="Convert Python example scripts to Jupyter notebooks."
)
Expand All @@ -123,19 +148,30 @@ def execute_notebook(fname):
"Has no effect when files to process are explicitly specified.",
)
argparser.add_argument(
"--no-exec", action="store_true", help="Create/update notebooks but don't execute them"
"--no-exec", action="store_true", help="Create/update notebooks but don't execute them."
)
argparser.add_argument(
"--no-ray",
action="store_true",
help="Execute notebooks serially, without the use of ray parallelization",
help="Execute notebooks serially, without the use of ray parallelization.",
)
argparser.add_argument(
"--verbose",
action="store_true",
help="Verbose operation.",
)
argparser.add_argument(
"--test",
action="store_true",
help="Show actions that would be taken but don't do anything.",
)
argparser.add_argument("filename", nargs="*", help="Optional Python example script filenames")
args = argparser.parse_args()


# Raise error if ray needed but not present
if not have_ray and not args.no_ray:
raise RuntimeError("The ray package is required to run this script")
raise RuntimeError("The ray package is required to run this script, try --no-ray")


if args.filename:
Expand All @@ -144,7 +180,7 @@ def execute_notebook(fname):
else:
# Read script names from index file
scriptnames = []
srcidx = "scripts/index.rst"
srcidx = examples_dir / "scripts" / "index.rst"
with open(srcidx, "r") as idxfile:
for line in idxfile:
m = re.match(r"(\s+)- ([^\s]+.py)", line)
Expand All @@ -154,13 +190,11 @@ def execute_notebook(fname):
# Ensure list entries are unique
scriptnames = sorted(list(set(scriptnames)))

# Creat list of selected scripts and corresponding notebooks.
# Create list of selected scripts.
scripts = []
notebooks = []
for s in scriptnames:
sb = Path(s).stem
spath = Path("scripts") / Path(sb + ".py")
npath = Path("notebooks") / Path(sb + ".ipynb")
spath = script_path(s)
npath = notebook_path(s)
# If scripts specified on command line or --all flag specified, convert all scripts.
# Otherwise, only convert scripts that have a newer timestamp than their corresponding
# notebooks, or that have not previously been converted (i.e. corresponding notebook
Expand All @@ -172,47 +206,83 @@ def execute_notebook(fname):
or spath.stat().st_mtime > npath.stat().st_mtime
):
# Add to the list of selected scripts
scripts.append(spath)
# Add to the list of selected notebooks
notebooks.append(npath)
scripts.append(s)

# Display status information
print(f"Processing scripts {', '.join([os.path.basename(s) for s in scriptnames])}")
if not scripts:
if args.verbose:
print("No scripts require conversion")
sys.exit(0)

# Convert selected scripts to corresponding notebooks and create list of new/modified notebooks.
for spath in scripts:
npath = Path("notebooks") / Path(spath.stem + ".ipynb")
# Display status information
if args.verbose:
print(f"Processing scripts {', '.join(scripts)}")

# Convert selected scripts to corresponding notebooks and determine which can be run in parallel
serial_scripts = []
parallel_scripts = []
for s in scripts:
spath = script_path(s)
npath = notebook_path(s)
# Determine how script should be executed
if script_uses_ray(spath):
serial_scripts.append(s)
else:
parallel_scripts.append(s)
# Make notebook file
script_to_notebook(spath, npath)

# Run relevant notebooks if no excecution flag not specified and notebooks list is not empty
if not args.no_exec and notebooks:
nproc = len(notebooks)

# Execute notebooks serially if requested to avoid use of ray, or if only one notebook
if args.no_ray or nproc < 2:

for nbfile in notebooks:
execute_notebook(nbfile)
if args.verbose or args.test:
print(f"Converting script {s} to notebook")
if not args.test:
script_to_notebook(spath, npath)

if args.no_exec:
if args.verbose:
print("Notebooks will not be executed")
sys.exit(0)


# If ray disabled or not worth using, run all serially
if args.no_ray or len(parallel_scripts) < 2:
serial_scripts.extend(parallel_scripts)
parallel_scripts = []

# Execute notebooks corresponding to serial_scripts
for s in serial_scripts:
npath = notebook_path(s)
if args.verbose or args.test:
print(f"Executing notebook corresponding to script {s}")
if not args.test:
execute_notebook(npath)


# Execute notebooks corresponding to parallel_scripts
if parallel_scripts:
if args.verbose or args.test:
print(
f"Notebooks corresponding to scripts {', '.join(parallel_scripts)} will "
"be executed in parallel"
)

# Execute notebooks in parallel using ray
else:
ray.init()

ngpu = 0
ar = ray.available_resources()
ncpu = max(int(ar["CPU"]) // nproc, 1)
if "GPU" in ar:
ngpu = max(int(ar["GPU"]) // nproc, 1)
print(f"Running on {ncpu} CPUs and {ngpu} GPUs per process")

# Function to execute each notebook with available resources suitably divided
@ray.remote(num_cpus=ncpu, num_gpus=ngpu)
def ray_run_nb(fname):
execute_notebook(fname)

nproc = len(parallel_scripts)
ray.init()

ngpu = 0
ar = ray.available_resources()
ncpu = max(int(ar["CPU"]) // nproc, 1)
if "GPU" in ar:
ngpu = max(int(ar["GPU"]) // nproc, 1)
if args.verbose or args.test:
print(f" Running on {ncpu} CPUs and {ngpu} GPUs per process")

# Function to execute each notebook with available resources suitably divided
@ray.remote(num_cpus=ncpu, num_gpus=ngpu)
def ray_run_nb(fname):
execute_notebook(fname)

if not args.test:
# Execute relevant notebooks in parallel
try:
notebooks = [notebook_path(s) for s in parallel_scripts]
objrefs = [ray_run_nb.remote(nbfile) for nbfile in notebooks]
ray.wait(objrefs, num_returns=len(objrefs))
except KeyboardInterrupt:
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/deconv_tv_admm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
This example demonstrates the use of class
[admm.ADMM](../_autosummary/scico.optimize.rst#scico.optimize.ADMM) to solve
an image deconvolution problem with anisotropic total variation (TV)
regularization.
regularization

$$\mathrm{argmin}_{\mathbf{x}} \; \| \mathbf{y} - A \mathbf{x} \|_2^2
+ \lambda \| C \mathbf{x} \|_1 \;,$$
Expand Down