Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXAPPS-805] Notebook execution failures in CI. #12068

Merged
merged 2 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/nightly/straight_dope/test_notebooks_multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
This file tests that the notebooks requiring multi GPUs run without
warning or exception.
"""
import logging
import unittest
from straight_dope_test_utils import _test_notebook
from straight_dope_test_utils import _download_straight_dope_notebooks

class StraightDopeMultiGpuTests(unittest.TestCase):
@classmethod
def setUpClass(self):
logging.basicConfig(level=logging.INFO)
assert _download_straight_dope_notebooks()

# Chapter 7
Expand Down
3 changes: 2 additions & 1 deletion tests/nightly/straight_dope/test_notebooks_single_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
warning or exception.
"""
import glob
import logging
import re
import os
import unittest
Expand Down Expand Up @@ -51,9 +52,9 @@
class StraightDopeSingleGpuTests(unittest.TestCase):
@classmethod
def setUpClass(self):
logging.basicConfig(level=logging.INFO)
assert _download_straight_dope_notebooks()


def test_completeness(self):
"""
Make sure that every tutorial that isn't in the whitelist is considered for testing by this
Expand Down
26 changes: 21 additions & 5 deletions tests/utils/notebook_test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@

IPYTHON_VERSION = 4 # Pin to ipython version 4.
TIME_OUT = 10*60 # Maximum 10 mins/test. Reaching timeout causes test failure.
RETRIES = 8
KERNEL_ERROR_MSG = 'Kernel died before replying to kernel_info'


def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='tmp_notebook'):
"""Run tutorial Jupyter notebook to catch any execution error.
Expand Down Expand Up @@ -72,15 +75,28 @@ def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='
os.makedirs(working_dir)
try:
notebook = nbformat.read(notebook_path + '.ipynb', as_version=IPYTHON_VERSION)
# Adding a small delay to allow time for sockets to be freed
# stop-gap measure to battle the 1000ms linger of socket hard coded
# in the kernel API code
time.sleep(1.1)
if kernel is not None:
eprocessor = ExecutePreprocessor(timeout=TIME_OUT, kernel_name=kernel)
else:
eprocessor = ExecutePreprocessor(timeout=TIME_OUT)
nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}})

# There is a low (< 1%) chance that starting a notebook executor will fail due to the kernel
# taking to long to start, or a port collision, etc.
for i in range(RETRIES):
try:
nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}})
except RuntimeError as rte:
# We check if the exception has to do with the Jupyter kernel failing to start. If
# not, we rethrow to prevent the notebook from erring RETRIES times. It is not ideal
# to inspect the exception message, but necessary for retry logic, as Jupyter client
# throws the generic RuntimeError that can be confused with other Runtime errors.
if str(rte) != KERNEL_ERROR_MSG:
raise rte

logging.info("Error starting preprocessor: {}. Attempt {}/{}".format(str(rte), i+1, RETRIES))
time.sleep(1)
continue
break
except Exception as err:
err_msg = str(err)
errors.append(err_msg)
Expand Down