forked from InsightSoftwareConsortium/SimpleITK-Notebooks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_notebooks.py
executable file
·337 lines (295 loc) · 16.5 KB
/
test_notebooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
import os
import subprocess
import tempfile
import nbformat
import pytest
import warnings
import markdown
import re
import urllib.request
from enchant.checker import SpellChecker
from enchant.tokenize import Filter, EmailFilter, URLFilter
from enchant import DictWithPWL
from lxml.html import document_fromstring, etree
from urllib.request import urlopen, URLError, Request
"""
run all tests:
pytest -v --tb=short
run python tests:
pytest -v --tb=short tests/test_notebooks.py::Test_notebooks::test_python_notebook
run r tests:
pytest -v --tb=short tests/test_notebooks.py::Test_notebooks::test_r_notebook
run specific Python test:
pytest -v --tb=short tests/test_notebooks.py::Test_notebooks::test_python_notebook[00_Setup.ipynb]
run specific R test:
pytest -v --tb=short tests/test_notebooks.py::Test_notebooks::test_r_notebook[00_Setup.ipynb]
-s : disable all capturing of output.
"""
class Test_notebooks(object):
"""
Testing of SimpleITK Jupyter notebooks:
1. Static analysis:
Check that notebooks do not contain output (sanity check as these should
not have been pushed to the repository).
Check that all the URLs in the markdown cells are not broken.
2. Dynamic analysis:
Run the notebook and check for errors. In some notebooks we
intentionally cause errors to illustrate certain features of the toolkit.
All code cells that intentionally generate an error are expected to be
marked using the cell's metadata. In the notebook go to
"View->Cell Toolbar->Edit Metadata and add the following json entry:
"simpleitk_error_expected": simpleitk_error_message
with the appropriate "simpleitk_error_message" text.
Cells where an error is allowed, but not necessarily expected should be
marked with the following json:
"simpleitk_error_allowed": simpleitk_error_message
The simpleitk_error_message is a substring of the generated error
message, such as 'Exception thrown in SimpleITK Show:'
To test notebooks that use too much memory (exceed the 4Gb allocated for the testing
machine):
1. Create an enviornment variable named SIMPLE_ITK_MEMORY_CONSTRAINED_ENVIRONMENT
2. Import the setup_for_testing.py at the top of the notebook. This module will
decorate the sitk.ReadImage so that after reading the initial image it is
resampled by a factor of 4 in each dimension.
Adding a test:
Simply add the new notebook file name to the list of files decorating the test_python_notebook
or test_r_notebook functions. DON'T FORGET THE COMMA.
"""
_allowed_error_markup = 'simpleitk_error_allowed'
_expected_error_markup = 'simpleitk_error_expected'
@pytest.mark.parametrize('notebook_file_name',
['00_Setup.ipynb',
'01_Image_Basics.ipynb',
'02_Pythonic_Image.ipynb',
pytest.param('03_Image_Details.ipynb', marks=pytest.mark.skipif(os.environ.get('GITHUB_ACTIONS')=='true', \
reason="on GitHub runners, nbconvert intermittently fails with dead kernel, even after reducing notebook memory usage")),
'04_Image_Display.ipynb',
'05_Results_Visualization.ipynb',
'10_matplotlibs_imshow.ipynb',
'20_Expand_With_Interpolators.ipynb',
'21_Transforms_and_Resampling.ipynb',
'22_Transforms.ipynb',
'300_Segmentation_Overview.ipynb',
'30_Segmentation_Region_Growing.ipynb',
'31_Levelset_Segmentation.ipynb',
'32_Watersheds_Segmentation.ipynb',
'33_Segmentation_Thresholding_Edge_Detection.ipynb',
'34_Segmentation_Evaluation.ipynb',
pytest.param('35_Segmentation_Shape_Analysis.ipynb', marks=pytest.mark.skipif(os.environ.get('CIRCLECI')=='true', \
reason="runtime too long for CircleCI")),
'36_Microscopy_Colocalization_Distance_Analysis.ipynb',
#'11_Progress.ipynb', # This notebook times out when run with nbconvert, due to javascript issues, so not tested.
'51_VH_Segmentation1.ipynb',
'55_VH_Resample.ipynb',
'56_VH_Registration1.ipynb',
'60_Registration_Introduction.ipynb',
'61_Registration_Introduction_Continued.ipynb',
'62_Registration_Tuning.ipynb',
'63_Registration_Initialization.ipynb',
'64_Registration_Memory_Time_Tradeoff.ipynb',
'65_Registration_FFD.ipynb',
'66_Registration_Demons.ipynb',
'67_Registration_Semiautomatic_Homework.ipynb',
'68_Registration_Errors.ipynb',
'69_x-ray-panorama.ipynb',
'70_Data_Augmentation.ipynb',
'71_Trust_But_Verify.ipynb'])
def test_python_notebook(self, notebook_file_name):
self.evaluate_notebook(self.absolute_path_python(notebook_file_name), 'python')
@pytest.mark.parametrize('notebook_file_name',
['00_Setup.ipynb',
'Image_Basics.ipynb',
'R_style_image.ipynb',
'33_Segmentation_Thresholding_Edge_Detection.ipynb',
'34_Segmentation_Evaluation.ipynb',
'35_Cell_Segmentation.ipynb',
'22_Transforms.ipynb',
'300_Segmentation_Overview.ipynb',
'60_Registration_Introduction.ipynb',
'61_Registration_Introduction_Continued.ipynb',
'65_Registration_FFD.ipynb',
'66_Registration_Demons.ipynb',
'70_Data_Augmentation.ipynb'])
def test_r_notebook(self, notebook_file_name):
self.evaluate_notebook(self.absolute_path_r(notebook_file_name), 'ir')
def evaluate_notebook(self, path, kernel_name):
"""
Perform static and dynamic analysis of the notebook.
Execute a notebook via nbconvert and print the results of the test (errors etc.)
Args:
path (string): Name of notebook to run.
kernel_name (string): Which jupyter kernel to use to run the test.
Relevant values are:'python2', 'python3', 'ir'.
"""
dir_name, file_name = os.path.split(path)
if dir_name:
os.chdir(dir_name)
print('-------- begin (kernel {0}) {1} --------'.format(kernel_name,file_name))
no_static_errors = self.static_analysis(path)
no_dynamic_errors = self.dynamic_analysis(path, kernel_name)
print('-------- end (kernel {0}) {1} --------'.format(kernel_name,file_name))
assert(no_static_errors and no_dynamic_errors)
def static_analysis(self, path):
"""
Perform static analysis of the notebook.
Read the notebook and check that there is no ouput and that the links
in the markdown cells are not broken.
Args:
path (string): Name of notebook.
Return:
boolean: True if static analysis succeeded, otherwise False.
"""
nb = nbformat.read(path, nbformat.current_nbformat)
#######################
# Check that the notebook does not contain output from code cells
# (should not be in the repository, but well...).
#######################
no_unexpected_output = True
# Check that the cell dictionary has an 'outputs' key and that it is
# empty, relies on Python using short circuit evaluation so that we
# don't get KeyError when retrieving the 'outputs' entry.
cells_with_output = [c.source for c in nb.cells if 'outputs' in c and c.outputs]
if cells_with_output:
no_unexpected_output = False
print('Cells with unexpected output:\n_____________________________')
for cell in cells_with_output:
print(cell+'\n---')
else:
print('no unexpected output')
#######################
# Check that all the links in the markdown cells are valid/accessible.
#######################
no_broken_links = True
cells_and_broken_links = []
for c in nb.cells:
if c.cell_type == 'markdown':
html_tree = document_fromstring(markdown.markdown(c.source))
broken_links = []
#iterlinks() returns tuples of the form (element, attribute, link, pos)
for document_link in html_tree.iterlinks():
try:
if all( prefix not in document_link[2] for prefix in ['http','https']): # Local file.
url = 'file:' + urllib.request.pathname2url(document_link[2])
else: # Remote file.
url = document_link[2]
# mimic a web browser request, otherwise some sites return
# "HTTP Error 403: Forbidden" to prevent web scraping
urlopen(Request(url, headers={'User-Agent':'Mozilla/5.0'}))
except URLError:
broken_links.append(url)
if broken_links:
cells_and_broken_links.append((broken_links,c.source))
if cells_and_broken_links:
no_broken_links = False
broken_links_list = []
print('Cells with broken links:\n________________________')
for links, cell in cells_and_broken_links:
print(cell+'\n')
print('\tBroken links:')
print('\t'+'\n\t'.join(links)+'\n---')
broken_links_list.extend(links)
else:
print('no broken links')
#######################
# Spell check all markdown cells and comments in code cells using the pyenchant spell checker.
#######################
no_spelling_mistakes = True
simpleitk_notebooks_dictionary = DictWithPWL('en_US', os.path.join(os.path.dirname(os.path.abspath(__file__)),
'additional_dictionary.txt'))
spell_checker = SpellChecker(simpleitk_notebooks_dictionary, filters = [EmailFilter, URLFilter])
cells_and_spelling_mistakes = []
for c in nb.cells:
spelling_mistakes = []
if c.cell_type == 'markdown':
# Get the text as a string from the html without the markup which is replaced by space.
spell_checker.set_text(' '.join(etree.XPath('//text()')(document_fromstring(markdown.markdown(c.source)))))
elif c.cell_type == 'code':
# Get all the comments and concatenate them into a single string separated by newlines.
comment_lines = re.findall('#+.*',c.source)
spell_checker.set_text('\n'.join(comment_lines))
for error in spell_checker:
error_message = 'error: '+ '\'' + error.word +'\', ' + 'suggestions: ' + str(spell_checker.suggest())
spelling_mistakes.append(error_message)
if spelling_mistakes:
cells_and_spelling_mistakes.append((spelling_mistakes, c.source))
if cells_and_spelling_mistakes:
no_spelling_mistakes = False
print('Cells with spelling mistakes:\n________________________')
for misspelled_words, cell in cells_and_spelling_mistakes:
print(cell+'\n')
print('\tMisspelled words and suggestions:')
print('\t'+'\n\t'.join(misspelled_words)+'\n---')
else:
print('no spelling mistakes')
if not no_broken_links:
warnings.warn('Broken links:\n'+'\n\t'.join(broken_links_list))
return(no_unexpected_output and no_spelling_mistakes)
def dynamic_analysis(self, path, kernel_name):
"""
Perform dynamic analysis of the notebook.
Execute a notebook via nbconvert and print the results of the test
(errors etc.)
Args:
path (string): Name of notebook to run.
kernel_name (string): Which jupyter kernel to use to run the test.
Relevant values are:'python', 'ir'.
Return:
boolean: True if dynamic analysis succeeded, otherwise False.
"""
# Execute the notebook and allow errors (run all cells), output is
# written to a temporary file which is automatically deleted.
# The delete=False is here to address an issue that Windows that seems to have with temporary files (see https://bugs.python.org/issue14243).
with tempfile.NamedTemporaryFile(suffix='.ipynb',delete=False) as fout:
args = ['jupyter', 'nbconvert',
'--to', 'notebook',
'--execute',
'--ExecutePreprocessor.kernel_name='+kernel_name,
'--ExecutePreprocessor.allow_errors=True',
'--ExecutePreprocessor.timeout=6000', # seconds till timeout
'--output', fout.name, path]
subprocess.check_call(args)
nb = nbformat.read(fout.name, nbformat.current_nbformat)
# Get all of the unexpected errors (logic: cell has output with an error
# and no error is expected or the allowed/expected error is not the one which
# was generated.)
unexpected_errors = [(output.evalue, c.source) for c in nb.cells \
if 'outputs' in c for output in c.outputs \
if (output.output_type=='error') and \
(((Test_notebooks._allowed_error_markup not in c.metadata) and (Test_notebooks._expected_error_markup not in c.metadata))or \
((Test_notebooks._allowed_error_markup in c.metadata) and (c.metadata[Test_notebooks._allowed_error_markup] not in output.evalue)) or \
((Test_notebooks._expected_error_markup in c.metadata) and (c.metadata[Test_notebooks._expected_error_markup] not in output.evalue)))]
no_unexpected_errors = True
if unexpected_errors:
no_unexpected_errors = False
print('Cells with unexpected errors:\n_____________________________')
for e, src in unexpected_errors:
print(src)
print('unexpected error: '+e)
else:
print('no unexpected errors')
# Get all of the missing expected errors (logic: cell has output
# but expected error was not generated.)
missing_expected_errors = []
for c in nb.cells:
if Test_notebooks._expected_error_markup in c.metadata:
missing_error = True
if 'outputs' in c:
for output in c.outputs:
if (output.output_type=='error') and (c.metadata[Test_notebooks._expected_error_markup] in output.evalue):
missing_error = False
if missing_error:
missing_expected_errors.append((c.metadata[Test_notebooks._expected_error_markup],c.source))
no_missing_expected_errors = True
if missing_expected_errors:
no_missing_expected_errors = False
print('\nCells with missing expected errors:\n___________________________________')
for e, src in missing_expected_errors:
print(src)
print('missing expected error: '+e)
else:
print('no missing expected errors')
return(no_unexpected_errors and no_missing_expected_errors)
def absolute_path_python(self, notebook_file_name):
return os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../Python', notebook_file_name))
def absolute_path_r(self, notebook_file_name):
return os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../R', notebook_file_name))