Skip to content

Commit

Permalink
Create Benchmarking Jupytext.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Sep 22, 2019
1 parent 057d626 commit 4c1f817
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions demo/Benchmarking Jupytext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# In this notebook, we benchmark the Jupytext formats for Jupyter notebooks against the base format
# Open this script as a notebook in Jupyter to run it and see the plots

import time
import copy
import pandas as pd
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS

import nbformat
import jupytext

# The notebook to be tested
notebook = jupytext.read('World population.ipynb')

# Same notebook, with no outputs, for a fair comparison
notebook_no_outputs = copy.deepcopy(notebook)
for cell in notebook_no_outputs.cells:
cell.outputs = []
cell.execution_count = None

# +
JUPYTEXT_FORMATS = ['ipynb', 'md', 'py:light', 'py:percent', 'py:sphinx']

# Let's see if we have pandoc here
try:
jupytext.writes(notebook, fmt='md:pandoc')
JUPYTEXT_FORMATS.append('md:pandoc')
except jupytext.formats.JupytextFormatError as err:
print(str(err))


# -


def sample_perf(nb, n=30):
samples = pd.DataFrame(
pd.np.NaN,
index=pd.MultiIndex.from_product(
(range(n), ['nbformat'] + JUPYTEXT_FORMATS), names=['sample', 'implementation']),
columns=pd.Index(['size', 'read', 'write'], name='measure'))

for i, fmt in samples.index:
t0 = time.time()
if fmt == 'nbformat':
text = nbformat.writes(nb)
else:
text = jupytext.writes(nb, fmt)
t1 = time.time()
samples.loc[(i, fmt), 'write'] = t1 - t0
samples.loc[(i, fmt), 'size'] = len(text)
t0 = time.time()
if fmt == 'nbformat':
nbformat.reads(text, as_version=4)
else:
jupytext.reads(text, fmt)
t1 = time.time()
samples.loc[(i, fmt), 'read'] = t1 - t0
return samples


def performance_plot(perf, title):
formats = ['nbformat'] + JUPYTEXT_FORMATS
mean = perf.groupby('implementation').mean().loc[formats]
std = perf.groupby('implementation').std().loc[formats]
data = [go.Bar(x=mean.index,
y=mean[col],
error_y=dict(
type='data',
array=std[col],
color=color,
thickness=0.5
) if col != 'size' else dict(),
name=col,
yaxis={'read': 'y1', 'write': 'y2', 'size': 'y3'}[col])
for col, color in zip(mean.columns, DEFAULT_PLOTLY_COLORS)]
layout = go.Layout(title=title,
xaxis=dict(title='Implementation', anchor='y3'),
yaxis=dict(domain=[0.7, 1], title='Read (secs)'),
yaxis2=dict(domain=[0.35, .65], title='Write (secs)'),
yaxis3=dict(domain=[0, .3], title='Size')
)
return go.Figure(data=data, layout=layout)


perf_no_outputs = sample_perf(notebook_no_outputs, 30)

performance_plot(perf_no_outputs, 'Benchmarking Jupytext on the World Population notebook<br>(Outputs filtered)')

perf = sample_perf(notebook, 30)

performance_plot(perf, 'Benchmarking Jupytext on the World Population notebook<br>(With outputs)')

0 comments on commit 4c1f817

Please sign in to comment.