Skip to content

Commit

Permalink
Refactor parsing process (#59)
Browse files Browse the repository at this point in the history
* Add initial implementation
* add some more comments
* Update parser.py
* add comments
* update mistletoe/myst_parser versions
* Update basic.ipynb
* fix pre-commit
* try adding widget state after metadata
  • Loading branch information
chrisjsewell authored Mar 12, 2020
1 parent 2207e30 commit 688e421
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 64 deletions.
6 changes: 4 additions & 2 deletions docs/use/basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
"![](../_static/logo.png)\n",
"\n",
"because MyST-NB is using the MyST-markdown parser, you can include rich markdown with Sphinx\n",
"in your notebook. For example, here's a note block:\n",
"in your notebook.[^note] For example, here's a note block:\n",
"\n",
"[^note]: Even footnotes!\n",
"\n",
"`````{note}\n",
"Wow, a note! It was generated with this code:\n",
Expand Down Expand Up @@ -256,7 +258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.6-final"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
190 changes: 129 additions & 61 deletions myst_nb/parser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
from docutils import nodes
import nbformat as nbf
from pathlib import Path
from sphinx.util import logging

from myst_parser.docutils_renderer import SphinxRenderer, dict_to_docinfo
from myst_parser.block_tokens import Document
from myst_parser.docutils_renderer import SphinxRenderer
from myst_parser.sphinx_parser import MystParser

from mistletoe.base_elements import BlockToken, Position, SourceLines
from mistletoe.parse_context import ParseContext, get_parse_context, set_parse_context
from mistletoe.block_tokenizer import tokenize_block
from mistletoe.block_tokens import Document, FrontMatter

from jupyter_sphinx.ast import get_widgets, JupyterWidgetStateNode
from jupyter_sphinx.execute import contains_widgets, write_notebook_output


SPHINX_LOGGER = logging.getLogger(__name__)


class NotebookParser(MystParser):
"""Docutils parser for IPynb + CommonMark + Math + Tables + RST Extensions """

Expand All @@ -21,6 +30,92 @@ class NotebookParser(MystParser):
config_section_dependencies = ("parsers",)

def parse(self, inputstring, document):

# de-serialize the notebook
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)

# This is a contaner for top level markdown tokens
# which we will add to as we walk the document
mkdown_tokens = [] # type: list[BlockToken]

# First we ensure that we are using a 'clean' global context
# for parsing, which is setup with the MyST parsing tokens
# the logger will report on duplicate link/footnote definitions, etc
parse_context = ParseContext(
find_blocks=SphinxNBRenderer.default_block_tokens,
find_spans=SphinxNBRenderer.default_span_tokens,
logger=SPHINX_LOGGER,
)
set_parse_context(parse_context)

for cell_index, nb_cell in enumerate(ntbk.cells):

# Skip empty cells
if len(nb_cell["source"].strip()) == 0:
continue

# skip cells tagged for removal
tags = nb_cell.metadata.get("tags", [])
if "remove_cell" in tags:
continue

if nb_cell["cell_type"] == "markdown":

# we add the document path and cell index
# to the source lines, so they can be included in the error logging
# NOTE: currently the logic to report metadata is not written
# into SphinxRenderer, but this will be introduced in a later update
lines = SourceLines(
nb_cell["source"],
uri=document["source"],
metadata={"cell_index": cell_index},
standardize_ends=True,
)

# parse the source markdown text;
# at this point span/inline level tokens are not yet processed, but
# link/footnote definitions are collected/stored in the global context
mkdown_tokens.extend(tokenize_block(lines))

# TODO for md cells, think of a way to implement the previous
# `if "hide_input" in tags:` logic

elif nb_cell["cell_type"] == "code":
# here we do nothing but store the cell as a custom token
mkdown_tokens.append(
NbCodeCell(
cell=nb_cell,
position=Position(
line_start=0,
uri=document["source"],
data={"cell_index": cell_index},
),
)
)

# Now all definitions have been gathered, we walk the tokens and
# process any inline text
for token in mkdown_tokens + list(
get_parse_context().foot_definitions.values()
):
token.expand_spans()

# If there are widgets, this will embed the state of all widgets in a script
if contains_widgets(ntbk):
mkdown_tokens.insert(0, JupyterWidgetState(state=get_widgets(ntbk)))

# create the front matter token
front_matter = FrontMatter(content=ntbk.metadata, position=None)

# Finally, we create the top-level markdown document
markdown_doc = Document(
children=mkdown_tokens,
front_matter=front_matter,
link_definitions=parse_context.link_definitions,
footnotes=parse_context.foot_definitions,
footref_order=parse_context.foot_references,
)

self.reporter = document.reporter
self.config = self.default_config.copy()
try:
Expand All @@ -29,8 +124,6 @@ def parse(self, inputstring, document):
except AttributeError:
pass

ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)

# Write the notebook's output to disk
path_doc = Path(document.settings.env.docname)
doc_relpath = path_doc.parent
Expand All @@ -39,69 +132,44 @@ def parse(self, inputstring, document):
output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
write_notebook_output(ntbk, str(output_dir), doc_filename)

# Parse notebook-level metadata as front-matter
# For now, only keep key/val pairs that point to int/float/string
metadata = ntbk.metadata
docinfo = dict_to_docinfo(metadata)
document += docinfo
# render the Markdown AST to docutils AST
renderer = SphinxNBRenderer(
parse_context=parse_context, document=document, current_node=None
)
renderer.render(markdown_doc)


class JupyterWidgetState(BlockToken):
def __init__(self, state):
self.state = state


class NbCodeCell(BlockToken):
def __init__(self, cell, position):
self.cell = cell
self.position = position


class SphinxNBRenderer(SphinxRenderer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.render_map["NbCodeCell"] = self.render_nb_code_cell
self.render_map["JupyterWidgetState"] = self.render_jupyter_widget_state

def render_jupyter_widget_state(self, token):
self.document.append(JupyterWidgetStateNode(state=token.state))

def render_nb_code_cell(self, token: NbCodeCell):
"""Render a Jupyter notebook cell."""
cell = token.cell
tags = cell.metadata.get("tags", [])

# If there are widgets, this will embed the state of all widgets in a script
if contains_widgets(ntbk):
document.append(JupyterWidgetStateNode(state=get_widgets(ntbk)))
renderer = SphinxRenderer(document=document, current_node=None)
with renderer:
# Loop through cells and render them
for ii, cell in enumerate(ntbk.cells):
# Skip empty cells
if len(cell["source"]) == 0:
continue
try:
_render_cell(cell, renderer)
except Exception as exc:
source = cell["source"][:50]
if len(cell["source"]) > 50:
source = source + "..."
msg_node = self.reporter.error(
(
f"\nError parsing notebook cell #{ii+1}: {exc}\n"
f"Type: {cell['cell_type']}\n"
f"Source:\n{source}\n\n"
)
)
msg_node += nodes.literal_block(cell["source"], cell["source"])
renderer.current_node += [msg_node]
continue


def _render_cell(cell, renderer):
"""Render a cell with a SphinxRenderer instance.
Returns nothing because the renderer updates itself.
"""
tags = cell.metadata.get("tags", [])
if "remove_cell" in tags:
return

# If a markdown cell, simply call the Myst parser and append children
if cell["cell_type"] == "markdown":
document = Document.read(cell["source"], front_matter=False)
# Check for tag-specific behavior because markdown isn't wrapped in a cell
if "hide_input" in tags:
container = nodes.container()
container["classes"].extend(["toggle"])
with renderer.current_node_context(container, append=True):
renderer.render(document)
else:
renderer.render(document)

# If a code cell, convert the code + outputs
elif cell["cell_type"] == "code":
# Cell container will wrap whatever is in the cell
classes = ["cell"]
for tag in tags:
classes.append(f"tag_{tag}")
sphinx_cell = CellNode(classes=classes, cell_type=cell["cell_type"])
renderer.current_node += sphinx_cell
self.current_node += sphinx_cell
if "remove_input" not in tags:
cell_input = CellInputNode(classes=["cell_input"])
sphinx_cell += cell_input
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
python_requires=">=3.6",
package_data={"myst_nb": ["_static/mystnb.css"]},
install_requires=[
"myst-parser~=0.5",
"myst-parser~=0.6.0",
"docutils>=0.15",
"sphinx>=2,<3",
"jupyter_sphinx==0.2.4a1",
Expand Down

0 comments on commit 688e421

Please sign in to comment.