Skip to content

Commit

Permalink
Add notebook (.ipynb) file support for yaml generator
Browse files Browse the repository at this point in the history
  • Loading branch information
Juha Kiili committed Jun 29, 2021
1 parent 12fa08e commit 8c5bee3
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 16 deletions.
61 changes: 61 additions & 0 deletions valohai/internals/notebooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import json
import os
import shlex
from typing import Union, List


# TODO: This file is a copy-pasta from https://github.com/valohai/jupyhai
# TODO: DRY between libs


def parse_ipynb(content_or_str: Union[str, dict]) -> dict:
"""
"Smartly" parse content that contains a notebook.
* If a string, it's first JSON deserialized.
* If it's a "wrapped" dict (i.e. contains "type" == "notebook" and "content"), unwraps the content
* Asserts the content smells like a notebook ("nbformat")
:param content: See above.
:return: Notebook data.
"""
if isinstance(content_or_str, str):
content = json.loads(content_or_str)
else:
content = content_or_str
if not isinstance(content, dict):
raise ValueError('Ipynb not a dict')
assert isinstance(content, dict)
if content.get('type') == 'notebook':
content = content['content']

nbformat = content.get('nbformat')
if not isinstance(nbformat, int):
raise ValueError('Nbformat value %s invalid' % nbformat)
return content


def get_notebook_source_code(contents: dict) -> str:
source = [cell['source'] for cell in contents['cells'] if cell['cell_type'] == 'code']

# Some notebook versions store it as list of rows already. Some as single string.
source = [row if isinstance(row, list) else row.split('\n') for row in source]

# Even when it was a list, the linefeeds are still there.
source = [row.rstrip() for sublist in source for row in sublist]

# Strip magics like "!pip install tensorflow"
source = [row for row in source if not row.startswith("!")]

return '\n'.join(source)


def get_notebook_command(notebook_relative_path) -> List[str]:
notebook_dir, notebook_name = os.path.split(notebook_relative_path)
papermill_command = " ".join([
"papermill -k python3 -f /valohai/config/parameters.yaml",
shlex.quote("/valohai/repository/{}".format(notebook_relative_path.replace(os.sep, "/"))),
shlex.quote("/valohai/outputs/{}".format(notebook_name.replace(os.sep, "/"))),
])
return [
"pip install -r requirements.txt",
papermill_command
]
53 changes: 37 additions & 16 deletions valohai/internals/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from valohai_yaml.objs.step import Step

from valohai.consts import DEFAULT_DOCKER_IMAGE
from valohai.internals.notebooks import parse_ipynb, get_notebook_source_code, get_notebook_command
from valohai.internals.parsing import parse

ParameterDict = Dict[str, Any]
Expand All @@ -24,10 +25,7 @@ def generate_step(
config_step = Step(
name=step,
image=image,
command=[
"pip install -r requirements.txt",
"python %s {parameters}" % relative_source_path,
],
command=get_command(relative_source_path),
)

for key, value in parameters.items():
Expand Down Expand Up @@ -93,18 +91,17 @@ def get_source_relative_path(source_path: str, config_path: str) -> str:


def parse_config_from_source(source_path: str, config_path: str) -> Config:
with open(source_path) as source_file:
parsed = parse(source_file.read())
if not parsed.step:
raise ValueError("Source is missing a call to valohai.prepare()")
relative_source_path = get_source_relative_path(source_path, config_path)
return generate_config(
relative_source_path=relative_source_path,
step=parsed.step,
image=DEFAULT_DOCKER_IMAGE if parsed.image is None else parsed.image,
parameters=parsed.parameters,
inputs=parsed.inputs,
)
parsed = parse(get_source_code(source_path))
if not parsed.step:
raise ValueError("Source is missing a call to valohai.prepare()")
relative_source_path = get_source_relative_path(source_path, config_path)
return generate_config(
relative_source_path=relative_source_path,
step=parsed.step,
image=DEFAULT_DOCKER_IMAGE if parsed.image is None else parsed.image,
parameters=parsed.parameters,
inputs=parsed.inputs,
)


def get_parameter_type_name(name: str, value: Any) -> str:
Expand All @@ -121,3 +118,27 @@ def get_parameter_type_name(name: str, value: Any) -> str:
"Unrecognized parameter type for %s=%s. Supported Python types are float, int, string and bool."
% (name, value)
)


def get_command(relative_source_path: str) -> List[str]:
if is_notebook_path(relative_source_path):
return get_notebook_command(relative_source_path)

return [
"pip install -r requirements.txt",
"python %s {parameters}" % relative_source_path,
]


def get_source_code(source_path: str) -> str:
with open(source_path) as source_file:
file_contents = source_file.read()
if is_notebook_path(source_path):
notebook_content = parse_ipynb(file_contents)
return get_notebook_source_code(notebook_content)
return file_contents


def is_notebook_path(source_path: str) -> bool:
filename, extension = os.path.splitext(source_path)
return extension == ".ipynb"

0 comments on commit 8c5bee3

Please sign in to comment.