Skip to content

Commit

Permalink
Replace raw containers example (flyteorg#492)
Browse files Browse the repository at this point in the history
* Replace raw containers example

Signed-off-by: Eduardo Apolinario <[email protected]>

* rename workflow file to raw_container.py

Signed-off-by: Eduardo Apolinario <[email protected]>

* Remove unnecessary files

Signed-off-by: Eduardo Apolinario <[email protected]>

* Add makefile

Signed-off-by: Eduardo Apolinario <[email protected]>

* Add panel

Signed-off-by: Eduardo Apolinario <[email protected]>

* Test literalinclude outside panel

Signed-off-by: Eduardo Apolinario <[email protected]>

* Fix path

Signed-off-by: Eduardo Apolinario <[email protected]>

* Fix typo

Signed-off-by: Eduardo Apolinario <[email protected]>

* Pass a different path to literalinclude

Signed-off-by: Eduardo Apolinario <[email protected]>

* Add caveats section and fix references to each panel.

Signed-off-by: Eduardo Apolinario <[email protected]>

* Remove extraneous literalinclude

Signed-off-by: Eduardo Apolinario <[email protected]>

* Review feedback

Signed-off-by: Eduardo Apolinario <[email protected]>

* De-indent literalinclude

Signed-off-by: Eduardo Apolinario <[email protected]>

* Lint - black

Signed-off-by: Eduardo Apolinario <[email protected]>

Co-authored-by: Eduardo Apolinario <[email protected]>
  • Loading branch information
eapolinario and eapolinario authored Dec 9, 2021
1 parent 6874282 commit ba07e51
Show file tree
Hide file tree
Showing 14 changed files with 300 additions and 36 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.PHONY: build_images
build_images:
ls per-language | xargs -n 1 -I {} docker build ./per-language/{} -f ./per-language/{}/Dockerfile --tag "rawcontainers-{}:v1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# raw-containers-demo

This directory holds the Dockerfiles and supporting files needed to run the example described in `raw_container.py`, split by language.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM haskell:9

WORKDIR /root

COPY calculate-ellipse-area.hs /root

RUN ghc calculate-ellipse-area.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import System.IO
import System.Environment
import Text.Read
import Text.Printf

calculateEllipseArea :: Float -> Float -> Float
calculateEllipseArea a b = pi * a * b

main = do
args <- getArgs
let input_a = args!!0 ++ "/a"
input_b = args!!0 ++ "/b"
a <- readFile input_a
b <- readFile input_b

let area = calculateEllipseArea (read a::Float) (read b::Float)

let output_area = args!!1 ++ "/area"
output_metadata = args!!1 ++ "/metadata"
writeFile output_area (show area)
writeFile output_metadata "[from haskell rawcontainer]"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM julia:1.6.4-buster

WORKDIR /root

COPY calculate-ellipse-area.jl /root
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

using Printf

function calculate_area(a, b)
π * a * b
end

function read_input(input_dir, v)
open(@sprintf "%s/%s" input_dir v) do file
parse.(Float64, read(file, String))
end
end

function write_output(output_dir, output_file, v)
output_path = @sprintf "%s/%s" output_dir output_file
open(output_path, "w") do file
write(file, string(v))
end
end

function main(input_dir, output_dir)
a = read_input(input_dir, 'a')
b = read_input(input_dir, 'b')

area = calculate_area(a, b)

write_output(output_dir, "area", area)
write_output(output_dir, "metadata", "[from julia rawcontainer]")
end

# the keyword ARGS is a special value that contains the command-line arguments
# julia arrays are 1-indexed
input_dir = ARGS[1]
output_dir = ARGS[2]

main(input_dir, output_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM python:3.10-slim-buster

WORKDIR /root

COPY *.py /root/
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import math
import sys

def read_input(input_dir, v):
with open(f'{input_dir}/{v}', 'r') as f:
return float(f.read())

def write_output(output_dir, output_file, v):
with open(f'{output_dir}/{output_file}', 'w') as f:
f.write(str(v))

def calculate_area(a, b):
return math.pi * a * b


def main(input_dir, output_dir):
a = read_input(input_dir, 'a')
b = read_input(input_dir, 'b')

area = calculate_area(a, b)

write_output(output_dir, 'area', area)
write_output(output_dir, 'metadata', '[from python rawcontainer]')


if __name__ == '__main__':
input_dir = sys.argv[1]
output_dir = sys.argv[2]

main(input_dir, output_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM r-base

WORKDIR /root

COPY *.R /root/

# Not sure whether this a horrible hack. I couldn't
# find a better way to install a package via command
# line
RUN Rscript --save install-readr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
library(readr)

args = commandArgs(trailingOnly=TRUE)

input_dir = args[1]
output_dir = args[2]

a = read_lines(sprintf("%s/%s", input_dir, 'a'))
b = read_lines(sprintf("%s/%s", input_dir, 'b'))

area <- pi * as.double(a) * as.double(b)
print(area)

writeLines(as.character(area), sprintf("%s/%s", output_dir, 'area'))
writeLines("[from R rawcontainer]", sprintf("%s/%s", output_dir, 'metadata'))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
install.packages("readr")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM alpine

WORKDIR /root

COPY calculate-ellipse-area.sh /root
RUN chmod +x /root/calculate-ellipse-area.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#! /usr/bin/env sh

a=$(cat $1/a)
b=$(cat $1/b)

echo "4*a(1) * $a * $b" | bc -l | tee $2/area

echo "[from shell rawcontainer]" | tee $2/metadata
186 changes: 150 additions & 36 deletions cookbook/core/containerization/raw_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,71 +4,185 @@
Using Raw Containers
---------------------
This example shows how it is possible to use arbitrary containers and pass data between them using Flyte.
This example demonstrates how to use arbitrary containers in 5 different languages, all orchestrated in flytekit seamlessly.
Flyte mounts an input data volume where all the data needed by the container is available and an output data volume
for the container to write all the data which will be stored away.
The data is written as separate files, one per input variable. The format of the file is serialized strings.
Refer to the raw protocol to understand how to leverage this
Refer to the raw protocol to understand how to leverage this.
"""
import logging
from typing import Tuple, Any, Mapping, List, Set
from flytekit import task, workflow
from flytekit import ContainerTask, kwtypes, workflow

logger = logging.getLogger(__file__)

# %%
# Container Tasks
# ===============
#
# A :py:class:`flytekit.ContainerTask` denotes an arbitrary container. In the following example, the name of the task
# is square. This name has to be unique in the entire project. Users can specify some
# is ``calculate_ellipse_area_shell``. This name has to be unique in the entire project. Users can specify:
#
# - ``input_data_dir`` -> where inputs will be written to
# - ``output_data_dir`` -> where Flyte will expect the outputs to exist.
#
# inputs and outputs specify the interface for the task, thus it should be an ordered dictionary of typed input and
# output variables
square = ContainerTask(
name="square",
# output variables.
calculate_ellipse_area_shell = ContainerTask(
name="ellipse-area-metadata-shell",
input_data_dir="/var/inputs",
output_data_dir="/var/outputs",
inputs=kwtypes(val=int),
outputs=kwtypes(out=int),
image="alpine",
inputs=kwtypes(a=float, b=float),
outputs=kwtypes(area=float, metadata=str),
image="rawcontainers-shell:v1",
command=[
"sh",
"-c",
"echo $(( {{.Inputs.val}} * {{.Inputs.val}} )) | tee /var/outputs/out",
"./calculate-ellipse-area.sh",
"/var/inputs",
"/var/outputs",
],
)

calculate_ellipse_area_python = ContainerTask(
name="ellipse-area-metadata-python",
input_data_dir="/var/inputs",
output_data_dir="/var/outputs",
inputs=kwtypes(a=float, b=float),
outputs=kwtypes(area=float, metadata=str),
image="rawcontainers-python:v1",
command=[
"python",
"calculate-ellipse-area.py",
"/var/inputs",
"/var/outputs",
],
)

calculate_ellipse_area_r = ContainerTask(
name="ellipse-area-metadata-r",
input_data_dir="/var/inputs",
output_data_dir="/var/outputs",
inputs=kwtypes(a=float, b=float),
outputs=kwtypes(area=float, metadata=str),
image="rawcontainers-r:v1",
command=[
"Rscript",
"--vanilla",
"calculate-ellipse-area.R",
"/var/inputs",
"/var/outputs",
],
)

calculate_ellipse_area_haskell = ContainerTask(
name="ellipse-area-metadata-haskell",
input_data_dir="/var/inputs",
output_data_dir="/var/outputs",
inputs=kwtypes(a=float, b=float),
outputs=kwtypes(area=float, metadata=str),
image="rawcontainers-haskell:v1",
command=[
"./calculate-ellipse-area",
"/var/inputs",
"/var/outputs",
],
)

sum = ContainerTask(
name="sum",
input_data_dir="/var/flyte/inputs",
output_data_dir="/var/flyte/outputs",
inputs=kwtypes(x=int, y=int),
outputs=kwtypes(out=int),
image="alpine",
calculate_ellipse_area_julia = ContainerTask(
name="ellipse-area-metadata-julia",
input_data_dir="/var/inputs",
output_data_dir="/var/outputs",
inputs=kwtypes(a=float, b=float),
outputs=kwtypes(area=float, metadata=str),
image="rawcontainers-julia:v1",
command=[
"sh",
"-c",
"echo $(( {{.Inputs.x}} + {{.Inputs.y}} )) | tee /var/flyte/outputs/out",
"julia",
"calculate-ellipse-area.jl",
"/var/inputs",
"/var/outputs",
],
)


@task
def report_all_calculated_areas(
area_shell: float,
metadata_shell: str,
area_python: float,
metadata_python: str,
area_r: float,
metadata_r: str,
area_haskell: float,
metadata_haskell: str,
area_julia: float,
metadata_julia: str,
):
logger.info(f"shell: area={area_shell}, metadata={metadata_shell}")
logger.info(f"python: area={area_python}, metadata={metadata_python}")
logger.info(f"r: area={area_r}, metadata={metadata_r}")
logger.info(f"haskell: area={area_haskell}, metadata={metadata_haskell}")
logger.info(f"julia: area={area_julia}, metadata={metadata_julia}")


# %%
# As can be seen in this example, ContainerTasks can be interacted with like normal python functions, whose inputs
# correspond to the declared input variables
# correspond to the declared input variables. All data returned by
@workflow
def raw_container_wf(val1: int, val2: int) -> int:
"""
These tasks can be invoked like simple python methods. But running them locally performs no execution, unless
the execution is mocked.
"""
return sum(x=square(val=val1), y=square(val=val2))
def wf(a: float, b: float):
# Calculate area in all languages
area_shell, metadata_shell = calculate_ellipse_area_shell(a=a, b=b)
area_python, metadata_python = calculate_ellipse_area_python(a=a, b=b)
area_r, metadata_r = calculate_ellipse_area_r(a=a, b=b)
area_haskell, metadata_haskell = calculate_ellipse_area_haskell(a=a, b=b)
area_julia, metadata_julia = calculate_ellipse_area_julia(a=a, b=b)

# Report on all results in a single task to simplify comparison
report_all_calculated_areas(
area_shell=area_shell,
metadata_shell=metadata_shell,
area_python=area_python,
metadata_python=metadata_python,
area_r=area_r,
metadata_r=metadata_r,
area_haskell=area_haskell,
metadata_haskell=metadata_haskell,
area_julia=area_julia,
metadata_julia=metadata_julia,
)


# %%
# ContainerTasks cannot really be executed locally as Flytekit is incapable of executing Containers currently.
# but it is possible to mock the execution.
if __name__ == "__main__":
print(f"Running {__file__} main...")
print(
f"Running raw_container_wf(val1=5, val2=5) {raw_container_wf(val1=5, val2=5)}"
)
#
# .. note::
# Raw containers cannot be run locally at the moment.
#
# Scripts
# =======
#
# The contents of each script mentioned above:
#
# calculate-ellipse-area.sh
# ^^^^^^^^^^^^^^^^^^^^^^^^^
# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh
# :language: shell
#
# calculate-ellipse-area.py
# ^^^^^^^^^^^^^^^^^^^^^^^^^
# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py
# :language: python
#
# calculate-ellipse-area.R
# ^^^^^^^^^^^^^^^^^^^^^^^^^
# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R
# :language: r
#
# calculate-ellipse-area.hs
# ^^^^^^^^^^^^^^^^^^^^^^^^^
# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs
# :language: haskell
#
# calculate-ellipse-area.jl
# ^^^^^^^^^^^^^^^^^^^^^^^^^
# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl
# :language: julia

0 comments on commit ba07e51

Please sign in to comment.