diff --git a/cookbook/core/containerization/raw-containers-supporting-files/Makefile b/cookbook/core/containerization/raw-containers-supporting-files/Makefile new file mode 100644 index 0000000000..e7f64d81b0 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/Makefile @@ -0,0 +1,3 @@ +.PHONY: build_images +build_images: + ls per-language | xargs -n 1 -I {} docker build ./per-language/{} -f ./per-language/{}/Dockerfile --tag "rawcontainers-{}:v1" diff --git a/cookbook/core/containerization/raw-containers-supporting-files/README.md b/cookbook/core/containerization/raw-containers-supporting-files/README.md new file mode 100644 index 0000000000..296f22f0e4 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/README.md @@ -0,0 +1,3 @@ +# raw-containers-demo + +This directory holds the Dockerfiles and supporting files needed to run the example described in `raw_container.py`, split by language. diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/Dockerfile b/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/Dockerfile new file mode 100644 index 0000000000..8d9679088d --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/Dockerfile @@ -0,0 +1,7 @@ +FROM haskell:9 + +WORKDIR /root + +COPY calculate-ellipse-area.hs /root + +RUN ghc calculate-ellipse-area.hs diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs b/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs new file mode 100644 index 0000000000..fa50d0293f --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs @@ -0,0 +1,21 @@ +import System.IO +import System.Environment +import Text.Read +import Text.Printf + +calculateEllipseArea :: Float -> Float -> Float +calculateEllipseArea a b = pi * a * b + +main = do + args <- getArgs + let input_a = args!!0 ++ "/a" + input_b = args!!0 ++ "/b" + a <- readFile input_a + b <- readFile input_b + + let area = calculateEllipseArea (read a::Float) (read b::Float) + + let output_area = args!!1 ++ "/area" + output_metadata = args!!1 ++ "/metadata" + writeFile output_area (show area) + writeFile output_metadata "[from haskell rawcontainer]" diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/Dockerfile b/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/Dockerfile new file mode 100644 index 0000000000..caaf85a2ab --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/Dockerfile @@ -0,0 +1,5 @@ +FROM julia:1.6.4-buster + +WORKDIR /root + +COPY calculate-ellipse-area.jl /root diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl b/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl new file mode 100644 index 0000000000..35bb75cb2b --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl @@ -0,0 +1,36 @@ + +using Printf + +function calculate_area(a, b) + π * a * b +end + +function read_input(input_dir, v) + open(@sprintf "%s/%s" input_dir v) do file + parse.(Float64, read(file, String)) + end +end + +function write_output(output_dir, output_file, v) + output_path = @sprintf "%s/%s" output_dir output_file + open(output_path, "w") do file + write(file, string(v)) + end +end + +function main(input_dir, output_dir) + a = read_input(input_dir, 'a') + b = read_input(input_dir, 'b') + + area = calculate_area(a, b) + + write_output(output_dir, "area", area) + write_output(output_dir, "metadata", "[from julia rawcontainer]") +end + +# the keyword ARGS is a special value that contains the command-line arguments +# julia arrays are 1-indexed +input_dir = ARGS[1] +output_dir = ARGS[2] + +main(input_dir, output_dir) diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/Dockerfile b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/Dockerfile new file mode 100644 index 0000000000..2174b2d997 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.10-slim-buster + +WORKDIR /root + +COPY *.py /root/ diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py new file mode 100644 index 0000000000..a523ca1037 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py @@ -0,0 +1,30 @@ +import math +import sys + +def read_input(input_dir, v): + with open(f'{input_dir}/{v}', 'r') as f: + return float(f.read()) + +def write_output(output_dir, output_file, v): + with open(f'{output_dir}/{output_file}', 'w') as f: + f.write(str(v)) + +def calculate_area(a, b): + return math.pi * a * b + + +def main(input_dir, output_dir): + a = read_input(input_dir, 'a') + b = read_input(input_dir, 'b') + + area = calculate_area(a, b) + + write_output(output_dir, 'area', area) + write_output(output_dir, 'metadata', '[from python rawcontainer]') + + +if __name__ == '__main__': + input_dir = sys.argv[1] + output_dir = sys.argv[2] + + main(input_dir, output_dir) diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/Dockerfile b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/Dockerfile new file mode 100644 index 0000000000..9258d7eeea --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/Dockerfile @@ -0,0 +1,10 @@ +FROM r-base + +WORKDIR /root + +COPY *.R /root/ + +# Not sure whether this a horrible hack. I couldn't +# find a better way to install a package via command +# line +RUN Rscript --save install-readr.R diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R new file mode 100644 index 0000000000..98488c2129 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R @@ -0,0 +1,15 @@ +library(readr) + +args = commandArgs(trailingOnly=TRUE) + +input_dir = args[1] +output_dir = args[2] + +a = read_lines(sprintf("%s/%s", input_dir, 'a')) +b = read_lines(sprintf("%s/%s", input_dir, 'b')) + +area <- pi * as.double(a) * as.double(b) +print(area) + +writeLines(as.character(area), sprintf("%s/%s", output_dir, 'area')) +writeLines("[from R rawcontainer]", sprintf("%s/%s", output_dir, 'metadata')) diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/install-readr.R b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/install-readr.R new file mode 100644 index 0000000000..3308314e4e --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/r/install-readr.R @@ -0,0 +1 @@ +install.packages("readr") diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/Dockerfile b/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/Dockerfile new file mode 100644 index 0000000000..856160ba11 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/Dockerfile @@ -0,0 +1,6 @@ +FROM alpine + +WORKDIR /root + +COPY calculate-ellipse-area.sh /root +RUN chmod +x /root/calculate-ellipse-area.sh diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh b/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh new file mode 100755 index 0000000000..25048c5de8 --- /dev/null +++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh @@ -0,0 +1,8 @@ +#! /usr/bin/env sh + +a=$(cat $1/a) +b=$(cat $1/b) + +echo "4*a(1) * $a * $b" | bc -l | tee $2/area + +echo "[from shell rawcontainer]" | tee $2/metadata diff --git a/cookbook/core/containerization/raw_container.py b/cookbook/core/containerization/raw_container.py index a98ca99125..81391f3a3a 100644 --- a/cookbook/core/containerization/raw_container.py +++ b/cookbook/core/containerization/raw_container.py @@ -4,71 +4,185 @@ Using Raw Containers --------------------- -This example shows how it is possible to use arbitrary containers and pass data between them using Flyte. +This example demonstrates how to use arbitrary containers in 5 different languages, all orchestrated in flytekit seamlessly. Flyte mounts an input data volume where all the data needed by the container is available and an output data volume for the container to write all the data which will be stored away. The data is written as separate files, one per input variable. The format of the file is serialized strings. -Refer to the raw protocol to understand how to leverage this +Refer to the raw protocol to understand how to leverage this. """ +import logging +from typing import Tuple, Any, Mapping, List, Set +from flytekit import task, workflow from flytekit import ContainerTask, kwtypes, workflow +logger = logging.getLogger(__file__) + # %% +# Container Tasks +# =============== +# # A :py:class:`flytekit.ContainerTask` denotes an arbitrary container. In the following example, the name of the task -# is square. This name has to be unique in the entire project. Users can specify some +# is ``calculate_ellipse_area_shell``. This name has to be unique in the entire project. Users can specify: # # - ``input_data_dir`` -> where inputs will be written to # - ``output_data_dir`` -> where Flyte will expect the outputs to exist. # # inputs and outputs specify the interface for the task, thus it should be an ordered dictionary of typed input and -# output variables -square = ContainerTask( - name="square", +# output variables. +calculate_ellipse_area_shell = ContainerTask( + name="ellipse-area-metadata-shell", input_data_dir="/var/inputs", output_data_dir="/var/outputs", - inputs=kwtypes(val=int), - outputs=kwtypes(out=int), - image="alpine", + inputs=kwtypes(a=float, b=float), + outputs=kwtypes(area=float, metadata=str), + image="rawcontainers-shell:v1", command=[ - "sh", - "-c", - "echo $(( {{.Inputs.val}} * {{.Inputs.val}} )) | tee /var/outputs/out", + "./calculate-ellipse-area.sh", + "/var/inputs", + "/var/outputs", ], ) +calculate_ellipse_area_python = ContainerTask( + name="ellipse-area-metadata-python", + input_data_dir="/var/inputs", + output_data_dir="/var/outputs", + inputs=kwtypes(a=float, b=float), + outputs=kwtypes(area=float, metadata=str), + image="rawcontainers-python:v1", + command=[ + "python", + "calculate-ellipse-area.py", + "/var/inputs", + "/var/outputs", + ], +) + +calculate_ellipse_area_r = ContainerTask( + name="ellipse-area-metadata-r", + input_data_dir="/var/inputs", + output_data_dir="/var/outputs", + inputs=kwtypes(a=float, b=float), + outputs=kwtypes(area=float, metadata=str), + image="rawcontainers-r:v1", + command=[ + "Rscript", + "--vanilla", + "calculate-ellipse-area.R", + "/var/inputs", + "/var/outputs", + ], +) + +calculate_ellipse_area_haskell = ContainerTask( + name="ellipse-area-metadata-haskell", + input_data_dir="/var/inputs", + output_data_dir="/var/outputs", + inputs=kwtypes(a=float, b=float), + outputs=kwtypes(area=float, metadata=str), + image="rawcontainers-haskell:v1", + command=[ + "./calculate-ellipse-area", + "/var/inputs", + "/var/outputs", + ], +) -sum = ContainerTask( - name="sum", - input_data_dir="/var/flyte/inputs", - output_data_dir="/var/flyte/outputs", - inputs=kwtypes(x=int, y=int), - outputs=kwtypes(out=int), - image="alpine", +calculate_ellipse_area_julia = ContainerTask( + name="ellipse-area-metadata-julia", + input_data_dir="/var/inputs", + output_data_dir="/var/outputs", + inputs=kwtypes(a=float, b=float), + outputs=kwtypes(area=float, metadata=str), + image="rawcontainers-julia:v1", command=[ - "sh", - "-c", - "echo $(( {{.Inputs.x}} + {{.Inputs.y}} )) | tee /var/flyte/outputs/out", + "julia", + "calculate-ellipse-area.jl", + "/var/inputs", + "/var/outputs", ], ) +@task +def report_all_calculated_areas( + area_shell: float, + metadata_shell: str, + area_python: float, + metadata_python: str, + area_r: float, + metadata_r: str, + area_haskell: float, + metadata_haskell: str, + area_julia: float, + metadata_julia: str, +): + logger.info(f"shell: area={area_shell}, metadata={metadata_shell}") + logger.info(f"python: area={area_python}, metadata={metadata_python}") + logger.info(f"r: area={area_r}, metadata={metadata_r}") + logger.info(f"haskell: area={area_haskell}, metadata={metadata_haskell}") + logger.info(f"julia: area={area_julia}, metadata={metadata_julia}") + + # %% # As can be seen in this example, ContainerTasks can be interacted with like normal python functions, whose inputs -# correspond to the declared input variables +# correspond to the declared input variables. All data returned by @workflow -def raw_container_wf(val1: int, val2: int) -> int: - """ - These tasks can be invoked like simple python methods. But running them locally performs no execution, unless - the execution is mocked. - """ - return sum(x=square(val=val1), y=square(val=val2)) +def wf(a: float, b: float): + # Calculate area in all languages + area_shell, metadata_shell = calculate_ellipse_area_shell(a=a, b=b) + area_python, metadata_python = calculate_ellipse_area_python(a=a, b=b) + area_r, metadata_r = calculate_ellipse_area_r(a=a, b=b) + area_haskell, metadata_haskell = calculate_ellipse_area_haskell(a=a, b=b) + area_julia, metadata_julia = calculate_ellipse_area_julia(a=a, b=b) + + # Report on all results in a single task to simplify comparison + report_all_calculated_areas( + area_shell=area_shell, + metadata_shell=metadata_shell, + area_python=area_python, + metadata_python=metadata_python, + area_r=area_r, + metadata_r=metadata_r, + area_haskell=area_haskell, + metadata_haskell=metadata_haskell, + area_julia=area_julia, + metadata_julia=metadata_julia, + ) # %% -# ContainerTasks cannot really be executed locally as Flytekit is incapable of executing Containers currently. -# but it is possible to mock the execution. -if __name__ == "__main__": - print(f"Running {__file__} main...") - print( - f"Running raw_container_wf(val1=5, val2=5) {raw_container_wf(val1=5, val2=5)}" - ) +# +# .. note:: +# Raw containers cannot be run locally at the moment. +# +# Scripts +# ======= +# +# The contents of each script mentioned above: +# +# calculate-ellipse-area.sh +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh +# :language: shell +# +# calculate-ellipse-area.py +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py +# :language: python +# +# calculate-ellipse-area.R +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R +# :language: r +# +# calculate-ellipse-area.hs +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs +# :language: haskell +# +# calculate-ellipse-area.jl +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# .. literalinclude:: ../../../../core/containerization/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl +# :language: julia