From 0ab9a8e9e8775814435f4c6207bb2ced7e97c09f Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 22:14:16 +0000 Subject: [PATCH 1/7] Clarify the term constructor --- docs/tutorial.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index bc6417467e45e..b67c138e7e00c 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -40,6 +40,7 @@ complicated, a line by line explanation follows below. dag = DAG('tutorial', default_args=default_args) + # t1, t2 and t3 are examples of a constructor t1 = BashOperator( task_id='print_date', bash_command='date', From 5aa82ad8a957d512d95bf788aa2875e7b8de6380 Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 22:25:37 +0000 Subject: [PATCH 2/7] Define a constructor --- docs/tutorial.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b67c138e7e00c..97f6de3ff05af 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -124,8 +124,9 @@ We also pass the default argument dictionary that we just define. Tasks ----- -Tasks are generated when instantiating objects from operators. The first -argument ``task_id`` acts as a unique identifier for the task. +Tasks are generated when instantiating objects from operators. An object +instatiated from an operator is called a constructor. The first argument +``task_id`` acts as a unique identifier for the task. .. code:: python From 61092601f2a800dd9523f84e84e42bc48404ac90 Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 22:54:02 +0000 Subject: [PATCH 3/7] The term task is more clear than constructor --- airflow/example_dags/tutorial.py | 1 + docs/tutorial.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/example_dags/tutorial.py b/airflow/example_dags/tutorial.py index 2040fa6a44b12..b306b52c0e985 100644 --- a/airflow/example_dags/tutorial.py +++ b/airflow/example_dags/tutorial.py @@ -27,6 +27,7 @@ dag = DAG('tutorial', default_args=default_args) +# t1, t2 and t3 are examples of tasks created by instatiating operators t1 = BashOperator( task_id='print_date', bash_command='date', diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 97f6de3ff05af..abf339d04c3d8 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -40,7 +40,7 @@ complicated, a line by line explanation follows below. dag = DAG('tutorial', default_args=default_args) - # t1, t2 and t3 are examples of a constructor + # t1, t2 and t3 are examples of tasks created by instatiating operators t1 = BashOperator( task_id='print_date', bash_command='date', From b65a47fb1300b52ac4d1d01b6d04d6bdc629f3e5 Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 22:55:32 +0000 Subject: [PATCH 4/7] Typo --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index abf339d04c3d8..ee779735b4ba0 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -143,7 +143,7 @@ instatiated from an operator is called a constructor. The first argument Notice how we pass a mix of operator specific arguments (``bash_command``) and an argument common to all operators (``email_on_failure``) inherited -from BaseOperator to the operators constructor. This is simpler than +from BaseOperator to the operator's constructor. This is simpler than passing every argument for every constructor call. Also, notice that in the second call we override ``email_on_failure`` parameter with ``False``. From e11c3a9fa6da8d0993207b353ffa1847b4e2d265 Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 22:56:54 +0000 Subject: [PATCH 5/7] Using more plain language and making the reference to a task consistent --- docs/tutorial.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index ee779735b4ba0..0379961d84352 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -145,15 +145,16 @@ Notice how we pass a mix of operator specific arguments (``bash_command``) and an argument common to all operators (``email_on_failure``) inherited from BaseOperator to the operator's constructor. This is simpler than passing every argument for every constructor call. Also, notice that in -the second call we override ``email_on_failure`` parameter with ``False``. +the second task we override ``email_on_failure`` parameter with ``False``. -The precedence rules for operator is: +The precedence rules for a task are as follows: -* Use the argument explicitly passed to the constructor -* Look in the default_args dictonary, use the value from there if it exists -* Use the operator's default, if any -* If none of these are defined, Airflow raises an exception +1. Explicitly passed arguments +2. Values that exist in the ``default_args`` dictionary +3. The operator's default value, if one exists +A task must include or inherit the arguments ``task_id`` and ``owner``, +otherwise Airflow will raise an exception. Templating with Jinja --------------------- From dfaa8c4a329ce44c74cfc3969f342aba7689ac4f Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 23:20:51 +0000 Subject: [PATCH 6/7] Small typo and clarity fixes --- docs/tutorial.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0379961d84352..624a5e8e3070b 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -161,7 +161,7 @@ Templating with Jinja Airflow leverages the power of `Jinja Templating `_ and provides the pipeline author -with a set of builtin parameters and macros. Airflow also provides +with a set of built-in parameters and macros. Airflow also provides hooks for the pipeline author to define their own parameters, macros and templates. @@ -175,7 +175,7 @@ curly brackets, and point to the most common template variable: ``{{ ds }}``. templated_command = """ {% for i in range(5) %} echo "{{ ds }}" - echo "{{ macros.ds_add(ds, 7)}}" + echo "{{ macros.ds_add(ds, 7) }}" echo "{{ params.my_param }}" {% endfor %} """ @@ -188,10 +188,10 @@ curly brackets, and point to the most common template variable: ``{{ ds }}``. Notice that the ``templated_command`` contains code logic in ``{% %}`` blocks, references parameters like ``{{ ds }}``, calls a function as in -``{{ macros.ds_add(ds, 7)}}``, and references a user defined parameter +``{{ macros.ds_add(ds, 7)}}``, and references a user-defined parameter in ``{{ params.my_param }}``. -The ``params`` hook in BaseOperator allows you to pass a dictionary of +The ``params`` hook in ``BaseOperator`` allows you to pass a dictionary of parameters and/or objects to your templates. Please take the time to understand how the parameter ``my_param`` makes it through to the template. From 20f773b0bedd9fb7a9c3ce231e8809da4e60327e Mon Sep 17 00:00:00 2001 From: statwonk Date: Sat, 13 Jun 2015 23:22:20 +0000 Subject: [PATCH 7/7] Made more consise and attempted to improve readability --- docs/tutorial.rst | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 624a5e8e3070b..7b008e2f48136 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -195,18 +195,13 @@ The ``params`` hook in ``BaseOperator`` allows you to pass a dictionary of parameters and/or objects to your templates. Please take the time to understand how the parameter ``my_param`` makes it through to the template. -Note that templated fields can point to files if you prefer. -It may be desirable for many reasons, like keeping your scripts logic -outside of your pipeline code, getting proper code highlighting in files, -and just generally allowing you to organize your pipeline's logic as you -please. - -In the above example, we could have -had a file ``templated_command.sh``, and referenced it in the ``bash_command`` -parameter, as in -``bash_command='templated_command.sh'`` where the file location is relative -to the pipeline's (``tutorial.py``) location. Note that it is also possible -to define your ``template_searchpath`` pointing to any folder +Files can also be passed to the ``bash_command`` argument, like +``bash_command='templated_command.sh'`` where the file location is relative to +the directory containing the pipeline file (``tutorial.py`` in this case). This +may be desirable for many reasons, like separating your script's logic and +pipeline code, allowing for proper code highlighting in files composed in +different languages, and general flexibility in structuring pipelines. It is +also possible to define your ``template_searchpath`` pointing to any folder locations in the DAG constructor call. Setting up Dependencies