From cdca75740dc6f49b3a240cfb027b471a71f1b365 Mon Sep 17 00:00:00 2001 From: Tim Pillinger <26465611+wxtim@users.noreply.github.com> Date: Wed, 3 Jul 2024 08:53:52 +0100 Subject: [PATCH] Add slides for a presentation on "the simplest useful workflow: a reminder of some handy basic Cylc features" --- simplest-useful/slides.html | 374 ++++++++++++++++++++++++++++++++++++ simplest-useful/slides.md | 102 ++++++++++ 2 files changed, 476 insertions(+) create mode 100644 simplest-useful/slides.html create mode 100644 simplest-useful/slides.md diff --git a/simplest-useful/slides.html b/simplest-useful/slides.html new file mode 100644 index 0000000..98dade1 --- /dev/null +++ b/simplest-useful/slides.html @@ -0,0 +1,374 @@ + + + + + + slides + + + + + + + + + +
+
+ + +
+
+

The Simplest Useful Cylc Workflow

+ +
+
+

Aim

+

Show you how to replace a very simple script with a very simple +workflow and get some (nearly) free:

+
    +
  • Error handling
  • +
  • Efficiency
  • +
+
+
+

The problemโ€ฆ

+
+

I want to get some dataโ€ฆ

+
+
    +
  • ๐Ÿ•ฐ๏ธ Time
  • +
  • ๐Ÿ’” Reliability
  • +
+
+

โ€ฆ and do some analysis

+
+
    +
  • ๐Ÿ Memory
  • +
  • ๐ŸŸ Processor Power
  • +
+
+
+

The original Script:

+
#!/bin/bash
+#@supercomputer --time 300
+#@supercomputer --memory LOTS
+#@supercomputer --CPU MANY
+
+./bin/get_data.sh
+
+./bin/process_data.sh
+
+
+

The Workflow

+
+
+

graph

+
[scheduling]
+    [[graph]]
+        R1 = get_data => process_data
+
+
+

get_data

+
[runtime]
+    [[get_data]]
+        script = get_data.sh
+        platform = any_old_server
+
+
+

process_data

+
[runtime]
+    [[process_data]]
+        script = process_data.sh
+        platform = supercomputer
+        [[[directives]]]
+            --time 300   # DONT!
+            --memory LOTS
+            --CPU MANY
+
+
+

Gains so far

+
    +
  • ๐Ÿ’ฐ get_data fails => no supercomputer resource request
  • +
  • ๐Ÿƒ cylc install => run dir
  • +
  • ๐Ÿ“• Cylcโ€™s logging facilities
  • +
+

But thereโ€™s moreโ€ฆ

+
+
+

A Cylc Anti Pattern

+
[[process_data]]
++     execution time limit = PT5M
+    [[[directives]]]
+-         --time 300   # DONT!
+

Cylc will know the task has timed out even +without communication with the platform!

+
+
+

Retries

+
[[get_data]]
+    script = get_data.sh
+    platform = any_old_server
++     execution retry delays = 4*PT15M, PT1D
+
+
+

Aim

+

Show you how to replace a very simple script with a very simple +workflow and get some (nearly) free:

+
    +
  • Error handling
  • +
  • Efficiency
  • +
+
+
+
+ + + + + + + + + + + diff --git a/simplest-useful/slides.md b/simplest-useful/slides.md new file mode 100644 index 0000000..827812a --- /dev/null +++ b/simplest-useful/slides.md @@ -0,0 +1,102 @@ +# The Simplest Useful Cylc Workflow + +## Aim + +Show you how to replace a very simple script with a very simple workflow +and get some (nearly) free: + +* Error handling +* Efficiency + +## The problem... + +> I want to get some data... + +* ๐Ÿ•ฐ๏ธ Time +* ๐Ÿ’” Reliability + +> ... and do some analysis + +* ๐Ÿ Memory +* ๐ŸŸ Processor Power + +## The original Script: + +```bash +#!/bin/bash +#@supercomputer --time 300 +#@supercomputer --memory LOTS +#@supercomputer --CPU MANY + +./bin/get_data.sh + +./bin/process_data.sh +``` + +## The Workflow + +## graph + +``` +[scheduling] + [[graph]] + R1 = get_data => process_data +``` + +## get_data + +``` +[runtime] + [[get_data]] + script = get_data.sh + platform = any_old_server +``` + +## process_data +``` +[runtime] + [[process_data]] + script = process_data.sh + platform = supercomputer + [[[directives]]] + --time 300 # DONT! + --memory LOTS + --CPU MANY +``` + +## Gains so far + +* ๐Ÿ’ฐ get_data fails => no supercomputer resource request +* ๐Ÿƒ ``cylc install`` => run dir +* ๐Ÿ“• Cylc's logging facilities + +But there's more... + +## A Cylc Anti Pattern + +```diff +[[process_data]] ++ execution time limit = PT5M + [[[directives]]] +- --time 300 # DONT! +``` + +**Cylc will know the task has timed out** +**even without communication with the platform!** + +## Retries + +```diff +[[get_data]] + script = get_data.sh + platform = any_old_server ++ execution retry delays = 4*PT15M, PT1D +``` + +## Aim + +Show you how to replace a very simple script with a very simple workflow +and get some (nearly) free: + +* Error handling +* Efficiency