caracal-pipeline · SpheMakh · Apr 5, 2022 · Mar 2, 2022 · Mar 2, 2022 · Mar 2, 2022
diff --git a/DEVNOTES.md b/DEVNOTES.md
@@ -0,0 +1,61 @@
+
+
+
+Problem with globs:
+-------------------
+
+* prevalidate() calls validation of inputs and outputs. This is the only chance for skipped recipes to get aliases propagated down. I.e. if a sub-recipe has an output that is an alias of a sub-step, and the sub-recipe is skipped, this is the only chance to evaluate the glob (presumably, to existing outputs on disk).
+
+* validate_inputs() called before running a step. Currently this does not evaluate globs.
+
+* validate_outputs() called after running. Here we must re-expand the globs, since running the step may have changed the content.
+
+The current scheme where the glob is expanded and substituted as ``params[name] = [files]`` creates a problem. Expansion needs to happen at prevalidation. Then it needs to happen again at the output stage. So we can't replace the glob with a filelist. Somehow we must retain knowledge that this is a glob, otherwise we won't know to re-evaluate it.
+
+I tried creating a Glob class, but pydantic won't allow that, it expects a list of strings. So we need to retain this information externally (in Cargo, perhaps?)
+
+So: keep a copy of the original params dict, and re-evaluate all globs when asked to.
+
+Consider adding an explicit "glob:" prefix to glob values, so that we know not to re-evaluate explicitly specified files?
+
+
+
+
+Problem with aliases:
+---------------------
+
+Let's clean up the logic. First, things to check during finalization:
+
+* an input alias may refer to multiple steps' inputs, but then their schema.dtype must be consistent
+
+* an output alias may refer to only one step's output, aliasing multiple outputs is nonsensical
+
+* an alias's schema is copied from the step, so implicit outputs are implicit for the recipe as well (but mark implicit=True, 
+since we don't want to copy the value literally until the step has validated it in its own {}-substitution context)
+
+During prevalidation (when parameter values are available), they must be propagated up or down
+
+* before prevalidating recipe parameters
+
+    * go over all aliases -- if a recipe's parameter is not set:
+
+        * check if a substep has a default or implicit -- propagate that value down to the recipe
+
+
+* before prevalidating steps -- if a recipe has a parameter (or default)
+
+  * propagate that value up to the step aliases, overriding their values
+
+  * implicit output values can't be set, so this will be caught at this point
+
+* after prevalidating the steps -- if the recipe does not have a parameter value
+
+  * check if a substep has a parameter value or default or implicit -- propagate that value down to the recipe
+
+  * if multiple substeps are aliased, check that they're not set to conflicting values, throw an error if so. Otherwise propagate up.
+
+  * make a list of parameters that have been propagated down
+
+After running the step
+
+* outputs may have changed compared to prevalidation. Propagate their value down to the recipe again (using the list compiled in prevalidation)
diff --git a/setup.py b/setup.py
@@ -9,13 +9,19 @@
 requirements = ["pyyaml",
                 "nose>=1.3.7",
                 "future-fstrings",
-                "scabha @ git+https://github.com/caracal-pipeline/scabha2",
+                "scabha >= 0.7.0", 
+                ## OMS: not a fan of this:
+                # @ git+https://github.com/caracal-pipeline/scabha2",
+                ## ...because it interferes with running scabha2 off a dev branch (i.e. if you have a local dev install of scabha,
+                ## pip install stimela will blow it away and replace with master branch...)
                 "ruamel.yaml",
                 "munch",
                 "omegaconf>=2.1pre1",
                 "click",
                 "pydantic",
                 "pathos",
+                "psutil",
+                "rich"
                 ],
 
 PACKAGE_NAME = "stimela"

diff --git a/stimela/backends/native.py b/stimela/backends/native.py
@@ -5,9 +5,9 @@
 from collections import OrderedDict
 from contextlib import redirect_stderr, redirect_stdout
 
-from scabha.cargo import Cab
+from scabha.cargo import Cab, Parameter
 from stimela import logger
-from stimela.utils.xrun_poll import xrun, dispatch_to_log
+from stimela.utils.xrun_asyncio import xrun, dispatch_to_log
 from stimela.exceptions import StimelaCabRuntimeError
 import click
 
@@ -30,7 +30,7 @@ def write(self, s):
         return len(s)
 
 
-def run(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None):
+def run(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None, batch=None):
     """Runs cab contents
 
     Args:
@@ -45,13 +45,13 @@ def run(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None):
     # commands of form "(module)function" are a Python call
     match = re.match("^\((.+)\)(.+)$", cab.command)
     if match:
-        return run_callable(match.group(1), match.group(2), cab, log, subst)
+        return run_callable(match.group(1), match.group(2), cab, params, log, subst)
     # everything else is a shell command
     else:
-        return run_command(cab, log, subst)
+        return run_command(cab, params, log, subst)
 
 
-def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[Dict[str, Any]] = None):
+def run_callable(modulename: str, funcname: str,  cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None):
     """Runs a cab corresponding to a Python callable. Intercepts stdout/stderr into the logger.
 
     Args:
@@ -69,10 +69,14 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[
     """
 
     # import module and get function object
+    path0 = sys.path.copy()
+    sys.path.append('.')
     try:
         mod = importlib.import_module(modulename)
     except ImportError as exc:
         raise StimelaCabRuntimeError(f"can't import {modulename}: {exc}", log=log)
+    finally:
+        sys.path = path0
 
     func = getattr(mod, funcname, None)
 
@@ -89,8 +93,8 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[
     args = OrderedDict()
     for key, schema in cab.inputs_outputs.items():
         if not schema.policies.skip:
-            if key in cab.params:
-                args[key] = cab.params
+            if key in params:
+                args[key] = params[key]
             elif cab.get_schema_policy(schema, 'pass_missing_as_none'):
                 args[key] = None
 
@@ -114,7 +118,7 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[
     return retval
 
 
-def run_command(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None):
+def run_command(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None, batch=None):
     """Runns command represented by cab.
 
     Args:
@@ -129,11 +133,11 @@ def run_command(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=Non
         int: return value (e.g. exit code) of command
     """
     # build command line from parameters
-    args, venv = cab.build_command_line(subst)
+    args, venv = cab.build_command_line(params, subst)
 
     if batch:
         batch = SlurmBatch(**batch)
-        batch.__init_cab__(cab, subst, log)
+        batch.__init_cab__(cab, params, subst, log)
         runcmd = "/bin/bash -c" + " ".join(args)
         jobfile = "foo-bar.job"
         batch.name = "foo-bar"
@@ -155,7 +159,7 @@ def run_command(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=Non
 
     retcode = xrun(args[0], args[1:], shell=False, log=log, 
                 output_wrangler=cab.apply_output_wranglers, 
-                return_errcode=True, command_name=command_name)
+                return_errcode=True, command_name=command_name, progress_bar=True)
 
     # if retcode is not zero, raise error, unless cab declared itself a success (via the wrangler)
     if retcode:

diff --git a/stimela/cargo/cab/wsclean.yaml b/stimela/cargo/cab/wsclean.yaml
@@ -21,21 +21,26 @@ outputs:
   dirty:
     info: Dirty images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-dirty.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-dirty.fits)"
     must_exist: false
   restored: 
     info: Restored images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-image.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-image.fits)"
+    must_exist: false
+  restored_timeint: 
+    info: Restored images per time interval
+    dtype: List[File]
+    implicit: "=GLOB({current.prefix}-t[0-9][0-9][0-9][0-9]-image.fits)"
     must_exist: false
   residual:
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-residual.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-residual.fits)"
     must_exist: false
   model:
     info: Model images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-model.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-model.fits)"
     must_exist: false
   restored_mfs:
     info: Restored MFS image

diff --git a/stimela/cargo/cab/wsclean_pol.yaml b/stimela/cargo/cab/wsclean_pol.yaml
@@ -27,44 +27,44 @@ outputs:
   dirty:
     info: Dirty images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-dirty.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-dirty.fits)"
     must_exist: false
   restored: 
     info: Restored images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-image.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-image.fits)"
     must_exist: false
   residual:
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-residual.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-residual.fits)"
     must_exist: false
   model:
     info: Model images
     dtype: List[File]
-    implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-model.fits"
+    implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-model.fits)"
     must_exist: false
   restored_mfs_i:
     info: Restored MFS images
     dtype: File
-    implicit: "{current.prefix}-MFS-I-image.fits"
+    implicit: "=GLOB({current.prefix}-MFS-I-image.fits)"
     must_exist: false
   restored_mfs:
     info: Restored MFS images
     dtype: List[File]
-    implicit: "{current.prefix}-MFS-[IQUV]-image.fits"
+    implicit: "=GLOB({current.prefix}-MFS-[IQUV]-image.fits)"
     must_exist: false
   residual_mfs:
     info: Residual MFS images
     dtype: List[File]
-    implicit: "{current.prefix}-MFS-[IQUV]-residual.fits"
+    implicit: "=GLOB({current.prefix}-MFS-[IQUV]-residual.fits)"
     must_exist: false
   model_mfs:
     info: Model MFS images
     dtype: List[File]
-    implicit: "{current.prefix}-MFS-[IQUV]-model.fits"
+    implicit: "=GLOB({current.prefix}-MFS-[IQUV]-model.fits)"
     must_exist: false
   dirty_mfs:
     info: Dirty MFS images
     dtype: List[File]
-    implicit: "{current.prefix}-MFS-[IQUV]-dirty.fits"
+    implicit: "=GLOB({current.prefix}-MFS-[IQUV]-dirty.fits)"
     must_exist: false
diff --git a/stimela/commands/help.py b/stimela/commands/help.py
@@ -0,0 +1,120 @@
+import fnmatch
+import os, sys
+import click
+from omegaconf import OmegaConf
+
+import stimela
+from scabha import configuratt
+from stimela import logger
+from stimela.main import cli
+from scabha.cargo import Cab, ParameterCategory
+from stimela.kitchen.recipe import Recipe, Step, join_quote
+from stimela.config import ConfigExceptionTypes
+from typing import *
+from rich.tree import Tree
+from rich.table import Table
+from rich import box
+from rich import print as rich_print
+
+@cli.command("help",
+    help="""
+    Print help on a cab or a recipe.
+    """)
+@click.option("do_list", "-l", "--list", is_flag=True,
+                help="""Lists the available cabs and recipes, including custom-defined ones.""")
+@click.option("-I", "--implicit", is_flag=True,
+                help="""Increases level of detail to include implicit inputs/outputs.""")
+@click.option("-O", "--obscure", is_flag=True,
+                help="""Increases level of detail to include implicit and obscure inputs/outputs.""")
+@click.option("-A", "--all", is_flag=True,
+                help="""Increases level of detail to include all inputs/outputs.""")
+@click.option("-R", "--required", is_flag=True,
+                help="""Decreases level of detail to include required inputs/outputs only.""")
+@click.argument("items", nargs=-1, metavar="filename.yml|cab name|recipe name|...") 
+def help(items: List[str] = [], do_list=False, implicit=False, obscure=False, all=False, required=False):
+
+    log = logger()
+    top_tree = Tree(f"stimela help {' '.join(items)}", guide_style="dim")
+    found_something = False
+
+    if required:
+        max_category = ParameterCategory.Required
+    else:
+        max_category = ParameterCategory.Optional
+    if all:
+        max_category = ParameterCategory.Hidden
+    elif obscure:
+        max_category = ParameterCategory.Obscure
+    elif implicit:
+        max_category = ParameterCategory.Implicit
+
+
+    for item in items:
+        # a filename -- treat it as a config
+        if os.path.isfile(item):
+            log.info(f"loading recipe/config {item}")
+
+            # if file contains a recipe entry, treat it as a full config (that can include cabs etc.)
+            try:
+                conf = configuratt.load(item, use_sources=[stimela.CONFIG])
+            except ConfigExceptionTypes as exc:
+                log.error(f"error loading {item}: {exc}")
+                sys.exit(2)
+
+            # anything that is not a standard config section will be treated as a recipe
+            recipes = [name for name in conf if name not in stimela.CONFIG]
+
+            for name in recipes:
+                # cast section to Recipe and remove from loaded conf
+                recipe = OmegaConf.create(Recipe)
+                recipe = OmegaConf.unsafe_merge(recipe, conf[name])
+                del conf[name]
+                # add to global namespace
+                stimela.CONFIG.lib.recipes[name] = recipe
+
+            # the rest is safe to merge into config as is
+            stimela.CONFIG = OmegaConf.merge(stimela.CONFIG, conf)
+
+        # else treat as a wildcard for recipe names or cab names
+        else:
+            recipe_names = fnmatch.filter(stimela.CONFIG.lib.recipes.keys(), item)
+            cab_names = fnmatch.filter(stimela.CONFIG.cabs.keys(), item)
+            if not recipe_names and not cab_names:
+                log.error(f"'{item}' does not match any files, recipes or cab names. Try -l/--list")
+                sys.exit(2)
+
+            for name in recipe_names:
+                recipe = Recipe(**stimela.CONFIG.lib.recipes[name])
+                recipe.finalize(fqname=name)
+                tree = top_tree.add(f"Recipe: [bold]{name}[/bold]")
+                recipe.rich_help(tree, max_category=max_category)
+
+            for name in cab_names:
+                cab = Cab(**stimela.CONFIG.cabs[name])
+                cab.finalize()
+                tree = top_tree.add(f"Cab: [bold]{name}[/bold]")
+                cab.rich_help(tree, max_category=max_category)
+
+            found_something = True
+
+    if do_list or not found_something:
+        if stimela.CONFIG.lib.recipes:
+            subtree = top_tree.add("Recipes:")
+            table = Table.grid("", "", padding=(0,2))
+            for name, recipe in stimela.CONFIG.lib.recipes.items():
+                table.add_row(f"[bold]{name}[/bold]", recipe.info)
+            subtree.add(table)
+        elif not do_list and not found_something:
+            log.error(f"nothing particular to help on, perhaps specify a recipe name or a cab name, or use -l/--list")
+            sys.exit(2)
+
+    if do_list:
+        subtree = top_tree.add("Cabs:")
+        table = Table.grid("", "", padding=(0,2))
+        for name, cab in stimela.CONFIG.cabs.items():
+            table.add_row(f"[bold]{name}[/bold]", cab.info)
+        subtree.add(table)            
+
+    rich_print(top_tree)
+
+